[med-svn] r163 - in trunk/packages: . clustalw clustalw/branches clustalw/branches/upstream clustalw/branches/upstream/current

Charles Plessy charles-guest at alioth.debian.org
Mon Dec 4 01:55:57 CET 2006


Author: charles-guest
Date: 2006-12-04 01:55:49 +0100 (Mon, 04 Dec 2006)
New Revision: 163

Added:
   trunk/packages/clustalw/
   trunk/packages/clustalw/branches/
   trunk/packages/clustalw/branches/upstream/
   trunk/packages/clustalw/branches/upstream/current/
   trunk/packages/clustalw/branches/upstream/current/README_W
   trunk/packages/clustalw/branches/upstream/current/README_X
   trunk/packages/clustalw/branches/upstream/current/alnscore.c
   trunk/packages/clustalw/branches/upstream/current/amenu.c
   trunk/packages/clustalw/branches/upstream/current/calcgapcoeff.c
   trunk/packages/clustalw/branches/upstream/current/calcprf1.c
   trunk/packages/clustalw/branches/upstream/current/calcprf2.c
   trunk/packages/clustalw/branches/upstream/current/calctree.c
   trunk/packages/clustalw/branches/upstream/current/clustalv.doc
   trunk/packages/clustalw/branches/upstream/current/clustalw.c
   trunk/packages/clustalw/branches/upstream/current/clustalw.doc
   trunk/packages/clustalw/branches/upstream/current/clustalw.h
   trunk/packages/clustalw/branches/upstream/current/clustalw.ms
   trunk/packages/clustalw/branches/upstream/current/clustalw.new
   trunk/packages/clustalw/branches/upstream/current/clustalw_help
   trunk/packages/clustalw/branches/upstream/current/clustalx.c
   trunk/packages/clustalw/branches/upstream/current/clustalx.html
   trunk/packages/clustalw/branches/upstream/current/clustalx_help
   trunk/packages/clustalw/branches/upstream/current/coldna.par
   trunk/packages/clustalw/branches/upstream/current/colprint.par
   trunk/packages/clustalw/branches/upstream/current/colprot.par
   trunk/packages/clustalw/branches/upstream/current/dayhoff.h
   trunk/packages/clustalw/branches/upstream/current/gcgcheck.c
   trunk/packages/clustalw/branches/upstream/current/general.h
   trunk/packages/clustalw/branches/upstream/current/globin.pep
   trunk/packages/clustalw/branches/upstream/current/gon90.bla
   trunk/packages/clustalw/branches/upstream/current/interface.c
   trunk/packages/clustalw/branches/upstream/current/makefile
   trunk/packages/clustalw/branches/upstream/current/makefile.alpha
   trunk/packages/clustalw/branches/upstream/current/makefile.linux
   trunk/packages/clustalw/branches/upstream/current/makefile.sgi
   trunk/packages/clustalw/branches/upstream/current/makefile.sun
   trunk/packages/clustalw/branches/upstream/current/malign.c
   trunk/packages/clustalw/branches/upstream/current/matrices.h
   trunk/packages/clustalw/branches/upstream/current/matrixseries.gon
   trunk/packages/clustalw/branches/upstream/current/pairalign.c
   trunk/packages/clustalw/branches/upstream/current/param.h
   trunk/packages/clustalw/branches/upstream/current/prfalign.c
   trunk/packages/clustalw/branches/upstream/current/random.c
   trunk/packages/clustalw/branches/upstream/current/readmat.c
   trunk/packages/clustalw/branches/upstream/current/sequence.c
   trunk/packages/clustalw/branches/upstream/current/showpair.c
   trunk/packages/clustalw/branches/upstream/current/trees.c
   trunk/packages/clustalw/branches/upstream/current/util.c
   trunk/packages/clustalw/branches/upstream/current/xcolor.c
   trunk/packages/clustalw/branches/upstream/current/xdisplay.c
   trunk/packages/clustalw/branches/upstream/current/xmenu.c
   trunk/packages/clustalw/branches/upstream/current/xmenu.h
   trunk/packages/clustalw/branches/upstream/current/xscore.c
   trunk/packages/clustalw/branches/upstream/current/xutils.c
   trunk/packages/clustalw/tags/
Log:
[svn-inject] Installing original source of clustalw

Added: trunk/packages/clustalw/branches/upstream/current/README_W
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/README_W	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/README_W	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,280 @@
+******************************************************************************
+
+               CLUSTAL W Multiple Sequence Alignment Program
+                        (version 1.83, Feb 2003)
+
+******************************************************************************
+
+
+Please send bug reports, comments etc. to one of:-
+	gibson at embl-heidelberg.de
+	thompson at igbmc.u-strasbg.fr
+	d.higgins at ucc.ie
+
+
+******************************************************************************
+
+                  POLICY ON COMMERCIAL DISTRIBUTION OF CLUSTAL W
+
+Clustal W is freely available to the user community. However, Clustal W is
+increasingly being distributed as part of commercial sequence analysis
+packages. To help us safeguard future maintenance and development, commercial
+distributors of Clustal W must take out a NON-EXCLUSIVE LICENCE. Anyone
+wishing to commercially distribute version 1.81 of Clustal W should contact the
+authors unless they have previously taken out a licence.
+
+******************************************************************************
+
+Clustal W is written in ANSI-C and can be run on any machine with an ANSI-C
+compiler. Executables are provided for several major platforms. 
+
+Changes since CLUSTAL X Version 1.82
+------------------------------------
+
+1. The FASTA format has been added to the list of alignment output options.
+
+2. It is now possible to save the residue ranges (appended after the sequence
+names) when saving a specified range of the alignment.
+
+3. The efficiency of  the neighour-joining algorithm has been improved. This
+work was done by Tadashi Koike at the Center for Information Biology and DNA Data
+Bank of Japan and FUJITSU Limited.
+
+Some example speedups are given below : (timings on a SPARC64 CPU)
+
+No. of sequences        original NJ     new NJ
+     200                0' 12"          0.1"
+     500                9' 19"          1.4"
+     1000               XXXX            0' 31"
+
+Changes since version 1.8 
+--------------------------
+
+1. ClustalW now returns error codes for some common errors when exiting. This
+may be useful for people who run clustalw automatically from within a script.
+Error codes are: 
+	1	bad command line option
+	2	cannot open sequence file
+	3	wrong format in sequence file
+	4	sequence file contains only 1 sequence (for multiple alignments)
+
+2. Alignments can now be saved in Nexus format, for compatibility with PAUP, 
+MacClade etc. For a description of the Nexus format, see:
+Maddison, D. R., D. L. Swofford and W. P. Maddison.  1997.
+NEXUS: an extensible file format for systematic information.
+Systematic Biology 46:590-621.
+
+3. Phylogenetic trees can also be saved in nexus format.
+
+4. A ClustalW icon has been designed for MAC and PC systems.
+
+
+Changes since version 1.74 
+--------------------------
+
+1. Some work has been done to automatically select the optimal parameters
+depending on the set of sequences to be aligned. The Gonnet series of residue
+comparison matrices are now used by default. The Blosum series remains as an
+option. The default gap extension penalty for proteins has been changed to 0.2
+(was 0.05).The 'delay divergent sequences' option has been changed to 30%
+residue identity (was 40%).
+
+2. The default parameters used when the 'Negative matrix' option is selected
+have been optimised. This option may help when the sequences to be aligned are
+not superposable over their whole lengths (e.g. in the presence of N/C terminal
+extensions).
+
+3. A bug in the calculation of phylogenetic trees for 2 sequences has been
+fixed.
+
+4. A command line option has been added to turn off the sequence weighting
+calculation.
+
+5. The phylogenetic tree calculation now ignores any ambiguity codes in the
+sequences. 
+
+6.  A bug in the memory access during the calculation of profiles has been
+fixed. (Thanks to Haruna Cofer at SGI).
+
+7. A bug has been fixed in the 'transition weight' option for nucleic acid
+sequences. (Thanks to Chanan Rubin at Compugen).
+
+8. An option has been added to read in a series of comparison matrices from a
+file. This option is only applicable for protein sequences. For details of the
+file format, see the on-line documentation.
+
+9. The MSF output file format has been changed. The sequence weights
+calculated by Clustal W are now included in the header.
+
+10. Two bugs in the FAST/APPROXIMATE pairwise alignments have been fixed. One
+involved the alignment of new sequences to an existing profile using the fast
+pairwise alignment option; the second was caused by changing the default
+options for the fast pairwise alignments.
+
+11. A bug in the alignment of a small number of sequences has been fixed.
+Previously a Guide Tree was not calculated for less than 4 sequences.
+
+
+Changes since version 1.6
+-------------------------
+
+1. The static arrays used by clustalw for storing the alignment data have been
+replaced by dynamically allocated memory. There is now no limit on the number
+or length of sequences which can be input.
+
+2. The alignment of DNA sequences now offers a new hard-coded matrix, as well
+as the identity matrix used previously. The new matrix is the default scoring
+matrix used by the BESTFIT program of the GCG package for the comparison of
+nucleic acid sequences. X's and N's are treated as matches to any IUB ambiguity
+symbol. All matches score 1.9; all mismatches for IUB symbols score 0.0.
+
+3. The transition weight option for aligning nucleotide sequences has been
+changed from an on/off toggle to a weight between 0 and 1.  A weight of zero
+means that the transitions are scored as mismatches; a weight of 1 gives 
+transitions the full match score. For distantly related DNA sequences, the
+weight should be near to zero; for closely related sequences it can be useful
+to assign a higher score.
+
+4. The RSF sequence alignment file format used by GCG Version 9 can now be
+read.
+
+5. The clustal sequence alignment file format has been changed to allow
+sequence names longer than 10 characters. The maximum length allowed is set in
+clustalw.h by the statement:
+#define MAXNAMES	10
+
+For the fasta format, the name is taken as the first string after the '>'
+character, stopping at the first white space. (Previously, the first 10
+characters were taken, replacing blanks by underscores).
+
+6. The bootstrap values written in the phylip tree file format can be assigned
+either to branches or nodes. The default is to write the values on the nodes,
+as this can be read by several commonly-used tree display programs. But note
+that this can lead to confusion if the tree is rooted and the bootstraps may
+be better attached to the internal branches: Software developers should ensure
+they can read the branch label format.
+
+7. The sequence weighting used during sequence to profile alignments has been
+changed. The tree weight is now multiplied by the percent identity of the
+new sequence compared with the most closely related sequence in the profile.
+
+8. The sequence weighting used during profile to profile alignments has been
+changed. A guide tree is now built for each profile separately and the
+sequence weights calculated from the two trees. The weights for each
+sequence are then multiplied by the percent identity of the sequence compared
+with the most closely related sequence in the opposite profile.
+
+9. The adjustment of the Gap Opening and Gap Extension Penalties for sequences
+of unequal length has been improved.
+
+10. The default order of the sequences in the output alignment file has been
+changed. Previously the default was to output the sequences in the same order
+as the input file. Now the default is to use the order in which the sequences
+were aligned (from the guide tree/dendrogram), thus automatically grouping
+closely related sequences.
+
+11. The option to 'Reset Gaps between alignments' has been switched off by
+default.
+
+12. The conservation line output in the clustal format alignment file has been
+changed. Three characters are now used:
+'*' indicates positions which have a single, fully conserved residue
+':' indicates that one of the following 'strong' groups is fully conserved:-
+                 STA
+                 NEQK
+                 NHQK
+                 NDEQ
+                 QHRK
+                 MILV
+                 MILF
+                 HY
+                 FYW
+
+'.' indicates that one of the following 'weaker' groups is fully conserved:-
+                 CSA
+                 ATV
+                 SAG
+                 STNK
+                 STPA
+                 SGND
+                 SNDEQK
+                 NDEQHK
+                 NEQHRK
+                 FVLIM
+                 HFY
+
+These are all the positively scoring groups that occur in the Gonnet Pam250
+matrix. The strong and weak groups are defined as strong score >0.5 and weak
+score =<0.5 respectively.
+
+13. A bug in the modification of the Myers and Miller alignment algorithm
+for residue-specific gap penalites has been fixed. This occasionally caused
+new gaps to be opened a few residues away from the optimal position.
+
+14. The GCG/MSF input format no longer needs the word PILEUP on the first
+line. Several versions can now be recognised:-
+      1.  The word PILEUP as the first word in the file
+      2.  The word !!AA_MULTIPLE_ALIGNMENT or !!NA_MULTIPLE_ALIGNMENT
+          as the first word in the file
+      3.  The characters MSF on the first line in the line, and the
+          characters .. at the end of the line.
+
+15. The standard command line separator for UNIX systems has been changed from
+'/' to '-'. ie. to give options on the command line, you now type
+
+     clustalw input.aln -gapopen=8.0
+
+instead of  clustalw input.aln /gapopen=8.0
+
+
+                      ATTENTION SOFTWARE DEVELOPERS!!
+                      -------------------------------
+
+The CLUSTAL sequence alignment output format was modified from version 1.7:
+
+1. Names longer than 10 chars are now allowed. (The maximum is specified in
+clustalw.h by '#define MAXNAMES'.)
+
+2. The consensus line now consists of three characters: '*',':' and '.'. (Only
+the '*' and '.' were previously used.)
+
+3. An option (not the default) has been added, allowing the user to print out
+sequence numbers at the end of each line of the alignment output.
+
+4. Both RNA bases (U) and base ambiguities are now supported in nucleic acid
+sequences. In the past, all characters (upper or lower case) other than
+a,c,g,t or u were converted to N. Now the following characters are recognised 
+and retained in the alignment output: ABCDGHKMNRSTUVWXY (upper or lower case).
+
+5. A  Blank line inadvertently added in the version 1.6 header has been taken
+out again.
+
+                              CLUSTAL REFERENCES
+                              ------------------
+
+Details of algorithms, implementation and useful tips on usage of Clustal
+programs can be found in the following publications:
+
+Jeanmougin,F., Thompson,J.D., Gouy,M., Higgins,D.G. and Gibson,T.J. (1998)
+Multiple sequence alignment with Clustal X. Trends Biochem Sci, 23, 403-5.
+
+Thompson,J.D., Gibson,T.J., Plewniak,F., Jeanmougin,F. and Higgins,D.G. (1997)
+The ClustalX windows interface: flexible strategies for multiple sequence 
+alignment aided by quality analysis tools. Nucleic Acids Research, 24:4876-4882.
+
+Higgins, D. G., Thompson, J. D. and Gibson, T. J. (1996) Using CLUSTAL for
+multiple sequence alignments. Methods Enzymol., 266, 383-402.
+
+Thompson, J.D., Higgins, D.G. and Gibson, T.J. (1994) CLUSTAL W: improving the
+sensitivity of progressive multiple sequence alignment through sequence
+weighting, positions-specific gap penalties and weight matrix choice.  Nucleic
+Acids Research, 22:4673-4680.
+
+Higgins,D.G., Bleasby,A.J. and Fuchs,R. (1992) CLUSTAL V: improved software for
+multiple sequence alignment. CABIOS 8,189-191.
+
+Higgins,D.G. and Sharp,P.M. (1989) Fast and sensitive multiple sequence
+alignments on a microcomputer. CABIOS 5,151-153.
+
+Higgins,D.G. and Sharp,P.M. (1988) CLUSTAL: a package for performing multiple
+sequence alignment on a microcomputer. Gene 73,237-244.

Added: trunk/packages/clustalw/branches/upstream/current/README_X
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/README_X	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/README_X	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,392 @@
+******************************************************************************
+
+	        CLUSTAL X Multiple Sequence Alignment Program
+                         (version 1.83, Feb 2003)
+
+******************************************************************************
+
+This README contains notes on version CHANGES and help with INSTALLATION
+
+Clustal X provides a new window-based user interface to the Clustal W multiple
+alignment program. It uses the Vibrant multi-platform user interface
+development library, developed by the National Center for Biotechnology
+Information (Bldg 38A, NIH 8600 Rockville Pike,Bethesda, MD 20894) as part of
+their NCBI SOFTWARE DEVELOPEMENT TOOLKIT. The toolkit is available by
+anonymous ftp from ncbi.nlm.nih.gov
+
+Please e-mail bug reports/complaints/suggestions (polite if possible) to
+	   Julie Thompson at julie at igbmc.u-strasbg.fr
+	or Toby Gibson at gibson at embl-heidelberg.de
+ 
+ 
+******************************************************************************
+
+            POLICY ON COMMERCIAL DISTRIBUTION OF CLUSTAL W and X
+
+Clustal W and X are freely available to the user community. However, Clustal W
+is increasingly being distributed as part of commercial sequence analysis
+packages. To help us safeguard future maintenance and development, commercial
+distributors of Clustal X must take out a non-exclusive licence. Anyone
+wishing to commercially distribute version 1.81 of Clustal X should contact the
+authors unless they have previously taken out a licence. 
+
+******************************************************************************
+
+Changes since CLUSTAL X Version 1.82
+------------------------------------
+
+1. The FASTA format has been added to the list of alignment output options.
+
+2. It is now possible to save the residue ranges (appended after the sequence
+names) when saving a specified range of the alignment.
+
+3. The efficiency of  the neighour-joining algorithm has been improved. This
+work was done by Tadashi Koike at the Center for Information Biology and DNA Data 
+Bank of Japan and FUJITSU Limited.
+
+Some example speedups are given below : (timings on a SPARC64 CPU)
+
+No. of sequences	original NJ	new NJ
+     200		0' 12"		0.1"
+     500		9' 19"		1.4"
+     1000		XXXX		0' 31"
+
+
+Changes since CLUSTAL X Version 1.8
+-----------------------------------
+
+1. ClustalX now returns error codes for some common errors when exiting. This
+may be useful for people who run clustalx automatically from within a script.
+Error codes are:
+        1       bad command line option
+        2       cannot open sequence file
+        3       wrong format in sequence file
+        4       sequence file contains only 1 sequence (for multiple alignments)
+
+2. Alignments can now be saved in Nexus format, for compatibility with PAUP,
+MacClade etc. For a description of the Nexus format, see:
+Maddison, D. R., D. L. Swofford and W. P. Maddison.  1997.
+NEXUS: an extensible file format for systematic information.
+Systematic Biology 46:590-621.
+
+3. Phylogenetic trees can also be saved in nexus format.
+
+4. A bug causing ClustalX to crash during cut-and-paste operations has been fixed.
+
+5. A bug on PC systems, causing an error message when writing to files with
+space characters in the filename has been fixed.
+
+6. The Quality Curve is now displayed as a bar chart, instead of a line plot.
+(Thanks to Michele Clamp, michele at ebi.ac.uk, who used this format in the JalView
+editor.)
+
+7. A bug in the 'Save Profile' option, causing the default profile filename to
+be lost has been fixed.
+
+8. A ClustalX icon has been designed for MAC and PC systems.
+
+
+Changes since CLUSTAL X Version 1.65b
+-------------------------------------
+
+1. Some work has been done to automatically select the optimal parameters
+depending on the set of sequences to be aligned. The Gonnet series of residue
+comparison matrices are now used by default. The Blosum series remains as an
+option. The default gap extension penalty for proteins has been changed to 0.2
+(was 0.05).The 'delay divergent sequences' option has been changed to 30%
+residue identity (was 40%).
+
+2. The default parameters used when the 'Negative matrix' option is selected
+have been optimised. This option may help when the sequences to be aligned are
+not superposable over their whole lengths (e.g. in the presence of N/C terminal
+extensions).
+
+3. An option has been added to save the quality scores displayed underneath the
+sequence window to a text file.
+
+4. The 'Hide Low-scoring segments' option has been moved from the Low-scoring
+parameter window to the Quality menu, and has been changed to 'Show Low-scoring
+segments'.
+
+5. An option has been added to allow the user to search for a string in the
+sequences.
+
+6. An option has been added to the postscript output to print on US Letter size
+paper.
+
+7. A bug in the display of the message at the bottom of the window causing the
+text to disappear when the window was resized has been fixed.
+
+8. The font for the Help window as been changed to Courier.
+
+9. A bug in the calculation of phylogenetic trees for 2 sequences has been
+fixed.
+
+10. A command line option has been added to turn off the sequence weighting
+calculation.
+
+11. The phylogenetic tree calculation now ignores any ambiguity codes in the
+sequences.
+
+12.  A bug in the memory access during the calculation of profiles has been
+fixed. (Thanks to Haruna Cofer at SGI).
+
+13. A bug has been fixed in the 'transition weight' option for nucleic acid
+sequences. (Thanks to Chanan Rubin at Compugen).
+
+14. An option has been added to allow the user to read in a series of residue
+comparison matrices from a file.
+
+15. The MSF output file format has been changed. The sequence weights
+calculated by ClustalX are now included in the header. 
+
+16. Two bugs in the FAST/APPROXIMATE pairwise alignments have been fixed. One
+involved the alignment of new sequences to an existing profile using the fast
+pairwise alignment option; the second was caused by changing the default
+options for the fast pairwise alignments.
+
+17. A bug in the alignment of a small number of sequences has been fixed.
+Previously a Guide Tree was not calculated for less than 4 sequences.
+
+18. Several bugs affecting use of secondary structure masks in Clustal X (but
+not in Clustal W) have been fixed. 
+
+
+Changes since Version 1.5b
+--------------------------
+
+1. The window displayed under MS Windows has previously been a fixed size. The
+window can now be resized by dragging the window frame.
+
+2. An option has been added to read in a series of comparison matrices from a
+file. This option is only applicable for protein sequences. For details of
+the file format, see the on-line documentation.
+
+3. A new DNA comparison matrix has been added. This is the default scoring 
+matrix used by BESTFIT for the comparison of nucleic acid sequences. X's and N's
+are treated as matches to any IUB ambiguity symbol. All matches score 1.9; all
+mismatches for IUB symbols score 0.
+The previous system used by ClustalW, in which matches score 1.0 and mismatches
+score 0 remains as an option. All matches for IUB symbols will also score 0.
+
+4. You can now read a comparison matrix for DNA sequences from a file. The
+matrix file should be in the same format as for the Blast program.
+
+5. The 'Reset gaps before alignment' has been changed to 'Reset new gaps
+before alignments'. A new option 'Reset ALL gaps before alignment' has been
+added.
+RESET NEW GAPS BEFORE ALIGNMENT will remove any new gaps introduced into the
+sequences during multiple alignment if you wish to change the parameters and
+try again.
+RESET ALL GAPS BEFORE ALIGNMENT will remove all gaps in the sequences including
+gaps which were read in from the sequence input file. 
+ 
+6. The 'Realign Residue Range' option has been changed. By default, gap
+opening and extension penalties are now applied to the ends of the alignment
+range in order to penalise terminal gaps. If the REALIGN SEGMENT END GAP
+PENALTIES option is switched off, gaps can be introduced at the ends of the
+residue range at no cost.
+
+7. The MSF output file format has been changed. The sequence weights calculated
+by ClustalX are now included in the header.
+
+8. Two bugs in the FAST/APPROXIMATE pairwise alignments have been fixed. One
+involved the alignment of new sequences to an existing profile using the
+fast pairwise alignment option; the second was caused by changing the default
+options for the fast pairwise alignments.
+
+9. A bug in the postscript output file has been fixed. The residue numbers
+printed at the right hand side of the alignment were not always correct.
+
+10. A bug in the alignment of a small number of sequences has been fixed.
+Previously a Guide Tree was not calculated for less than 4 sequences.
+
+11. A bug which occurred after frequent cut-and-paste operations has been
+fixed.
+
+12. A new file called clustalx.html contains an html'ised version of the
+on-line help. The file can be viewed using a World Wide Web viewer, such as
+Netscape.
+
+
+New Features since ClustalW
+---------------------------
+
+1. A subset of sequences in an alignment may be selected and realigned to a
+profile made from the unselected sequences. This may be useful when trying to
+align very divergent sequences which have been badly aligned in the initial
+full multiple alignment.
+
+
+2. A range of the sequence alignment can be selected for realignment. A new
+phylogenetic guide tree is built based only on the residue range selected.
+The selected residues are then aligned, and pasted back into the full sequence
+alignment. This may be useful for aligning small sections of the alignment
+which have been badly aligned in the full sequence alignment, or which have a
+very different guide tree structure from the tree built using the full
+sequences.
+
+
+3. Clustal X provides a versatile coloring scheme for the sequence alignment
+display. The sequences (or profiles) are colored automatically, when they are
+loaded. Sequences can be colored either by assigning a color to specific
+residues, or on the basis of an alignment consensus. In the latter case,
+the alignment consensus is calculated automatically, and the residues in each
+column are colored according to the consensus character assigned to the column.
+In this way, for example, conserved hydrophylic or hydrophobic positions can
+be highlighted.
+
+
+4. An 'Alignment Quality Score' is plotted below the alignment. This is an
+estimate of the conservation of each column in the alignment. Highly conserved
+columns will have a high quality score, less conserved positions will be
+marked by a low score.
+
+
+5. 'Exceptional' residues in the alignment that cause the low quality scores
+described above, can be highlighted. These can be expected to occur at a
+moderate frequency in all the sequences because of their steady divergence
+due to the natural processes of evolution. However, clustering of highlighted
+residues is a strong indication of misalignment.
+Occasionally, highlighted residues may also point to regions of some biological
+significance.
+
+6. Low-scoring segments in the alignment can be highlighted. The segments are
+defined as those regions which score negatively in a forward and backward
+summation of the alignment profile scores. See the online help for more
+details.
+
+7. The new GCG9 MSF,RSF formats are now recognised as input formats for
+clustalx.  The alignments cannot be written out in these formats however.
+
+The code has been tested on UNIX (SGI, SUN, DIGITAL) and Macintosh. Compiled
+executables are provided for these systems. If you wish to recompile the
+source files, you will first need to install the NCBI toolkit on your machine.
+Then, to compile the program on UNIX, edit the makefile to point to your NCBI
+include and library files, and type:
+
+     make -f makefile.sun
+or   make -f makefile.sgi
+or   make -f makefile.osf
+
+
+To run the program, type clustalx. A window is displayed with a pull-down menu
+bar which allow all functions to be selected and all alignment parameters
+may be modified, if desired.
+
+
+Documentation for ClustalW (clustalw.doc) is included in the directory. Online
+help is also available for most options of Clustal X by selecting HELP from
+the menu bar.
+
+Help is also available on the WWW at
+
+www-igbmc.u-strasbg.fr/BioInfo/ClustalX/
+www-igbmc.u-strasbg.fr/BioInfo/ClustalW/
+www.U.arizona.edu/~schluter/ClustalW/index.html
+
+
+INSTALLATION    (for Unix, PC and MAC)
+------------
+
+UNIX
+----
+
+Executables are provided in the appropriate archives for Digital UNIX 4.0 on
+Alphas, Sun OS 5.6, Silicon Graphics IRIX 6.2 and LINUX (libc6 must be
+installed). If you wish to run on another platform, you will need to recompile
+Clustal X for yourself.
+
+The executable file clustalx should be copied to one of the directories
+specified in your PATH environment variable. The files called *.par and
+clustalx_help should also be copied to the same directory.
+
+Recompiling ClustalX:
+
+First of all, you need the NCBI Vibrant toolkit installed on your machine. If
+this is not already done, you can get the toolkit by anonymous ftp to
+ncbi.nlm.nih.gov.
+You should then copy one of the makefiles supplied in the unix archives to
+'makefile' and edit it, changing the NCBI_INC and NCBI_LIB paths for your
+system.
+
+You make the program with:
+make -f makefile
+
+This produces the executable file clustalx. You can then proceed with the 
+installation as described above.
+
+
+MS WINDOWS
+----------
+
+We supply an executable file (clustalx.exe) which will run under MS Windows 
+(32 bit). The directory containing the executable (plus the files named *.par,
+and clustalx.hlp) should be added to your path defined in the autoexec.bat
+file.
+
+
+Recompiling ClustalX:
+
+First of all, you need the NCBI Vibrant toolkit installed on your machine. If
+this is not already done, you can get the toolkit by anonymous ftp to
+ncbi.nlm.nih.gov.
+
+A makefile is supplied which can be used as a guide for recompiling the
+ClustalX source code. You will need to edit it for your system. In 
+particular the NCBI_INC and NCBI_LIB paths should point to your installation.
+
+
+MAC
+---
+
+An executable program called clustalx is supplied for Power Macintoshes.
+For 68K machines, you will need to recompile the code yourself. The 
+program may need up to 10m of memory to run depending on the number and
+length of your sequences. The memory allocation can be adjusted with the
+Get Info (%I) command from the Finder if you have problems. Just double click 
+the executable file name or icon and off you go (we hope). The files *.par and
+clustalx_help should be stored in the same directory as the clustalx program.
+
+Recompiling ClustalX:
+
+First of all, you need the NCBI Vibrant toolkit installed on your machine. If
+this is not already done, you can get the toolkit by anonymous ftp to
+ncbi.nlm.nih.gov.
+
+We used the Metroworks Codewarrior C compiler to compile the ClustalX files,
+but another ANSI C compiler should work. You need to compile all the *.c
+files supplied in the archive, then link them together with the NCBI Toolkit
+libraries 'ncbi' and 'vibrant'.
+
+
+                            CLUSTAL REFERENCES
+                            ------------------
+
+Details of algorithms, implementation and useful tips on usage of Clustal
+programs can be found in the following publications:
+
+Jeanmougin,F., Thompson,J.D., Gouy,M., Higgins,D.G. and Gibson,T.J. (1998)
+Multiple sequence alignment with Clustal X. Trends Biochem Sci, 23, 403-5.
+
+Thompson,J.D., Gibson,T.J., Plewniak,F., Jeanmougin,F. and Higgins,D.G. (1997)
+The ClustalX windows interface: flexible strategies for multiple sequence 
+alignment aided by quality analysis tools. Nucleic Acids Research, 25:4876-4882.
+
+Higgins, D. G., Thompson, J. D. and Gibson, T. J. (1996) Using CLUSTAL for
+multiple sequence alignments. Methods Enzymol., 266, 383-402.
+
+Thompson, J.D., Higgins, D.G. and Gibson, T.J. (1994) CLUSTAL W: improving the
+sensitivity of progressive multiple sequence alignment through sequence
+weighting, positions-specific gap penalties and weight matrix choice.  Nucleic
+Acids Research, 22:4673-4680.
+
+Higgins,D.G., Bleasby,A.J. and Fuchs,R. (1992) CLUSTAL V: improved software for
+multiple sequence alignment. CABIOS 8,189-191.
+
+Higgins,D.G. and Sharp,P.M. (1989) Fast and sensitive multiple sequence
+alignments on a microcomputer. CABIOS 5,151-153.
+
+Higgins,D.G. and Sharp,P.M. (1988) CLUSTAL: a package for performing multiple
+sequence alignment on a microcomputer. Gene 73,237-244.
+

Added: trunk/packages/clustalw/branches/upstream/current/alnscore.c
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/alnscore.c	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/alnscore.c	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,114 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include "clustalw.h"
+
+#define MAX(a,b) ((a)>(b)?(a):(b))
+#define MIN(a,b) ((a)<(b)?(a):(b))
+
+/*
+ *       Prototypes
+ */
+
+static sint  count_gaps(sint s1, sint s2, sint l);
+
+/*
+ *       Global Variables
+ */
+
+extern float gap_open;
+extern sint   nseqs;
+extern sint   *seqlen_array;
+extern short   blosum45mt[];
+extern short   def_aa_xref[];
+extern sint   debug;
+extern sint   max_aa;
+extern char  **seq_array;
+
+
+void aln_score(void)
+{
+  static short  *mat_xref, *matptr;
+  static sint maxres;
+  static sint  s1,s2,c1,c2;
+  static sint    ngaps;
+  static sint    i,l1,l2;
+  static lint    score;
+  static sint   matrix[NUMRES][NUMRES];
+
+/* calculate an overall score for the alignment by summing the
+scores for each pairwise alignment */
+
+  matptr = blosum45mt;
+  mat_xref = def_aa_xref;
+  maxres = get_matrix(matptr, mat_xref, matrix, TRUE, 100);
+  if (maxres == 0)
+    {
+       fprintf(stdout,"Error: matrix blosum30 not found\n");
+       return;
+    }
+
+  score=0;
+  for (s1=1;s1<=nseqs;s1++)
+   {
+    for (s2=1;s2<s1;s2++)
+      {
+
+        l1 = seqlen_array[s1];
+        l2 = seqlen_array[s2];
+        for (i=1;i<l1 && i<l2;i++)
+          {
+            c1 = seq_array[s1][i];
+            c2 = seq_array[s2][i];
+            if ((c1>=0) && (c1<=max_aa) && (c2>=0) && (c2<=max_aa))
+                score += matrix[c1][c2];
+          }
+
+        ngaps = count_gaps(s1, s2, l1);
+
+        score -= 100 * gap_open * ngaps;
+
+      }
+   }
+
+  score /= 100;
+
+  info("Alignment Score %d", (pint)score);
+
+}
+
+static sint count_gaps(sint s1, sint s2, sint l)
+{
+    sint i, g;
+    sint q, r, *Q, *R;
+
+
+    Q = (sint *)ckalloc((l+2) * sizeof(sint));
+    R = (sint *)ckalloc((l+2) * sizeof(sint));
+
+    Q[0] = R[0] = g = 0;
+
+    for (i=1;i<l;i++)
+      {
+         if (seq_array[s1][i] > max_aa) q = 1;
+         else q = 0;
+         if (seq_array[s2][i] > max_aa) r = 1;
+         else r = 0;
+
+         if (((Q[i-1] <= R[i-1]) && (q != 0) && (1-r != 0)) ||
+             ((Q[i-1] >= R[i-1]) && (1-q != 0) && (r != 0)))
+             g += 1;
+         if (q != 0) Q[i] = Q[i-1]+1;
+         else Q[i] = 0;
+
+         if (r != 0) R[i] = R[i-1]+1;
+         else R[i] = 0;
+     }
+     
+   Q=ckfree((void *)Q);
+   R=ckfree((void *)R);
+
+   return(g);
+}
+          
+

Added: trunk/packages/clustalw/branches/upstream/current/amenu.c
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/amenu.c	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/amenu.c	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,1317 @@
+/* Menus and command line interface for Clustal W  */
+/* DES was here MARCH. 1994 */
+/* DES was here SEPT.  1994 */
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <signal.h>
+#include <setjmp.h>
+#include "clustalw.h"
+
+static jmp_buf jmpbuf;
+#ifndef VMS
+#ifndef AIX
+#define BADSIG (void (*)())-1
+#endif
+#endif
+
+static void jumper(int);
+
+static void jumper(int i)
+{
+        longjmp(jmpbuf,1);
+}
+
+
+/*
+*	Prototypes
+*/
+
+
+static void pair_menu(void);
+static void multi_menu(void);
+static void gap_penalties_menu(void);
+static void multiple_align_menu(void);          /* multiple alignments menu */
+static void profile_align_menu(void);           /* profile       "      "   */
+static void phylogenetic_tree_menu(void);       /* NJ trees/distances menu  */
+static void format_options_menu(void);          /* format of alignment output */
+static void tree_format_options_menu(void);     /* format of tree output */
+static void ss_options_menu(void);
+static sint secstroutput_options(void);
+static sint read_matrix(char *title,MatMenu menu, char *matnam, sint matn, short *mat, short *xref);
+
+/*
+*	 Global variables
+*/
+
+extern float    gap_open,      gap_extend;
+extern float  	dna_gap_open,  dna_gap_extend;
+extern float 	prot_gap_open, prot_gap_extend;
+extern float    pw_go_penalty,      pw_ge_penalty;
+extern float  	dna_pw_go_penalty,  dna_pw_ge_penalty;
+extern float 	prot_pw_go_penalty, prot_pw_ge_penalty;
+extern float	transition_weight;
+extern char 	revision_level[];
+extern sint    wind_gap,ktup,window,signif;
+extern sint    dna_wind_gap, dna_ktup, dna_window, dna_signif;
+extern sint    prot_wind_gap,prot_ktup,prot_window,prot_signif;
+extern sint	nseqs;
+extern sint 	divergence_cutoff;
+extern sint 	debug;
+extern Boolean 	neg_matrix;
+extern Boolean  quick_pairalign;
+extern Boolean	reset_alignments_new;		/* DES */
+extern Boolean	reset_alignments_all;		/* DES */
+extern sint 	gap_dist;
+extern Boolean 	no_var_penalties, no_hyd_penalties, no_pref_penalties;
+extern sint 	output_order;
+extern sint profile_no;
+extern short 	usermat[], pw_usermat[];
+extern short 	aa_xref[], pw_aa_xref[];
+extern short 	userdnamat[], pw_userdnamat[];
+extern short 	dna_xref[], pw_dna_xref[];
+
+extern Boolean 	lowercase; /* Flag for GDE output - set on comm. line*/
+extern Boolean 	cl_seq_numbers;
+extern Boolean seqRange;  /* to append sequence range with seq names, Ranu */
+
+extern Boolean 	output_clustal, output_nbrf, output_phylip, output_gcg, output_gde, output_nexus;
+extern Boolean output_fasta; /* Ramu */
+
+extern Boolean 	output_tree_clustal, output_tree_phylip, output_tree_distances,output_tree_nexus;
+extern sint     bootstrap_format;
+extern Boolean 	tossgaps, kimura;
+extern Boolean  percent;
+extern Boolean 	usemenu;
+extern Boolean 	showaln, save_parameters;
+extern Boolean	dnaflag;
+extern Boolean  use_ambiguities;
+
+
+extern char 	hyd_residues[];
+extern char 	mtrxname[], pw_mtrxname[];
+extern char 	dnamtrxname[], pw_dnamtrxname[];
+extern char	seqname[];
+
+extern sint output_struct_penalties;
+extern Boolean use_ss1, use_ss2;
+
+extern Boolean empty;
+extern Boolean profile1_empty, profile2_empty;   /* whether or not profiles   */
+
+extern char  	profile1_name[FILENAMELEN+1];
+extern char  	profile2_name[FILENAMELEN+1];
+
+extern Boolean         use_endgaps;
+extern sint        matnum,pw_matnum;
+extern sint        dnamatnum,pw_dnamatnum;
+
+extern sint        helix_penalty;
+extern sint        strand_penalty;
+extern sint        loop_penalty;
+extern sint        helix_end_minus;
+extern sint        helix_end_plus;
+extern sint        strand_end_minus;
+extern sint        strand_end_plus;
+extern sint        helix_end_penalty;
+extern sint        strand_end_penalty;
+
+extern MatMenu matrix_menu;
+extern MatMenu pw_matrix_menu;
+extern MatMenu dnamatrix_menu;
+
+static char phylip_name[FILENAMELEN]="";
+static char clustal_name[FILENAMELEN]="";
+static char dist_name[FILENAMELEN]="";
+static char nexus_name[FILENAMELEN]="";
+static char fasta_name[FILENAMELEN]="";
+
+static char p1_tree_name[FILENAMELEN]="";
+static char p2_tree_name[FILENAMELEN]="";
+
+static char *secstroutput_txt[] = {
+				"Secondary Structure",
+				"Gap Penalty Mask",
+				"Structure and Penalty Mask",
+				"None"	};
+				                
+
+static char *lin1, *lin2, *lin3;
+
+static int firstres =0;	/* range of alignment for saving as ... */
+static int lastres = 0;
+
+void init_amenu(void)
+{
+
+	lin1 = (char *)ckalloc( (MAXLINE+1) * sizeof (char) );
+	lin2 = (char *)ckalloc( (MAXLINE+1) * sizeof (char) );
+	lin3 = (char *)ckalloc( (MAXLINE+1) * sizeof (char) );
+}
+
+void main_menu(void)
+{
+        int catchint;
+
+        catchint = signal(SIGINT, SIG_IGN) != SIG_IGN;
+        if (catchint) {
+                if (setjmp(jmpbuf) != 0)
+                        fprintf(stdout,"\n.. Interrupt\n");
+#ifdef UNIX
+                if (signal(SIGINT,jumper) == BADSIG)
+                        fprintf(stdout,"Error: signal\n");
+#else
+                if (signal(SIGINT,SIG_DFL) == BADSIG)
+                        fprintf(stdout,"Error: signal\n");
+#endif
+        }
+
+	while(TRUE) {
+		fprintf(stdout,"\n\n\n");
+		fprintf(stdout," **************************************************************\n");
+		fprintf(stdout," ******** CLUSTAL %s Multiple Sequence Alignments  ********\n",revision_level);
+		fprintf(stdout," **************************************************************\n");
+		fprintf(stdout,"\n\n");
+		
+		fprintf(stdout,"     1. Sequence Input From Disc\n");
+		fprintf(stdout,"     2. Multiple Alignments\n");
+		fprintf(stdout,"     3. Profile / Structure Alignments\n");
+		fprintf(stdout,"     4. Phylogenetic trees\n");
+		fprintf(stdout,"\n");
+		fprintf(stdout,"     S. Execute a system command\n");
+		fprintf(stdout,"     H. HELP\n");
+		fprintf(stdout,"     X. EXIT (leave program)\n\n\n");
+		
+		getstr("Your choice",lin1);
+
+		switch(toupper(*lin1)) {
+			case '1': seq_input(FALSE);
+				phylip_name[0]=EOS;
+				clustal_name[0]=EOS;
+				dist_name[0]=EOS;
+				nexus_name[0]=EOS;
+				break;
+			case '2': multiple_align_menu();
+				break;
+			case '3': profile_align_menu();
+				break;
+			case '4': phylogenetic_tree_menu();
+				break;
+			case 'S': do_system();
+				break;
+			case '?':
+			case 'H': get_help('1');
+				break;
+			case 'Q':
+			case 'X': exit(0);
+				break;
+			default: fprintf(stdout,"\n\nUnrecognised Command\n\n");
+				break;
+		}
+	}
+}
+
+
+
+
+
+
+
+
+
+static void multiple_align_menu(void)
+{
+        int catchint;
+
+        catchint = signal(SIGINT, SIG_IGN) != SIG_IGN;
+        if (catchint) {
+                if (setjmp(jmpbuf) != 0)
+                        fprintf(stdout,"\n.. Interrupt\n");
+#ifdef UNIX
+                if (signal(SIGINT,jumper) == BADSIG)
+                        fprintf(stdout,"Error: signal\n");
+#else
+                if (signal(SIGINT,SIG_DFL) == BADSIG)
+                        fprintf(stdout,"Error: signal\n");
+#endif
+        }
+
+
+    while(TRUE)
+    {
+        fprintf(stdout,"\n\n\n");
+        fprintf(stdout,"****** MULTIPLE ALIGNMENT MENU ******\n");
+        fprintf(stdout,"\n\n");
+
+
+        fprintf(stdout,"    1.  Do complete multiple alignment now (%s)\n",
+                        (!quick_pairalign) ? "Slow/Accurate" : "Fast/Approximate");
+        fprintf(stdout,"    2.  Produce guide tree file only\n");
+        fprintf(stdout,"    3.  Do alignment using old guide tree file\n\n");
+        fprintf(stdout,"    4.  Toggle Slow/Fast pairwise alignments = %s\n\n",
+                                        (!quick_pairalign) ? "SLOW" : "FAST");
+        fprintf(stdout,"    5.  Pairwise alignment parameters\n");
+        fprintf(stdout,"    6.  Multiple alignment parameters\n\n");
+	fprintf(stdout,"    7.  Reset gaps before alignment?");
+	if(reset_alignments_new)
+		fprintf(stdout," = ON\n");
+	else
+		fprintf(stdout," = OFF\n");
+        fprintf(stdout,"    8.  Toggle screen display          = %s\n",
+                                        (!showaln) ? "OFF" : "ON");
+        fprintf(stdout,"    9.  Output format options\n");
+        fprintf(stdout,"\n");
+
+        fprintf(stdout,"    S.  Execute a system command\n");
+        fprintf(stdout,"    H.  HELP\n");
+        fprintf(stdout,"    or press [RETURN] to go back to main menu\n\n\n");
+
+        getstr("Your choice",lin1);
+        if(*lin1 == EOS) return;
+
+        switch(toupper(*lin1))
+        {
+        case '1': align(phylip_name);
+            break;
+        case '2': make_tree(phylip_name);
+            break;
+        case '3': get_tree(phylip_name);
+            break;
+        case '4': quick_pairalign ^= TRUE;
+            break;
+        case '5': pair_menu();
+            break;
+        case '6': multi_menu();
+            break;
+	case '7': reset_alignments_new ^= TRUE;
+	    if(reset_alignments_new==TRUE)
+		reset_alignments_all=FALSE;
+            break;
+        case '8': showaln ^= TRUE;
+	    break;
+        case '9': format_options_menu();
+            break;
+        case 'S': do_system();
+            break;
+        case '?':
+        case 'H': get_help('2');
+            break;
+        case 'Q':
+        case 'X': return;
+
+        default: fprintf(stdout,"\n\nUnrecognised Command\n\n");
+            break;
+        }
+    }
+}
+
+
+
+
+
+
+
+
+
+static void profile_align_menu(void)
+{
+        int catchint;
+
+        catchint = signal(SIGINT, SIG_IGN) != SIG_IGN;
+        if (catchint) {
+                if (setjmp(jmpbuf) != 0)
+                        fprintf(stdout,"\n.. Interrupt\n");
+#ifdef UNIX
+                if (signal(SIGINT,jumper) == BADSIG)
+                        fprintf(stdout,"Error: signal\n");
+#else
+                if (signal(SIGINT,SIG_DFL) == BADSIG)
+                        fprintf(stdout,"Error: signal\n");
+#endif
+        }
+
+
+    while(TRUE)
+    {
+	fprintf(stdout,"\n\n\n");
+        fprintf(stdout,"****** PROFILE AND STRUCTURE ALIGNMENT MENU ******\n");
+        fprintf(stdout,"\n\n");
+
+        fprintf(stdout,"    1.  Input 1st. profile             ");
+        if (!profile1_empty) fprintf(stdout,"(loaded)");
+        fprintf(stdout,"\n");
+        fprintf(stdout,"    2.  Input 2nd. profile/sequences   ");
+        if (!profile2_empty) fprintf(stdout,"(loaded)");
+        fprintf(stdout,"\n\n");
+        fprintf(stdout,"    3.  Align 2nd. profile to 1st. profile\n");
+        fprintf(stdout,"    4.  Align sequences to 1st. profile (%s)\n\n",
+                        (!quick_pairalign) ? "Slow/Accurate" : "Fast/Approximate");
+        fprintf(stdout,"    5.  Toggle Slow/Fast pairwise alignments = %s\n\n",
+                                        (!quick_pairalign) ? "SLOW" : "FAST");
+        fprintf(stdout,"    6.  Pairwise alignment parameters\n");
+        fprintf(stdout,"    7.  Multiple alignment parameters\n\n");
+        fprintf(stdout,"    8.  Toggle screen display                = %s\n",
+                                        (!showaln) ? "OFF" : "ON");
+        fprintf(stdout,"    9.  Output format options\n");
+        fprintf(stdout,"    0.  Secondary structure options\n");
+        fprintf(stdout,"\n");
+        fprintf(stdout,"    S.  Execute a system command\n");
+        fprintf(stdout,"    H.  HELP\n");
+        fprintf(stdout,"    or press [RETURN] to go back to main menu\n\n\n");
+
+        getstr("Your choice",lin1);
+        if(*lin1 == EOS) return;
+
+        switch(toupper(*lin1))
+        {
+        case '1': profile_no = 1;      /* 1 => 1st profile */ 
+          profile_input();
+		  strcpy(profile1_name, seqname);
+            break;
+        case '2': profile_no = 2;      /* 2 => 2nd profile */
+          profile_input();
+		  strcpy(profile2_name, seqname);
+            break;
+        case '3': profile_align(p1_tree_name,p2_tree_name);       /* align the 2 alignments now */
+            break;
+        case '4': new_sequence_align(phylip_name);  /* align new sequences to profile 1 */
+            break;
+        case '5': quick_pairalign ^= TRUE;
+	    break;
+        case '6': pair_menu();
+            break;
+        case '7': multi_menu();
+            break;
+        case '8': showaln ^= TRUE;
+	    break;
+        case '9': format_options_menu();
+            break;
+        case '0': ss_options_menu();
+            break;
+        case 'S': do_system();
+            break;
+        case '?':
+        case 'H': get_help('6');
+            break;
+        case 'Q':
+        case 'X': return;
+
+        default: fprintf(stdout,"\n\nUnrecognised Command\n\n");
+            break;
+        }
+    }
+}
+
+
+static void ss_options_menu(void)
+{
+        int catchint;
+
+        catchint = signal(SIGINT, SIG_IGN) != SIG_IGN;
+        if (catchint) {
+                if (setjmp(jmpbuf) != 0)
+                        fprintf(stdout,"\n.. Interrupt\n");
+#ifdef UNIX
+                if (signal(SIGINT,jumper) == BADSIG)
+                        fprintf(stdout,"Error: signal\n");
+#else
+                if (signal(SIGINT,SIG_DFL) == BADSIG)
+                        fprintf(stdout,"Error: signal\n");
+#endif
+        }
+
+
+	while(TRUE) {
+	
+		fprintf(stdout,"\n\n\n");
+		fprintf(stdout," ********* SECONDARY STRUCTURE OPTIONS *********\n");
+		fprintf(stdout,"\n\n");
+
+		fprintf(stdout,"     1. Use profile 1 secondary structure / penalty mask  ");
+		if(use_ss1)
+			fprintf(stdout,"= YES\n");
+		else
+			fprintf(stdout,"= NO\n");
+		fprintf(stdout,"     2. Use profile 2 secondary structure / penalty mask  ");
+		if(use_ss2)
+			fprintf(stdout,"= YES\n");
+		else
+			fprintf(stdout,"= NO\n");
+		fprintf(stdout,"\n");
+		fprintf(stdout,"     3. Output in alignment  ");
+		fprintf(stdout,"= %s\n",secstroutput_txt[output_struct_penalties]);
+		fprintf(stdout,"\n");
+
+		fprintf(stdout,"     4. Helix gap penalty                     :%d\n",(pint)helix_penalty);
+		fprintf(stdout,"     5. Strand gap penalty                    :%d\n",(pint)strand_penalty);
+		fprintf(stdout,"     6. Loop gap penalty                      :%d\n",(pint)loop_penalty);
+
+		fprintf(stdout,"     7. Secondary structure terminal penalty  :%d\n",(pint)helix_end_penalty);
+		fprintf(stdout,"     8. Helix terminal positions       within :%d      outside :%d\n",
+		                                 (pint)helix_end_minus,(pint)helix_end_plus);
+		fprintf(stdout,"     9. Strand terminal positions      within :%d      outside :%d\n",
+		                                 (pint)strand_end_minus,(pint)strand_end_plus);
+
+		fprintf(stdout,"\n\n");
+		fprintf(stdout,"     H. HELP\n\n\n");
+		
+		getstr("Enter number (or [RETURN] to exit)",lin2);
+		if( *lin2 == EOS) { 
+			return;
+		}
+		
+		switch(toupper(*lin2)) {
+			case '1': use_ss1 ^= TRUE;
+				break;
+			case '2': use_ss2 ^= TRUE;
+				break;
+			case '3': output_struct_penalties = secstroutput_options();
+				break;
+			case '4':
+				fprintf(stdout,"Helix Penalty Currently: %d\n",(pint)helix_penalty);
+				helix_penalty=getint("Enter number",1,9,helix_penalty);
+				break;
+			case '5':
+				fprintf(stdout,"Strand Gap Penalty Currently: %d\n",(pint)strand_penalty);
+				strand_penalty=getint("Enter number",1,9,strand_penalty);
+				break;
+			case '6':
+				fprintf(stdout,"Loop Gap Penalty Currently: %d\n",(pint)loop_penalty);
+				loop_penalty=getint("Enter number",1,9,loop_penalty);
+				break;
+			case '7':
+				fprintf(stdout,"Secondary Structure Terminal Penalty Currently: %d\n",
+				          (pint)helix_end_penalty);
+				helix_end_penalty=getint("Enter number",1,9,helix_end_penalty);
+				strand_end_penalty = helix_end_penalty;
+				break;
+			case '8':
+				fprintf(stdout,"Helix Terminal Positions Currently: \n");
+				fprintf(stdout,"        within helix: %d     outside helix: %d\n",
+				                            (pint)helix_end_minus,(pint)helix_end_plus);
+				helix_end_minus=getint("Enter number of residues within helix",0,3,helix_end_minus);
+				helix_end_plus=getint("Enter number of residues outside helix",0,3,helix_end_plus);
+				break;
+			case '9':
+				fprintf(stdout,"Strand Terminal Positions Currently: \n");
+				fprintf(stdout,"        within strand: %d     outside strand: %d\n",
+				                            (pint)strand_end_minus,(pint)strand_end_plus);
+				strand_end_minus=getint("Enter number of residues within strand",0,3,strand_end_minus);
+				strand_end_plus=getint("Enter number of residues outside strand",0,3,strand_end_plus);
+				break;
+			case '?':
+			case 'H':
+				get_help('B');
+				break;
+			default:
+				fprintf(stdout,"\n\nUnrecognised Command\n\n");
+				break;
+		}
+	}
+}
+
+
+static sint secstroutput_options(void)
+{
+
+        while(TRUE)
+        {
+                fprintf(stdout,"\n\n\n");
+                fprintf(stdout," ********* Secondary Structure Output Menu *********\n");
+                fprintf(stdout,"\n\n");
+
+
+                fprintf(stdout,"     1. %s\n",secstroutput_txt[0]);
+                fprintf(stdout,"     2. %s\n",secstroutput_txt[1]);
+                fprintf(stdout,"     3. %s\n",secstroutput_txt[2]);
+                fprintf(stdout,"     4. %s\n",secstroutput_txt[3]);
+                fprintf(stdout,"     H. HELP\n\n");
+                fprintf(stdout,
+"     -- Current output is %s ",secstroutput_txt[output_struct_penalties]);
+                fprintf(stdout,"--\n");
+
+
+                getstr("\n\nEnter number (or [RETURN] to exit)",lin2);
+                if(*lin2 == EOS) return(output_struct_penalties);
+
+        	switch(toupper(*lin2))
+        	{
+       	 		case '1': return(0);
+        		case '2': return(1);
+      			case '3': return(2);
+        		case '4': return(3);
+			case '?': 
+        		case 'H': get_help('C');
+            		case 'Q':
+        		case 'X': return(0);
+
+        		default: fprintf(stdout,"\n\nUnrecognised Command\n\n");
+            		break;
+        	}
+        }
+}
+
+
+
+
+
+
+
+static void phylogenetic_tree_menu(void)
+{
+        int catchint;
+
+        catchint = signal(SIGINT, SIG_IGN) != SIG_IGN;
+        if (catchint) {
+                if (setjmp(jmpbuf) != 0)
+                        fprintf(stdout,"\n.. Interrupt\n");
+#ifdef UNIX
+                if (signal(SIGINT,jumper) == BADSIG)
+                        fprintf(stdout,"Error: signal\n");
+#else
+                if (signal(SIGINT,SIG_DFL) == BADSIG)
+                        fprintf(stdout,"Error: signal\n");
+#endif
+        }
+
+
+    while(TRUE)
+    {
+        fprintf(stdout,"\n\n\n");
+        fprintf(stdout,"****** PHYLOGENETIC TREE MENU ******\n");
+        fprintf(stdout,"\n\n");
+
+        fprintf(stdout,"    1.  Input an alignment\n");
+        fprintf(stdout,"    2.  Exclude positions with gaps?        ");
+	if(tossgaps)
+		fprintf(stdout,"= ON\n");
+	else
+		fprintf(stdout,"= OFF\n");
+        fprintf(stdout,"    3.  Correct for multiple substitutions? ");
+	if(kimura)
+		fprintf(stdout,"= ON\n");
+	else
+		fprintf(stdout,"= OFF\n");
+        fprintf(stdout,"    4.  Draw tree now\n");
+        fprintf(stdout,"    5.  Bootstrap tree\n");
+	fprintf(stdout,"    6.  Output format options\n");
+        fprintf(stdout,"\n");
+        fprintf(stdout,"    S.  Execute a system command\n");
+        fprintf(stdout,"    H.  HELP\n");
+        fprintf(stdout,"    or press [RETURN] to go back to main menu\n\n\n");
+
+        getstr("Your choice",lin1);
+        if(*lin1 == EOS) return;
+
+        switch(toupper(*lin1))
+        {
+       	 	case '1': seq_input(FALSE);
+				phylip_name[0]=EOS;
+				clustal_name[0]=EOS;
+				dist_name[0]=EOS;
+				nexus_name[0]=EOS;
+         	   	break;
+        	case '2': tossgaps ^= TRUE;
+          	  	break;
+      		case '3': kimura ^= TRUE;;
+            		break;
+        	case '4': phylogenetic_tree(phylip_name,clustal_name,dist_name,nexus_name,"amenu.pim");
+            		break;
+        	case '5': bootstrap_tree(phylip_name,clustal_name,nexus_name);
+            		break;
+		case '6': tree_format_options_menu();
+			break;
+        	case 'S': do_system();
+            		break;
+            	case '?':
+        	case 'H': get_help('7');
+            		break;
+            	case 'Q':
+        	case 'X': return;
+
+        	default: fprintf(stdout,"\n\nUnrecognised Command\n\n");
+            	break;
+        }
+    }
+}
+
+
+
+
+
+
+static void tree_format_options_menu(void)      /* format of tree output */
+{	
+        int catchint;
+
+        catchint = signal(SIGINT, SIG_IGN) != SIG_IGN;
+        if (catchint) {
+                if (setjmp(jmpbuf) != 0)
+                        fprintf(stdout,"\n.. Interrupt\n");
+#ifdef UNIX
+                if (signal(SIGINT,jumper) == BADSIG)
+                        fprintf(stdout,"Error: signal\n");
+#else
+                if (signal(SIGINT,SIG_DFL) == BADSIG)
+                        fprintf(stdout,"Error: signal\n");
+#endif
+        }
+
+
+	while(TRUE) {
+	fprintf(stdout,"\n\n\n");
+	fprintf(stdout," ****** Format of Phylogenetic Tree Output ******\n");
+	fprintf(stdout,"\n\n");
+	fprintf(stdout,"     1. Toggle CLUSTAL format tree output    =  %s\n",
+					(!output_tree_clustal)  ? "OFF" : "ON");
+	fprintf(stdout,"     2. Toggle Phylip format tree output     =  %s\n",
+					(!output_tree_phylip)   ? "OFF" : "ON");
+	fprintf(stdout,"     3. Toggle Phylip distance matrix output =  %s\n",
+					(!output_tree_distances)? "OFF" : "ON");
+	fprintf(stdout,"     4. Toggle Nexus format tree output      =  %s\n\n",
+					(!output_tree_nexus)? "OFF" : "ON");
+	fprintf(stdout,"     5. Toggle Phylip bootstrap positions    =  %s\n\n",
+(bootstrap_format==BS_NODE_LABELS) ? "NODE LABELS" : "BRANCH LABELS");
+	fprintf(stdout,"\n");
+	fprintf(stdout,"     H. HELP\n\n\n");	
+	
+		getstr("Enter number (or [RETURN] to exit)",lin2);
+		if(*lin2 == EOS) return;
+		
+		switch(toupper(*lin2)) {
+			case '1':
+				output_tree_clustal   ^= TRUE;
+				break;
+			case '2':
+              			output_tree_phylip    ^= TRUE;
+			  	break;
+			case '3':
+              			output_tree_distances ^= TRUE;
+			  	break;
+			case '4':
+              			output_tree_nexus ^= TRUE;
+			  	break;
+			case '5':
+              			if (bootstrap_format == BS_NODE_LABELS)
+					bootstrap_format = BS_BRANCH_LABELS;
+				else
+					bootstrap_format = BS_NODE_LABELS;
+			  	break;
+			case '?':
+			case 'H':
+				get_help('0');
+				break;
+			default:
+				fprintf(stdout,"\n\nUnrecognised Command\n\n");
+				break;
+		}
+	}
+}
+
+
+static void format_options_menu(void)      /* format of alignment output */
+{	
+	sint i;
+	sint length = 0;
+	char path[FILENAMELEN+1];
+    int catchint;
+
+        catchint = signal(SIGINT, SIG_IGN) != SIG_IGN;
+        if (catchint) {
+                if (setjmp(jmpbuf) != 0)
+                        fprintf(stdout,"\n.. Interrupt\n");
+#ifdef UNIX
+                if (signal(SIGINT,jumper) == BADSIG)
+                        fprintf(stdout,"Error: signal\n");
+#else
+                if (signal(SIGINT,SIG_DFL) == BADSIG)
+                        fprintf(stdout,"Error: signal\n");
+#endif
+        }
+
+
+	while(TRUE) {
+	fprintf(stdout,"\n\n\n");
+	fprintf(stdout," ********* Format of Alignment Output *********\n");
+	fprintf(stdout,"\n\n");
+	fprintf(stdout,"     F. Toggle FASTA format output       =  %s\n\n",
+					(!output_fasta) ? "OFF" : "ON");
+	fprintf(stdout,"     1. Toggle CLUSTAL format output     =  %s\n",
+					(!output_clustal) ? "OFF" : "ON");
+	fprintf(stdout,"     2. Toggle NBRF/PIR format output    =  %s\n",
+					(!output_nbrf) ? "OFF" : "ON");
+	fprintf(stdout,"     3. Toggle GCG/MSF format output     =  %s\n",
+					(!output_gcg) ? "OFF" : "ON");
+	fprintf(stdout,"     4. Toggle PHYLIP format output      =  %s\n",
+					(!output_phylip) ? "OFF" : "ON");
+	fprintf(stdout,"     5. Toggle NEXUS format output       =  %s\n",
+					(!output_nexus) ? "OFF" : "ON");
+	fprintf(stdout,"     6. Toggle GDE format output         =  %s\n\n",
+					(!output_gde) ? "OFF" : "ON");
+	fprintf(stdout,"     7. Toggle GDE output case           =  %s\n",
+					(!lowercase) ? "UPPER" : "LOWER");
+
+	fprintf(stdout,"     8. Toggle CLUSTALW sequence numbers =  %s\n",
+					(!cl_seq_numbers) ? "OFF" : "ON");
+	fprintf(stdout,"     9. Toggle output order              =  %s\n\n",
+					(output_order==0) ? "INPUT FILE" : "ALIGNED");
+
+	fprintf(stdout,"     0. Create alignment output file(s) now?\n\n");
+	fprintf(stdout,"     T. Toggle parameter output          = %s\n",
+					(!save_parameters) ? "OFF" : "ON");
+	fprintf(stdout,"     R. Toggle sequence range numbers =  %s\n",
+					(!seqRange) ? "OFF" : "ON");
+	fprintf(stdout,"\n");
+	fprintf(stdout,"     H. HELP\n\n\n");	
+	
+		getstr("Enter number (or [RETURN] to exit)",lin2);
+		if(*lin2 == EOS) return;
+		
+		switch(toupper(*lin2)) {
+			case '1':
+				output_clustal ^= TRUE;
+				break;
+			case '2':
+              			output_nbrf ^= TRUE;
+			  	break;
+			case '3':
+              			output_gcg ^= TRUE;
+			  	break;
+			case '4':
+              			output_phylip ^= TRUE;
+			  	break;
+			case '5':
+              			output_nexus ^= TRUE;
+			  	break;
+			case '6':
+              			output_gde ^= TRUE;
+			  	break;
+			case '7':
+              			lowercase ^= TRUE;
+			  	break;
+			case '8':
+              			cl_seq_numbers ^= TRUE;
+			  	break;
+			case '9':
+                                if (output_order == INPUT) output_order = ALIGNED;
+              			else output_order = INPUT;
+			  	break;
+			case 'F':
+              			output_fasta ^= TRUE;
+			  	break;
+			case 'R':
+              			seqRange ^= TRUE;
+			  	break;
+
+			case '0':		/* DES */
+				if(empty) {
+					error("No sequences loaded");
+					break;
+				}
+				get_path(seqname,path);
+				if(!open_alignment_output(path)) break;
+				create_alignment_output(1,nseqs);
+				break;
+        		case 'T': save_parameters ^= TRUE;
+	   			 break;
+			case '?':
+			case 'H':
+				get_help('5');
+				break;
+			default:
+				fprintf(stdout,"\n\nUnrecognised Command\n\n");
+				break;
+		}
+	}
+}
+
+
+
+
+
+
+
+
+
+
+
+
+static void pair_menu(void)
+{
+        int catchint;
+
+        catchint = signal(SIGINT, SIG_IGN) != SIG_IGN;
+        if (catchint) {
+                if (setjmp(jmpbuf) != 0)
+                        fprintf(stdout,"\n.. Interrupt\n");
+#ifdef UNIX
+                if (signal(SIGINT,jumper) == BADSIG)
+                        fprintf(stdout,"Error: signal\n");
+#else
+                if (signal(SIGINT,SIG_DFL) == BADSIG)
+                        fprintf(stdout,"Error: signal\n");
+#endif
+        }
+
+
+        if(dnaflag) {
+                pw_go_penalty     = dna_pw_go_penalty;
+                pw_ge_penalty     = dna_pw_ge_penalty;
+                ktup       = dna_ktup;
+                window     = dna_window;
+                signif     = dna_signif;
+                wind_gap   = dna_wind_gap;
+
+        }
+        else {
+                pw_go_penalty     = prot_pw_go_penalty;
+                pw_ge_penalty     = prot_pw_ge_penalty;
+                ktup       = prot_ktup;
+                window     = prot_window;
+                signif     = prot_signif;
+                wind_gap   = prot_wind_gap;
+
+        }
+
+	while(TRUE) {
+	
+		fprintf(stdout,"\n\n\n");
+		fprintf(stdout," ********* PAIRWISE ALIGNMENT PARAMETERS *********\n");
+		fprintf(stdout,"\n\n");
+
+		fprintf(stdout,"     Slow/Accurate alignments:\n\n");
+
+		fprintf(stdout,"     1. Gap Open Penalty       :%4.2f\n",pw_go_penalty);
+		fprintf(stdout,"     2. Gap Extension Penalty  :%4.2f\n",pw_ge_penalty);
+		fprintf(stdout,"     3. Protein weight matrix  :%s\n" ,
+                                        matrix_menu.opt[pw_matnum-1].title);
+		fprintf(stdout,"     4. DNA weight matrix      :%s\n" ,
+                                        dnamatrix_menu.opt[pw_dnamatnum-1].title);
+		fprintf(stdout,"\n");
+
+		fprintf(stdout,"     Fast/Approximate alignments:\n\n");
+
+		fprintf(stdout,"     5. Gap penalty            :%d\n",(pint)wind_gap);
+		fprintf(stdout,"     6. K-tuple (word) size    :%d\n",(pint)ktup);
+		fprintf(stdout,"     7. No. of top diagonals   :%d\n",(pint)signif);
+		fprintf(stdout,"     8. Window size            :%d\n\n",(pint)window);
+
+                fprintf(stdout,"     9. Toggle Slow/Fast pairwise alignments ");
+                if(quick_pairalign)
+                      fprintf(stdout,"= FAST\n\n");
+                else
+                      fprintf(stdout,"= SLOW\n\n");
+
+
+		fprintf(stdout,"     H. HELP\n\n\n");
+		
+		getstr("Enter number (or [RETURN] to exit)",lin2);
+		if( *lin2 == EOS) {
+                        if(dnaflag) {
+                                dna_pw_go_penalty     = pw_go_penalty;
+                                dna_pw_ge_penalty     = pw_ge_penalty;
+                		dna_ktup       = ktup;
+                		dna_window     = window;
+                		dna_signif     = signif;
+                		dna_wind_gap   = wind_gap;
+
+                        }
+                        else {
+                                prot_pw_go_penalty     = pw_go_penalty;
+                                prot_pw_ge_penalty     = pw_ge_penalty;
+                		prot_ktup       = ktup;
+                		prot_window     = window;
+                		prot_signif     = signif;
+                		prot_wind_gap   = wind_gap;
+
+                        }
+ 
+			return;
+		}
+		
+		switch(toupper(*lin2)) {
+			case '1':
+				fprintf(stdout,"Gap Open Penalty Currently: %4.2f\n",pw_go_penalty);
+				pw_go_penalty=(float)getreal("Enter number",(double)0.0,(double)100.0,(double)pw_go_penalty);
+				break;
+			case '2':
+				fprintf(stdout,"Gap Extension Penalty Currently: %4.2f\n",pw_ge_penalty);
+				pw_ge_penalty=(float)getreal("Enter number",(double)0.0,(double)10.0,(double)pw_ge_penalty);
+				break;
+                        case '3':
+                                pw_matnum = read_matrix("PROTEIN",pw_matrix_menu,pw_mtrxname,pw_matnum,pw_usermat,pw_aa_xref);
+                                break;
+                        case '4':
+                                pw_dnamatnum = read_matrix("DNA",dnamatrix_menu,pw_dnamtrxname,pw_dnamatnum,pw_userdnamat,pw_dna_xref);
+                                break;
+			case '5':
+                                fprintf(stdout,"Gap Penalty Currently: %d\n",(pint)wind_gap);
+                                wind_gap=getint("Enter number",1,500,wind_gap);
+				break;
+			case '6':
+                                fprintf(stdout,"K-tuple Currently: %d\n",(pint)ktup);
+                                if(dnaflag)
+                                     ktup=getint("Enter number",1,4,ktup);
+                                else
+                                     ktup=getint("Enter number",1,2,ktup);                                     
+				break;
+			case '7':
+                                fprintf(stdout,"Top diagonals Currently: %d\n",(pint)signif);
+                                signif=getint("Enter number",1,50,signif);
+				break;
+			case '8':
+                                fprintf(stdout,"Window size Currently: %d\n",(pint)window);
+                                window=getint("Enter number",1,50,window);
+				break;
+                        case '9': quick_pairalign ^= TRUE;
+                                break;
+			case '?':
+			case 'H':
+				get_help('3');
+				break;
+			default:
+				fprintf(stdout,"\n\nUnrecognised Command\n\n");
+				break;
+		}
+	}
+}
+
+
+
+
+
+static void multi_menu(void)
+{
+        int catchint;
+
+        catchint = signal(SIGINT, SIG_IGN) != SIG_IGN;
+        if (catchint) {
+                if (setjmp(jmpbuf) != 0)
+                        fprintf(stdout,"\n.. Interrupt\n");
+#ifdef UNIX
+                if (signal(SIGINT,jumper) == BADSIG)
+                        fprintf(stdout,"Error: signal\n");
+#else
+                if (signal(SIGINT,SIG_DFL) == BADSIG)
+                        fprintf(stdout,"Error: signal\n");
+#endif
+        }
+
+
+	if(dnaflag) {
+		gap_open   = dna_gap_open;
+		gap_extend = dna_gap_extend;
+	}
+	else {
+		gap_open   = prot_gap_open;
+		gap_extend = prot_gap_extend;
+	}
+
+	while(TRUE) {
+
+		fprintf(stdout,"\n\n\n");
+		fprintf(stdout," ********* MULTIPLE ALIGNMENT PARAMETERS *********\n");
+		fprintf(stdout,"\n\n");
+		
+		fprintf(stdout,"     1. Gap Opening Penalty              :%4.2f\n",gap_open);
+		fprintf(stdout,"     2. Gap Extension Penalty            :%4.2f\n",gap_extend);
+
+		fprintf(stdout,"     3. Delay divergent sequences        :%d %%\n\n",(pint)divergence_cutoff);
+
+                fprintf(stdout,"     4. DNA Transitions Weight           :%1.2f\n\n",transition_weight);
+                fprintf(stdout,"     5. Protein weight matrix            :%s\n"
+                                        	,matrix_menu.opt[matnum-1].title);
+                fprintf(stdout,"     6. DNA weight matrix                :%s\n"
+                                        	,dnamatrix_menu.opt[dnamatnum-1].title);
+		fprintf(stdout,"     7. Use negative matrix              :%s\n\n",(!neg_matrix) ? "OFF" : "ON");
+                fprintf(stdout,"     8. Protein Gap Parameters\n\n");
+		fprintf(stdout,"     H. HELP\n\n\n");		
+
+		getstr("Enter number (or [RETURN] to exit)",lin2);
+
+		if(*lin2 == EOS) {
+			if(dnaflag) {
+				dna_gap_open    = gap_open;
+				dna_gap_extend  = gap_extend;
+			}
+			else {
+				prot_gap_open   = gap_open;
+				prot_gap_extend = gap_extend;
+			}
+			return;
+		}
+		
+		switch(toupper(*lin2)) {
+			case '1':
+			fprintf(stdout,"Gap Opening Penalty Currently: %4.2f\n",gap_open);
+				gap_open=(float)getreal("Enter number",(double)0.0,(double)100.0,(double)gap_open);
+				break;
+			case '2':
+				fprintf(stdout,"Gap Extension Penalty Currently: %4.2f\n",gap_extend);
+				gap_extend=(float)getreal("Enter number",(double)0.0,(double)10.0,(double)gap_extend);
+				break;
+			case '3':
+				fprintf(stdout,"Min Identity Currently: %d\n",(pint)divergence_cutoff);
+				divergence_cutoff=getint("Enter number",0,100,divergence_cutoff);
+				break;
+			case '4':
+				fprintf(stdout,"Transition Weight Currently: %1.2f\n",(pint)transition_weight);
+				transition_weight=(float)getreal("Enter number",(double)0.0,(double)1.0,(double)transition_weight);
+				break;
+			case '5':
+                                matnum = read_matrix("PROTEIN",matrix_menu,mtrxname,matnum,usermat,aa_xref);
+				break;
+			case '6':
+                                dnamatnum = read_matrix("DNA",dnamatrix_menu,dnamtrxname,dnamatnum,userdnamat,dna_xref);
+				break;
+			case '7':
+				neg_matrix ^= TRUE;
+				break;
+			case '8':
+                                gap_penalties_menu();
+				break;
+			case '?':
+			case 'H':
+				get_help('4');
+				break;
+			default:
+				fprintf(stdout,"\n\nUnrecognised Command\n\n");
+				break;
+		}
+	}
+}
+
+
+
+
+
+
+static void gap_penalties_menu(void)
+{
+	char c;
+	sint i;
+        int catchint;
+
+        catchint = signal(SIGINT, SIG_IGN) != SIG_IGN;
+        if (catchint) {
+                if (setjmp(jmpbuf) != 0)
+                        fprintf(stdout,"\n.. Interrupt\n");
+#ifdef UNIX
+                if (signal(SIGINT,jumper) == BADSIG)
+                        fprintf(stdout,"Error: signal\n");
+#else
+                if (signal(SIGINT,SIG_DFL) == BADSIG)
+                        fprintf(stdout,"Error: signal\n");
+#endif
+        }
+
+
+	while(TRUE) {
+
+		fprintf(stdout,"\n\n\n");
+		fprintf(stdout," ********* PROTEIN GAP PARAMETERS *********\n");
+		fprintf(stdout,"\n\n\n");
+
+		fprintf(stdout,"     1. Toggle Residue-Specific Penalties :%s\n\n",(no_pref_penalties) ? "OFF" : "ON");
+		fprintf(stdout,"     2. Toggle Hydrophilic Penalties      :%s\n",(no_hyd_penalties) ? "OFF" : "ON");
+		fprintf(stdout,"     3. Hydrophilic Residues              :%s\n\n"
+					,hyd_residues);
+		fprintf(stdout,"     4. Gap Separation Distance           :%d\n",(pint)gap_dist);
+		fprintf(stdout,"     5. Toggle End Gap Separation         :%s\n\n",(!use_endgaps) ? "OFF" : "ON");
+		fprintf(stdout,"     H. HELP\n\n\n");		
+
+		getstr("Enter number (or [RETURN] to exit)",lin2);
+
+		if(*lin2 == EOS) return;
+		
+		switch(toupper(*lin2)) {
+			case '1':
+				no_pref_penalties ^= TRUE;
+				break;
+			case '2':
+				no_hyd_penalties ^= TRUE;
+				break;
+			case '3':
+				fprintf(stdout,"Hydrophilic Residues Currently: %s\n",hyd_residues);
+
+				getstr("Enter residues (or [RETURN] to quit)",lin1);
+                                if (*lin1 != EOS) {
+                                        for (i=0;i<strlen(hyd_residues) && i<26;i++) {
+                                        c = lin1[i];
+                                        if (isalpha(c))
+                                                hyd_residues[i] = (char)toupper(c);
+                                        else
+                                                break;
+                                        }
+                                        hyd_residues[i] = EOS;
+                                }
+                                break;
+			case '4':
+				fprintf(stdout,"Gap Separation Distance Currently: %d\n",(pint)gap_dist);
+				gap_dist=getint("Enter number",0,100,gap_dist);
+				break;
+			case '5':
+				use_endgaps ^= TRUE;
+				break;
+			case '?':
+			case 'H':
+				get_help('A');
+				break;
+			default:
+				fprintf(stdout,"\n\nUnrecognised Command\n\n");
+				break;
+		}
+	}
+}
+
+
+
+
+static sint read_matrix(char *title,MatMenu menu, char *matnam, sint matn, short *mat, short *xref)
+{       static char userfile[FILENAMELEN+1];
+	int i;
+
+        while(TRUE)
+        {
+                fprintf(stdout,"\n\n\n");
+                fprintf(stdout," ********* %s WEIGHT MATRIX MENU *********\n",title);
+                fprintf(stdout,"\n\n");
+
+		for(i=0;i<menu.noptions;i++)
+                	fprintf(stdout,"     %d. %s\n",i+1,menu.opt[i].title);
+                fprintf(stdout,"     H. HELP\n\n");
+                fprintf(stdout,
+"     -- Current matrix is the %s ",menu.opt[matn-1].title);
+                if(matn == menu.noptions) fprintf(stdout,"(file = %s)",userfile);
+                fprintf(stdout,"--\n");
+
+
+                getstr("\n\nEnter number (or [RETURN] to exit)",lin2);
+                if(*lin2 == EOS) return(matn);
+
+                i=toupper(*lin2)-'0';
+		if(i>0 && i<menu.noptions) {
+                        strcpy(matnam,menu.opt[i-1].string);
+                        matn=i;
+		} else if (i==menu.noptions) {
+                        if(user_mat(userfile, mat, xref)) {
+                              strcpy(matnam,userfile);
+                              matn=i;
+                        }
+		}
+		else
+                switch(toupper(*lin2))  {
+                        case '?':
+                        case 'H':
+                                get_help('8');
+                                break;
+                        default:
+                                fprintf(stdout,"\n\nUnrecognised Command\n\n");
+                                break;
+                }
+        }
+}
+
+
+char prompt_for_yes_no(char *title,char *prompt)
+{
+	char line[80];
+	char lin2[80];
+
+	fprintf(stdout,"\n%s\n",title);
+	strcpy(line,prompt);
+	strcat(line, "(y/n) ? [y]");
+	getstr(line,lin2);
+	if ((*lin2 != 'n') && (*lin2 != 'N'))
+		return('y');
+	else
+		return('n');
+
+}
+
+
+/*
+*	fatal()
+*
+*	Prints error msg to stdout and exits.
+*	Variadic parameter list can be passed.
+*
+*	Return values:
+*		none
+*/
+
+void fatal( char *msg,...)
+{
+	va_list ap;
+	
+	va_start(ap,msg);
+	fprintf(stdout,"\n\nFATAL ERROR: ");
+	vfprintf(stdout,msg,ap);
+	fprintf(stdout,"\n\n");
+	va_end(ap);
+	exit(1);
+}
+
+/*
+*	error()
+*
+*	Prints error msg to stdout.
+*	Variadic parameter list can be passed.
+*
+*	Return values:
+*		none
+*/
+
+void error( char *msg,...)
+{
+	va_list ap;
+	
+	va_start(ap,msg);
+	fprintf(stdout,"\n\nERROR: ");
+	vfprintf(stdout,msg,ap);
+	fprintf(stdout,"\n\n");
+	va_end(ap);
+}
+
+/*
+*	warning()
+*
+*	Prints warning msg to stdout.
+*	Variadic parameter list can be passed.
+*
+*	Return values:
+*		none
+*/
+
+void warning( char *msg,...)
+{
+	va_list ap;
+	
+	va_start(ap,msg);
+	fprintf(stdout,"\n\nWARNING: ");
+	vfprintf(stdout,msg,ap);
+	fprintf(stdout,"\n\n");
+	va_end(ap);
+}
+
+/*
+*	info()
+*
+*	Prints info msg to stdout.
+*	Variadic parameter list can be passed.
+*
+*	Return values:
+*		none
+*/
+
+void info( char *msg,...)
+{
+	va_list ap;
+	
+	va_start(ap,msg);
+	fprintf(stdout,"\n");
+	vfprintf(stdout,msg,ap);
+	va_end(ap);
+}

Added: trunk/packages/clustalw/branches/upstream/current/calcgapcoeff.c
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/calcgapcoeff.c	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/calcgapcoeff.c	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,497 @@
+#include <stdio.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <string.h>
+#include "clustalw.h"
+
+
+/*
+ *   Prototypes
+ */
+void calc_p_penalties(char **aln, sint n, sint fs, sint ls, sint *weight);
+void calc_h_penalties(char **aln, sint n, sint fs, sint ls, sint *weight);
+void calc_v_penalties(char **aln, sint n, sint fs, sint ls, sint *weight);
+sint local_penalty(sint penalty, sint n, sint *pweight, sint *hweight, sint *vweight);
+float percentid(char *s1, char *s2,sint length);
+/*
+ *   Global variables
+ */
+
+extern sint gap_dist;
+extern sint max_aa;
+extern sint debug;
+extern Boolean dnaflag;
+extern Boolean use_endgaps;
+extern Boolean endgappenalties;
+extern Boolean no_var_penalties, no_hyd_penalties, no_pref_penalties;
+extern char hyd_residues[];
+extern char *amino_acid_codes;
+
+/* vwindow is the number of residues used for a window for the variable zone penalties */
+/* vll is the lower limit for the variable zone penalties (vll < pen < 1.0) */
+int vll=50;
+int vwindow=5;
+
+sint	vlut[26][26] = {
+/*	  A  B  C  D  E  F  G  H  I  J  K  L  M  N  O  P  Q  R  S  T  U  V  W  X  Y  Z */
+/*A*/	  1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+/*B*/	  0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*C*/	  0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*D*/	  0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*E*/	  0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*F*/	  0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*G*/	  0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*H*/	  0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*I*/	  0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*J*/	  0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*K*/	  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*L*/	  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*M*/	  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*N*/	  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*O*/	  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*P*/	  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*Q*/	  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*R*/	  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
+/*S*/	  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
+/*T*/	  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
+/*U*/	  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
+/*V*/	  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
+/*W*/	  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
+/*X*/	  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
+/*Y*/	  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
+/*Z*/	  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1
+	};
+
+/* pascarella probabilities for opening a gap at specific residues */
+char   pr[] =     {'A' , 'C', 'D', 'E', 'F', 'G', 'H', 'K', 'I', 'L',
+                   'M' , 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'Y', 'W'};
+sint    pas_op[] = { 87, 87,104, 69, 80,139,100,104, 68, 79,
+                    71,137,126, 93,128,124,111, 75,100, 77};
+sint    pas_op2[] ={ 88, 57,111, 98, 75,126, 95, 97, 70, 90,
+                    60,122,110,107, 91,125,124, 81,106, 88};
+sint    pal_op[] = { 84, 69,128, 78, 88,176, 53, 95, 55, 49,
+                    52,148,147,100, 91,129,105, 51,128, 88};
+
+float reduced_gap = 1.0;
+Boolean nvar_pen,nhyd_pen,npref_pen; /* local copies of ho_hyd_penalties, no_pref_penalties */
+sint gdist;                  /* local copy of gap_dist */
+
+void calc_gap_coeff(char **alignment, sint *gaps, sint **profile, Boolean struct_penalties,
+         char *gap_penalty_mask, sint first_seq, sint last_seq,
+         sint prf_length, sint gapcoef, sint lencoef)
+{
+
+   char c;
+   sint i, j;
+   sint is, ie;
+   static sint numseq,val,pcid;
+   static sint *gap_pos;
+   static sint *v_weight, *p_weight, *h_weight;
+   static float scale;
+   
+   numseq = last_seq - first_seq;
+   if(numseq == 2)
+     {
+	pcid=percentid(alignment[first_seq],alignment[first_seq+1],prf_length);
+     }
+   else pcid=0;
+
+   for (j=0; j<prf_length; j++)
+        gaps[j] = 0;
+/*
+   Check for a gap penalty mask
+*/
+   if (struct_penalties != NONE)
+     {
+        nvar_pen = nhyd_pen = npref_pen = TRUE;
+        gdist = 0;
+     }
+   else if (no_var_penalties == FALSE && pcid > 60)
+     {
+if(debug>0) fprintf(stderr,"Using variable zones to set gap penalties (pcid = %d)\n",pcid);
+	nhyd_pen = npref_pen = TRUE;
+	nvar_pen = FALSE;
+     }
+   else
+     {
+	nvar_pen = TRUE;
+        nhyd_pen = no_hyd_penalties;
+        npref_pen = no_pref_penalties;
+        gdist = gap_dist;
+     }                  
+     
+   for (i=first_seq; i<last_seq; i++)
+     {
+/*
+   Include end gaps as gaps ?
+*/
+        is = 0;
+        ie = prf_length;
+        if (use_endgaps == FALSE && endgappenalties==FALSE)
+        {
+          for (j=0; j<prf_length; j++)
+            {
+              c = alignment[i][j];
+              if ((c < 0) || (c > max_aa))
+                 is++;
+              else
+                 break;
+            }
+          for (j=prf_length-1; j>=0; j--)
+            {
+              c = alignment[i][j];
+              if ((c < 0) || (c > max_aa))
+                 ie--;
+              else
+                 break;
+            }
+        }
+
+        for (j=is; j<ie; j++)
+          {
+              if ((alignment[i][j] < 0) || (alignment[i][j] > max_aa))
+                 gaps[j]++;
+          }
+     }
+
+   if ((!dnaflag) && (nvar_pen == FALSE))
+     {
+        v_weight = (sint *) ckalloc( (prf_length+2) * sizeof (sint) );
+        calc_v_penalties(alignment, prf_length, first_seq, last_seq, v_weight);
+     }
+
+
+   if ((!dnaflag) && (npref_pen == FALSE))
+     {
+        p_weight = (sint *) ckalloc( (prf_length+2) * sizeof (sint) );
+        calc_p_penalties(alignment, prf_length, first_seq, last_seq, p_weight);
+     }
+
+   if ((!dnaflag) && (nhyd_pen == FALSE))
+     {
+        h_weight = (sint *) ckalloc( (prf_length+2) * sizeof (sint) );
+        calc_h_penalties(alignment, prf_length, first_seq, last_seq, h_weight);
+     }
+
+   gap_pos = (sint *) ckalloc( (prf_length+2) * sizeof (sint) );
+/*
+    mark the residues close to an existing gap (set gaps[i] = -ve)
+*/
+   if (dnaflag || (gdist <= 0))
+     {
+       for (i=0;i<prf_length;i++) gap_pos[i] = gaps[i];
+     }
+   else
+     {
+       i=0;
+       while (i<prf_length)
+         {
+            if (gaps[i] <= 0)
+              {
+                 gap_pos[i] = gaps[i];
+                 i++;
+              }
+            else 
+              {
+                 for (j = -gdist+1; j<0; j++)
+                  {
+                   if ((i+j>=0) && (i+j<prf_length) &&
+                       ((gaps[i+j] == 0) || (gaps[i+j] < j))) gap_pos[i+j] = j;
+                  }
+                 while (gaps[i] > 0)
+                    {
+                       if (i>=prf_length) break;
+                       gap_pos[i] = gaps[i];
+                       i++;
+                    }
+                 for (j = 0; j<gdist; j++)
+                  {
+                   if (gaps[i+j] > 0) break;
+                   if ((i+j>=0) && (i+j<prf_length) && 
+                       ((gaps[i+j] == 0) || (gaps[i+j] < -j))) gap_pos[i+j] = -j-1;
+                  }
+                 i += j;
+              }
+         }
+     }
+if (debug>1)
+{
+fprintf(stdout,"gap open %d gap ext %d\n",(pint)gapcoef,(pint)lencoef);
+fprintf(stdout,"gaps:\n");
+  for(i=0;i<prf_length;i++) fprintf(stdout,"%d ", (pint)gaps[i]);
+  fprintf(stdout,"\n");
+fprintf(stdout,"gap_pos:\n");
+  for(i=0;i<prf_length;i++) fprintf(stdout,"%d ", (pint)gap_pos[i]);
+  fprintf(stdout,"\n");
+}
+
+
+   for (j=0;j<prf_length; j++)
+     {
+          
+        if (gap_pos[j] <= 0)
+          {
+/*
+    apply residue-specific and hydrophilic gap penalties.
+*/
+	     	if (!dnaflag) {
+              	profile[j+1][GAPCOL] = local_penalty(gapcoef, j,
+                                                   p_weight, h_weight, v_weight);
+              	profile[j+1][LENCOL] = lencoef;
+	     	}
+	     	else {
+              	profile[j+1][GAPCOL] = gapcoef;
+              	profile[j+1][LENCOL] = lencoef;
+	     	}
+
+/*
+    increase gap penalty near to existing gaps.
+*/
+             if (gap_pos[j] < 0)
+                {
+                    profile[j+1][GAPCOL] *= 2.0+2.0*(gdist+gap_pos[j])/gdist;
+                }
+
+
+          }
+        else
+          {
+             scale = ((float)(numseq-gaps[j])/(float)numseq) * reduced_gap;
+             profile[j+1][GAPCOL] = scale*gapcoef;
+             profile[j+1][LENCOL] = 0.5 * lencoef;
+          }
+/*
+    apply the gap penalty mask
+*/
+        if (struct_penalties != NONE)
+          {
+            val = gap_penalty_mask[j]-'0';
+            if (val > 0 && val < 10)
+              {
+                profile[j+1][GAPCOL] *= val;
+                profile[j+1][LENCOL] *= val;
+              }
+          }
+/*
+   make sure no penalty is zero - even for all-gap positions
+*/
+        if (profile[j+1][GAPCOL] <= 0) profile[j+1][GAPCOL] = 1;
+        if (profile[j+1][LENCOL] <= 0) profile[j+1][LENCOL] = 1;
+     }
+
+/* set the penalties at the beginning and end of the profile */
+   if(endgappenalties==TRUE)
+     {
+        profile[0][GAPCOL] = gapcoef;
+        profile[0][LENCOL] = lencoef;
+     }
+   else
+     {
+        profile[0][GAPCOL] = 0;
+        profile[0][LENCOL] = 0;
+        profile[prf_length][GAPCOL] = 0;
+        profile[prf_length][LENCOL] = 0;
+     }
+if (debug>0)
+{
+  fprintf(stdout,"Opening penalties:\n");
+  for(i=0;i<=prf_length;i++) fprintf(stdout," %d:%d ",i, (pint)profile[i][GAPCOL]);
+  fprintf(stdout,"\n");
+}
+if (debug>0)
+{
+  fprintf(stdout,"Extension penalties:\n");
+  for(i=0;i<=prf_length;i++) fprintf(stdout,"%d:%d ",i, (pint)profile[i][LENCOL]);
+  fprintf(stdout,"\n");
+}
+   if ((!dnaflag) && (nvar_pen == FALSE))
+        v_weight=ckfree((void *)v_weight);
+
+   if ((!dnaflag) && (npref_pen == FALSE))
+        p_weight=ckfree((void *)p_weight);
+
+   if ((!dnaflag) && (nhyd_pen == FALSE))
+        h_weight=ckfree((void *)h_weight);
+
+
+   gap_pos=ckfree((void *)gap_pos);
+}              
+            
+void calc_v_penalties(char **aln, sint n, sint fs, sint ls, sint *weight)
+{
+  char ix1,ix2;
+  sint i,j,k,t;
+
+  for (i=0;i<n;i++)
+    {
+      weight[i] = 0;
+	t=0;
+	for(j=i-vwindow;j<i+vwindow;j++)
+	{
+		if(j>=0 && j<n)
+		{
+                	ix1 = aln[fs][j];
+                	ix2 = aln[fs+1][j];
+                	if ((ix1 < 0) || (ix1 > max_aa) || (ix2< 0) || (ix2> max_aa)) continue;
+			weight[i] += vlut[amino_acid_codes[ix1]-'A'][amino_acid_codes[ix2]-'A'];
+			t++;
+		} 
+	}
+/* now we have a weight -t < w < t */
+      weight[i] +=t;
+      if(t>0)
+      	weight[i] = (weight[i]*100)/(2*t);
+      else
+      	weight[i] = 100;
+/* now we have a weight vll < w < 100 */
+      if (weight[i]<vll) weight[i]=vll;
+    }
+
+
+}
+
+void calc_p_penalties(char **aln, sint n, sint fs, sint ls, sint *weight)
+{
+  char ix;
+  sint j,k,numseq;
+  sint i;
+
+  numseq = ls - fs;
+  for (i=0;i<n;i++)
+    {
+      weight[i] = 0;
+      for (k=fs;k<ls;k++)
+        {
+           for (j=0;j<22;j++)
+             {
+                ix = aln[k][i];
+                if ((ix < 0) || (ix > max_aa)) continue;
+                if (amino_acid_codes[ix] == pr[j])
+                  {
+                    weight[i] += (180-pas_op[j]);
+                    break;
+                  }
+             }
+        }
+      weight[i] /= numseq;
+    }
+
+}
+            
+void calc_h_penalties(char **aln, sint n, sint fs, sint ls, sint *weight)
+{
+
+/*
+   weight[] is the length of the hydrophilic run of residues.
+*/
+  char ix;
+  sint nh,j,k;
+  sint i,e,s;
+  sint *hyd;
+  float scale;
+
+  hyd = (sint *)ckalloc((n+2) * sizeof(sint));
+  nh = (sint)strlen(hyd_residues);
+  for (i=0;i<n;i++)
+     weight[i] = 0;
+
+  for (k=fs;k<ls;k++)
+    {
+       for (i=0;i<n;i++)
+         {
+             hyd[i] = 0;
+             for (j=0;j<nh;j++)
+                {
+                   ix = aln[k][i];
+                   if ((ix < 0) || (ix > max_aa)) continue;
+                   if (amino_acid_codes[ix] == hyd_residues[j])
+                      {
+                         hyd[i] = 1;
+                         break;
+                      }
+                }
+          }
+       i = 0;
+       while (i < n)
+         {
+            if (hyd[i] == 0) i++;
+            else
+              {
+                 s = i;
+                 while ((hyd[i] != 0) && (i<n)) i++;
+                 e = i;
+                 if (e-s > 3)
+                    for (j=s; j<e; j++) weight[j] += 100;
+              }
+         }
+    }
+
+  scale = ls - fs;
+  for (i=0;i<n;i++)
+     weight[i] /= scale;
+
+  hyd=ckfree((void *)hyd);
+
+if (debug>1)
+{
+  for(i=0;i<n;i++) fprintf(stdout,"%d ", (pint)weight[i]);
+  fprintf(stdout,"\n");
+}
+
+}
+            
+sint local_penalty(sint penalty, sint n, sint *pweight, sint *hweight, sint *vweight)
+{
+
+  Boolean h = FALSE;
+  float gw;
+
+  if (dnaflag) return(1);
+
+  gw = 1.0;
+  if (nvar_pen == FALSE)
+    {
+	gw *= (float)vweight[n]/100.0;
+    }
+
+  if (nhyd_pen == FALSE)
+    {
+        if (hweight[n] > 0)
+         {
+           gw *= 0.5;
+           h = TRUE;
+         }
+    }
+  if ((npref_pen == FALSE) && (h==FALSE))
+    {
+       gw *= ((float)pweight[n]/100.0);
+    }
+
+  gw *= penalty;
+  return((sint)gw);
+
+}
+
+float percentid(char *s1, char *s2,sint length)
+{
+   sint i;
+   sint count,total;
+   float score;
+
+   count = total = 0;
+   for (i=0;i<length;i++) {
+     if ((s1[i]>=0) && (s1[i]<max_aa)) {
+       total++;
+       if (s1[i] == s2[i]) count++;
+     }
+	if (s1[i]==(-3) || s2[i]==(-3)) break;
+
+   }
+
+   if(total==0) score=0;
+   else
+   score = 100.0 * (float)count / (float)total;
+   return(score);
+
+}
+

Added: trunk/packages/clustalw/branches/upstream/current/calcprf1.c
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/calcprf1.c	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/calcprf1.c	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,99 @@
+#include <stdio.h>
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include "clustalw.h"
+
+
+/*
+ *   Prototypes
+ */
+
+/*
+ *   Global variables
+ */
+
+extern sint max_aa,gap_pos1,gap_pos2;
+
+void calc_prf1(sint **profile, char **alignment, sint *gaps,
+  sint matrix[NUMRES][NUMRES],
+  sint *seq_weight, sint prf_length, sint first_seq, sint last_seq)
+{
+
+  sint **weighting, sum2, d, i, res; 
+  sint numseq;
+  sint r, pos;
+  int f;
+  float scale;
+
+  weighting = (sint **) ckalloc( (NUMRES+2) * sizeof (sint *) );
+  for (i=0;i<NUMRES+2;i++)
+    weighting[i] = (sint *) ckalloc( (prf_length+2) * sizeof (sint) );
+
+  numseq = last_seq-first_seq;
+
+  sum2 = 0;
+  for (i=first_seq; i<last_seq; i++)
+       sum2 += seq_weight[i];
+
+  for (r=0; r<prf_length; r++)
+   {
+      for (d=0; d<=max_aa; d++)
+        {
+            weighting[d][r] = 0;
+            for (i=first_seq; i<last_seq; i++)
+               if (d == alignment[i][r]) weighting[d][r] += seq_weight[i];
+        }
+      weighting[gap_pos1][r] = 0;
+      for (i=first_seq; i<last_seq; i++)
+         if (gap_pos1 == alignment[i][r]) weighting[gap_pos1][r] += seq_weight[i];
+      weighting[gap_pos2][r] = 0;
+      for (i=first_seq; i<last_seq; i++)
+         if (gap_pos2 == alignment[i][r]) weighting[gap_pos2][r] += seq_weight[i];
+   }
+
+  for (pos=0; pos< prf_length; pos++)
+    {
+      if (gaps[pos] == numseq)
+        {
+           for (res=0; res<=max_aa; res++)
+             {
+                profile[pos+1][res] = matrix[res][gap_pos1];
+             }
+           profile[pos+1][gap_pos1] = matrix[gap_pos1][gap_pos1];
+           profile[pos+1][gap_pos2] = matrix[gap_pos2][gap_pos1];
+        }
+      else
+        {
+           scale = (float)(numseq-gaps[pos]) / (float)numseq;
+           for (res=0; res<=max_aa; res++)
+             {
+                f = 0;
+                for (d=0; d<=max_aa; d++)
+                     f += (weighting[d][pos] * matrix[d][res]);
+                f += (weighting[gap_pos1][pos] * matrix[gap_pos1][res]);
+                f += (weighting[gap_pos2][pos] * matrix[gap_pos2][res]);
+                profile[pos+1][res] = (sint  )(((float)f / (float)sum2)*scale);
+             }
+           f = 0;
+           for (d=0; d<=max_aa; d++)
+                f += (weighting[d][pos] * matrix[d][gap_pos1]);
+           f += (weighting[gap_pos1][pos] * matrix[gap_pos1][gap_pos1]);
+           f += (weighting[gap_pos2][pos] * matrix[gap_pos2][gap_pos1]);
+           profile[pos+1][gap_pos1] = (sint )(((float)f / (float)sum2)*scale);
+           f = 0;
+           for (d=0; d<=max_aa; d++)
+                f += (weighting[d][pos] * matrix[d][gap_pos2]);
+           f += (weighting[gap_pos1][pos] * matrix[gap_pos1][gap_pos2]);
+           f += (weighting[gap_pos2][pos] * matrix[gap_pos2][gap_pos2]);
+           profile[pos+1][gap_pos2] = (sint )(((float)f / (float)sum2)*scale);
+        }
+    }
+
+  for (i=0;i<NUMRES+2;i++)
+    weighting[i]=ckfree((void *)weighting[i]);
+  weighting=ckfree((void *)weighting);
+
+}
+
+

Added: trunk/packages/clustalw/branches/upstream/current/calcprf2.c
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/calcprf2.c	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/calcprf2.c	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,73 @@
+#include <stdio.h>
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include "clustalw.h"
+
+/*
+ *   Prototypes
+ */
+/*
+ *   Global variables
+ */
+
+extern sint max_aa,gap_pos1,gap_pos2;
+
+void calc_prf2(sint **profile, char **alignment,
+  sint *seq_weight,sint prf_length, sint first_seq, sint last_seq)
+{
+
+  sint sum1, sum2;	
+  sint i, d;
+  sint   r;
+
+
+  for (r=0; r<prf_length; r++)
+    {
+/*
+   calculate sum2 = number of residues found in this column
+*/
+       sum2 = 0;
+       for (i=first_seq; i<last_seq; i++)
+         {
+            sum2 += seq_weight[i];
+         }
+/*
+   only include matrix comparison scores for those residue types found in this
+   column
+*/
+       if (sum2 == 0)
+         {
+           for (d=0; d<=max_aa; d++)
+             profile[r+1][d] = 0;
+           profile[r+1][gap_pos1] = 0;
+           profile[r+1][gap_pos2] = 0;
+         }
+       else
+         {
+           for (d=0; d<=max_aa; d++)
+             {
+                sum1 = 0;
+                for (i=first_seq; i<last_seq; i++)
+                 {
+                  if (d == alignment[i][r]) sum1 += seq_weight[i];
+                 }
+                profile[r+1][d] = (sint)(10 * (float)sum1 / (float)sum2);
+             }
+           sum1 = 0;
+           for (i=first_seq; i<last_seq; i++)
+            {
+             if (gap_pos1 == alignment[i][r]) sum1 += seq_weight[i];
+            }
+           profile[r+1][gap_pos1] = (sint)(10 * (float)sum1 / (float)sum2);
+           sum1 = 0;
+           for (i=first_seq; i<last_seq; i++)
+            {
+             if (gap_pos2 == alignment[i][r]) sum1 += seq_weight[i];
+            }
+           profile[r+1][gap_pos2] = (sint)(10 * (float)sum1 / (float)sum2);
+         }
+    }
+}
+
+

Added: trunk/packages/clustalw/branches/upstream/current/calctree.c
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/calctree.c	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/calctree.c	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,984 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <stdarg.h>
+#include <ctype.h>
+#include "clustalw.h"
+
+#define MAXERRS 10
+
+/*
+ *   Prototypes
+ */
+static void create_tree(treeptr ptree, treeptr parent);
+static void create_node(treeptr pptr, treeptr parent);
+static treeptr insert_node(treeptr pptr);
+static void skip_space(FILE *fd);
+static treeptr avail(void);
+static void set_info(treeptr p, treeptr parent, sint pleaf, char *pname, float pdist);
+static treeptr reroot(treeptr ptree, sint nseqs);
+static treeptr insert_root(treeptr p, float diff);
+static float calc_root_mean(treeptr root, float *maxdist);
+static float calc_mean(treeptr nptr, float *maxdist, sint nseqs);
+static void order_nodes(void);
+static sint calc_weight(sint leaf);
+static void group_seqs(treeptr p, sint *next_groups, sint nseqs);
+static void mark_group1(treeptr p, sint *groups, sint n);
+static void mark_group2(treeptr p, sint *groups, sint n);
+static void save_set(sint n, sint *groups);
+static void clear_tree_nodes(treeptr p);
+
+
+/*
+ *   Global variables
+ */
+extern Boolean interactive;
+extern Boolean distance_tree;
+extern Boolean usemenu;
+extern sint debug;
+extern double **tmat;
+extern sint **sets;
+extern sint nsets;
+extern char **names;
+extern sint *seq_weight;
+extern Boolean no_weights;
+
+char ch;
+FILE *fd;
+treeptr *lptr;
+treeptr *olptr;
+treeptr *nptr;
+treeptr *ptrs;
+sint nnodes = 0;
+sint ntotal = 0;
+Boolean rooted_tree = TRUE;
+static treeptr seq_tree,root;
+static sint *groups, numseq;
+
+void calc_seq_weights(sint first_seq, sint last_seq, sint *sweight)
+{
+  sint   i, nseqs;
+  sint   temp, sum, *weight;
+
+
+/*
+  If there are more than three sequences....
+*/
+  nseqs = last_seq-first_seq;
+   if ((nseqs >= 2) && (distance_tree == TRUE) && (no_weights == FALSE))
+     {
+/*
+  Calculate sequence weights based on Phylip tree.
+*/
+      weight = (sint *)ckalloc((last_seq+1) * sizeof(sint));
+
+      for (i=first_seq; i<last_seq; i++)
+           weight[i] = calc_weight(i);
+
+/*
+  Normalise the weights, such that the sum of the weights = INT_SCALE_FACTOR
+*/
+
+         sum = 0;
+         for (i=first_seq; i<last_seq; i++)
+            sum += weight[i];
+
+         if (sum == 0)
+          {
+            for (i=first_seq; i<last_seq; i++)
+               weight[i] = 1;
+            sum = i;
+          }
+
+         for (i=first_seq; i<last_seq; i++)
+           {
+              sweight[i] = (weight[i] * INT_SCALE_FACTOR) / sum;
+              if (sweight[i] < 1) sweight[i] = 1;
+           }
+
+       weight=ckfree((void *)weight);
+
+     }
+
+   else
+     {
+/*
+  Otherwise, use identity weights.
+*/
+        temp = INT_SCALE_FACTOR / nseqs;
+        for (i=first_seq; i<last_seq; i++)
+           sweight[i] = temp;
+     }
+
+}
+
+void create_sets(sint first_seq, sint last_seq)
+{
+  sint   i, j, nseqs;
+
+  nsets = 0;
+  nseqs = last_seq-first_seq;
+  if (nseqs >= 2)
+     {
+/*
+  If there are more than three sequences....
+*/
+       groups = (sint *)ckalloc((nseqs+1) * sizeof(sint));
+       group_seqs(root, groups, nseqs);
+       groups=ckfree((void *)groups);
+
+     }
+
+   else
+     {
+       groups = (sint *)ckalloc((nseqs+1) * sizeof(sint));
+       for (i=0;i<nseqs-1;i++)
+         {
+           for (j=0;j<nseqs;j++)
+              if (j<=i) groups[j] = 1;
+              else if (j==i+1) groups[j] = 2;
+              else groups[j] = 0;
+           save_set(nseqs, groups);
+         }
+       groups=ckfree((void *)groups);
+     }
+
+}
+
+sint read_tree(char *treefile, sint first_seq, sint last_seq)
+{
+
+  char c;
+  char name1[MAXNAMES+1], name2[MAXNAMES+1];
+  sint i, j, k;
+  Boolean found;
+
+  numseq = 0;
+  nnodes = 0;
+  ntotal = 0;
+  rooted_tree = TRUE;
+
+#ifdef VMS
+  if ((fd = fopen(treefile,"r","rat=cr","rfm=var")) == NULL)
+#else
+  if ((fd = fopen(treefile, "r")) == NULL)
+#endif
+    {
+      error("cannot open %s", treefile);
+      return((sint)0);
+    }
+
+  skip_space(fd);
+  ch = (char)getc(fd);
+  if (ch != '(')
+    {
+      error("Wrong format in tree file %s", treefile);
+      return((sint)0);
+    }
+  rewind(fd);
+
+  distance_tree = TRUE;
+
+/*
+  Allocate memory for tree
+*/
+  nptr = (treeptr *)ckalloc(3*(last_seq-first_seq+1) * sizeof(treeptr));
+  ptrs = (treeptr *)ckalloc(3*(last_seq-first_seq+1) * sizeof(treeptr));
+  lptr = (treeptr *)ckalloc((last_seq-first_seq+1) * sizeof(treeptr));
+  olptr = (treeptr *)ckalloc((last_seq+1) * sizeof(treeptr));
+  
+  seq_tree = avail();
+  set_info(seq_tree, NULL, 0, "", 0.0);
+
+  create_tree(seq_tree,NULL);
+  fclose(fd);
+
+
+  if (numseq != last_seq-first_seq)
+     {
+         error("tree not compatible with alignment\n(%d sequences in alignment and %d in tree", (pint)last_seq-first_seq,(pint)numseq);
+         return((sint)0);
+     }
+
+/*
+  If the tree is unrooted, reroot the tree - ie. minimise the difference
+  between the mean root->leaf distances for the left and right branches of
+  the tree.
+*/
+
+  if (distance_tree == FALSE)
+     {
+  	if (rooted_tree == FALSE)
+          {
+       	     error("input tree is unrooted and has no distances.\nCannot align sequences");
+             return((sint)0);
+          }
+     }
+
+  if (rooted_tree == FALSE)
+     {
+        root = reroot(seq_tree, last_seq-first_seq+1);
+     }
+  else
+     {
+        root = seq_tree;
+     }
+
+/*
+  calculate the 'order' of each node.
+*/
+  order_nodes();
+
+  if (numseq >= 2)
+     {
+/*
+  If there are more than three sequences....
+*/
+/*
+  assign the sequence nodes (in the same order as in the alignment file)
+*/
+      for (i=first_seq; i<last_seq; i++)
+       {
+         if (strlen(names[i+1]) > MAXNAMES)
+             warning("name %s is too long for PHYLIP tree format (max %d chars)", names[i+1],MAXNAMES);
+
+         for (k=0; k< strlen(names[i+1]) && k<MAXNAMES ; k++)
+           {
+             c = names[i+1][k];
+             if ((c>0x40) && (c<0x5b)) c=c | 0x20;
+             if (c == ' ') c = '_';
+             name2[k] = c;
+           }
+         name2[k]='\0';
+         found = FALSE;
+         for (j=0; j<numseq; j++)
+           {
+            for (k=0; k< strlen(lptr[j]->name) && k<MAXNAMES ; k++)
+              {
+                c = lptr[j]->name[k];
+                if ((c>0x40) && (c<0x5b)) c=c | 0x20;
+                name1[k] = c;
+              }
+            name1[k]='\0';
+            if (strcmp(name1, name2) == 0)
+              {
+                olptr[i] = lptr[j];
+                found = TRUE;
+              }
+           }
+         if (found == FALSE)
+           {
+             error("tree not compatible with alignment:\n%s not found", name2);
+             return((sint)0);
+           }
+       }
+
+     }
+   return((sint)1);
+}
+
+static void create_tree(treeptr ptree, treeptr parent)
+{
+   treeptr p;
+
+   sint i, type;
+   float dist;
+   char name[MAXNAMES+1];
+
+/*
+  is this a node or a leaf ?
+*/
+  skip_space(fd);
+  ch = (char)getc(fd);
+  if (ch == '(')
+    {  
+/*
+   this must be a node....
+*/
+      type = NODE;
+      name[0] = '\0';
+      ptrs[ntotal] = nptr[nnodes] = ptree;
+      nnodes++;
+      ntotal++;
+
+      create_node(ptree, parent);
+
+      p = ptree->left;
+      create_tree(p, ptree);
+           
+      if ( ch == ',')
+       {
+          p = ptree->right;
+          create_tree(p, ptree);
+          if ( ch == ',')
+            {
+               ptree = insert_node(ptree);
+               ptrs[ntotal] = nptr[nnodes] = ptree;
+               nnodes++;
+               ntotal++;
+               p = ptree->right;
+               create_tree(p, ptree);
+               rooted_tree = FALSE;
+            }
+       }
+
+      skip_space(fd);
+      ch = (char)getc(fd);
+    }
+/*
+   ...otherwise, this is a leaf
+*/
+  else
+    {
+      type = LEAF;
+      ptrs[ntotal++] = lptr[numseq++] = ptree;
+/*
+   get the sequence name
+*/
+      name[0] = ch;
+      ch = (char)getc(fd);
+      i = 1;
+      while ((ch != ':') && (ch != ',') && (ch != ')'))
+        {
+          if (i < MAXNAMES) name[i++] = ch;
+          ch = (char)getc(fd);
+        }
+      name[i] = '\0';
+      if (ch != ':')
+         {
+           distance_tree = FALSE;
+           dist = 0.0;
+         }
+    }
+
+/*
+   get the distance information
+*/
+  dist = 0.0;
+  if (ch == ':')
+     {
+       skip_space(fd);
+       fscanf(fd,"%f",&dist);
+       skip_space(fd);
+       ch = (char)getc(fd);
+     }
+   set_info(ptree, parent, type, name, dist);
+
+
+}
+
+static void create_node(treeptr pptr, treeptr parent)
+{
+  treeptr t;
+
+  pptr->parent = parent;
+  t = avail();
+  pptr->left = t;
+  t = avail();
+  pptr->right = t;
+    
+}
+
+static treeptr insert_node(treeptr pptr)
+{
+
+   treeptr newnode;
+
+   newnode = avail();
+   create_node(newnode, pptr->parent);
+
+   newnode->left = pptr;
+   pptr->parent = newnode;
+
+   set_info(newnode, pptr->parent, NODE, "", 0.0);
+
+   return(newnode);
+}
+
+static void skip_space(FILE *fd)
+{
+  int   c;
+  
+  do
+     c = getc(fd);
+  while(isspace(c));
+
+  ungetc(c, fd);
+}
+
+static treeptr avail(void)
+{
+   treeptr p;
+   p = ckalloc(sizeof(stree));
+   p->left = NULL;
+   p->right = NULL;
+   p->parent = NULL;
+   p->dist = 0.0;
+   p->leaf = 0;
+   p->order = 0;
+   p->name[0] = '\0';
+   return(p);
+}
+
+void clear_tree(treeptr p)
+{
+   clear_tree_nodes(p);
+      
+   nptr=ckfree((void *)nptr);
+   ptrs=ckfree((void *)ptrs);
+   lptr=ckfree((void *)lptr);
+   olptr=ckfree((void *)olptr);
+}
+
+static void clear_tree_nodes(treeptr p)
+{
+   if (p==NULL) p = root;
+   if (p->left != NULL)
+     {
+       clear_tree_nodes(p->left);
+     }
+   if (p->right != NULL)
+     {
+       clear_tree_nodes(p->right);
+     }
+   p->left = NULL;
+   p->right = NULL;
+   p=ckfree((void *)p);   
+}
+
+static void set_info(treeptr p, treeptr parent, sint pleaf, char *pname, float pdist)
+{
+   p->parent = parent;
+   p->leaf = pleaf;
+   p->dist = pdist;
+   p->order = 0;
+   strcpy(p->name, pname);
+   if (p->leaf == TRUE)
+     {
+        p->left = NULL;
+        p->right = NULL;
+     }
+}
+
+static treeptr reroot(treeptr ptree, sint nseqs)
+{
+
+   treeptr p, rootnode, rootptr;
+   float   diff, mindiff = 0.0, mindepth = 1.0, maxdist;
+   sint   i;
+   Boolean first = TRUE;
+
+/*
+  find the difference between the means of leaf->node
+  distances on the left and on the right of each node
+*/
+   rootptr = ptree;
+   for (i=0; i<ntotal; i++)
+     {
+        p = ptrs[i];
+        if (p->parent == NULL)
+           diff = calc_root_mean(p, &maxdist);
+        else
+           diff = calc_mean(p, &maxdist, nseqs);
+
+        if ((diff == 0) || ((diff > 0) && (diff < 2 * p->dist)))
+          {
+              if ((maxdist < mindepth) || (first == TRUE))
+                 {
+                    first = FALSE;
+                    rootptr = p;
+                    mindepth = maxdist;
+                    mindiff = diff;
+                 }
+           }
+
+     }
+
+/*
+  insert a new node as the ancestor of the node which produces the shallowest
+  tree.
+*/
+   if (rootptr == ptree)
+     {
+        mindiff = rootptr->left->dist + rootptr->right->dist;
+        rootptr = rootptr->right;
+     }
+   rootnode = insert_root(rootptr, mindiff);
+  
+   diff = calc_root_mean(rootnode, &maxdist);
+
+   return(rootnode);
+}
+
+static treeptr insert_root(treeptr p, float diff)
+{
+   treeptr newp, prev, q, t;
+   float dist, prevdist,td;
+
+   newp = avail();
+
+   t = p->parent;
+   prevdist = t->dist;
+
+   p->parent = newp;
+
+   dist = p->dist;
+
+   p->dist = diff / 2;
+   if (p->dist < 0.0) p->dist = 0.0;
+   if (p->dist > dist) p->dist = dist;
+
+   t->dist = dist - p->dist; 
+
+   newp->left = t;
+   newp->right = p;
+   newp->parent = NULL;
+   newp->dist = 0.0;
+   newp->leaf = NODE;
+
+   if (t->left == p) t->left = t->parent;
+   else t->right = t->parent;
+
+   prev = t;
+   q = t->parent;
+
+   t->parent = newp;
+
+   while (q != NULL)
+     {
+        if (q->left == prev)
+           {
+              q->left = q->parent;
+              q->parent = prev;
+              td = q->dist;
+              q->dist = prevdist;
+              prevdist = td;
+              prev = q;
+              q = q->left;
+           }
+        else
+           {
+              q->right = q->parent;
+              q->parent = prev;
+              td = q->dist;
+              q->dist = prevdist;
+              prevdist = td;
+              prev = q;
+              q = q->right;
+           }
+    }
+
+/*
+   remove the old root node
+*/
+   q = prev;
+   if (q->left == NULL)
+      {
+         dist = q->dist;
+         q = q->right;
+         q->dist += dist;
+         q->parent = prev->parent;
+         if (prev->parent->left == prev)
+            prev->parent->left = q;
+         else
+            prev->parent->right = q;
+         prev->right = NULL;
+      }
+   else
+      {
+         dist = q->dist;
+         q = q->left;
+         q->dist += dist;
+         q->parent = prev->parent;
+         if (prev->parent->left == prev)
+            prev->parent->left = q;
+         else
+            prev->parent->right = q;
+         prev->left = NULL;
+      }
+
+   return(newp);
+}
+
+static float calc_root_mean(treeptr root, float *maxdist)
+{
+   float dist , lsum = 0.0, rsum = 0.0, lmean,rmean,diff;
+   treeptr p;
+   sint i;
+   sint nl, nr;
+   sint direction;
+/*
+   for each leaf, determine whether the leaf is left or right of the root.
+*/
+   dist = (*maxdist) = 0;
+   nl = nr = 0;
+   for (i=0; i< numseq; i++)
+     {
+         p = lptr[i];
+         dist = 0.0;
+         while (p->parent != root)
+           {
+               dist += p->dist;
+               p = p->parent;
+           }
+         if (p == root->left) direction = LEFT;
+         else direction = RIGHT;
+         dist += p->dist;
+
+         if (direction == LEFT)
+           {
+             lsum += dist;
+             nl++;
+           }
+         else
+           {
+             rsum += dist;
+             nr++;
+           }
+        if (dist > (*maxdist)) *maxdist = dist;
+     }
+
+   lmean = lsum / nl;
+   rmean = rsum / nr;
+
+   diff = lmean - rmean;
+   return(diff);
+}
+
+
+static float calc_mean(treeptr nptr, float *maxdist, sint nseqs)
+{
+   float dist , lsum = 0.0, rsum = 0.0, lmean,rmean,diff;
+   treeptr p, *path2root;
+   float *dist2node;
+   sint depth = 0, i,j , n = 0;
+   sint nl , nr;
+   sint direction, found;
+
+	path2root = (treeptr *)ckalloc(nseqs * sizeof(treeptr));
+	dist2node = (float *)ckalloc(nseqs * sizeof(float));
+/*
+   determine all nodes between the selected node and the root;
+*/
+   depth = (*maxdist) = dist = 0;
+   nl = nr = 0;
+   p = nptr;
+   while (p != NULL)
+     {
+         path2root[depth] = p;
+         dist += p->dist;
+         dist2node[depth] = dist;
+         p = p->parent;
+         depth++;
+     }
+ 
+/*
+   *nl = *nr = 0;
+   for each leaf, determine whether the leaf is left or right of the node.
+   (RIGHT = descendant, LEFT = not descendant)
+*/
+   for (i=0; i< numseq; i++)
+     {
+       p = lptr[i];
+       if (p == nptr)
+         {
+            direction = RIGHT;
+            dist = 0.0;
+         }
+       else
+         {
+         direction = LEFT;
+         dist = 0.0;
+/*
+   find the common ancestor.
+*/
+         found = FALSE;
+         n = 0;
+         while ((found == FALSE) && (p->parent != NULL))
+           {
+               for (j=0; j< depth; j++)
+                 if (p->parent == path2root[j])
+                    { 
+                      found = TRUE;
+                      n = j;
+                    }
+               dist += p->dist;
+               p = p->parent;
+           }
+         if (p == nptr) direction = RIGHT;
+         }
+
+         if (direction == LEFT)
+           {
+             lsum += dist;
+             lsum += dist2node[n-1];
+             nl++;
+           }
+         else
+           {
+             rsum += dist;
+             nr++;
+           }
+
+        if (dist > (*maxdist)) *maxdist = dist;
+     }
+
+	dist2node=ckfree((void *)dist2node);
+	path2root=ckfree((void *)path2root);
+	
+   lmean = lsum / nl;
+   rmean = rsum / nr;
+   
+   diff = lmean - rmean;
+   return(diff);
+}
+
+static void order_nodes(void)
+{
+   sint i;
+   treeptr p;
+
+   for (i=0; i<numseq; i++)
+     {
+        p = lptr[i];
+        while (p != NULL)
+          {
+             p->order++;
+             p = p->parent;
+          }
+     }
+}
+
+
+static sint calc_weight(sint leaf)
+{
+
+  treeptr p;
+  float weight = 0.0;
+
+  p = olptr[leaf];
+  while (p->parent != NULL)
+    {
+       weight += p->dist / p->order;
+       p = p->parent;
+    }
+
+  weight *= 100.0;
+
+  return((sint)weight);
+
+}
+
+static void group_seqs(treeptr p, sint *next_groups, sint nseqs)
+{
+    sint i;
+    sint *tmp_groups;
+
+    tmp_groups = (sint *)ckalloc((nseqs+1) * sizeof(sint));
+    for (i=0;i<nseqs;i++)
+         tmp_groups[i] = 0;
+
+    if (p->left != NULL)
+      {
+         if (p->left->leaf == NODE)
+            {
+               group_seqs(p->left, next_groups, nseqs);
+               for (i=0;i<nseqs;i++)
+                 if (next_groups[i] != 0) tmp_groups[i] = 1;
+            }
+         else
+            {
+               mark_group1(p->left, tmp_groups, nseqs);
+            }
+               
+      }
+
+    if (p->right != NULL)
+      {
+         if (p->right->leaf == NODE)
+            {
+               group_seqs(p->right, next_groups, nseqs);
+               for (i=0;i<nseqs;i++)
+                    if (next_groups[i] != 0) tmp_groups[i] = 2;
+            }
+         else 
+            {
+               mark_group2(p->right, tmp_groups, nseqs);
+            }
+         save_set(nseqs, tmp_groups);
+      }
+    for (i=0;i<nseqs;i++)
+      next_groups[i] = tmp_groups[i];
+
+    tmp_groups=ckfree((void *)tmp_groups);
+
+}
+
+static void mark_group1(treeptr p, sint *groups, sint n)
+{
+    sint i;
+
+    for (i=0;i<n;i++)
+       {
+         if (olptr[i] == p)
+              groups[i] = 1;
+         else
+              groups[i] = 0;
+       }
+}
+
+static void mark_group2(treeptr p, sint *groups, sint n)
+{
+    sint i;
+
+    for (i=0;i<n;i++)
+       {
+         if (olptr[i] == p)
+              groups[i] = 2;
+         else if (groups[i] != 0)
+              groups[i] = 1;
+       }
+}
+
+static void save_set(sint n, sint *groups)
+{
+    sint i;
+
+    for (i=0;i<n;i++)
+      sets[nsets+1][i+1] = groups[i];
+    nsets++;
+}
+
+
+
+sint calc_similarities(sint nseqs)
+{
+   sint depth = 0, i,j, k, n;
+   sint found;
+   sint nerrs, seq1[MAXERRS],seq2[MAXERRS];
+   treeptr p, *path2root;
+   float dist;
+   float *dist2node, bad_dist[MAXERRS];
+   double **dmat;
+   char err_mess[1024],err1[MAXLINE],reply[MAXLINE];
+
+   path2root = (treeptr *)ckalloc((nseqs) * sizeof(treeptr));
+   dist2node = (float *)ckalloc((nseqs) * sizeof(float));
+   dmat = (double **)ckalloc((nseqs) * sizeof(double *));
+   for (i=0;i<nseqs;i++)
+     dmat[i] = (double *)ckalloc((nseqs) * sizeof(double));
+
+   if (nseqs >= 2)
+    {
+/*
+   for each leaf, determine all nodes between the leaf and the root;
+*/
+      for (i = 0;i<nseqs; i++)
+       { 
+          depth = dist = 0;
+          p = olptr[i];
+          while (p != NULL)
+            {
+                path2root[depth] = p;
+                dist += p->dist;
+                dist2node[depth] = dist;
+                p = p->parent;
+                depth++;
+            }
+ 
+/*
+   for each pair....
+*/
+          for (j=0; j < i; j++)
+            {
+              p = olptr[j];
+              dist = 0.0;
+/*
+   find the common ancestor.
+*/
+              found = FALSE;
+              n = 0;
+              while ((found == FALSE) && (p->parent != NULL))
+                {
+                    for (k=0; k< depth; k++)
+                      if (p->parent == path2root[k])
+                         { 
+                           found = TRUE;
+                           n = k;
+                         }
+                    dist += p->dist;
+                    p = p->parent;
+                }
+   
+              dmat[i][j] = dist + dist2node[n-1];
+            }
+        }
+
+		nerrs = 0;
+        for (i=0;i<nseqs;i++)
+          {
+             dmat[i][i] = 0.0;
+             for (j=0;j<i;j++)
+               {
+                  if (dmat[i][j] < 0.01) dmat[i][j] = 0.01;
+                  if (dmat[i][j] > 1.0) {
+                  	if (dmat[i][j] > 1.1 && nerrs<MAXERRS) {
+                  		seq1[nerrs] = i;
+                  		seq2[nerrs] = j;
+                  		bad_dist[nerrs] = dmat[i][j];
+                  		nerrs++;
+                  	}
+                    dmat[i][j] = 1.0;
+                  }
+               }
+          }
+        if (nerrs>0) 
+          {
+             strcpy(err_mess,"The following sequences are too divergent to be aligned:\n");
+             for (i=0;i<nerrs && i<5;i++)
+              {
+             	sprintf(err1,"           %s and %s (distance %1.3f)\n",
+             	                        names[seq1[i]+1],
+					names[seq2[i]+1],bad_dist[i]);
+             	strcat(err_mess,err1);
+              }
+	     strcat(err_mess,"(All distances should be between 0.0 and 1.0)\n");
+	     strcat(err_mess,"This may not be fatal but you have been warned!\n");
+             strcat(err_mess,"SUGGESTION: Remove one or more problem sequences and try again");
+             if(interactive) 
+             	    (*reply)=prompt_for_yes_no(err_mess,"Continue ");
+             else (*reply) = 'y';
+             if ((*reply != 'y') && (*reply != 'Y'))
+                    return((sint)0);
+          }
+     }
+   else
+     {
+        for (i=0;i<nseqs;i++)
+          {
+             for (j=0;j<i;j++)
+               {
+                  dmat[i][j] = tmat[i+1][j+1];
+               }
+          }
+     }
+
+   path2root=ckfree((void *)path2root);
+   dist2node=ckfree((void *)dist2node);
+   for (i=0;i<nseqs;i++)
+     {
+        tmat[i+1][i+1] = 0.0;
+        for (j=0;j<i;j++)
+          {
+             tmat[i+1][j+1] = 100.0 - (dmat[i][j]) * 100.0;
+             tmat[j+1][i+1] = tmat[i+1][j+1];
+          }
+     }
+
+   for (i=0;i<nseqs;i++) dmat[i]=ckfree((void *)dmat[i]);
+   dmat=ckfree((void *)dmat);
+
+   return((sint)1);
+}
+

Added: trunk/packages/clustalw/branches/upstream/current/clustalv.doc
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/clustalv.doc	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/clustalv.doc	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,1978 @@
+
+
+
+		Clustal V  Multiple Sequence Alignments.
+
+		Documentation (Installation and Usage).
+
+		Des Higgins
+		European Molecular Biology Laboratory
+		Postfach 10.2209
+		D-6900 Heidelberg
+		Germany.
+
+		higgins at EMBL-Heidelberg.DE
+
+
+*******************************************************************
+
+
+		Contents.
+
+
+		1		Overview
+
+		2		Installation
+
+		3		Interactive usage
+
+		4		Command-line interface
+
+		5		Algorithms and references
+
+
+*******************************************************************
+
+		1.  Overview
+
+This document describes how to install and use ClustalV on various 
+machines.  ClustalV is a complete upgrade and rewrite of the Clustal 
+package of multiple alignment programs (Higgins and Sharp, 1988 and 
+1989).   The original programs were written in Fortran for 
+microcomputers running MSDOS.   You carried out a complete alignment 
+by running 3 programs in succession.   Later, these were merged into 
+a single menu driven program with on-line help, for VAX/VMS.  
+ClustalV was written in C and has all of the features of the old 
+programs plus many new ones.  It has been compiled and tested using 
+VAX/VMS C, Decstation ULTRIX C, Gnu C for Sun workstations, Turbo C 
+for IBM PC's and Think C for Apple Mac's.   The original Clustal was 
+written by Des Higgins while he was a Post-Doc in the lab of Paul 
+Sharp in the Genetics Department, Trinity College, Dublin 2, 
+Ireland. 
+
+The main feature of the old package was the ability to carry out 
+reliable multiple alignments of many sequences.  The sensitivity of 
+the program is as good as from any other program we have tried, with 
+the exception of the programs of Vingron and Argos (1991), while it 
+works in reasonable time on a microcomputer.  The programs of 
+Vingron and Argos are specialised for finding distant similarities 
+between proteins but require mainframes or workstations and are more 
+difficult to use.
+
+The main new features are: profile alignments (alignments of old 
+alignments); phylogenetic trees (Neighbor Joining trees calculated 
+after multiple alignment with a bootstrapping option); better 
+sequence input (automatically recognise and read NBRF/PIR, Pearson 
+(Fasta) or EMBL/SwissProt formats); flexible alignment output 
+(choose one of: old Clustal format, NBRF/PIR, GCG msf format or 
+Phylip format); full command line interface (everything that you can 
+do interactively can be specified on the command line).
+
+In version 7 of the GCG package, there is a program called PILEUP 
+which uses a very similar algorithm to the one in ClustalV.  There 
+are 2 main differences between the programs: 1) the metric used to 
+compare the sequences for the initial "guide tree" uses a full 
+global, optimal alignment in PILEUP instead of the fast, approximate 
+ones in ClustalV.  This makes PILEUP much slower for the comparison 
+of long sequences.  In principle, the distances calculated from 
+PILEUP will be more sensitive than ours, but in practice it will not 
+make much difference, except in difficult cases.  2)  During the 
+multiple alignment, terminal gaps are penalised in ClustalV but not 
+in PILEUP.  This will make the PILEUP alignments better when the 
+sequences are of very different lengths (has no effect if there are 
+no large terminal gaps).   
+
+
+This software may be distributed and used freely, provided that you 
+do not modify it or this documentation in any way without the 
+permission of the authors.  
+
+If you wish to refer to ClustalV, please cite: 
+Higgins,D.G. Bleasby,A.J. and Fuchs,R. (1991) CLUSTAL V: improved software
+for multiple sequence alignment. CABIOS, vol .8, 189-191.  
+
+The overall multiple alignment algorithm was described in:
+Higgins,D.G. and Sharp,P.M. (1989).  Fast and sensitive multiple 
+sequence alignments on a microcomputer.  CABIOS, vol. 5, 151-153.
+
+
+ACKNOWLEDGEMENTS.
+
+D.H. would particularly like to thank Paul Sharp, in whose lab. this 
+work originated.  We also thank Manolo Gouy, Gene Myers, Peter Rice 
+and Martin Vingron for suggestions, bug-fixes and help.    
+
+Des Higgins and Rainer Fuchs, 
+EMBL Data Library, Heidelberg, Germany.
+
+Alan Bleasby,  
+Daresbury, UK.
+
+JUNE 1991
+*******************************************************************
+
+		2.  Installation.
+
+
+
+As far as possible, we have tried to make ClustalV portable to any 
+machine with a standard C compiler (proposed ANSI C standard).  The 
+source code, as supplied by us, has been compiled and tested using 
+the following compilers:
+
+VAX/VMS C
+Ultrix C (on a Decstation 2100)
+Gnu C on a Sun 4 workstation
+Think C on an Apple Macintosh SE
+Turbo C on an IBM AT.
+
+In each case, one must make 1 change to 1 line of code in 1 header 
+file.  This is described below.  The exact capacity of the program 
+(how many sequences of what length can be aligned) will depend of 
+course on available memory but can also be set in this header file.
+
+The package comes as 9 C source files; 3 header files; 1 file of on-
+line help; this documentation file; 3 make files:
+
+Source code:	clustalv.c, amenu.c, gcgcheck.c, myers.c, sequence.c, 
+			showpair.c, trees.c, upgma.c, util.c
+
+Header files:	clustalv.h, general.h, matrices.h
+
+On-Line help:	clustalv.hlp  (must be renamed or defined as 		
+			clustalv_help except on PC's)
+
+Documentation:	clustalv.doc (this file).
+
+Makefiles:	makefile.sun (gnu c on Sun), vmslink.com (vax/vms), 
+			makefile.ult (ultrix).
+
+
+
+
+
+
+
+Before compiling ClustalV you must look at and possibly change 
+clustalV.h, shown below..  
+
+/*******************CLUSTALV.H********************************/
+
+/*
+Main header file for ClustalV. Uncomment ONE of the following lines
+depending on which compiler you wish to use.
+*/
+
+#define VMS 1             /* VAX VMS */
+
+/*#define MAC 1           Think_C for MacIntosh */
+
+/*#define MSDOS 1         Turbo C for PC's */
+
+/*#define UNIX 1          Ultrix for Decstations or Gnu C for Sun */
+
+/*************************************************************/
+
+#include "general.h"
+
+#define MAXNAMES          10
+#define MAXTITLES         60
+#define FILENAMELEN      256
+
+#define UNKNOWN   0
+#define EMBLSWISS 1
+#define PIR       2
+#define PEARSON   3
+
+#define PAGE_LEN       22
+
+#if VMS
+#define DIRDELIM ']'
+#define MAXLEN          3000
+#define MAXN             150
+#define FSIZE          15000
+#define LINELENGTH        60
+#define GCG_LINELENGTH    50
+
+#elif MAC
+#define DIRDELIM ':'
+#define MAXLEN          2600
+#define MAXN              30
+#define FSIZE          10000
+#define LINELENGTH        50
+#define GCG_LINELENGTH    50
+
+#elif MSDOS
+#define DIRDELIM '\\'
+#define MAXLEN          1300
+#define MAXN              30
+#define FSIZE           5000
+#define LINELENGTH        50
+#define GCG_LINELENGTH    50
+
+#elif UNIX
+#define DIRDELIM '/'
+#define MAXLEN         3000
+#define MAXN             50
+#define FSIZE         15000
+#define LINELENGTH       60
+#define GCG_LINELENGTH   50
+#endif
+/*****************end*of*CLUSTALV.H***************************/
+
+
+
+First, you must remove the comments from one of the first 10 lines.  
+There are 4 'define' compiler directives here (e.g. #define VMS 1), 
+and you should use one of these, depending on which system you wish 
+to work. So choose one of these, remove its comments (if it is 
+already commented out) and put comments around any of the others 
+that are still active. If you wish to use a different system, you 
+will need to insert a new line with a new keyword (which you must 
+invent) to identify your system.  Most of the rest of this header 
+file is taken up with a block of 'define' statements for each system 
+type; e.g. the VAX/VMS block is:
+
+#if VMS
+#define DIRDELIM ']'
+#define MAXLEN          3000
+#define MAXN             150
+#define FSIZE          15000
+#define LINELENGTH        60
+#define GCG_LINELENGTH    50
+
+In this block, you can specify the maximum number of sequences to be 
+allowed (MAXN); the maximum sequence length, including gaps 
+(MAXLEN);  FSIZE declares the size of some workspace, used by the 
+fast 2 sequence comparison routines and should be APPROXIMATELY 4 
+times MAXLEN; LINELENGTH is the length of the blocks of alignment 
+output in the output files; GCG_LINELENGTH is the same but for the 
+GCG compatible output only.  Finally, DIRDELIM is the character used 
+to specify directories and subdirectories in file names.  It should 
+be the character used to seperate the file name itself from the 
+directory name (e.g. in VMS, file names are like: 
+$drive:[dir1.dir2.dir3]filename.ext;2  so ']' is used as DIRDELIM).   
+
+So, if you want to use a system, not covered in Clustalv.h, you will 
+have to insert a new block, like the above one.  To compile and link 
+the program, we supply 3 makefiles: one each for VAX/VMS, Ultrix 
+and GNU C for Sun workstations. 
+
+ 
+
+VAX/VMS
+
+Compile and link the program with the 
+supplied makefile for vms: vmslink.com .
+
+$ @vmslink
+
+This will produce clustalv.exe (and a lot of .obj files which you can delete).  
+
+The on-line help file (clustalv.hlp) should be 'defined' as 
+clustalv_help as follows:
+
+$ def clustalv_help $drive:[dir1.dir2]clustalv.hlp 
+
+where $drive is the drive designation and [dir1.dir2] is the 
+directory where clustalv.hlp is kept.  
+
+To make use of the command-line interface, you must make clustalv a 
+'foreign' command with:
+
+$ clustalv :== $$drive:[dir1.dir2]clustalv
+
+where $drive is the drive designation and [dir1.dir2] is the 
+directory where clustalv.exe is kept.  
+
+
+
+IBM PC/MSDOS/TURBO C
+
+Create a makefile (something.prj) with the names of the source files 
+(clustalv.c, amenu.c etc.) and 'make' this using the HUGE memory 
+model.  You will get half a dozen warnings from the compiler about 
+pieces of code than look suspicious to it but ignore these.  The 
+help file should remain as clustalv.hlp .   To run the program using 
+the default settings in Clustalv.h, you need approximately 500k of 
+memory.  To reduce this, the main influence on memory usage is the 
+parameter MAXLEN; reduce MAXLEN to reduce memory usage.
+
+
+
+Apple Mac/THINK_C version 4.0.2
+
+This version of the program is not at all Mac like.  It runs in a 
+window, the inside of which looks just like a normal character based 
+terminal.  In the future we might put a proper Mac interface on it 
+but do not have the time right now.  With the default settings in 
+the header file ClustalV.h, you need just over 800k of memory to run 
+the program.  To reduce this, reduce MAXLEN; this is easily the 
+biggest influence on memory usage.  To compile the program and save 
+it as an application you need to 'set the application type'; here 
+you specify how much memory (in kilobytes (k)) the application will 
+need.  You should set this to 900k to run the application as it is 
+OR reduce MAXLEN in the header.  To compile the program you have to 
+create a 'project'; you 'add' the names of the 9 source files to the 
+project AND the name of the ANSI library.  The source code is too 
+large to compile in one compilation unit.  You will get a 'link 
+error: code segment too big' if you try to compile and link as is.  
+You should compile amenu.c (the biggest source file) as a seperate 
+unit ..... you will have to read the manual/ask someone/mail me to 
+find out what this is.
+
+
+*******************************************************************
+
+		3.  Interactive usage.
+
+
+
+Interactive usage of Clustal V is completely menu driven.  On-line 
+help is provided, defaults are offered for all parameters and file 
+names.  With a little effort it should be completely self 
+explanatory.   The main menu, which appears when you run the 
+programs is shown below.  Each item brings you to a sub menu.
+
+
+
+Main menu for Clustal V:
+
+
+     1. Sequence Input From Disc
+     2. Multiple Alignments
+     3. Profile Alignments
+     4. Phylogenetic trees
+
+     S. Execute a system command
+     H. HELP
+     X. EXIT (leave program)
+
+
+Your choice: 
+
+
+
+The options S and H appear on all the main menus.  H will provide 
+help and if you type S you will be asked to enter a command, such as 
+DIR or LS, which will be sent to the system (does not work on 
+Mac's).  Before carrying out an alignment, you must use option 1 
+(sequence input); the format for sequences is explained below.  
+Under menu item 2 you will be able to automatically align your 
+sequences to each other.  Menu item 3 allows you to do profile 
+alignments.  These are alignments of old alignments.  This allows 
+you to build up a multiple alignment in stages or add a new sequence 
+to an old alignment.   You can calculate phylogenetic trees from 
+alignments using menu item 4.
+
+
+
+
+      ******************************
+      *       SEQUENCE INPUT.      *
+      ******************************
+
+
+All sequences should be in 1 file.  Three formats are automatically 
+recognised and used: NBRF/PIR, EMBL/SwissProt and FASTA (Pearson and 
+Lipman (1988) format).   
+
+***
+Users of the Wisconsin GCG package should use the command TONBRF 
+(recently changed to TOPIR) to reformat their sequences before use. 
+*** 
+
+Sequences can be in upper or lower case.  For proteins, the only 
+symbols recognised are:  A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y and 
+for DNA/RNA use: A,C,G and T (or U).  Any other letters of the 
+alphabet will be treated as X (proteins) or N (DNA/RNA) for unknown.  
+All other symbols (blanks, digits etc.) will be ignored EXCEPT for 
+the hyphen "-" which can be used to specify a gap.  This last point 
+is especially useful for 2 reasons: 1) you can fix the positions of 
+some gaps in advance; 2) the alignment output from this program can 
+be written out in NBRF format using "-"'s to specify gaps; these 
+alignments can be used again as input, either for profile alignments 
+or for phylogenetic trees.
+
+If you are using an editor to create sequence files, use the FASTA 
+format as it is by far the simplest (see below).  If you have access 
+to utility programs for generating/converting the NBRF/PIR format 
+then use it in preference.
+
+
+
+FASTA (PEARSON AND LIPMAN, 1988) FORMAT:     The sequences are 
+delimited by an angle bracket ">" in column 1.  The text immediately 
+after the ">" is used as a title.  Everything on the following line 
+until the next ">" or the end of the file is one sequence.
+
+e.g.
+
+> RABSTOUT   rabbit Guinness receptor
+   LKMHLMGHLKMGLKMGLKGMHLMHLKHMHLMTYTYTTYRRWPLWMWLPDFGHAS
+   ADSCVCAHGFAVCACFAHFDVCFGAVCFHAVCFAHVCFAAAVCFAVCAC
+> MUSNOSE   mouse nose drying factor
+    mhkmmhkgmkhmhgmhmhglhmkmhlkmgkhmgkmkytytytryrwtqtqwtwyt
+    fdgfdsgafdagfdgfsagdfavdfdvgavfsvfgvdfsvdgvagvfdv
+> HSHEAVEN    human Guinness receptor repeat
+ mhkmmhkgmkhmhgmhmhg   lhmkmhlkmgkhmgkmk  ytytytryrwtqtqwtwyt
+ fdgfdsgafdagfdgfsag   dfavdfdvgavfsvfgv  dfsvdgvagvfdv
+ mhkmmhkgmkhmhgmhmhg   lhmkmhlkmgkhmgkmk  ytytytryrwtqtqwtwyt
+ fdgfdsgafdagfdgfsag   dfavdfdvgavfsvfgv  dfsvdgvagvfdv
+
+
+
+NBRF/PIR FORMAT         is similar to FASTA format but immediately 
+after the ">", you find the characters "P1;" if the sequences are 
+protein or "DL;" if they are nucleic acid.  Clustalv looks for the 
+";" character as the third character after the ">".  If it finds one 
+it assumes that the format is NBRF if not, FASTA format is assumed.  
+The text after the ";" is treated as a sequence name while the 
+entire next line is treated as a title.  The sequence is terminated 
+by a star "*" and the next sequence can then begin (with a >P1; etc 
+).  This is just the basic format description (there are other 
+variations and rules).
+
+ANY files/sequences in GCG format can be converted to this format 
+using the TONBRF command (now TOPIR) of the Wisconsin GCG package.
+
+
+e.g.
+
+>P1;RABSTOUT
+rabbit Guinness receptor
+LKMHLMGHLKMGLKMGLKGMHLMHLKHMHLMTYTYTTYRRWPLWMWLPDFGHAS
+ADSCVCAHGFAVCACFAHFDVCFGAVCFHAVCFAHVCFAAAVCFAVCAC*
+>P1;MUSNOSE   
+mouse nose drying factor
+mhkmmhkgmkhmhgmhmhglhmkmhlkmgkhmgkmkytytytryrwtqtqwtwyt
+fdgfdsgafdagfdgfsagdfavdfdvgavfsvfgvdfsvdgvagvfd
+*
+>P1;HSHEAVEN    
+human Guinness receptor repeat protein.
+mhkmmhkgmkhmhgmhmhg   lhmkmhlkmgkhmgkmk  ytytytryrwtqtqwtwyt
+fdgfdsgafdagfdgfsag   dfavdfdvgavfsvfgv  dfsvdgvagvfdv
+mhkmmhkgmkhmhgmhmhg   lhmkmhlkmgkhmgkmk  ytytytryrwtqtqwtwyt
+fdgfdsgafdagfdgfsag   dfavdfdvgavfsvfgv  dfsvdgvagvfdv*
+
+
+  
+
+EMBL/SWISSPROT FORMAT:       Do not try to create files with this 
+format unless you have utilities to help.  If you are just using an 
+editor, use one of the above formats.  If you do use this format, 
+the program will ignore everything between the ID line (line 
+beginning with the characters "ID") and the SQ line.  The sequence 
+is then read from between the SQ line and the "//" characters.
+
+
+
+It is critically important for the program to know whether or not it 
+is aligning DNA or protein sequences.  The input routines attempt to 
+guess which type of sequence is being used by counting the number of 
+A,C,G,T or U's in the sequences.  If the total is more than 85% of 
+the sequence length then DNA is assumed.  If you use very bizarre 
+sequences (proteins with really strange aa compositions or DNA 
+sequences with loads of strange ambiguity codes) you might confuse 
+the program.  It is difficult to do but be careful.
+
+
+
+
+
+      ******************************
+      *  MULTIPLE ALIGNMENT MENU.  *
+      ******************************
+
+The multiple alignment menu is shown below.  Before explaining how 
+to use it, you must be introduced briefly to the alignment strategy. 
+If you do not follow this, try using option 1 anyway; the entire 
+process will be carried out automatically.
+
+To do a complete multiple alignment, we need to know the approximate 
+relationships of the sequences to each other (which ones are most 
+similar to each other).  We do this by calculating a crude 
+phylogenetic tree which we call a dendrogram (to distinguish it from 
+the more sensitive trees available under the phylogenetic tree 
+menu).   This dendrogram is used as a guide to align bigger and 
+bigger groups of sequences during the multiple alignment.  The 
+dendrogram is calculated in 2 stages: 1) all pairs of sequence are 
+compared using the fast/approximate method of Wilbur and Lipman 
+(1983); the result of each comparison is a similarity score. 2) the 
+similarity scores are used to construct the dendrogram using the 
+UPGMA cluster analysis method of Sneath and Sokal (1973).  
+
+The construction of the dendrogram can be very time consuming if you 
+wish to align many sequences (e.g. for 100 sequences you need to 
+carry out 100x99/2 sequence comparisons = 4950). During every 
+multiple alignment, a dendrogram is constructed and saved to a file 
+(something.dnd).  These can be reused later.
+
+
+
+
+
+
+
+
+******Multiple*Alignment*Menu******
+
+
+    1.  Do complete multiple alignment now
+    2.  Produce dendrogram file only
+    3.  Use old dendrogram file
+    4.  Pairwise alignment parameters
+    5.  Multiple alignment parameters
+    6.  Output format options
+
+    S.  Execute a system command
+    H.  HELP
+    or press [RETURN] to go back to main menu
+
+
+Your choice: 
+
+
+So, if in doubt, and you have already loaded some sequences from the 
+main menu, just try option 1 and press the <Return> key in response 
+to any questions.  You will be prompted for 2 file names e.g. if the 
+sequence input file was called DRINK.PEP, you will be offered 
+DRINK.ALN as the file to contain the alignment and DRINK.DND for the 
+dendrogram.  
+
+If you wish to repeat a multiple alignment (e.g. to experiment with 
+different gap penalties) but do not wish to make a dendrogram all 
+over again use menu item 3 (providing you are using the same 
+sequences).  Similarly, menu item 2 allows you to produce the 
+dendrogram file only.
+
+
+
+
+PAIRWISE ALIGNMENT PARAMETERS:     
+
+The parameters that control the initial fast/approximate comparisons 
+can be set from menu item 4 which looks like:
+
+
+ ********* WILBUR/LIPMAN PAIRWISE ALIGNMENT PARAMETERS *********
+
+
+     1. Toggle Scoring Method  :Percentage
+     2. Gap Penalty            :3
+     3. K-tuple                :1
+     4. No. of top diagonals   :5
+     5. Window size            :5
+
+     H. HELP
+
+
+Enter number (or [RETURN] to exit): 
+
+
+
+The similarity scores are calculated from fast alignments generated 
+by the method of Wilbur and Lipman (1983).  These are 'hash' or 
+'word' or 'k-tuple' alignments carried out in 3 stages.  
+
+First you mark the positions of every fragment of sequence, K-tuple 
+long (for proteins, the default length is 1 residue, for DNA it is 2 
+bases) in both sequences.  Then you locate all k-tuple matches 
+between the 2 sequences.   At this stage you have to imagine a dot-
+matrix plot between the 2 sequences with each k-tuple match as a 
+dot.   You find those diagonals in the plot with most matches (you 
+take the "No. of top diagonals" best ones) and mark all diagonals 
+within "Window size" of each top diagonal.  This process will define 
+diagonal bands in the plot where you hope the most likely regions of 
+similarity will lie.  
+
+The final alignment stage is to find that head to tail arrangement 
+of k-tuple matches from these diagonal regions that will give the 
+highest score.  The score is calculated as the number of exactly 
+matching residues in this alignment minus a "gap penalty" for every 
+gap that was introduced.  When you toggle "Scoring method" you 
+choose between expressing these similarity scores as raw scores or 
+expressed as a percentage of the shorter sequence length.  
+
+K-TUPLE SIZE:   Can be 1 or 2 for proteins; 1 to 4 for DNA.  
+Increase this to increase speed; decrease to improve sensitivity.
+
+GAP PENALTY:    The number of matching residues that must be found 
+in order to introduce a gap.  This should be larger than K-Tuple 
+Size.  This has little effect on speed or sensitivity.
+
+NO. OF TOP DIAGONALS:    The number of best diagonals in the 
+imaginary dot-matrix plot that are considered.  Decrease (must be 
+greater than zero) to increase speed; increase to improve 
+sensitivity.
+
+WINDOW SIZE:    The number of diagonals around each "top" diagonal 
+that are considered.   Decrease for speed; increase for greater 
+sensitivity.
+
+SCORING METHOD: The similarity scores may be expressed as raw scores 
+(number of identical residues minus a "gap penalty" for each gap) or 
+as percentage scores.  If the sequences are of very different 
+lengths, percentage scores make more sense.
+
+
+
+CHANGING THE PAIRWISE ALIGNMENT PARAMETERS
+
+The main reason for wanting to change the above parameters is SPEED 
+(especially on microcomputers), NOT SENSITIVITY.   The dendrograms 
+that are produced can only show the relationships between the 
+sequences APPROXIMATELY because the similarity scores are calculated 
+from seperate pairwise alignments; not from a multiple alignment 
+(that is what we eventually hope to produce).  If the groupings of 
+the sequences are "obvious", the above method should work well; if 
+the relationships are obscure or weakly represented by the data, it 
+will not make much difference playing with the parameters.  The main 
+factor influencing speed is the K-TUPLE SIZE followed by the WINDOW 
+SIZE.  
+
+The alignments are carried out in a small amount of memory.  
+Occasionally (it is hard to predict), you will run out of memory 
+while doing these alignments; when this happens, it will say on the 
+screen: "Sequences (a,b) partially aligned" (instead of "Sequences 
+(a,b) aligned").  This means that the alignment score for these 
+sequences will be approximate;  it is not a problem unless many of 
+the alignments do this.  It can be fixed by using less sensitive 
+parameters or increasing parameter FSIZE in clustalv.h .
+
+
+THE DENDROGRAM ITSELF
+
+The similarity scores generated by the fast comparison of all the 
+sequences are used to construct a dendrogram by the UPGMA method of 
+Sneath and Sokal (1973).  This is a form of cluster analysis and the 
+end result produces something that looks like a tree.  It represents 
+the similarity of the sequences as a hierarchy.  The dendrogram is 
+written to a file in a machine readable format and is ahown below 
+for an example with 6 sequences.
+
+
+    91.0   0   0   2   012000         ! seq 2 joins seq 3 at 91% ID.
+    72.0   1   0   3   011200         ! seq 4 joins seqs 2,3 at 72%
+    71.1   0   0   2   000012         ! seq 5 joins seq 6 at 71%
+    35.5   0   2   4   122200         ! seq 1 joins seqs 2,3,4
+    21.7   4   3   6   111122         ! seqs 1,2,3,4 join seqs 5,6
+
+This LOOKS complicated but you do not normally need to care what is 
+in here.  Anyway, each row represents the joining together of 2 or 
+more sequences.  You progress from the top down, joining more and 
+more sequences until all are joined together; for N sequences you 
+have N-1 groupings hence there are 5 rows in the above file (there 
+were 6 sequences).  In each row, the first number is the similarity 
+score of this grouping; ignore the next three columns for the 
+moment; the last 6 digits in the line show which sequences are 
+grouped; there is one digit for each sequence (the first digit is 
+for the first sequence).  The rule is:  in each row, all of the "1"s 
+join all of the "2"s; the zero's do nothing.   
+
+Hence, in the first row, sequence 2 joins sequence 3 at a similarity 
+level of 91% identity; next, sequence 4 joins the previous grouping 
+of 2 plus 3 at a level of 72% etc.   This is shown diagrammatically 
+below.  Before leaving the dendrogram format, the other 3 columns of 
+numbers are: a pointer to the row from which the "1" sequences were 
+last joined (or zero if only one of them); a pointer to the row in 
+which the "2"s were last joined; the total number of sequences 
+joined in this line.
+
+
+
+
+                      I------ 2
+               I------I
+               I      I------ 3  Diagram of the sequence similarity 
+          I----I
+          I    I------------- 4  relationships shown in the above 
+       I--I
+       I  I------------------ 1  dendrogram file (branch lengths are
+   ----I
+       I       I------------- 5  not to scale).
+       I-------I
+               I------------- 6
+
+
+
+
+
+
+
+
+
+MULTIPLE ALIGNMENT PARAMETERS:
+
+
+Having calculated a dendrogram between a set of sequences, the final 
+multiple alignment is carried out by a series of alignments of 
+larger and larger groups of sequences.  The order is determined by 
+the dendrogram so that the most similar sequences get aligned first.  
+Any gaps that are introduced in the early alignments are fixed.  
+When two groups of sequences are aligned against each other, a full 
+protein weight matrix (such as a Dayhoff PAM 250) is used.  Two gap 
+penalties are offered: a "FIXED" penalty for opening up a gap and a 
+"FLOATING" penalty for extending a gap.  
+
+
+ ********* MULTIPLE ALIGNMENT PARAMETERS *********
+
+
+     1. Fixed Gap Penalty       :10
+     2. Floating Gap Penalty    :10
+     3. Toggle Transitions (DNA):Weighted
+     4. Protein weight matrix   :PAM 250
+
+     H. HELP
+
+
+Enter number (or [RETURN] to exit): 
+
+
+FIXED GAP PENALTY:   Reduce this to encourage gaps of all sizes; 
+increase it to discourage them.   Terminal gaps are penalised same 
+as all others.  BEWARE of making this too small (approx 5 or so); if 
+the penalty is too small, the program may prefer to align each 
+sequence opposite one long gap.
+
+FLOATING GAP PENALTY:   Reduce this to encourage longer gaps; 
+increase it to shorten them.   Terminal gaps are penalised same as 
+all others.  BEWARE of making this too small (approx 5 or so); if 
+the penalty is too small, the program may prefer to align each 
+sequence opposite one long gap.
+
+
+DNA TRANSITIONS = WEIGHTED or UNWEIGHTED:   By default, transitions 
+(A versus G; C versus T) are weighted more strongly than 
+transversions (an A aligned with a G will be preferred to an A 
+aligned with a C or a T).  You can make all pairs of nucleotide 
+equally weighted with this option.
+
+PROTEIN WEIGHT MATRIX:  For protein comparisons, a weight matrix is 
+used to differentially weight different pairs of aligned amino 
+acids.  The default is the well known Dayhoff PAM 250 matrix.  We 
+also offer a PAM 100 matrix, an identity matrix (all weights are the 
+same for exact matches) or allow you to give the name of a file with 
+your own matrix.  The weight matrices used by Clustal V are shown in 
+full in the Algorithms and References section of this documentation.  
+
+If you input a matrix from a file, it must be in the following 
+format.  Use a 20x20 matrix only (entries for the 20 "normal" amino 
+acids only; no ambiguity codes etc.).  Input the lower left triangle 
+of the matrix, INCLUDING the diagonal.  The order of the amino acids 
+(rows and columns) must be: CSTPAGNDEQHRKMILVFYW.  The values can be 
+in free format seperated by spaces (not commas).  The PAM 250 matrix 
+is shown below in this format.
+
+  12 
+   0  2 
+  -2  1  3 
+  -3  1  0  6 
+  -2  1  1  1  2 
+  -3  1  0 -1  1  5 
+  -4  1  0 -1  0  0  2 
+  -5  0  0 -1  0  1  2  4 
+  -5  0  0 -1  0  0  1  3  4 
+  -5 -1 -1  0  0 -1  1  2  2  4 
+  -3 -1 -1  0 -1 -2  2  1  1  3  6 
+  -4  0 -1  0 -2 -3  0 -1 -1  1  2  6 
+  -5  0  0 -1 -1 -2  1  0  0  1  0  3  5 
+  -5 -2 -1 -2 -1 -3 -2 -3 -2 -1 -2  0  0  6 
+  -2 -1  0 -2 -1 -3 -2 -2 -2 -2 -2 -2 -2  2  5 
+  -6 -3 -2 -3 -2 -4 -3 -4 -3 -2 -2 -3 -3  4  2  6 
+  -2 -1  0 -1  0 -1 -2 -2 -2 -2 -2 -2 -2  2  4  2  4 
+  -4 -3 -3 -5 -4 -5 -4 -6 -5 -5 -2 -4 -5  0  1  2 -1  9 
+   0 -3 -3 -5 -3 -5 -2 -4 -4 -4  0 -4 -4 -2 -1 -1 -2  7 10 
+  -8 -2 -5 -6 -6 -7 -4 -7 -7 -5 -3  2 -3 -4 -5 -2 -6  0  0 17 
+
+Values must be integers and can be all positive or positive and 
+negative as above.  These are SIMILARITY values.  
+
+
+
+
+ALIGNMENT OUTPUT OPTIONS:
+      
+By default, the alignment goes to a file in a self explanatory 
+"blocked" alignment format.  This format is fine for displaying the 
+results but requires heavy editing if you wish to use the alignment 
+with other software.  To help, we provide 3 other formats which can 
+be turned on or off.  If you have a sequence data set or alignment 
+in memory, you can also ask for output files in whatever formats are 
+turned on, NOW.  The menu you use to choose format is shown below.
+ 
+*** 
+We draw your attention to NBRF/PIR format in particular.  This 
+format is EXACTLY the same as one of the input formats.  Therefore, 
+alignments written in this format can be used again as input (to the 
+profile alignments or phylogenetic trees).
+***
+
+
+ ********* Format of Alignment Output *********
+
+
+     1. Toggle CLUSTAL format output   =  ON
+     2. Toggle NBRF/PIR format output  =  OFF
+     3. Toggle GCG format output       =  OFF
+     4. Toggle PHYLIP format output    =  OFF
+
+     5. Create alignment output file(s) now?
+     H. HELP
+
+
+Enter number (or [RETURN] to exit): 
+
+
+
+CLUSTAL FORMAT:     This is a self explanatory alignment.  The 
+alignment is written out in blocks.  Identities are highlighted and 
+(if you use a PAM 250 matrix) positions in the alignment where all 
+of the residues are "similar" to each other (PAM 250 score of 8 or 
+more) are indicated.
+
+NBRF/PIR FORMAT:    This is the usual NBRF/PIR format with gaps 
+indicated by hyphens ("-"). AS we have stressed before, this format 
+is EXACTLY compatible with the sequence input format.  Therefore you 
+can read in these alignments again for profile alignments or for 
+calculating phylogenetic trees.  
+
+GCG FORMAT:         In version 7 of the Wisconsin GCG package, a new 
+multiple sequence format was introduced.  This is the MSF (Multiple 
+Sequence Format) format.  It can be used as input to the GCG 
+sequence editor or any of the GCG programs that make use of multiple 
+alignments.   THIS FORMAT IS ONLY SUPPORTED IN VERSION 7 OF THE GCG 
+PACKAGE OR LATER.  
+
+PHYLIP FORMAT:      This format can be used by the Phylip package of 
+Joe Felsenstein (see the references/algorithms section for details 
+of how to get it).  Phylip allows you to do a huge range of 
+phylogenetic analyses (we just offer one method in this program) and 
+is probably the most widely used set of programs for drawing trees.
+It also works on just about every computer you can think of, 
+providing you have a decent Pascal compiler.
+
+
+
+
+
+      ******************************
+      *   PROFILE ALIGNMENT MENU.  *
+      ******************************
+
+
+
+This menu is for taking two old alignments (or single sequences) and 
+aligning them with each other.  The result is one bigger alignment.  
+The menu is very similar to the multiple alignment menu except that 
+there is no mention of dendrograms here (they are not needed) and 
+you need to input two sets of sequences.  The menu looks like this:
+
+
+
+******Profile*Alignment*Menu******
+
+
+    1.  Input 1st. profile/sequence
+    2.  Input 2nd. profile/sequence
+    3.  Do alignment now
+    4.  Alignment parameters
+    5.  Output format options
+
+    S.  Execute a system command
+    H.  HELP
+    or press [RETURN] to go back to main menu
+
+
+Your choice: 
+
+
+You must input profile number 1 first.   When both profiles are 
+loaded, use item 3 (Do alignment now) and the 2 profiles will be 
+aligned.  Items 4 and 5 (parameters and output options) are 
+identical to the equivalent options on the multiple alignment menu.  
+
+The same input routines that are used for general input are used 
+here i.e. sequences must be in NBRF/PIR, EMBL/SwissProt or FASTA 
+format, with gaps indicated by hyphens ("-").  This is why we have 
+continualy drawn your attention to the NBRF/PIR format as a useful 
+output format.  
+
+Either profile can consist of just one sequence.   Therefore, if you 
+have a favourite alignment of sequences that you are working on and 
+wish to add a new sequence, you can use this menu, provided the 
+alignment is in the correct format.  
+
+The total number of sequences in the two profiles must be less less 
+than or equal to the MAXN parameter set in the clustalv.h header 
+file.  
+
+
+
+
+
+
+
+
+
+
+
+      ******************************
+      *   PHYLOGENETIC TREE MENU.  *
+      ******************************
+
+
+This menu allows you to input an alignment and calculate a 
+phylogenetic tree.  You can also calculate a tree if you have just 
+carried out a multiple alignment and the alignment is still in 
+memory.  THE SEQUENCES MUST BE ALIGNED ALREADY!!!!!!   The tree will 
+look strange if the sequences are not already aligned.  You can also 
+"BOOTSTRAP" the tree to show confidence levels for groupings.  This 
+is SLOW on microcomputers but works fine on workstations or 
+mainframes.
+
+
+
+******Phylogenetic*tree*Menu******
+
+
+    1.  Input an alignment
+    2.  Exclude positions with gaps?        = OFF
+    3.  Correct for multiple substitutions? = OFF
+    4.  Draw tree now
+    5.  Bootstrap tree
+
+    S.  Execute a system command
+    H.  HELP
+    or press [RETURN] to go back to main menu
+
+
+Your choice: 
+
+
+
+
+The same input routine that is used for general input is used here 
+i.e. sequences must be in NBRF/PIR, EMBL/SwissProt or FASTA format, 
+with gaps indicated by hyphens ("-").  This is why we have 
+continualy drawn your attention to the NBRF/PIR format as a useful 
+output format.  
+
+If you have input an alignment, then just use item 4 to draw a tree.  
+The method used is the Neighbor Joining method of Saitou and Nei 
+(1987).  This is a "distance method". First, percent divergence 
+figures are calculated between all pairs of sequence.  These 
+divergence figures are then used by the NJ method to give the tree.  
+Example trees will be shown below.  
+
+There are two options which can be used to control the way the 
+distances are calculated.  These are set by options 2 and 3 in the 
+menu.  
+
+EXCLUDE POSITIONS WITH GAPS?   This option allows you to ignore all 
+alignment positions (columns) where there is a gap in ANY sequence.  
+This guarantees that "like" is compared with "like" in all distances 
+i.e. the same positions are used to calculate all distances.  It 
+also means that the distances will be "metric".  The disadvantage of 
+using this option is that you throw away much of the data if there 
+are many gaps.  If the total number of gaps is small, it has little 
+effect.  
+ 
+CORRECT FOR MULTIPLE SUBSTITUTIONS?    As sequences diverge, 
+substitutions accumulate.  It becomes increasingly likely that more 
+than one substitution (as a result of a mutation) will have happened 
+at a site where you observe just one difference now.  This option 
+allows you to use formulae developed by Motoo Kimura to correct for 
+this effect.  It has the effect of stretching long branches in tres 
+while leaving short ones relatively untouched.  The desired effect 
+is to try and make distances proportional to time since divergence.  
+
+The tree is sent to a file called BLAH.NJ, where BLAH.SEQ is the 
+name of the input, alignment file.  An example is shown below for 6 
+globin sequences.  
+
+
+
+ DIST   = percentage divergence (/100)
+ Length = number of sites used in comparison
+
+   1 vs.   2  DIST = 0.5683;  length =    139
+   1 vs.   3  DIST = 0.5540;  length =    139
+   1 vs.   4  DIST = 0.5315;  length =    111
+   1 vs.   5  DIST = 0.7447;  length =    141
+   1 vs.   6  DIST = 0.7571;  length =    140
+   2 vs.   3  DIST = 0.0897;  length =    145
+   2 vs.   4  DIST = 0.1391;  length =    115
+   2 vs.   5  DIST = 0.7517;  length =    145
+   2 vs.   6  DIST = 0.7431;  length =    144
+   3 vs.   4  DIST = 0.0957;  length =    115
+   3 vs.   5  DIST = 0.7379;  length =    145
+   3 vs.   6  DIST = 0.7361;  length =    144
+   4 vs.   5  DIST = 0.7304;  length =    115
+   4 vs.   6  DIST = 0.7368;  length =    114
+   5 vs.   6  DIST = 0.2697;  length =    152
+
+
+			Neighbor-joining Method
+
+ Saitou, N. and Nei, M. (1987) The Neighbor-joining Method:
+ A New Method for Reconstructing Phylogenetic Trees.
+ Mol. Biol. Evol., 4(4), 406-425
+
+
+ This is an UNROOTED tree
+
+ Numbers in parentheses are branch lengths
+
+
+ Cycle   1     =  SEQ:   5 (  0.13382) joins  SEQ:   6 (  0.13592)
+
+ Cycle   2     =  SEQ:   1 (  0.28142) joins Node:   5 (  0.33462)
+
+ Cycle   3     =  SEQ:   2 (  0.05879) joins  SEQ:   3 (  0.03086)
+
+ Cycle   4 (Last cycle, trichotomy):
+
+		 Node:   1 (  0.20798) joins
+		 Node:   2 (  0.02341) joins
+		  SEQ:   4 (  0.04915) 
+
+
+
+The output file first shows the percent divergence (distance) 
+figures between each pair of sequence.  Then a description of a NJ 
+tree is given.  This description shows which sequences (SEQ:) or 
+which groups of sequences (NODE: , a node is numbered using the 
+lowest sequence that belongs to it) join at each level of the tree.  
+
+This is an unrooted tree!! This means that the direction of 
+evolution through the tree is not shown.  This can only be inferred 
+in one of two ways:  
+1) assume a degree of constancy in the molecular clock and place the 
+root (bottom of the tree; the point where all the sequences radiate 
+from) half way along the longest branch.     **OR**
+2) use an "outgroup", a sequence from an organism that you "know" 
+must be outside of the rest of the sequences i.e. root the tree 
+manually, on biological grounds.
+
+The above tree can be represented diagramatically as follows:
+
+
+                          SEQ 1       SEQ 4
+                           I           I
+          13.6             I 28.1      I 4.9          5.9
+  SEQ 6 ----------I        I           I          I--------- SEQ 2
+                  I        I           I          I
+                  I--------I-----------I----------I
+          13.4    I  33.5      20.8         2.3   I   3.1
+  SEQ 5 ----------I                               I--------- SEQ 3
+
+
+The figures along each branch are percent divergences along that 
+branch.  If you root the tree by placing the root along the longest 
+branch (33.5%) then you can draw it again as follows, this time 
+rooted:
+
+
+
+                        13.6
+                I-------------------- SEQ 6
+      I---------I       13.4
+      I         I-------------------- SEQ 5
+      I 33.5 
+ -----I                 28.1
+      I         I-------------------- SEQ 1
+      I         I
+      I---------I                4.9
+                I  20.8  I----------- SEQ 4
+                I--------I  
+                         I       5.9
+                         I 2.3 I----- SEQ 2
+                         I-----I 3.1
+                               I----- SEQ 3
+
+
+
+The longest branch (33.5% between 5,6 and 1,2,3,4) is split between 
+the 2 bottom branches of the tree.  As it happens in this particular 
+case, sequences 5 and 6 are myoglobins while sequences 1,2,3 and 4 
+are alpha and beta globins, so you could also justify the above 
+rooting on biological grounds.  If you do not have any particular 
+need or evidence for the position of the root, then LEAVE THE TREE 
+UNROOTED.  Unrooted trees do not look as pretty as rooted ones but 
+it is uaual to leave them unrooted if you do not have any evidence 
+for the position of the root.
+
+
+BOTSTRAPPING:    Different sets of sequences and different tree 
+drawing methods may give different topologies (branching orders) for 
+parts of a tree that are weakly supported by the data.  It is useful 
+to have an indication of the degree of error in the tree.  There are 
+several ways of doing this, some of them rather technical.  We 
+provide one general purpose method in this program, which makes use 
+of a technique called bootstrapping (see Felsenstein, 1985).
+
+In the case of sequence alignments, bootstrapping involves taking 
+random samples of positions from the alignment.  If the alignment 
+has N positions, each bootstrap sample consists of a random sample 
+of N positions, taken WITH REPLACEMENT i.e. in any given sample, 
+some sites may be sampled several times, others not at all.  Then, 
+with each sample of sites, you calculate a distance matrix as usual 
+and draw a tree.  If the data very strongly support just one tree 
+then the sample trees will be very similar to each other and to the 
+original tree, drawn without bootstrapping.  However, if parts of 
+the tree are not well supported, then the sample trees will vary 
+considerably in how they represent these parts.
+
+In practice, you should use a very large number of bootstrap 
+replicates (1000 is recommended, even if it means running the 
+program for an hour on a slow microcomputer; on a workstation it 
+will be MUCH faster).  For each grouping on the tree, you record the 
+number of times this grouping occurs in the sample trees.  For a 
+group to be considered "significant" at the 95% level (or P <= 0.05 
+in statistical terms) you expect the grouping to show up in >= 95% 
+of the sample trees.  If this happens, then you can say that the 
+grouping is significant, given the data set and the method used to 
+draw the tree.  
+
+So, when you use the bootstrap option, a NJ tree is drawn as before 
+and then you are asked to say how many bootstrap samples you want 
+(1000 is the default) and you are asked to give a seed number for 
+the random number generator.  If you give the same seed number in 
+future, you will get the same results (we hope).  Remember to give 
+different seed numbers if you wish to carry out genuinely different 
+bootstrap sampling experiments.  Below is the output file from using 
+the same data for the 6 globin sequences as used before.  The output 
+file has the same name as the input fike with the extension ".njb".
+
+//
+STUFF DELETED  .... same as for the ordinary NJ output
+//
+			Bootstrap Confidence Limits
+
+
+ Random number generator seed =      99
+
+ Number of bootstrap trials   =    1000
+
+
+ Diagrammatic representation of the above tree: 
+
+ Each row represents 1 tree cycle; defining 2 groups.
+
+ Each column is 1 sequence; the stars in each line show 1 group; 
+ the dots show the other
+
+ Numbers show occurences in bootstrap samples.
+ 
+****..   1000              
+.***..   1000                <- This is the answer!!
+*..***    812 
+122311
+
+
+For an unrooted tree with N sequences, there are actually only N-3 
+genuinely different groupings that we can test (this is the number 
+of "internal branches"; each internal branch splits the sequences 
+into 2 groups).  In this example, we have 6 sequences with 3 
+internal branches in the reference tree.  In the bootstrap 
+resampling, we count how often each of these internal branches 
+occur.  Here, we find that the branch which splits 1,2,3 and 4 
+versus 5 and 6 occurs in all 1000 samples; the branch which splits 
+2,3 and 4 versus 1,5 and 6 occurs in 1000; the branch which splits 2 
+and 3 versus 1,4,5 and 6 occurs in 812/1000 samples.  We can put 
+these figures on to the diagrammatic representation we made earlier 
+of our unrooted NJ tree as follows:
+
+
+
+                          SEQ 1       SEQ 4
+                           I           I
+                           I           I            
+  SEQ 6 ----------I        I           I          I--------- SEQ 2
+                  I  1000  I   1000    I   812    I
+                  I--------I-----------I----------I
+                  I                               I    
+  SEQ 5 ----------I                               I--------- SEQ 3
+
+
+
+You can equally put these confidence figures on the rooted tree (in 
+fact the interpretation is simpler with rooted trees).  With the 
+unrooted tree, the grouping of sequence 5 with 6 is significant (as 
+is the grouping of sequences 1,2,3 and 4).  Equally the grouping of 
+sequences 1,5 and 6 is significant (the same as saying that 2,3 and 
+4 group significantly).  However, the grouping of 2 and 3 is not 
+significant, although it is relatively strongly supported.  
+
+Unfortunately, there is a small complication in the interpretation 
+of these results.  In statistical hypothesis testing, it is not 
+valid to make multiple simultaneous tests and to treat the result of 
+each test completely independantly.  In the above case, if you have 
+one particular test (grouping) that you wish to make in advance, it 
+is valid to test IT ALONE and to simply show the other bootstrap 
+figures for reference.  If you do not have any particular test in 
+mind before you do the bootstrapping, you can just show all of the 
+figures and use the 95% level as an ARBITRARY cut off to show those 
+groups that are very strongly supported; but not mention anything 
+about SIGNIFICANCE testing.  In the literature, it is common 
+practice to simply show the figures with a tree; they frequently 
+speak for themselves.  
+
+
+
+*******************************************************************
+
+		4.  Command Line Interface.
+
+
+
+You can do almost everything that can be done from the menus, using 
+a command line interface. In this mode, the program will take all of 
+its instructions as "switches" when you activate it; no questions 
+will be asked; if there are no errors, the program just does an 
+analysis and stops.   It does not work so well on the MAC but is 
+still possible.  To get you started we will show you the 2 simplest 
+uses of the command line as it looks on VAX/VMS.  On all other 
+machines (except the MAC) it works in the same way.
+
+$ clustalv /help           **OR**   $ clustalv /check
+
+Both of the above switches give you a one page summary of the 
+command line on the screen and then the program stops. 
+
+
+$ clustalv proteins.seq    **OR**   $ clustalv /infile=proteins.seq    
+
+This will read the sequences from the file 'proteins.seq' and do a 
+complete multiple alignment.  Default parameters will be used, the 
+program will try to tell whether or not the sequences are DNA or 
+protein and the output will go to a file called 'proteins.aln' . A 
+dendrogram file called 'proteins.dnd' will also be created.  Thus 
+the default action for the program, when it successfully reads in an 
+input file is to do a full multiple alignment.  Some further 
+examples of command line usage will be given leter.
+
+Command line switches can be abbreviated but MAKE SURE YOU DO NOT 
+MAKE THEM AMBIGUOUS.  No attempt will be made to detect ambiguity.  
+Use enough characters to distinguish each switch uniquely.
+
+
+
+
+
+
+
+The full list of allowed switches is given below:
+
+
+                DATA (sequences)
+
+/INFILE=file.ext    :input sequences.  If you give an input file and 
+				nothing else as a switch, the default action is 
+				to do a complete multiple alignment.  The input 
+				file can also be specified by giving it as the 
+				first command line parameter with no "/" in 	
+				front of it e.g $ clustalv file.ext  .
+
+/PROFILE1=file.ext	:You use these two switches to give the names of  
+/PROFILE2=file.ext	two profiles.  The default action is to align 
+			the two. You must give the names of both profile 
+				files. 
+
+
+
+                VERBS (do things)
+
+/HELP  		:list the command line parameters on the screen.
+/CHECK           
+                
+/ALIGN        	:do full multiple alignment.  This is the default 	
+			action if no other switches except for input files 
+			are given.
+
+/TREE      	:calculate NJ tree.  If this is the only action 	
+			specified (e.g. $ clustalv proteins.seq/tree ) it IS 
+			ASSUMED THAT THE SEQUENCES ARE ALREADY ALIGNED.  If 
+			the sequences are not already aligned, you should 	
+			also give the /ALIGN switch.  This will align the 	
+			sequences first, output an alignment file and 	
+			calculate the tree in memory. 
+
+/BOOTSTRAP(=n)	:bootstrap a NJ tree (n= number of bootstraps; 	
+			default = 1000).  If this is the only action 		
+			specified (e.g. $ clustalv proteins.seq/bootstrap ) 
+			it IS ASSUMED THAT THE SEQUENCES ARE ALREADY ALIGNED.  
+			If the sequences are not already aligned, you should 
+			also give the /ALIGN switch.  This will align the 	
+			sequences first, output an alignment file and 	
+			calculate the bootstraps in memory.  You can set the 
+			number of bootstrap trials here (e.g./bootstrap=500).  
+			You can set the seed number for the random number 	
+			generator with /seed=n.
+
+
+
+                PARAMETERS (set things)
+
+***Pairwise alignments:***
+
+/KTUP=n      	:word size              
+    
+/TOPDIAGS=n  	:number of best diagonals
+
+/WINDOW=n    	:window around best diagonals 
+ 
+/PAIRGAP=n   	:gap penalty
+
+
+
+***Multiple alignments:***
+
+/FIXEDGAP=n  	:fixed length gap pen.  
+    
+/FLOATGAP=n  	:variable length gap pen.
+
+/MATRIX=     	:PAM100 or ID or file name. The default weight matrix 
+			for proteins is PAM 250.
+
+/TYPE=p or d 	:type is protein or DNA.   This allows you to 	
+			explicitely overide the programs attempt at guessing 
+			the type of the sequence.  It is only useful if you 
+			are using sequences with a VERY strange composition.
+
+/OUTPUT=     	:GCG or PHYLIP or PIR.  The default output is 	
+			Clustal format.
+    
+/TRANSIT     	:transitions not weighted.  The default is to weight 
+			transitions as more favourable than other mismatches 
+			in DNA alignments.  This switch makes all nucleotide 
+			mismatches equally weighted.
+
+
+***Trees:***                             
+
+/KIMURA      	:use Kimura's correction on distances.   
+
+/TOSSGAPS    	:ignore positions with a gap in ANY sequence.
+
+/SEED=n      	:seed number for bootstraps.
+
+
+
+
+EXAMPLES:
+
+These examples use the VAX/VMS $ prompt; otherwise, command-line 
+usage is the same on all machines except the Macintosh.
+
+ 
+$ clustalv proteins.seq      OR     $ clustalv /infile=proteins.seq
+
+Read whatever sequences are in the file "proteins.seq" and do a full 
+multiple alignment; output will go to the files: "proteins.dnd" 
+(dendrogram) and "proteins.aln" (alignment).
+
+
+$ clustalv proteins.seq/ktup=2/matrix=pam100/output=pir
+
+Same as last example but use K-Tuple size of 2; use a PAM 100 
+protein weight matrix; write the alignment out in NBRF/PIR format 
+(goes to a file called "proteins.pir").
+
+
+$ clustalv /profile1=proteins.seq/profile2=more.seq/type=p/fixed=11
+
+Take the alignment in "proteins.seq" and align it with "more.seq" 
+using default values for everything except the fixed gap penalty 
+which is set to 11.  The sequence type is explicitely set to 
+PROTEIN.
+
+
+$ clustalv proteins.pir/tree/kimura
+
+Take the sequences in proteins.pir (they MUST BE ALIGNED ALREADY) 
+and calculate a phylogenetic tree using Kimura's correction for 
+distances.  
+
+
+$ clustalv proteins.pir/align/tree/kimura
+
+Same as the previous example, EXCEPT THAT AN ALIGNMENT IS DONE 
+FIRST.
+
+
+$ clustalv proteins.seq/align/boot=500/seed=99/tossgaps/type=p
+
+Take the sequences in proteins.seq; they are explicitely set to be 
+protein; align them; bootstrap a tree using 500 samples and a seed 
+number of 99.
+
+
+*******************************************************************
+
+		5.  Algorithms and references.
+
+
+
+In this section, we will try to BRIEFLY describe the algorithms used 
+in ClustalV and give references.  The topics covered are:
+
+
+	-Multiple alignments
+
+	-Profile alignments
+
+	-Protein weight matrices
+
+	-Phylogenetic trees
+
+		-distances
+
+		-NJ method
+
+		-Bootstrapping
+
+		-Phylip
+
+	-References
+
+
+
+
+
+
+MULTIPLE ALIGNMENTS.
+
+The approach used in ClustalV is a modified version of the method of 
+Feng and Doolittle (1987) who aligned the sequences in larger and 
+larger groups according to the branching order in an initial 
+phylogenetic tree.  This approach allows a very useful combination 
+of computational tractability and sensitivity.  
+
+The positions of gaps that are generated in early alignments remain 
+through later stages.  This can be justified because gaps that arise 
+from the comparison of closely related sequences should not be moved 
+because of later alignment with more distantly related sequences.  
+At each alignment stage, you align two groups of already aligned 
+sequences.  This is done using a dynamic programming algorithm where 
+one allows the residues that occur in every sequence at each 
+alignment position to contribute to the alignment score.  A Dayhoff 
+(1978) PAM matrix is used in protein comparisons.
+
+The details of the algorithm used in ClustalV have been published in 
+Higgins and Sharp (1989).  This was an improved version of an 
+earlier algorithm published in Higgins and Sharp (1988).  First, you 
+calculate a crude similarity measure between every pair of sequence.  
+This is done using the fast, approximate alignment algorithm of 
+Wilbur and Lipman (1983).  Then, these scores are used to calculate 
+a "guide tree" or dendrogram, which will tell the multiple alignment 
+stage in which order to align the sequences for the final multiple 
+alignment.  This "guide tree" is calculated using the UPGMA method 
+of Sneath and Sokal (1973).  UPGMA is a fancy name for one type of 
+average linkage cluster analysis, invented by Sokal and Michener 
+(1958).  
+
+Having calculated the dendrogram, the sequences are aligned in 
+larger and larger groups.  At each alignment stage, we use the 
+algorithm of Myers and Miller (1988) for the optimal alignments.  
+This algorithm is a very memory efficient variation of Gotoh's 
+algorithm (Gotoh, 1982).  It is because of this algorithm that 
+ClustalV can work on microcomputers.   Each of these alignments 
+consists of aligning 2 alignments, using what we call "profile 
+alignments".
+
+
+PROFILE ALIGNMENTS.
+
+We use the term "profile alignment" to describe the alignment of 2 
+alignments.  We use this term because the method is a simple 
+extension of the profile method of Gribskov, et al. (1987) for 
+aligning 1 sequence with an alignment.  Normally, with a 2 sequence 
+alignment, you use a weight matrix (e.g. a PAM 250 matrix) to give a 
+score between the pairs of aligned residues.  The alignment is 
+considered "optimal" if it gives the best total score for aligned 
+residues minus penalties for any gaps (insertions or deletions) that 
+must be introduced.  
+
+Profile alignments are a simple extension of 2 sequence alignments 
+in that you can treat each of the two input alignments as single 
+sequences but you calculate the score at aligned positions as the 
+average weight matrix score of all the residues in one alignment 
+versus all those in the other e.g. if you have 2 alignments with I 
+and J sequences respectively; the score at any position is the 
+average of all the I times J scores of the residues compared 
+seperately.  Any gaps that are introduced are placed in all of the 
+sequences of an alignment at the same position.  The profile 
+alignments offered in the "profile alignment menu" are also 
+calculated in this way.
+
+
+PROTEIN WEIGHT MATRICES.
+
+There are 3 built-in weight matrices used by clustalV.  These are 
+the PAM 100 and PAM 250 matrices of Dayhoff (1978) and an identity 
+matrix.  Each matrix is given as the bottom left half, including the 
+diagonal of a 20 by 20 matrix.  The order of the rows and columns is 
+CSTPAGNDEQHRKMILVFYW.
+
+
+PAM 250
+
+C  12 
+S   0  2 
+T  -2  1  3 
+P  -3  1  0  6 
+A  -2  1  1  1  2 
+G  -3  1  0 -1  1  5 
+N  -4  1  0 -1  0  0  2 
+D  -5  0  0 -1  0  1  2  4 
+E  -5  0  0 -1  0  0  1  3  4 
+Q  -5 -1 -1  0  0 -1  1  2  2  4 
+H  -3 -1 -1  0 -1 -2  2  1  1  3  6 
+R  -4  0 -1  0 -2 -3  0 -1 -1  1  2  6 
+K  -5  0  0 -1 -1 -2  1  0  0  1  0  3  5 
+M  -5 -2 -1 -2 -1 -3 -2 -3 -2 -1 -2  0  0  6 
+I  -2 -1  0 -2 -1 -3 -2 -2 -2 -2 -2 -2 -2  2  5 
+L  -6 -3 -2 -3 -2 -4 -3 -4 -3 -2 -2 -3 -3  4  2  6 
+V  -2 -1  0 -1  0 -1 -2 -2 -2 -2 -2 -2 -2  2  4  2  4 
+F  -4 -3 -3 -5 -4 -5 -4 -6 -5 -5 -2 -4 -5  0  1  2 -1  9 
+Y   0 -3 -3 -5 -3 -5 -2 -4 -4 -4  0 -4 -4 -2 -1 -1 -2  7 10 
+W  -8 -2 -5 -6 -6 -7 -4 -7 -7 -5 -3  2 -3 -4 -5 -2 -6  0  0 17 
+---------------------------------------------------------------- 
+    C  S  T  P  A  G  N  D  E  Q  H  R  K  M  I  L  V  F  Y  W
+
+
+IDENTITY MATRIX
+
+10 
+ 0  10 
+ 0  0  10 
+ 0  0  0  10 
+ 0  0  0  0  10 
+ 0  0  0  0  1  10 
+ 0  0  0  0  0  0  10 
+ 0  0  0  0  0  0  0  10 
+ 0  0  0  0  0  0  0  0  10 
+ 0  0  0  0  0  0  0  0  0  10 
+ 0  0  0  0  0  0  0  0  0  0  10 
+ 0  0  0  0  0  0  0  0  0  0  0  10 
+ 0  0  0  0  0  0  0  0  0  0  0  0  10 
+ 0  0  0  0  0  0  0  0  0  0  0  0  0  10 
+ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  10 
+ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  10 
+ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  10 
+ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  10 
+ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 10 
+ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 10
+
+
+
+
+
+PAM 100
+
+ 14 
+ -1  6 
+ -5  2   7 
+ -6  1  -1  10 
+ -5  2   2   1   6 
+ -8  1  -3  -3   1   8 
+ -8  2   0  -3  -1  -1  7 
+-11 -1  -2  -4  -1  -1  4   8 
+-11 -2  -3  -3   0  -2  1   5   8 
+-11 -3  -3  -1  -2  -5 -1   1   4   9 
+ -6 -4  -5  -2  -5  -7  2  -1  -2   4 11 
+ -6 -1  -4, -2  -5  -8 -3  -6  -5   1  1 10 
+-11 -2  -1  -4  -4  -5  1  -2  -2  -1 -3  3  8 
+-11 -4  -2  -6  -3  -8 -5  -8  -6  -2 -7 -2  1 13 
+ -5 -4  -1  -6  -3  -7 -4  -6  -5  -5 -7 -4  4  2  9 
+-12 -7  -5  -5  -5  -8 -6  -9  -7  -3 -5 -7 -6  4  2  9 
+ -4 -4  -1  -4   0  -4 -5  -6  -5  -5 -6 -6 -6  1  5  1  8 
+-10 -5  -6  -9  -7  -8 -6 -11 -11 -10 -4 -7-11 -2  0  0 -5 12 
+ -2 -6  -6 -11  -6 -11 -3  -9  -7  -9 -1-10-10 -8 -4 -5 -6  6 13 
+-13 -4 -10 -11 -11 -13 -8 -13 -14 -11 -7  1 -9-11-12 -7-14 -2 -2 19 
+
+
+
+
+PHYLOGENETIC TREES.
+
+There are two COMMONLY used approaches for inferring phylogentic 
+trees from sequence data: parsimony and distance methods. There are 
+other approaches which are probably superior in theory but which are 
+yet to be used widely. This does not mean that they are no use; we 
+(the authors of this program at any rate) simply do not know enough 
+about them yet.  You should see the documentation accompanying the 
+Phylip package and some of the references there for an explanation 
+of the different methods and what assumptions are implied when you 
+use them.   
+
+There is a constant debate in the literature as to the merits of 
+different methods but unfortunately, a lot of what is said is 
+incomprehensible or inaccurate.  It is also a field that is prone to 
+having highly opinionated schools of thought.  This is a pity 
+because it prevents rational discussion of the pro's and con's of 
+the different methods.  The approach adopted in ClustalV is to 
+supply just one method and to produce alignments in a format that 
+can be used by Phylip.  In simple cases, the trees produced will be 
+as "good" (reliable, robust) as those from ANY other method.  In 
+more complicated cases, there is no single magic recipe that we can 
+supply that will work well in even most situations.
+
+The method we provide is the Neighbor Joining method (NJ) of Saitou 
+and Nei (1987) which is a distance method.  We use this for three 
+reasons:  it is conceptually and computationally simple; it is fast; 
+it gives "good" trees in simple cases. It is difficult to prove that 
+one tree is "better" than another if you do not know the true 
+phylogeny; the few systematic surveys of methods show it to work 
+more or less as well as any other method ON AVERAGE.  Another reason 
+for using the NJ method is that it is very commonly used; THIS IS A 
+BAD REASON SCIENTIFICALLY but at least you will not feel lonely if 
+you use it.
+
+The NJ method works on a matrix of distances (the distance matrix) 
+between all pairs of sequence to be analysed.  These distances are 
+related to the degree of divergence between the sequences.  It is 
+normal to calculate the distances from the sequences after they are 
+multiply aligned.  If you calculate them from seperate alignments 
+(as done for the dendrograms in another part of this program), you 
+may increase the error considerably.  
+
+
+DISTANCES
+
+The simplest measure of distance between sequences is percent 
+divergence (100% minus percent identity).  For two sequences, you 
+count how many positions differ between them (ignoring all positions 
+with a gap or an unknown residue) and divide by the number of 
+positions considered.  It is common practice to also ignore all 
+positions in the alignment where there is a GAP in ANY of the 
+sequences (Tossgaps ? option in the menu).  Usually, you express the 
+percent distance divided by 100 (gives distances between 0.0 and 
+1.0).
+
+This measure of distance is perfectly adequate (with some further 
+modification described below) for rRNA sequences. However it treats 
+all residues identically e.g. all amino acid substitutions are 
+equally weighted. It also treats all positions identically e.g. it 
+does not take account of different rates of substitution in 
+different positions of different codons in protein coding DNA 
+sequences; see Li et al (1985) for a distance measure that does.  
+Despite these shortcomings, these percent identity distances do work 
+well in practice in a wide variety of situations.  
+
+In a simple world, you would like a distance to be proportional to 
+the time since the sequences diverged.  If this were EXACTLY true, 
+then the calculation of the tree would be a simple matter of algebra 
+(UPGMA does this for you) and the branch lengths will be nice and 
+meaningful (times).  In practice this OBVIOUSLY depends on the 
+existence and quality of the "molecular clock", a subject of on-
+going debate.  However, even if there is a good clock, there is a 
+further problem with estimating divergences.  As sequences diverge, 
+they become "saturated" with mutations.  Sites can have 
+substitutions more than once.  Calculated distances will 
+underestimate actual divergence times; the greater the divergence, 
+the greater the discrepancy.  There are various methods for dealing 
+with this and we provide two commonly used ones, both due to Motoo 
+Kimura; one for proteins and one for DNA. 
+
+
+For distance K (percent divergence /100 ) ...
+
+Correction for Protein distances:  (Kimura, 1983).
+
+       Corrected K = -ln(1.0 - K - (K * k/5.0))
+
+
+
+Correction for nucleotide distances: Kimura's 2-parameter method 
+(Kimura, 1980).
+
+       Corrected K = 0.5*ln(a) + 0.25*ln(b)
+
+       where     a = 1/(1 - 2*P - Q)
+       and       b = 1/(1 - 2*Q)
+
+       P and Q are the proportions of transitions (A<-->G, C<-->T)
+       and transversions occuring between the sequences.  
+
+
+One paradoxical effect of these corrections, is that distances can 
+be corrected to have more than 100% divergence.  That is because, 
+for very highly diverged sequences of length N, you can estimate 
+that more than N substitutions have occured by correcting the 
+observed distance in the above ways.  Don't panic!
+
+
+
+NEIGHBOR JOINING TREES.
+
+VERY briefly, the NJ method works as follows.  You start by placing 
+the sequences in a star topology (no internal branches).  You then 
+find that internal branch (take 2 sequences; join them; connect them 
+to the rest by the internal branch) which when added to the tree 
+will minimise the total branch length. The two joined sequences 
+(neighbours) are merged into a single sequence and the process is 
+repeated.  For an unrooted tree with N sequences, there are N-3 
+internal branches.  The above process is repeated N-3 times to give 
+the final tree.  The full details are given in Saitou and Nei 
+(1987).
+
+As explained elsewhere in the documentation, you can only root the 
+tree by one of two methods:
+
+1) assume a degree of constancy in the molecular clock and place the 
+root along the longest branch (internal or external).  Methods that 
+appear to produce rooted trees automatically are often just doing 
+this without letting you know; this is true of UPGMA.
+
+2) root the tree on biological grounds.  The usual method is to 
+include an "outgroup", a sequence that you are certain will branch 
+to the outside of the tree.  
+
+
+
+BOOTSTRAPPING.
+
+Bootstrapping is a general purpose technique that can be used for 
+placing confidence limits on statistics that you estimate without 
+any knowledge of the underlying distribution (e.g. a normal or 
+poisson distribution).  In the case of phylogenetic trees, there are 
+several analytical methods for placing confidence limits on 
+groupings (actually on the internal branches) but these are either 
+restricted to particular tree drawing methods or only work on small 
+trees of 4 or 5 sequences.  Felsenstein (1985) showed how to use 
+bootstrapping to calculate confidence limits on trees.  His approach 
+is completely general and can be applied to any tree drawing method.  
+The main assumption of the method in this context is that the sites 
+in the alignment are independant; this will be true of some sequence 
+alignments (e.g. pseudogenes) but not others (e.g. rRNA's).  What 
+effect, lack of independance will have on the results is not known.
+
+The method works by taking random samples of data from the complete 
+data set.  You compute the test statistic (tree in this case) on 
+each sample.   Variation in the statistic computed from the samples 
+gives a measure of variation in the statistic which can be used to 
+calculate confidence intervals.  Each random sample is the same size 
+as the complete data set and is taken WITH REPLACEMENT i.e. a data 
+point can be selected more than once (or not at all) in any given 
+sample.  
+
+In the case of an alignment N residues long, each random sample is a 
+random selection of N sites form the alignment.  For each sample, we 
+calculate a distance matrix and tree in the usual way.  Variation in 
+the sample trees compared to a tree calculated from the full data 
+set gives an indication of how well supported the tree is by the 
+data.  If the sample trees are very similar to each other and to the 
+full tree, then the tree is "strongly" supported; if the sample 
+trees show great variation, then the tree will be weakly supported.  
+In practice, you usually find some parts of a tree well supported, 
+others weakly.  This can be seen by counting how often each 
+monophyletic group in the full tree occurs in the sample trees.  
+
+For a particular grouping, one considers it to be significant at the 
+95% level (P <= 0.05) if it occurs in 95% of the bootstrap samples. 
+If a grouping is significant, it is significant with respect to the 
+particular data set and method used for drawing the tree.  
+Biological "significance" is another matter.
+
+
+PHYLIP.
+
+The Phylip package was written by Joe Felsenstein, University of 
+Washington, USA.  It provides Pascal source code for a large number 
+of programs for doing most types of phylogenetic analyses.  The 
+Phylip format alignments produced by this program can be used by all 
+of the Phylip programs, version 3.4 or later (March 1991).  It is 
+freely available from him as follows.
+
+
+
+================= PHYLIP information sheet =====================
+
+    PHYLIP - Phylogeny Inference Package (version 3.3)
+
+This is a FREE package of programs for inferring phylogenies and 
+carrying out certain related tasks.  At present it contains 28 
+programs, which carry out different algorithms on different kinds of 
+data.  The programs in the package are:
+
+      ---------- Programs for molecular sequence data ----------
+PROTPARS  Protein parsimony          
+DNAPARS   Parsimony method for DNA
+DNAMOVE   Interactive DNA parsimony  
+DNAPENNY  Branch and bound for DNA
+DNABOOT   Bootstraps DNA parsimony   
+DNACOMP   Compatibility for DNA
+DNAINVAR  Phylogenetic invariants    
+DNAML     Maximum likelihood method
+DNAMLK    DNAML with molecular clock 
+DNADIST   Distances from sequences
+RESTML    ML for restriction sites
+
+    ----------- Programs for distance matrix data ------------
+FITCH     Fitch-Margoliash and least-squares methods
+KITSCH    Fitch-Margoliash and least squares methods with
+          evolutionary clock
+
+    --- Programs for gene frequencies and continuous characters --
+CONTML    Maximum likelihood method  
+GENDIST   Computes genetic distances
+
+    ------------- Programs for discrete state data -----------
+MIX       Wagner, Camin-Sokal, and mixed parsimony criteria
+MOVE      Interactive Wagner, C-S, mixed parsimony program
+PENNY     Finds all most parsimonious trees by branch-and-bound
+BOOT      Bootstrap confidence interval on mixed parsimony methods
+DOLLOP, DOLMOVE, DOLPENNY, DOLBOOT   same as preceding four
+          programs, but for the Dollo and polymorphism parsimony 
+          criteria
+CLIQUE    Compatibility method       
+FACTOR    recode multistate characters
+
+    ---- Programs for plotting trees and consensus trees ----
+DRAWGRAM  Draws cladograms and phenograms on screens, plotters and 
+          printers
+DRAWTREE  Draws unrooted phylogenies on screens, plotters and 
+          printers
+CONSENSE  Majority-rule and strict consensus trees
+
+The package includes extensive documentation files that provide the 
+information necessary to use and modify the programs.
+
+COMPATIBILITY: The programs are written in a very standard subset of 
+Pascal, a language that is available on most computers (including 
+microcomputers). The programs require only trivial modifications to 
+run on most machines: for example they work with only minor 
+modifications with Turbo Pascal, and without modifications on VAX 
+VMS Pascal. Pascal source code is distributed in the regular version 
+of PHYLIP: compiled object code is not.  To use that version, you 
+must have a Pascal compiler.
+
+DISKETTE DISTRIBUTION: The package is distributed in a variety of 
+microcomputer diskette formats.   You should send FORMATTED 
+diskettes, which I will return with the package written on them. 
+Unfortunately, I cannot write any Apple formats.   See below for how 
+many diskettes to send.  The programs on the magnetic tape or 
+electronic network versions may of course also be moved to 
+microcomputers using a terminal program.
+
+PRECOMPILED VERSIONS: Precompiled executable programs for PCDOS 
+systems are available from me.  Specify the "PCDOS executable 
+version" and send the number of extra diskettes indicated below.   
+An Apple Macintosh version with precompiled code is available from 
+Willem Ellis, Instituut voor Taxonomische Zoologie, Zoologisch 
+Museum, Universiteit van Amsterdam, Plantage Middenlaan 64, 1018DH 
+Amsterdam, Netherlands, who asks that you send 5 800K diskettes.
+
+HOW MANY DISKETTES TO SEND: The following table shows for different 
+PCDOS formats how many diskettes to send, and how many extra 
+diskettes to send for the PCDOS executable version: 
+
+Diskette size   Density   For source code    For executables, send
+                                                in addition
+  3.5 inch      1.44 Mb          2                     1
+  5.25 inch      1.2 Mb          2                     2
+  3.5 inch       720 Kb          4                     2
+  5.25 inch      360 Kb          7                     4
+
+Some other formats are also available. You MUST tell me EXACTLY 
+which of these formats you need.  The diskettes MUST be formatted by 
+you before being sent to me. Sending an extra diskette may be 
+helpful.
+
+NETWORK DISTRIBUTION: The package is also available by distribution 
+of the files directly over electronic networks, and by anonymous ftp 
+from evolution.genetics.washington.edu.  Contact me by electronic 
+mail for details.
+
+TAPE DISTRIBUTION: The programs are also distributed on a magnetic 
+tape provided by you (which should be a small tape and need only be 
+able to hold two megabytes) in the following format: 9-track, ASCII, 
+odd parity, unlabelled, 6250 bpi (unless otherwise indicated).  
+Logical record: 80 bytes, physical record: 3200 bytes (i.e. blocking 
+factor 40). There are a total of 71 files. The first one describes 
+the contents of the package.
+
+POLICIES: The package is distributed free.  I do not make it 
+available or support it in South Africa.  The package will be 
+written on the diskettes or tape, which will be mailed back.  They 
+can be sent to:
+
+                                           Joe Felsenstein
+Electronic mail addresses:                 Department of Genetics SK-50
+ Internet:  joe at genetics.washington.edu    University of Washington
+ Bitnet/EARN:  felsenst at uwavm              Seattle, Washington 98195
+ UUCP:  uw-beaver!evolution.genetics!joe   U.S.A.
+
+
+===================== End of Phylip Info. Sheet ====================
+
+
+
+
+REFERENCES.
+
+Dayhoff, M.O., Schwartz, R.M. and Orcutt, B.C. (1978) in Atlas of 
+Protein Sequence and Structure, Vol. 5 supplement 3, Dayhoff, M.O. 
+(ed.), NBRF, Washington, p. 345.  
+
+Felsenstein, J. (1985)  Confidence limits on phylogenies: an 
+approach using the bootstrap.  Evolution 39, 783-791.
+
+Feng, D.-F. and Doolittle, R.F. (1987)  Progressive sequence 
+alignment as a prerequisite to correct phylogenetic trees.  
+J.Mol.Evol. 25, 351-360.
+
+Gotoh, O. (1982)  An improved algorithm for matching biological 
+sequences.  J.Mol.Biol. 162, 705-708.
+
+Gribskov, M., McLachlan, A.D. and Eisenberg, D. (1987) Profile 
+analysis: detection of distantly related proteins. PNAS USA 84, 
+4355-4358.
+
+Higgins, D.G. and Sharp, P.M. (1988)  CLUSTAL: a package for 
+performing multiple sequence alignments on a microcomputer.  Gene 
+73, 237-244.
+
+Higgins, D.G. and Sharp, P.M. (1989)  Fast and sensitive multiple 
+sequence alignments on a microcomputer.  CABIOS 5, 151-153.
+
+Kimura, M. (1980)   A simple method for estimating evolutionary 
+rates of base substitutions through comparative studies of 
+nucleotide sequences. J. Mol. Evol. 16, 111-120.
+
+Kimura, M. (1983)   The Neutral Theory of Molecular Evolution.  
+Cambridge University Press, Cambridge, England.
+
+Li, W.-H., Wu, C.-I. and Luo, C.-C. (1985)  A new method for 
+estimating synonymous and nonsynonymous rates of nucleotide 
+substitution considering the relative likelihood of nucleotide and 
+codon changes.  Mol.Biol.Evol. 2, 150-174.
+
+Myers, E.W. and Miller, W. (1988)  Optimal alignments in linear 
+space.  CABIOS 4, 11-17.
+
+Pearson, W.R. and Lipman, D.J. (1988)  Improved tools for biological 
+sequence comparison.  PNAS USA 85, 2444-2448.
+
+Saitou, N. and Nei, M. (1987)  The neighbor-joining method: a new 
+method for reconstructing phylogenetic trees.  Mol.Biol.Evol. 4, 
+406-425.
+
+Sneath, P.H.A. and Sokal, R.R. (1973)  Numerical Taxonomy.  Freeman, 
+San Francisco.
+
+Sokal, R.R. and Michener, C.D. (1958)  A statistical method for 
+evaluating systematic relationships.  Univ.Kansas Sci.Bull. 38, 
+1409-1438.
+
+Vingron, M. and Argos, P. (1991)  Motif recognition and alignment 
+for many sequences by comparison of dot matrices.  J.Mol.Biol. 218, 
+33-43.
+
+Wilbur, W.J. and Lipman, D.J. (1983)  Rapid similarity searches of 
+nucleic acid and protein data banks.  PNAS USA 80, 726-730.
+

Added: trunk/packages/clustalw/branches/upstream/current/clustalw.c
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/clustalw.c	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/clustalw.c	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,122 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#ifdef MAC
+#include <console.h>
+#endif
+#include "clustalw.h"
+
+/*
+*	Prototypes
+*/
+
+#ifdef MAC
+extern int ccommand(char ***);
+#endif
+
+extern void *ckalloc(size_t);
+extern void init_amenu(void);
+extern void init_interface(void);
+extern void init_matrix(void);
+extern void fill_chartab(void);
+extern void parse_params(Boolean);
+extern void main_menu(void);
+
+/*
+*	Global variables
+*/
+double **tmat;
+
+char revision_level[] = "W (1.83)";  /* JULIE  feb 2001*/
+
+Boolean interactive=FALSE;
+
+#ifdef MSDOS
+        char *help_file_name = "clustalw.hlp";
+#else
+        char *help_file_name = "clustalw_help";
+#endif
+
+sint max_names; /* maximum length of names in current alignment file */
+
+float           gap_open,      gap_extend;
+float           pw_go_penalty, pw_ge_penalty;
+
+FILE *tree;
+FILE *clustal_outfile, *gcg_outfile, *nbrf_outfile, *phylip_outfile,
+     *gde_outfile, *nexus_outfile;
+FILE *fasta_outfile; /* Ramu */
+
+sint  *seqlen_array;
+sint max_aln_length;
+short usermat[NUMRES][NUMRES], pw_usermat[NUMRES][NUMRES];
+short def_aa_xref[NUMRES+1], aa_xref[NUMRES+1], pw_aa_xref[NUMRES+1];
+short userdnamat[NUMRES][NUMRES], pw_userdnamat[NUMRES][NUMRES];
+short def_dna_xref[NUMRES+1], dna_xref[NUMRES+1], pw_dna_xref[NUMRES+1];
+sint nseqs;
+sint nsets;
+sint *output_index;
+sint **sets;
+sint *seq_weight;
+sint max_aa;
+sint gap_pos1;
+sint gap_pos2;
+sint mat_avscore;
+sint profile_no;
+
+Boolean usemenu;
+Boolean dnaflag;
+Boolean distance_tree;
+
+char  **seq_array;
+char **names,**titles;
+char **args;
+char seqname[FILENAMELEN+1];
+
+char *gap_penalty_mask1 = NULL, *gap_penalty_mask2 = NULL;
+char *sec_struct_mask1 = NULL, *sec_struct_mask2 = NULL;
+sint struct_penalties;
+char *ss_name1 = NULL, *ss_name2 = NULL;
+
+Boolean user_series = FALSE;
+UserMatSeries matseries;
+short usermatseries[MAXMAT][NUMRES][NUMRES];
+short aa_xrefseries[MAXMAT][NUMRES+1];
+
+int main(int argc,char **argv)
+{
+	sint i;
+	
+#ifdef MAC
+	argc=ccommand(&argv);
+#endif
+
+    init_amenu();
+    init_interface();
+    init_matrix();
+	
+	fill_chartab();
+
+	if(argc>1) {
+		args = (char **)ckalloc(argc * sizeof(char *));
+	
+		for(i=1;i<argc;++i) 
+		{
+			args[i-1]=(char *)ckalloc((strlen(argv[i])+1) * sizeof(char));
+			strcpy(args[i-1],argv[i]);
+		}
+		usemenu=FALSE;
+		parse_params(FALSE);
+
+		for(i=0;i<argc-1;i++) 
+			ckfree(args[i]);
+		ckfree(args);
+	}
+	usemenu=TRUE;
+	interactive=TRUE;
+
+	main_menu();
+	
+	exit(0);
+}
+

Added: trunk/packages/clustalw/branches/upstream/current/clustalw.doc
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/clustalw.doc	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/clustalw.doc	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,757 @@
+README for Clustal W version 1.7  June 1997
+
+             Clustal W version 1.7 Documentation
+
+This file provides some notes on the latest changes, installation and usage
+of the Clustal W multiple sequence alignment program.
+
+
+
+Julie Thompson (Thompson at EMBL-Heidelberg.DE)
+Toby Gibson    (Gibson at EMBL-Heidelberg.DE)
+
+European Molecular Biology Laboratory
+Meyerhofstrasse 1
+D 69117 Heidelberg
+Germany
+
+
+Des Higgins (Higgins at ucc.ie)
+
+University of County Cork
+Cork
+Ireland
+
+
+Please e-mail bug reports/complaints/suggestions (polite if possible)
+to Toby Gibson or Des Higgins.  
+
+
+
+Thompson, J.D., Higgins, D.G. and Gibson, T.J. (1994)
+CLUSTAL W: improving the sensitivity of progressive multiple sequence alignment
+through sequence weighting, positions-specific gap penalties and weight matrix
+choice.  Nucleic Acids Research, 22:4673-4680.
+
+--------------------------------------------------------------
+
+What's New (June 1997) in Version 1.7 (since version 1.6).
+
+
+1. The static arrays used by clustalw for storing the alignment data have been
+replaced by dynamically allocated memory. There is now no limit on the number
+or length of sequences which can be input.
+
+2. The alignment of DNA sequences now offers a new hard-coded matrix, as well
+as the identity matrix used previously. The new matrix is the default scoring
+matrix used by the BESTFIT program of the GCG package for the comparison of
+nucleic acid sequences. X's and N's are treated as matches to any IUB ambiguity
+symbol. All matches score 1.9; all mismatches for IUB symbols score 0.0.
+
+3. The transition weight option for aligning nucleotide sequences has been
+changed from an on/off toggle to a weight between 0 and 1.  A weight of zero
+means that the transitions are scored as mismatches; a weight of 1 gives 
+transitions the full match score. For distantly related DNA sequences, the
+weight should be near to zero; for closely related sequences it can be useful
+to assign a higher score.
+
+4. The RSF sequence alignment file format used by GCG Version 9 can now be
+read.
+
+5. The clustal sequence alignment file format has been changed to allow
+sequence names longer than 10 characters. The maximum length allowed is set in
+clustalw.h by the statement:
+#define MAXNAMES	10
+
+For the fasta format, the name is taken as the first string after the '>'
+character, stopping at the first white space. (Previously, the first 10
+characters were taken, replacing blanks by underscores).
+
+6. The bootstrap values written in the phylip tree file format can be assigned
+either to branches or nodes. The default is to write the values on the nodes,
+as this can be read by several commonly-used tree display programs. But note
+that this can lead to confusion if the tree is rooted and the bootstraps may
+be better attached to the internal branches: Software developers should ensure
+they can read the branch label format.
+
+7. The sequence weighting used during sequence to profile alignments has been
+changed. The tree weight is now multiplied by the percent identity of the
+new sequence compared with the most closely related sequence in the profile.
+
+8. The sequence weighting used during profile to profile alignments has been
+changed. A guide tree is now built for each profile separately and the
+sequence weights calculated from the two trees. The weights for each
+sequence are then multiplied by the percent identity of the sequence compared
+with the most closely related sequence in the opposite profile.
+
+9. The adjustment of the Gap Opening and Gap Extension Penalties for sequences
+of unequal length has been improved.
+
+10. The default order of the sequences in the output alignment file has been
+changed. Previously the default was to output the sequences in the same order
+as the input file. Now the default is to use the order in which the sequences
+were aligned (from the guide tree/dendrogram), thus automatically grouping
+closely related sequences.
+
+11. The option to 'Reset Gaps between alignments' has been switched off by
+default.
+
+12. The conservation line output in the clustal format alignment file has been
+changed. Three characters are now used:
+'*' indicates positions which have a single, fully conserved residue
+':' indicates that one of the following 'strong' groups is fully conserved:-
+                 STA
+                 NEQK
+                 NHQK
+                 NDEQ
+                 QHRK
+                 MILV
+                 MILF
+                 HY
+                 FYW
+
+'.' indicates that one of the following 'weaker' groups is fully conserved:-
+                 CSA
+                 ATV
+                 SAG
+                 STNK
+                 STPA
+                 SGND
+                 SNDEQK
+                 NDEQHK
+                 NEQHRK
+                 FVLIM
+                 HFY
+
+These are all the positively scoring groups that occur in the Gonnet Pam250
+matrix. The strong and weak groups are defined as strong score >0.5 and weak
+score =<0.5 respectively.
+
+13. A bug in the modification of the Myers and Miller alignment algorithm
+for residue-specific gap penalites has been fixed. This occasionally caused
+new gaps to be opened a few residues away from the optimal position.
+
+14. The GCG/MSF input format no longer needs the word PILEUP on the first
+line. Several versions can now be recognised:-
+      1.  The word PILEUP as the first word in the file
+      2.  The word !!AA_MULTIPLE_ALIGNMENT or !!NA_MULTIPLE_ALIGNMENT
+          as the first word in the file
+      3.  The characters MSF on the first line in the line, and the
+          characters .. at the end of the line.
+
+15. The standard command line separator for UNIX systems has been changed from
+'/' to '-'. ie. to give options on the command line, you now type
+
+     clustalw input.aln -gapopen=8.0
+
+instead of  clustalw input.aln /gapopen=8.0
+
+
+                      ATTENTION SOFTWARE DEVELOPERS!!
+                      -------------------------------
+
+The CLUSTAL sequence alignment output format has been modified:
+
+1. Names longer than 10 chars are now allowed. (The maximum is specified in
+clustalw.h by '#define MAXNAMES'.)
+
+2. The consensus line now consists of three characters: '*',':' and '.'. (Only
+the '*' and '.' were previously used.)
+
+3. An option (not the default) has been added, allowing the user to print out
+sequence numbers at the end of each line of the alignment output.
+
+4. Both RNA bases (U) and base ambiguities are now supported in nucleic acid
+sequences. In the past, all characters (upper or lower case) other than
+a,c,g,t or u were converted to N. Now the following characters are recognised 
+and retained in the alignment output: ABCDGHKMNRSTUVWXY (upper or lower case).
+
+5. A  Blank line inadvertently added in the version 1.6 header has been taken
+out again.
+
+
+--------------------------------------------------------------
+
+What's New (March 1996) in Version 1.6 (since version 1.5).
+
+
+1) Improved handling of sequences of unequal length.  Previously, we
+increased the gap extension penalties for both sequences if the two sequences
+(or groups of previously aligned sequences) were of different lengths.  
+Now, we increase the gap opening and extension penalties for the shorter 
+sequence only.   This helps prevent short sequences being stretched out
+along longer ones.
+
+2) Added the "Gonnet" series of weight matrices (from Gaston Gonnet and 
+co-workers at the ETH in Zurich).  Fixed a bug in the matrix
+choice menu; now PAM matrices can be selected ok.
+
+3) Added secondary structure/gap penalty masks.  These allow you to 
+include, in an alignment, a position specific set of gap penalties.  
+You can either set a gap opening penalty at each position or specify
+the secondary strcuture (if protein; alpha helix, beta strand or loop)
+and have gap penalties set automatically.   This, basically, is used to make 
+gaps harder to open inside helices or strands.  
+
+These masks are only used in the "profile alignment" menu.  They may be read in
+as part of an alignment in a special format (see the on-line help for
+details) or associated with each sequence, if the sequences are in Swiss Prot 
+format and secondary structure information is given.   All of the mask 
+parameters can be set from the profile alignment menu.  Basically, the
+mask is made up of a series of numbers between 1 and 9, one per position.
+The gap opening penalty at a position is calculated as the starting penalty
+multipleied by the mask value at that site. 
+
+4) Added command line options /profile and /sequences.
+These allow uses to choose between normal profile alignment where the
+two profiles (pre-existing alignments specified in the files
+/profile1= and /profile2=) are merged/aligned with each other (/profile)
+and the case where the individual sequences in /profile2 are aligned
+sequentially with the alignment in /profile1 (/sequences).
+
+5) Fixed bug in modified Myers and Miller algorithm - gap penalty score
+was not always calculated properly for type 2 midpoints.  This is the core
+alignment algorithm.
+
+6) Only allows one output file format to be selected from command line
+- ie. multiple output alignment files are not allowed.
+
+7) Fixed 'bad calls to ckfree' error during calculation of phylip distance
+matrix.
+
+8) Fixed command line options /gapopen /gapext /type=protein /negative.
+
+9) Allowed user to change command line separator on UNIX from '/' to '-'.
+This allows unix users to use the more conventinal '-' symbol
+for seperating command line options.  "/" can then be used in unix
+file names on the command line.   The symbol that is used,
+is specified in the file clustalw.h which must be edited if you 
+wish to change it (and the program must then be recompiled).   Find the 
+block of code in clustalw.h that corrsponds to the operating system you
+are using.  These blocks are started by one of the following:
+
+#ifdef VMS 
+#elif MAC
+#elif MSDOS
+#elif UNIX
+
+On the next line after each is the line:
+
+#define COMMANDSEP '/'
+
+Change this in the appropriate block of code (e.g. the UNIX block) to 
+
+#define COMMANDSEP '-'
+
+if you wish to use the "-" character as command seperator.
+  
+
+       
+--------------------------------------------------------------
+
+What's New (April 1995) in Version 1.5 (since version 1.3).
+
+1) ported to MAC and PC.  These versions are quite slow unless you
+have a nice beefy machine.  On a Power Mac or a Pentium box
+it is nice and fast.  Two precompiled versions are supplied for Macs
+(Power mac and old mac versions).
+Mac:       1500 residues by 100 sequences
+Power Mac  3000    "     "   "     "
+PC         1500    "     "   "     "
+
+2) alignment of new sequences to an alignment.  Fixed a serious bug
+which assigned weights to the wrong sequences.  Now also, weights 
+sequences according to distance from the incoming sequence.  The
+new weights are: tree weights * similarity to incoming sequence.
+The tree weights are the old weights that we derive from the tree
+connecting all the sequences in the existing alignment.
+
+3) for all platforms, output linelength = 60.
+
+4) Bootstrap files (*.phb): the "final" node (arbitrary trichotomy
+at the end of the neighbor-joining process) is labelled as 
+TRICHOTOMY in the bootstrap output files.  This is to help
+link bootstrap figures with nodes when you reroot the tree.
+
+5) Command line /bootstrap option now more robust.
+
+--------------------------------------------------------------
+INTRODUCTION
+
+
+
+This document gives some BRIEF notes about usage of the Clustal W
+multiple alignment program for UNIX and VMS machines.  Clustal W
+is a major update and rewrite of the Clustal V program which 
+was described in:
+
+Higgins, D.G., Bleasby, A.J. and Fuchs, R. (1992)
+CLUSTAL V: improved software for multiple sequence alignment.
+Computer Applications in the Biosciences (CABIOS), 8(2):189-191.
+
+The main new features are a greatly improved (more sensitive)
+multiple alignment procedure for proteins and improved support
+for different file formats.  This software was described in:
+
+Thompson, J.D., Higgins, D.G. and Gibson, T.J. (1994)
+CLUSTAL W: improving the sensitivity of progressive multiple
+sequence alignment through sequence weighting, position specific
+gap penalties and weight matrix choice.
+Nucleic Acids Research, 22(22):4673-4680.
+
+
+The usage of Clustal W is largely the same as for
+Clustal V details of which are described in clustalv.doc.  Details of the
+new alignment algorithms are described in the manuscript by
+Thompson et. al. above, an ascii/text version of which is included 
+(clustalw.ms). This file lists some of the details not covered by either 
+of the above documents.
+
+
+There are brief notes on the following topics:
+
+1) Installation for VMS and UNIX and MAC and PC
+2) File input
+3) file output
+4) changes to the alignment algorithms
+5) minor modifications to the phylogenetic tree and bootstrapping methods
+6) summary of the command line usage.
+
+-------------------------------------------------------------------
+
+1) INSTALLATION    (for Unix, VAX/VMS, PC and MAC)
+
+
+
+*****IMPORTANT*****
+If you wish to recompile the program (or compile it for the first
+time; you will have to do this with UNIX):
+first check the file CLUSTALW.H which needs to be changed if you
+move the code from between unix and vms machines.  At the top
+of the file are four lines which define one of VMS, MSDOS, MAC or
+UNIX to be 1.  All of these EXCEPT one must be commented out
+using enclosed /* ... */.  
+*******************
+
+
+Unix
+-----
+
+Make files are supplied for unix machines.  The code was compiled and
+tested using Decstation (Ultrix), SUN (Gnu C compiler/gcc), Silicon
+Graphics (IRIX) and DEC/Alpha (OSF1).  We have not tested the code on any other
+systems.  Just use makefile to make on most systems.  For Sun, you need to
+have the Gnuc C (gcc) compiler installed ... use the file makefile.sun in this
+case.  You make the program with:
+make  (or make -f makefile.sun)
+
+This produces the file clustalw which can be run by typing clustalw and
+pressing return.  The help file is called clustalw_help
+
+
+VMS
+----
+
+There is a small DCL command file (VMSLINK.COM) to compile and link the
+code for VMS machines (vax or alpha).  This procedure just compiles the
+source files and links using default settings.  Run it using:
+$ @vmslink
+This produces Clustalw.exe which can be run using the run command:
+$ run clustalw
+
+The intermediate object files can be deleted with:
+$ del *.obj;
+
+There is an extensive command line facility.  To use this, you must
+create a symbol to run the program (and put this in your login.com file).
+e.g.
+$ clustalw :== $$drive:[dir.dir]clustalw
+where $drive is the drive on which the executable file is stored (clustalw.exe)
+and [dir.dir] is the full directory specification.  NOTE THE EXTRA DOLLAR SIGN.
+Then the program can be run using the command:
+$ clustalw
+
+
+PC
+__
+
+We supply an executable file (Clustalw.exe) which will run using MSDOS.
+It will also run under windows (as a DOS application) 
+*** IF you have a maths coprocessor***.  If you do not have a maths chip 
+(e.g. 80387), the program can only be run under MSDOS.  In the latter case, 
+you must have the file EMU387.exe in the same directory as CLUSTALW.EXE.  
+This file emulates a maths chip if you do not have one.  
+
+
+We generated the executable file using gnu c for MSDOS. 
+It will also compile (with about 10,000 warning messages)
+using Microsoft C but we have not tested it and there appear to be problems
+with the executable. 
+
+You will need to use a "memory extender" to allow the program to get at more 
+than 640kb of memory.
+
+
+
+MAC
+---
+
+The code compiles for Power Mac and older macs using Metroworks Codewarrior
+C compiler.  We supply 2 executable programs (one each for PowerMac and
+older mac): ClustalwPPC and Clustalw68k).  These need up to
+10mb of memory to run which needs to be adjusted with the Get Info (%I)
+command from the Finder if you have problems.  Just double click the 
+executable file name or icon and off you go (we hope).
+
+As a special treat for Mac users, we supply an executable and brief readme
+file for NJPLOT.   This is a really nice program by Manolo Gouy
+(University of Lyon, France) that allows you to import the trees
+made by Clustal W and display them/manipulate them.  It will properly
+display the bootstrap figures from the *.phb files.  It can export the
+trees in PICT format which can then be used by MacDraw for example.
+
+
+-------------------------------------------------------------------------
+
+2) FILE INPUT (sequences to be aligned)
+
+
+
+The sequences must all be in one file (or two files for a "profile alignment")
+in ONE of the following formats:
+
+FASTA (Pearson), NBRF/PIR, EMBL/Swiss Prot, GDE, CLUSTAL, GCG/MSF, GCG9/RSF.
+
+The program tries to "guess" which format is being used and whether
+the sequences are nucleic acid (DNA/RNA) or amino acid (proteins).  The
+format is recognised by the first characters in the file.  This is kind
+of stupid/crude but works most of the time and it is difficult
+to do reliably, any other way.
+
+
+Format           First non blank word or character in the file.
+...............................................................
+FASTA            >
+NBRF             >P1;  or >D1;
+EMBL/SWISS       ID
+GDE protein      % 
+GDE nucleotide   # 
+CLUSTAL          CLUSTAL (blocked multiple alignments)
+GCG/MSF          PILEUP  or !!AA_MULTIPLE_ALIGNMENT or !!NA_MULTIPLE_ALIGNMENT
+                 or MSF on the first line, and '..' at the end of line
+GCG9/RSF         !!RICH_SEQUENCE
+
+Note, that the only way of spotting that a file is MSF format is if
+the word PILEUP appears at the very beginning of the file.  If you 
+produce this format from software other than the GCG pileup program,
+then you will have to insert the word PILEUP at the start of the file.
+Similarly, if you use clustal format, the word CLUSTAL must appear first.
+
+All of these formats can be used to read in AN EXISTING FULL ALIGNMENT.
+With CLUSTAL format, this is just the same as the output format of this
+program and Clustal V.  If you use PILEUP or CLUSTAL format, all sequences
+must be the same length, INCLUDING GAPS ("-" in clustal format; "." in MSF).
+With the other formats, sequences can be gapped with "-" characters.  If you
+read in any gaps these are kept during any later alignments.  You can use
+this facility to read in an alignment in order to calculate a phylogenetic
+tree OR to output the same alignment in a different format (from the
+output format options menu of the multiple alignment menu) e.g. read
+in a GCG/MSF format alignment and output a PHYLIP format alignment. This is 
+also useful to read in one reference alignment and to add one or more new 
+sequences to it using the "profile alignment" facilities.
+
+DNA vs. PROTEIN:  the program will count the number of A,C,G,T,U and N
+charcters.  If 85% or more of the characters in a sequence are as above,
+then DNA/RNA is assumed, protein otherwise.  
+
+-------------------------------------------------------------------------
+
+
+3) FILE OUTPUT 
+
+
+1) the alignments.
+
+In the multiple alignment and profile alignment menus, there is a menu
+item to control the output format(s).
+
+The alignment output format can be set to any (or all) of:
+CLUSTAL  (a self explanatory blocked alignment)
+NBRF/PIR (same as input format but with "-" characters for gaps)
+MSF      (the main GCG package multiple alignment format)
+PHYLIP   (Joe Felsenstein's phylogeny inference package.  Gaps are set to
+         "-" characters.  For some programs (e.g. PROTPARS/DNAPARS) these 
+         should be changed to "?" characters for unknown residues.
+GDE      (Used by Steven Smith's GDE package)
+
+You can also choose between having the sequences in the same order as in 
+the input file or writing them out in an order that more closely matches the 
+order used to carry out the multiple alignment.
+
+
+2) The trees.
+
+Believe it or not, we now use the New Hampshire (nested parentheses)
+format as default for our trees.  This format is compatible with e.g. the
+PHYLIP package.  If you want to view a tree, you can use the RETREE or 
+DRAWGRAM/DRAWTREE programs of PHYLIP.  This format is used for all our 
+trees, even the initial guide trees for deciding the order of multiple
+alignment.  The output trees from the phylogenetic tree menu can also be
+requested in our old verbose/cryptic format.  This may be more useful
+if, for example, you wish to see the bootstrap figures.  The bootstrap
+trees in the default New Hampshire format give the bootstrap figures
+as extra labels which can be viewed very easily using TREETOOL which is
+available as part of the GDE package.  TREETOOL is available from the
+RDP project by ftp from rdp.life.uiuc.edu.  
+
+The New Hampshire format is only useful if you have software to display or
+manipulate the trees.  The PHYLIP package is highly recommended if you intend
+to do much work with trees and includes programs for doing this.  If you do
+not have such software, request the trees in the older clustal format
+and see the documentation for Clustal V (clustalv.doc).  WE DO NOT PROVIDE
+ANY DIRECT MEANS FOR VIEWING TREES GRAPHICALLY.
+
+-------------------------------------------------------------------------
+
+4) THE ALIGNMENT ALGORITHMS
+
+
+The basic algorithm is the same as for Clustal V and is described in some
+detail in clustalv.doc.  The new modifications are described in detail in 
+clustalw.ms.  Here we just list some notes to help answer some of the most
+obvious questions.
+
+
+Terminal Gaps
+
+In the original Clustal V program, terminal gaps were penalised the same
+as all other gaps.  This caused some ugly side effects e.g.
+
+acgtacgtacgtacgt                              acgtacgtacgtacgt
+a----cgtacgtacgt  gets the same score as      ----acgtacgtacgt
+
+NOW, terminal gaps are free.  This is better on average and stops silly
+effects like single residues jumping to the edge of the alignment.  However,
+it is not perfect.  It does mean that if there should be a gap near the end 
+of the alignment, the program may be reluctant to insert it i.e. 
+
+cccccgggccccc                                              cccccgggccccc
+ccccc---ccccc  may be considered worse (lower score) than  cccccccccc---
+
+In the right hand case above, the terminal gap is free and may score higher
+than the laft hand alignment.  This can be prevented by lowering the gap
+opening and extension penalties.   It is difficult to get this right all the
+time.  Please watch the ends of your alignments. 
+
+
+
+Speed of the initial (pairwise) alignments (fast approximate/slow accurate)
+
+By default, the initial pairwise alignments are now carried out using a full
+dynamic programming algorithm.  This is more accurate than the older hash/
+k-tuple based alignments (Wilbur and Lipman) but is MUCH slower.  On a fast
+workstation you may not notice but on a slow box, the difference is extreme.
+You can set the alignment method from the menus easily to the older, faster
+method.
+
+
+
+Delaying alignment of distant sequences
+
+The user can set a cut off to delay the alignment of the most divergent
+sequences in a data set until all other sequences have been aligned.  By 
+default, this is set to 40% which means that if a sequence is less than 40%
+identical to any other sequence, its alignment will be delayed.  
+
+
+
+Iterative realignment/Reset gaps between alignments
+
+By default, if you align a set of sequences a second time (e.g. with changed
+gap penalties), the gaps from the first alignment are discarded.  You can
+set this from the menus so that older gaps will be kept between alignments,
+This can sometimes give better alignments by keeping the gaps (do not reset
+them) and doing the full multiple alignment a second time.  Sometimes, the
+alignment will converge on a better solution; sometimes the new alignment will
+be the same as the first.  There can be a strange side effect: you can get
+columns of nothing but gaps introduced.  
+
+Any gaps that are read in from the input file are always kept, regardless 
+of the setting of this switch.  If you read in a full multiple alignment, the "reset
+gaps" switch has no effect.  The old gaps will remain and if you carry out 
+a multiple alignment, any new gaps will be added in.  If you wish to carry out 
+a full new alignment of a set of sequences that are already aligned in a file
+you must input the sequences without gaps.
+
+
+
+Profile alignment
+
+By profile alignment, we simply mean the alignment of old alignments/sequences.
+In this context, a profile is just an existing alignment (or even a set of 
+unaligned sequences; see below).  This allows you to
+read in an old alignment (in any of the allowed input formats) and align
+one or more new sequences to it.  From the profile alignment menu, you
+are allowed to read in 2 profiles.  Either profile can be a full alignment
+OR a single sequence.  In the simplest mode, you simply align the two profiles
+to each other. This is useful if you want to gradually build up a full
+multiple alignment.  
+
+A second option is to align the sequences from the second profile, one at
+a time to the first profile.  This is done, taking the underlying tree between
+the sequences into account.  This is useful if you have a set of new sequences
+(not aligned) and you wish to add them all to an older alignment.
+
+----------------------------------------------------------------------------
+
+5) CHANGES TO THE PHYLOGENTIC TREE CALCULATIONS AND SOME HINTS.
+
+
+
+IMPROVED DISTANCE CALCULATIONS FOR PROTEIN TREES
+
+
+The phylogenetic trees in Clustal W (the real trees that you calculate
+AFTER alignment; not the guide trees used to decide the branching order
+for multiple alignment) use the Neighbor-Joining method of Saitou and
+Nei based on a matrix of "distances" between all sequences.  These distances
+can be corrected for "multiple hits".  This is normal practice when accurate
+trees are needed.  This correction stretches distances (especially large ones)
+to try to correct for the fact that OBSERVED distances (mean number of 
+differences per site) greatly underestimate the actual number that happened
+during evolution.  
+
+In Clustal V we used a simple formula to convert an observed distance to one
+that is corrected for multiple hits.  The observed distance is the mean number
+of differences per site in an alignment (ignoring sites with a gap) and is
+therefore always between 0.0 (for ientical sequences) an 1.0 (no residues the
+same at any site).  These distances can be multiplied by 100 to give percent
+difference values.  100 minus percent difference gives percent identity.
+The formula we use to correct for multiple hits is from Motoo Kimura
+(Kimura, M. The neutral Theory of Molecular Evolution, Camb.Univ.Press, 1983,
+page 75) and is:
+
+K = -Ln(1 - D - (D.D)/5)  where D is the observed distance and K is       
+                              corrected distance.
+
+This formula gives mean number of estimated substitutions per site and, in
+contrast to D (the observed number), can be greater than 1 i.e. more than
+one substitution per site, on average.  For example, if you observe 0.8
+differences per site (80% difference; 20% identity), then the above formula
+predicts that there have been 2.5 substitutions per site over the course 
+of evolution since the 2 sequences diverged.  This can also be expressed in 
+PAM units by multiplying by 100 (mean number of substitutions per 100 residues).
+The PAM scale of evolution and its derivation/calculation comes from the
+work of Margaret Dayhoff and co workers (the famous Dayhoff PAM series
+of weight matrices also came from this work).  Dayhoff et al constructed
+an elaborate model of protein evolution based on observed frequencies
+of substitution between very closely related proteins.  Using this model,
+they derived a table relating observed distances to predicted PAM distances.
+Kimura's formula, above, is just a "curve fitting" approximation to this table.
+It is very accurate in the range 0.75 > D > 0.0 but becomes increasingly
+unaccurate at high D (>0.75) and fails completely at around D = 0.85.
+
+To circumvent this problem, we calculated all the values for K corresponding
+to D above 0.75 directly using the Dayhoff model and store these in an 
+internal table, used by Clustal W.  This table is declared in the file dayhoff.h and
+gives values of K for all D between 0.75 and 0.93 in intervals of 0.001 i.e.
+for D = 0.750, 0.751, 0.752 ...... 0.929, 0.930.   For any observed D 
+higher than 0.930, we arbitrarily set K to 10.0.  This sounds drastic but
+with real sequences, distances of 0.93 (less than 7% identity) are rare.
+If your data set includes sequences with this degree of divergence, you
+will have great difficulty getting accurate trees by ANY method; the alignment
+itself will be very difficult (to construct and to evaluate).
+
+There are some important
+things to note.  Firstly, this formula works well if your sequences are
+of average amino acid composition and if the amino acids substitute according
+to the original Dayhoff model.  In other cases, it may be misleading.  Secondly,
+it is based only on observed percent distance i.e. it does not DIRECTLY
+take conservative substitutions into account.  Thirdly, the error on the
+estimated PAM distances may be VERY great for high distances; at very high
+distance (e.g. over 85%) it may give largely arbitrary corrected distances.
+In most cases, however, the correction is still worth using; the trees will
+be more accurate and the branch lengths will be more realistic.  
+
+A far more sophisticated distance correction based on a full Dayhoff
+model which DOES take conservative substitutions and actual amino acid
+composition into account, may be found in the PROTDIST program of the
+PHYLIP package.  For serious tree makers, this program is highly recommended. 
+
+
+
+TWO NOTES ON BOOTSTRAPPING...
+
+When you use the BOOTSTRAP in Clustal W to estimate the reliability of parts
+of a tree, many of the uncorrected distances may randomly exceed the arbitrary cut
+off of 0.93 (sequences only 7% identical) if the sequences are distantly
+related.  This will happen randomly i.e. even if none of the pairs of 
+sequences are less than 7% identical, the bootstrap samples may contain pairs
+of sequences that do exceed this cut off.
+If this happens, you will be warned.  In practice, this can
+happen with many data sets.  It is not a serious problem if it happens rarely.
+If it does happen (you are warned when it happens and told how often the
+problem occurs), you should consider removing the most distantly
+related sequences and/or using the PHYLIP package instead.
+
+
+A further problem arises in almost exactly the opposite situation: when
+you bootstrap a data set which contains 3 or more sequences that are identical
+or almost identical.  Here, the sets of identical sequences should be shown
+as a multifurcation (several sequences joing at the same part of the tree).
+Because the Neighbor-Joining method only gives strictly dichotomous trees
+(never more than 2 sequences join at one time), this cannot be exactly 
+represented.  In practice, this is NOT a problem as there will be some
+internal branches of zero length seperating the sequences.  If you
+display the tree with all branch lengths, you will still see a multifurcation.  
+However, when you bootstrap
+the tree, only the branching orders are stored and counted.  In the case
+of multifurcations, the exact branching order is arbitrary but the program
+will always get the same branching order, depending only on the input order
+of the sequences.  In practice, this is only a problem in situations where
+you have a set of sequences where all of them are VERY similar.  In this case,
+you can find very high support for some groupings which will disappear if you
+run the analysis with a different input order.  Again, the PHYLIP package
+deals with this by offering a JUMBLE option to shuffle the input order
+of your sequences between each bootstrap sample.  
+
+----------------------------------------------------------------------------
+
+6) SUMMARY OF THE COMMAND LINE USAGE
+
+Clustal W is designed to be run interactively.  However, there are many 
+situations where it is convenient to run it from the command line, especially
+if you wish to run it from another piece of software (e.g. SeqApp or GDE).
+All parameters can be set from the command line by giving options after the
+clustalw command. On UNIX options should be preceded by '-', all other systems
+use the '/' character.
+
+If anything is put on the command line, the program will (attempt to) carry
+out whatever is requested and will exit.  If you wish to use the command
+line to set some parameters and then go into interactive mode, use the
+command line switch: interactive .... e.g.
+
+clustalw -quicktree -interactive    on UNIX
+or
+clustalw /quicktree /interactive    on VMS,MAC and PC
+
+will set the default initial alignment mode to fast/approximate and will then
+go to the main menu.
+
+
+To see a list of all the command line parameters, type: 
+
+clustalw -options           on UNIX
+or
+clustalw /options           on VMS,MAC and PC
+
+and you will see a list with no explanation.
+
+
+To get (VERY BRIEF) help on command line usage, use the /HELP or /CHECK
+(-help or -check on UNIX systems) options.  Otherwise, the command line
+usage is self explanatory or is explained in clustalv.doc.  The defaults
+for all parameters are set in the file param.h which can be changed easily 
+(remember to recompile the program afterwards :-).
+
+------------------------------------------------------------------------------


Property changes on: trunk/packages/clustalw/branches/upstream/current/clustalw.doc
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/clustalw/branches/upstream/current/clustalw.h
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/clustalw.h	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/clustalw.h	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,250 @@
+/*#include "/us1/user/julie/dmalloc/malloc.h"*/
+/*********************CLUSTALW.H*********************************************/
+/****************************************************************************/
+
+   /*
+   Main header file for ClustalW.  Uncomment ONE of the following 4 lines
+   depending on which compiler you wish to use.
+   */
+
+/*#define VMS 1                 VAX or ALPHA VMS */
+
+/*#define MAC 1                 Think_C for Macintosh */
+
+/*#define MSDOS 1               Turbo C for PC's */
+
+#define UNIX 1                /*Ultrix/Decstation, Gnu C for 
+                                Sun, IRIX/SGI, OSF1/ALPHA */
+
+/***************************************************************************/
+/***************************************************************************/
+
+
+#include "general.h"
+
+#define MAXNAMES		30	/* Max chars read for seq. names */
+#define MAXTITLES		60      /* Title length */
+#define FILENAMELEN 	256             /* Max. file name length */
+	
+#define UNKNOWN   0
+#define EMBLSWISS 1
+#define PIR 	  2
+#define PEARSON   3
+#define GDE    	  4
+#define CLUSTAL   5	/* DES */
+#define MSF       6 /* DES */
+#define RSF       7	/* JULIE */
+#define USER      8	/* DES */
+#define PHYLIP    9	/* DES */
+#define NEXUS    10/* DES */
+#define FASTA    11/* Ramu */
+
+#define NONE      0
+#define SECST     1
+#define GMASK     2
+
+#define PROFILE 0
+#define SEQUENCE 1
+
+#define BS_NODE_LABELS 2
+#define BS_BRANCH_LABELS 1
+
+#define PAGE_LEN       22   /* Number of lines of help sent to screen */
+
+#define PAGEWIDTH	80  /* maximum characters on output file page */
+#define LINELENGTH     	60  /* Output file line length */
+#define GCG_LINELENGTH 	50
+
+#ifdef VMS						/* Defaults for VAX VMS */
+#define COMMANDSEP '/'
+#define DIRDELIM ']'		/* Last character before file name in full file 
+							   specs */
+#define INT_SCALE_FACTOR 1000 /* Scaling factor to convert float to integer for profile scores */
+
+#elif MAC
+#define COMMANDSEP '/'
+#define DIRDELIM ':'
+#define INT_SCALE_FACTOR 100  /* Scaling factor to convert float to integer for profile scores */
+
+#elif MSDOS
+#define COMMANDSEP '/'
+#define DIRDELIM '\\'
+#define INT_SCALE_FACTOR 100  /* Scaling factor to convert float to integer for profile scores */
+
+#elif UNIX
+#define COMMANDSEP '-'
+#define DIRDELIM '/'
+#define INT_SCALE_FACTOR 1000 /* Scaling factor to convert float to integer for profile scores */
+#endif
+
+#define NUMRES 32		/* max size of comparison matrix */
+
+#define INPUT 0
+#define ALIGNED 1
+
+#define LEFT 1
+#define RIGHT 2
+
+#define NODE 0
+#define LEAF 1
+
+#define GAPCOL 32		/* position of gap open penalty in profile */
+#define LENCOL 33		/* position of gap extension penalty in profile */
+
+typedef struct node {		/* phylogenetic tree structure */
+        struct node *left;
+        struct node *right;
+        struct node *parent;
+        float dist;
+        sint  leaf;
+        int order;
+        char name[64];
+} stree, *treeptr;
+
+typedef struct {
+	char title[30];
+	char string[30];
+} MatMenuEntry;
+
+typedef struct {
+	int noptions;
+	MatMenuEntry opt[10];
+} MatMenu;
+
+#define MAXMAT 10
+
+typedef struct {
+	int llimit;	
+	int ulimit;
+	short *matptr;
+	short *aa_xref;
+} SeriesMat;
+
+typedef struct {
+	int nmat;
+	SeriesMat mat[MAXMAT];
+} UserMatSeries;
+	
+
+/*
+   Prototypes
+*/
+
+/* alnscore.c */
+void aln_score(void);
+/* interface.c */
+void parse_params(Boolean);
+void init_amenu(void);
+void init_interface(void);
+void 	main_menu(void);
+FILE 	*open_output_file(char *, char *, char *, char *);
+FILE    *open_explicit_file(char *);
+sint seq_input(Boolean);
+Boolean open_alignment_output(char *);
+void create_alignment_output(sint fseq,sint lseq);
+void align(char *phylip_name);
+void profile_align(char *p1_tree_name,char *p2_tree_name);/* Align 2 alignments */
+void make_tree(char *phylip_name);
+void get_tree(char *phylip_name);
+sint profile_input(void);                        /* read a profile */
+void new_sequence_align(char *phylip_name);
+Boolean user_mat(char *, short *, short *);
+Boolean user_mat_series(char *, short *, short *);
+void get_help(char);
+void clustal_out(FILE *, sint, sint, sint, sint);
+void nbrf_out(FILE *, sint, sint, sint, sint);
+void gcg_out(FILE *, sint, sint, sint, sint);
+void phylip_out(FILE *, sint, sint, sint, sint);
+void gde_out(FILE *, sint, sint, sint, sint);
+void nexus_out(FILE *, sint, sint, sint, sint);
+void fasta_out(FILE *, sint, sint, sint, sint);
+void print_sec_struct_mask(int prf_length,char *mask,char *struct_mask);
+void fix_gaps(void);
+
+
+/* calcgapcoeff.c */
+void calc_gap_coeff(char **alignment, sint *gaps, sint **profile, Boolean struct_penalties,
+                   char *gap_penalty_mask, sint first_seq, sint last_seq,
+                   sint prf_length, sint gapcoef, sint lencoef);
+/* calcprf1.c */
+void calc_prf1(sint **profile, char **alignment, sint *gaps, sint matrix[NUMRES ][NUMRES ], 
+               sint *seq_weight, sint prf_length, sint first_seq, sint last_seq);
+/* calcprf2.c */
+void calc_prf2(sint **profile, char **alignment, sint *seq_weight, sint prf_length,
+               sint first_seq, sint last_seq);
+/* calctree.c */
+void calc_seq_weights(sint first_seq, sint last_seq,sint *seq_weight);
+void create_sets(sint first_seq, sint last_seq);
+sint read_tree(char *treefile, sint first_seq, sint last_seq);
+void clear_tree(treeptr p);
+sint calc_similarities(sint nseqs);
+/* clustalw.c */
+int main(int argc, char **argv);
+/* gcgcheck.c */
+int SeqGCGCheckSum(char *seq, sint len);
+/* malign.c */
+sint malign(sint istart,char *phylip_name);
+sint seqalign(sint istart,char *phylip_name);
+sint palign1(void);
+float countid(sint s1, sint s2);
+sint palign2(char *p1_tree_name,char *p2_tree_name);
+/* pairalign.c */
+sint pairalign(sint istart, sint iend, sint jstart, sint jend);
+/* prfalign.c */
+lint prfalign(sint *group, sint *aligned);
+/* random.c */
+unsigned long linrand(unsigned long r);
+unsigned long addrand(unsigned long r);
+void addrandinit(unsigned long s);
+/* readmat.c */
+void init_matrix(void);
+sint get_matrix(short *matptr, short *xref, sint matrix[NUMRES ][NUMRES ], Boolean neg_flag,
+                sint scale);
+sint read_user_matrix(char *filename, short *usermat, short *xref);
+sint read_matrix_series(char *filename, short *usermat, short *xref);
+int getargs(char *inline1, char *args[], int max);
+/* sequence.c */
+void fill_chartab(void);
+sint readseqs(sint first_seq);
+/* showpair.c */
+void show_pair(sint istart, sint iend, sint jstart, sint jend);
+/* trees.c */
+void phylogenetic_tree(char *phylip_name,char *clustal_name,char *dist_name, char *nexus_name, char *pim_name);
+void bootstrap_tree(char *phylip_name,char *clustal_name, char *nexus_name);
+sint dna_distance_matrix(FILE *tree);
+sint prot_distance_matrix(FILE *tree);
+void guide_tree(FILE *tree,int first_seq,sint nseqs);
+
+void calc_percidentity(FILE *pfile);
+
+/* util.c */
+
+void alloc_aln(sint nseqs);
+void realloc_aln(sint first_seq,sint nseqs);
+void free_aln(sint nseqs);
+void alloc_seq(sint seq_no,sint length);
+void realloc_seq(sint seq_no,sint length);
+void free_seq(sint seq_no);
+
+void *ckalloc(size_t bytes);
+void *ckrealloc(void *ptr, size_t bytes);
+void *ckfree(void *ptr);
+char prompt_for_yes_no(char *title,char *prompt);
+void fatal(char *msg, ...);
+void error(char *msg, ...);
+void warning(char *msg, ...);
+void info(char *msg, ...);
+char *rtrim(char *str);
+char *blank_to_(char *str);
+char *upstr(char *str);
+char *lowstr(char *str);
+void getstr(char *instr, char *outstr);
+double getreal(char *instr, double minx, double maxx, double def);
+int getint(char *instr, int minx, int maxx, int def);
+void do_system(void);
+Boolean linetype(char *line, char *code);
+Boolean keyword(char *line, char *code);
+Boolean blankline(char *line);
+void get_path(char *str, char *path);
+
+

Added: trunk/packages/clustalw/branches/upstream/current/clustalw.ms
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/clustalw.ms	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/clustalw.ms	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,794 @@
+This is just an ASCII text version of the manuscript describing
+Clustal W, without the figures.  It was published:
+
+Nucleic Acids Research, 22(22):4673-4680.
+
+
+
+CLUSTAL W: improving the sensitivity of progressive multiple 
+sequence alignment through sequence weighting, position specific 
+gap penalties and weight matrix choice.
+
+
+
+Julie D. Thompson, Desmond G. Higgins1 and Toby J. Gibson*
+
+European Molecular Biology Laboratory
+Postfach 102209
+Meyerhofstrasse 1
+D-69012 Heidelberg
+Germany
+
+
+Phone:		+49-6221-387398
+Fax:		+49-6221-387306
+E-mail:		Gibson at EMBL-Heidelberg.DE
+		Des.Higgins at EBI.AC.UK
+		Thompson at EMBL-Heidelberg.DE
+
+
+Keywords:	Multiple alignment, phylogenetic tree, weight matrix, gap
+		penalty, dynamic programming, sequence weighting.
+
+
+1 Current address: 
+European Bioinformatics Institute
+Hinxton Hall
+Hinxton
+Cambridge CB10 1RQ
+UK.
+
+* To whom correspondence should be addressed
+
+
+ABSTRACT
+
+The sensitivity of the commonly used progressive multiple sequence 
+alignment method has been greatly improved for the alignment of divergent 
+protein sequences.   Firstly, individual weights are assigned to each sequence 
+in a partial alignment in order to downweight near-duplicate sequences and 
+upweight the most divergent ones.   Secondly, amino acid substitution 
+matrices are varied at different alignment stages according to the divergence 
+of the sequences to be aligned.    Thirdly, residue specific gap penalties and 
+locally reduced gap penalties in hydrophilic regions encourage new gaps in 
+potential loop regions rather than regular secondary structure.   Fourthly, 
+positions in early alignments where gaps have been opened receive locally 
+reduced gap penalties to encourage the opening up of new gaps at these 
+positions.  These modifications are incorporated into a new program, 
+CLUSTAL W which is freely available.  
+
+
+INTRODUCTION
+
+The simultaneous alignment of many nucleotide or amino acid sequences is 
+now an essential tool in molecular biology.  Multiple alignments are used to 
+find diagnostic patterns to characterise protein families; to detect or 
+demonstrate homology between new sequences and existing families of 
+sequences; to help predict the secondary and tertiary structures of new 
+sequences; to suggest oligonucleotide primers for PCR; as an essential prelude 
+to molecular evolutionary analysis.   The rate of appearance of new sequence 
+data is steadily increasing and the development of efficient and accurate 
+automatic methods for multiple alignment is, therefore, of major 
+importance.   The majority of automatic multiple alignments are now carried 
+out using the "progressive" approach of Feng and Doolittle (1).   In this paper, 
+we describe a number of improvements to the progressive multiple 
+alignment method which greatly improve the sensitivity without sacrificing 
+any of the speed and efficiency which makes this approach so practical.  The 
+new methods are made available in a program called CLUSTAL W which is 
+freely available and portable to a wide variety of computers and operating 
+systems.
+
+In order to align just two sequences, it is standard practice to use dynamic 
+programming (2).  This guarantees a mathematically optimal alignment, 
+given a table of scores for matches and mismatches between all amino acids 
+or nucleotides (e.g. the PAM250 matrix (3) or BLOSUM62 matrix (4)) and 
+penalties for insertions or deletions of different lengths.   Attempts at 
+generalising dynamic programming to multiple alignments are limited to 
+small numbers of short sequences (5).  For much more than eight or so 
+proteins of average length, the problem is uncomputable given current 
+computer power.  Therefore, all of the methods capable of handling larger 
+problems in practical timescales, make use of heuristics.    Currently, the most 
+widely used approach is to exploit the fact that homologous sequences are 
+evolutionarily related.  One can build up a multiple alignment progressively 
+by a series of pairwise alignments, following the branching order in a 
+phylogenetic tree (1).  One first aligns the most closely related sequences, 
+gradually adding in the more distant ones.   This approach is sufficiently fast 
+to allow alignments of virtually any size.   Further, in simple cases, the 
+quality of the alignments is excellent, as judged by the ability to correctly align 
+corresponding domains from sequences of known secondary or tertiary 
+structure (6).  In more difficult cases, the alignments give good starting points 
+for further automatic or manual refinement.
+
+This approach works well when the data set consists of sequences of different 
+degrees of divergence.   Pairwise alignment of very closely related sequences 
+can be carried out very accurately.   The correct answer may often be obtained 
+using a wide range of parameter values (gap penalties and weight matrix).  By 
+the time the most distantly related sequences are aligned, one already has a 
+sample of aligned sequences which gives important information about the 
+variability at each position.   The positions of the gaps that were introduced 
+during the early alignments of the closely related sequences are not changed 
+as new sequences are added.   This is justified because the placement of gaps 
+in alignments between closely related sequences is much more accurate than 
+between distantly related ones.   When all of the sequences are highly 
+divergent (e.g. less than approximately 25-30% identity between any pair of 
+sequences), this progressive approach becomes much less reliable.
+
+There are two major problems with the progressive approach:  the local 
+minimum problem and the choice of alignment parameters.   The local 
+minimum problem stems from the "greedy" nature of the alignment strategy.  
+The algorithm greedily adds sequences together, following the initial tree.  
+There is no guarantee that the global optimal solution, as defined by some 
+overall measure of multiple alignment quality (7,8), or anything close to it, 
+will be found.   More specifically, any mistakes (misaligned regions) made 
+early in the alignment process cannot be corrected later as new information 
+from other sequences is added.   This problem is frequently thought of as 
+mainly resulting from an incorrect branching order in the initial tree.  The 
+initial trees are derived from a matrix of distances between separately aligned 
+pairs of sequences and are much less reliable than trees from complete 
+multiple alignments.   In our experience, however, the real problem is caused 
+simply by errors in the initial alignments.  Even if the topology of the guide 
+tree is correct, each alignment step in the multiple alignment process may 
+have some percentage of the residues misaligned.   This percentage will be 
+very low on average for very closely related sequences but will increase as 
+sequences diverge.   It is these misalignments which carry through from the 
+early alignment steps that cause the local minimum problem.   The only way 
+to correct this is to use an iterative or stochastic sampling procedure (e.g. 
+7,9,10).   We do not directly address this problem in this paper.
+
+The alignment parameter choice problem is, in our view, at least as serious as 
+the local minimum problem.   Stochastic or iterative algorithms will be just 
+as badly affected as progressive ones if the parameters are inappropriate: they 
+will arrive at a false global minimum.  Traditionally, one chooses one weight 
+matrix and two gap penalties (one for opening a new gap and one for 
+extending an existing gap) and hope that these will work well over all parts of 
+all the sequences in the data set.   When the sequences are all closely related, 
+this works.  The first reason is that virtually all residue weight matrices give 
+most weight to identities.   When identities dominate an alignment, almost 
+any weight matrix will find approximately the correct solution.   With very 
+divergent sequences, however, the scores given to non-identical residues will 
+become critically important; there will be more mismatches than identities.   
+Different weight matrices will be optimal at different evolutionary distances 
+or for different classes of proteins.  
+
+The second reason is that the range of gap penalty values that will find the 
+correct or best possible solution can be very broad for highly similar sequences 
+(11).   As more and more divergent sequences are used, however, the exact 
+values of the gap penalties become important for success.   In each case, there 
+may be a very narrow range of values which will deliver the best alignment.  
+Further, in protein alignments, gaps do not occur randomly (i.e. with equal 
+probability at all positions).  They occur far more often between the major 
+secondary structural elements of alpha helices and beta strands than within 
+(12).
+
+The major improvements described in this paper attempt to address the 
+alignment parameter choice problem.   We dynamically vary the gap 
+penalties in a position and residue specific manner. The observed relative 
+frequencies of gaps adjacent to each of the 20 amino acids (12) are used to 
+locally adjust the gap opening penalty after each residue.   Short stretches of 
+hydrophilic residues (e.g. 5 or more) usually indicate loop or random coil 
+regions and the gap opening penalties are locally reduced in these stretches.   
+In addition, the locations of the gaps found in the early alignments are also 
+given reduced gap opening penalties.  It has been observed in alignments 
+between sequences of known structure that gaps tend not to be closer than 
+roughly eight residues on average (12).   We increase the gap opening penalty 
+within eight residues of exising gaps.   The two main series of amino acid 
+weight matrices that are used today are the PAM series (3) and the BLOSUM 
+series (4).   In each case, there is a range of matrices to choose from.  Some 
+matrices are appropriate for aligning very closely related sequences where 
+most weight by far is given to identities, with only the most frequent 
+conservative substitutions receiving high scores.  Other matrices work better 
+at greater evolutionary distances where less importance is attached to 
+identities (13).  We choose different weight matrices, as the alignment 
+proceeds, depending on the estimated divergence of the sequences to be 
+aligned at each stage.  
+
+Sequences are weighted to correct for unequal sampling across all 
+evolutionary distances in the data set (14).   This downweights sequences that 
+are very similar to other sequences in the data set and upweights the most 
+divergent ones.  The weights are calculated directly from the branch lengths 
+in the initial guide tree (15).   Sequence weighting has already been shown to 
+be effective in improving the sensitivity of profile searches (15,16).  In the 
+original CLUSTAL programs (17-19), the initial guide trees, used to guide the 
+multiple alignment, were calculated using the UPGMA method (20).  We 
+now use the Neighbour-Joining method (21) which is more robust against the 
+effects of unequal evolutionary rates in different lineages and which gives 
+better estimates of individual branch lengths.  This is useful because it is these 
+branch lengths which are used to derive the sequence weights.  We also allow 
+users to choose between fast approximate alignments (22) or full dynamic 
+programming for the distance calculations used to make the guide tree. 
+
+The new improvements dramatically improve the sensitivity of the 
+progressive alignment method for difficult alignments involving highly 
+diverged sequences.  We show one very demanding test case of over 60 SH3 
+domains (23) which includes sequence pairs with as little as 12% identity and 
+where there is only one exactly conserved residue across all of the sequences.   
+Using default parameters, we can achieve an alignment that is almost exactly 
+correct, according to available structural information (24).   Using the program 
+in a wide variety of situations, we find that it will normally find the correct 
+alignment, in all but the most difficult and pathological of cases.  
+
+
+MATERIAL AND METHODS
+
+
+The basic alignment method
+
+The basic multiple alignment algorithm consists of three main stages: 1) all 
+pairs of sequences are aligned separately in order to calculate a distance matrix 
+giving the divergence of each pair of sequences; 2) a guide tree is calculated 
+from the distance matrix; 3) the sequences are progressively aligned according 
+to the branching order in the guide tree.   An example using 7 globin 
+sequences of known tertiary structure (25) is given in figure 1.
+
+
+1) The distance matrix/pairwise alignments
+
+In the original CLUSTAL programs, the pairwise distances were calculated 
+using a fast approximate method (22).   This allows very large numbers of 
+sequences to be aligned, even on a microcomputer.   The scores are calculated 
+as the number of k-tuple matches (runs of identical residues, typically 1 or 2 
+long for proteins or 2 to 4 long for nucleotide sequences) in the best alignment 
+between two sequences minus a fixed penalty for every gap.   We now offer a 
+choice between this method and the slower but more accurate scores from full 
+dynamic programming alignments using two gap penalties (for opening or 
+extending gaps) and a full amino acid weight matrix.   These scores are 
+calculated as the number of identities in the best alignment divided by the 
+number of residues compared (gap positions are excluded).   Both of these 
+scores are initially calculated as percent identity scores and are converted to 
+distances by dividing by 100 and subtracting from 1.0 to give number of 
+differences per site.   We do not correct for multiple substitutions in these 
+initial distances.   In figure 1 we give the 7x7 distance matrix between the 7 
+globin sequences calculated using the full dynamic programming method.
+
+
+2) The guide tree
+
+The trees used to guide the final multiple alignment process are calculated 
+from the distance matrix of step 1 using the Neighbour-Joining method (21).   
+This produces unrooted trees with branch lengths proportional to estimated 
+divergence along each branch.   The root is placed by a "mid-point" method 
+(15) at a position where the means of the branch lengths on either side of the 
+root are equal.   These trees are also used to derive a weight for each sequence 
+(15).   The weights are dependent upon the distance from the root of the tree 
+but sequences which have a common branch with other sequences share the 
+weight derived from the shared branch.   In the example in figure 1, the 
+leghaemoglobin (Lgb2_Luplu) gets a weight of 0.442 which is equal to the 
+length of the branch from the root to it.  The Human beta globin 
+(Hbb_Human) gets a weight consisting of the length of the branch leading to 
+it that is not shared with any other sequences (0.081) plus half the length of 
+the branch shared with the horse beta globin (0.226/2) plus one quarter the 
+length of the branch shared by all four haemoglobins (0.061/4) plus one fifth 
+the branch shared between the haemoglobins and the myoglobin (0.015/5) 
+plus one sixth the branch leading to all the vertebrate globins (0.062).  This 
+sums to a total of 0.221.  By contrast, in the normal progressive alignment 
+algorithm, all sequences would be equally weighted.  The rooted tree with 
+branch lengths and sequence weights for the 7 globins is given in figure 1.  
+
+
+3) Progressive alignment
+
+The basic procedure at this stage is to use a series of pairwise alignments to 
+align larger and larger groups of sequences, following the branching order in 
+the guide tree.   You proceed from the tips of the rooted tree towards the root.   
+In the globin example in figure 1 you align the sequences in the following 
+order: human vs. horse beta globin; human vs. horse alpha globin; the 2 
+alpha globins vs. the 2 beta globins; the myoglobin vs. the haemoglobins; the 
+cyanohaemoglobin vs the haemoglobins plus myoglobin; the leghaemoglobin 
+vs. all the rest.  At each stage a full dynamic programming (26,27) algorithm is 
+used with a residue weight matrix and penalties for opening and extending 
+gaps.   Each step consists of aligning two existing alignments or sequences.  
+Gaps that are present in older alignments remain fixed.  In the basic 
+algorithm, new gaps that are introduced at each stage get full gap opening and 
+extension penalties, even if they are introduced inside old gap positions (see 
+the section on gap penalties below for modifications to this rule).  In order to 
+calculate the score between a position from one sequence or alignment and 
+one from another, the average of all the pairwise weight matrix scores from 
+the amino acids in the two sets of sequences is used i.e. if you align 2 
+alignments with 2 and 4 sequences respectively, the score at each position is 
+the average of 8 (2x4) comparisons.   This is illustrated in figure 2.  If either set 
+of sequences contains one or more gaps in one of the positions being 
+considered, each gap versus a residue is scored as zero.   The default amino 
+acid weight matrices we use are rescored to have only positive values. 
+Therefore, this treatment of gaps treats the score of a residue versus a gap as 
+having the worst possible score.  When sequences are weighted (see 
+improvements to progressive alignment, below), each weight matrix value is 
+multiplied by the weights from the 2 sequences, as illustrated in figure 2.
+
+
+Improvements to progressive alignment
+
+All of the remaining modifications apply only to the final progressive 
+alignment stage.   Sequence weighting is relatively straightforward and is 
+already widely used in profile searches (15,16).   The treatment of gap penalties 
+is more complicated.   Initial gap penalties are calculated depending on the 
+weight matrix, the similarity of the sequences, and the length of the 
+sequences. Then, an attempt is made to derive sensible local gap opening 
+penalties at every position in each pre-aligned group of sequences that will 
+vary as new sequences are added.   The use of different weight matrices as the 
+alignment progresses is novel and largely by-passes the problem of initial 
+choice of weight matrix.   The final modification allows us to delay the 
+addition of very divergent sequences until the end of the alignment process 
+when all of the more closely related sequences have already been aligned.
+
+
+Sequence weighting
+
+Sequence weights are calculated directly from the guide tree.    The weights 
+are normalised such that the biggest one is set to 1.0 and the rest are all less 
+than one.  Groups of closely related sequences receive lowered weights 
+because they contain much duplicated information.  Highly divergent 
+sequences without any close relatives receive high weights.  These weights 
+are used as simple multiplication factors for scoring positions from different 
+sequences or prealigned groups of sequences.  The method is illustrated in 
+figure 2.  In the globin example in figure 1, the two alpha globins get 
+downweighted because they are almost duplicate sequences (as do the two 
+beta globins); they receive a combined weight of only slightly more than if a 
+single alpha globin was used.   
+
+
+Initial gap penalties
+
+Initially, two gap penalties are used: a gap opening penalty (GOP) which gives 
+the cost of opening a new gap of any length and a gap extension penalty (GEP) 
+which gives the cost of every item in a gap.  Initial values can be set by the 
+user from a menu.   The software then automatically attempts to choose 
+appropriate gap penalties for each sequence alignment, depending on the 
+following factors.
+
+1) Dependence on the weight matrix
+
+It has been shown (16,28) that varying the gap penalties used with different 
+weight matrices can improve the accuracy of sequence alignments. Here, we 
+use the average score for two mismatched residues (ie. off-diagonal values in 
+the matrix) as a scaling factor for the GOP.
+
+2) Dependence on the similarity of the sequences
+
+The percent identity of the two (groups of) sequences to be aligned is used to 
+increase the GOP for closely related sequences and decrease it for more 
+divergent sequences on a linear scale.
+
+3) Dependence on the lengths of the sequences   
+
+The scores for both true and false sequence alignments grow with the length 
+of the sequences. We use the logarithm of the length of the shorter sequence 
+to increase the GOP with sequence length.
+
+Using these three modifications, the initial GOP calculated by the program is:
+
+GOP->(GOP+log(MIN(N,M))) * (average residue mismatch score) *
+                                                               (percent identity scaling factor)
+where N, M are the lengths of the two sequences.
+
+4) Dependence on the difference in the lengths of the sequences
+
+The GEP is modified depending on the difference between the lengths of the 
+two sequences to be aligned. If one sequence is much shorter than the other, 
+the GEP is increased to inhibit too many long gaps in the shorter sequence.
+The initial GEP calculated by the program is:
+
+GEP ->  GEP*(1.0+|log(N/M)|) 
+where N, M are the lengths of the two sequences.
+
+
+Position-specific gap penalties
+
+ In most dynamic programming applications, the initial gap opening and 
+extension penalties are applied equally at every position in the sequence, 
+regardless of the location of a gap, except for terminal gaps which are usually 
+allowed at no cost.   In CLUSTAL W, before any pair of sequences or 
+prealigned groups of sequences are aligned, we generate a table of gap opening 
+penalties for every position in the two (sets of) sequences.  An example is 
+shown in figure 3.  We manipulate the initial gap opening penalty in a 
+position specific manner, in order to make gaps more or less likely at different 
+positions.   
+
+The local gap penalty modification rules are applied in a hierarchical manner.   
+The exact details of each rule are given below.  Firstly, if there is a gap at a 
+position, the gap opening and gap extension penalties are lowered; the other 
+rules do not apply.   This makes gaps more likely at positions where there are 
+already gaps.  If there is no gap at a position, then the gap opening penalty is 
+increased if the position is within 8 residues of an existing gap.   This 
+discourages gaps that are too close together.  Finally, at any position within a 
+run of hydrophilic residues, the penalty is decreased.  These runs usually 
+indicate loop regions in protein structures.  If there is no run of hydrophilic 
+residues, the penalty is modified using a table of residue specific gap 
+propensities (12).   These propensities were derived by counting the frequency 
+of each residue at either end of gaps in alignments of proteins of known 
+structure.  An illustration of the application of these rules from one part of 
+the globin example, in figure 1, is given in figure 3.  
+
+1) Lowered gap penalties at existing gaps
+
+If there are already gaps at a position, then the GOP is reduced in proportion 
+to the number of sequences with a gap at this position and the GEP is lowered 
+by a half.  The new gap opening penalty is calculated as:
+
+GOP ->  GOP*0.3*(no. of sequences without a gap/no. of sequences).
+
+2) Increased gap penalties near existing gaps
+
+If a position does not have any gaps but is within 8 residues of an existing gap, 
+the GOP is increased by:
+
+GOP ->  GOP*(2+((8-distance from gap)*2)/8)
+
+3) Reduced gap penalties in hydrophilic stretches
+
+Any run of 5 hydrophilic residues is considered to be a hydrophilic stretch.  
+The residues that are to be considered hydrophilic may be set by the user but 
+are conservatively set to D, E, G, K, N, Q, P, R or S by default.   If, at any 
+position, there are no gaps and any of the sequences has such a stretch, the 
+GOP is reduced by one third.
+
+
+4) Residue specific penalties
+
+If there is no hydrophilic stretch and the position does not contain any gaps, 
+then the GOP is multiplied by one of the 20 numbers in table 1, depending on 
+the residue.  If there is a mixture of residues at a position, the multiplication 
+factor is the average of all the contributions from each sequence.  
+
+
+Weight matrices
+
+Two main series of weight matrices are offered to the user: the Dayhoff PAM 
+series (3) and the BLOSUM series (4).   The default is the BLOSUM series.  In 
+each case, there is a choice of matrix ranging from strict ones, useful for 
+comparing very closely related sequences to very "soft" ones that are useful 
+for comparing very distantly related sequences.   Depending on the distance 
+between the two sequences or groups of sequences to be compared, we switch 
+between 4 different matrices.  The distances are measured directly from the 
+guide tree.  The ranges of distances and tables used with the PAM series of 
+matrices is: 80-100%:PAM20, 60-80%:PAM60, 40-60%:PAM120, 0-40%:PAM350. 
+The range used with the BLOSUM series is:80-100%:BLOSUM80,
+60-80%:BLOSUM62, 30-60%:BLOSUM45, 0-30%:BLOSUM30.
+
+
+Divergent sequences
+
+The most divergent sequences (most different, on average from all of the 
+other sequences) are usually the most difficult to align correctly.  It is 
+sometimes better to delay the incorporation of these sequences until all of the 
+more easily aligned sequences are merged first.  This may give a better chance 
+of correctly placing the gaps and matching weakly conserved positions against 
+the rest of the sequences.   A choice is offered to set a cut off (default is 40% 
+identity or less with any other sequence) that will delay the alignment of the 
+divergent sequences until all of the rest have been aligned.  
+
+
+Software and Algorithms
+
+
+Dynamic Programming
+
+The most demanding part of the multiple alignment strategy, in terms of 
+computer processing and memory usage, is the alignment of two (groups of) 
+sequences at each step in the final progressive alignment.   To make it 
+possible to align very long sequences (e.g. dynein heavy chains at ~ 5,000 
+residues) in a reasonable amount of memory, we use the memory efficient 
+dynamic programming algorithm of Myers and Miller (26).   This sacrifices 
+some processing time but makes very large alignments practical in very little 
+memory.   One disadvantage of this algorithm is that it does not allow 
+different gap opening and extension penalties at each position.  We have 
+modified the algorithm so as to allow this and the details are described in a 
+separate paper (27).   
+
+
+
+Menus/file formats
+
+Six different sequence input formats are detected automatically and read by 
+the program:  EMBL/Swiss Prot, NBRF/PIR, Pearson/FASTA (29), GCG/MSF 
+(30), GDE (Steven Smith, Harvard University Genome Center) and CLUSTAL 
+format alignments.   The last three formats allow users to read in complete 
+alignments (e.g. for calculating phylogenetic trees or for addition of new 
+sequences to an existing alignment).   Alignment output may be requested in 
+standard CLUSTAL format (self-explanatory blocked alignments) or in 
+formats compatible with the GDE, PHYLIP (31) or GCG (30) packages.   The 
+program offers the user the ability to calculate Neighbour-Joining 
+phylogenetic trees from existing alignments with options to correct for 
+multiple hits (32,33) and to estimate confidence levels using a bootstrap 
+resampling procedure (34).   The trees may be output in the "New 
+Hampshire" format that is compatible with the PHYLIP package (31).
+
+Alignment to an alignment
+
+Profile alignment is used to align two existing alignments (either of which 
+may consist of just one sequence) or to add a series of new sequences to an 
+existing alignment.   This is useful because one may wish to build up a 
+multiple alignment gradually, choosing different parameters manually, or 
+correcting intermediate errors as the alignment proceeds.   Often, just a few 
+sequences cause misalignments in the progressive algorithm and these can be 
+removed from the process and then added at the end by profile alignment.  A 
+second use is where one has a high quality reference alignment and wishes to 
+keep it fixed while adding new sequences automatically.  
+
+
+Portability/Availability
+
+The full source code of the package is provided free to academic users.   The 
+program will run on any machine with a full ANSI conforming C compiler.  
+It has been tested on the following hardware/software combinations:  
+Decstation/Ultrix, Vax or ALPHA/VMS, Silicon Graphics/IRIX.   The source 
+code and documentation are available by E-mail from the EMBL file server 
+(send the words HELP and HELP SOFTWARE on two lines to the internet 
+address: 
+Netserv at EMBL-Heidelberg.DE) or by anonymous FTP from 
+FTP.EMBL-Heidelberg.DE.  Queries may be addressed by E-mail to 
+Des.Higgins at EBI.AC.UK or Gibson at EMBL-Heidelberg.DE.
+
+
+RESULTS AND DISCUSSION
+
+
+Alignment of SH3 Domains
+
+The ~60 residue SH3 domain was chosen to illustrate the performance of 
+CLUSTAL W, as there is a reference manual alignment (23) and the fold is 
+known (24).  SH3 domains, with a minimum similarity below 12% identity, 
+are poorly aligned by progressive alignment programs such as CLUSTAL V 
+and PILEUP: neither program can generate the correct blocks corresponding to 
+the secondary structure elements. 
+
+Figure 4 shows an alignment generated by CLUSTAL W of the example set of 
+SH3 domains. The alignment was generated in two steps. After progressive 
+alignment, five blocks were produced, corresponding to structural elements, 
+with gaps inserted exclusively in the known loop regions. The beta strands in 
+blocks 1, 4 and 5 were all correctly superposed. However, four sequences in 
+block 2 and one sequence in block 3 were misaligned by 1-2 residues 
+(underlined in figure 4). A second progressive alignment of the aligned 
+sequences, including the gaps, improved this alignment: A single misaligned 
+sequence, H_P55, remains in block 2 (boxed in figure 4), while block 3 is now 
+completely aligned.  This alignment corrects several errors (eg. P85A, P85B 
+and FUS1) in the manual alignment (23).
+
+The SH3 alignment illustrates several features of CLUSTAL W usage. Firstly, 
+in a practical application involving divergent sequences, the initial 
+progressive alignment is likely to be a good but not perfect approximation to 
+the correct alignment. The alignment quality can be improved in a number of 
+ways. If the block structure of the alignment appears to be correct, realignment 
+of the alignment will usually improve most of the misaligned blocks: the 
+existing gaps allow the blocks to "float" cheaply to a locally optimal position 
+without disturbing the rest of the alignment. Remaining sequences which are 
+doubtfully aligned can then be individually tested by profile alignment to the 
+remainder: the misaligned H_P55 SH3 domain can be correctly aligned by 
+profile (with GOP <= 8). The indel regions in the final alignment can then be 
+manually cleaned up: Usually the exact alignment in the loop regions is not 
+determinable, and may have no meaning in structural terms. It is then 
+desirable to have a single gap per structural loop. CLUSTAL W achieved this 
+for two of the four SH3 loop regions (figure 4).
+
+If the block structure of the alignment appears suspect, greater intervention by 
+the user may be required. The most divergent sequences, especially if they 
+have large insertions (which can be discerned with the aid of dot matrix 
+plots), should be left out of the progressive alignment. If there are sets of 
+closely related sequences that are deeply diverged from other sets, these can be 
+separately aligned and then merged by profile alignment. Incorrectly 
+determined sequences, containing frameshifts, can also confound regions of 
+an alignment: these can be hard to detect but sometimes they have been 
+grouped within the excluded divergent sequences: then they may be revealed 
+when they are individually compared to the alignment as having apparently 
+nonsense segments with respect to the other sequences. 
+
+
+
+Finding the best alignment
+
+In cases where all of the sequences in a data set are very similar (e.g. no pair 
+less than 35% identical), CLUSTAL W will find an alignment which is 
+difficult to improve by eye.  In this sense, the alignment is optimal with 
+regard to the alternative of manual alignment.  Mathematically, this is vague 
+and can only be put on a more systematic footing by finding an objective 
+function (a measure of multiple alignment quality) that exactly mirrors the 
+information used by an "expert" to evaluate an alignment.  Nonetheless, if an 
+alignment is impossible to improve by eye, then the program has achieved a 
+very useful result.   
+
+In more difficult cases, as more divergent sequences are included, it becomes 
+increasingly difficult to find good alignments and to evaluate them.    What 
+we find with CLUSTAL W is that the basic block-like structure of the 
+alignment (corresponding to the major secondary structure elements) is 
+usually recovered, with some of the most divergent sequences misaligned in 
+small regions.  This is a very useful starting point for manual refinement as it 
+helps define the major blocks of similarity.   The problem sequences can be 
+removed from the analysis and realigned to the rest of the sequences 
+automatically or with different parameter settings.   An examination of the 
+tree used to guide the alignment will usually show which sequences will be 
+most unreliably placed (those that branch off closest to the root and/or those 
+that align to other single sequences at a very low level of sequence identity 
+rather than align to a group of pre-aligned sequences).  Finally, one can 
+simply iterate the multiple alignment process by feeding an output alignment 
+back into CLUSTAL W and repeating the multiple alignment process (using 
+the same or different parameters).   The SH3 domain alignment in figure 4 
+was derived in this way by 2 passes using default parameters.  In the second 
+pass, the local gap penalties are dominated by the placement of the initial 
+major gap positions.  The alignment will either remain unchanged or will 
+converge rapidly (after 1 or 2 extra passes) on a better solution.  If the 
+placement of the initial gaps is approximately correct but some of the 
+sequences are locally misaligned, this works well.  
+
+
+Comparison with other methods
+
+Recently, several papers have addressed the problem of position specific 
+parameters for multiple alignment.  In one case (35), local gap penalties are 
+increased in alpha helical and beta strand regions, when the 3-D structures of 
+one or more of the sequences are known.  In a second case (36), a hidden 
+Markov model was used to estimate position specific gap penalties and 
+residue substitution weight matrices when large numbers of examples of a 
+protein domain were known.  With CLUSTAL W, we attempt to derive the 
+same information purely from the set of sequences to be aligned.  Therefore, 
+we can apply the method to any set of sequences.  The success of this approach 
+will depend on the number of available sequences and their evolutionary 
+relationships.  It will also depend on the decision making process during 
+multiple alignment (e.g. when to change weight matrix) and the accuracy and 
+appropriateness of our parameterisation.  In the long term, this can only be 
+evaluated by exhaustive testing of sets of sequences where the correct 
+alignment (or parts of it) are known from structural information.   What is 
+clear, however, is that the modifications described here significantly improve 
+the sensitivity of the progressive multiple alignment approach.  This is 
+achieved with almost no sacrifice in speed and efficiency.  
+
+There are several areas where further improvements in sensitivity and 
+accuracy can be made.  Firstly, the residue weight matrices and gap settings 
+can be made more accurate as more and more data accumulate, while 
+matrices for specific sequence types can be derived (e.g. for transmembrane 
+regions (37)).  Secondly, stochastic or iterative optimisation methods can be 
+used to refine initial alignments (7,9,10).   CLUSTAL W could be run with 
+several sets of starting parameters and in each case, the alignments refined 
+according to an objective function.   The search for a good objective function, 
+that takes into account the sequence and position specific information used in 
+CLUSTAL W is a key area of research.   Finally, the average number of 
+examples of each protein domain or family is growing steadily.  It is not only 
+important that programs can cope with the large volumes of data that are 
+being generated, they should be able to exploit the new information to make 
+the alignments more and more accurate.   Globally optimal alignments 
+(according to an objective function) may not always be possible but the 
+problem may be avoided if sufficiently large volumes of data become 
+available.  CLUSTAL W is a step in this direction.
+
+ACKNOWLEDGEMENTS
+
+Numerous people have offered advice and suggestions for improvements to 
+earlier versions of the CLUSTAL programs.  D.H. wishes to apologise to all of 
+the irate CLUSTAL V users who had to live with the bugs and lack of facilities 
+for getting trees in the New Hampshire format.  We wish to specifically thank 
+Jeroen Coppieters who suggested using a series of weight matrices and Steven 
+Henikoff for advice on using the BLOSUM matrices.  We are grateful to Rein 
+Aasland, Peer Bork, Ariel Blocker and BŽrtrand Seraphin for providing 
+challenging alignment problems.   T.G. and J.T. thank Kevin Leonard for 
+support and encouragement.  Finally, we thank all of the people who were 
+involved with various CLUSTAL programs over the years, namely: Paul 
+Sharp, Rainer Fuchs and Alan Bleasby.
+
+
+REFERENCES
+
+ 1.Feng, D.-F. and Doolittle, R.F. (1987). J. Mol. Evol. 25, 351-360.
+ 2.Needleman, S.B. and Wunsch, C.D. (1970). J. Mol. Biol. 48, 443-453.
+ 3.Dayhoff, M.O., Schwartz, R.M. and Orcutt, B.C. (1978)  in Atlas of Protein 
+Sequence and Structure, vol. 5, suppl. 3 (Dayhoff, M.O., ed.), pp 345-352, 
+NBRF, Washington.
+ 4.Henikoff, S. and Henikoff, J.G. (1992). Proc. Natl. Acad. Sci. USA 89, 10915-
+10919.
+ 5.Lipman, D.J., Altschul, S.F. and Kececioglu, J.D. (1989). Proc. Natl. Acad. Sci. 
+USA 86, 4412-4415.
+ 6.Barton, G.J. and Sternberg, M.J.E. (1987). J. Mol. Biol. 198, 327-337.
+ 7.Gotoh, O. (1993). CABIOS 9, 361-370.
+ 8.Altschul, S.F. (1989). J. Theor. Biol. 138, 297-309.
+ 9.Lukashin, A.V., Engelbrecht, J. and Brunak, S. (1992). Nucl. Acids Res. 20, 
+2511-2516.
+10.Lawrence, C.E., Altschul, S.F., Boguski, M.S., Liu, J.S., Neuwald, A.F. and 
+Wooton, J.C. (1993). Science, 262, 208-214.
+11.Vingron, M. and Waterman, M.S. (1993). J. Mol. Biol. 234, 1-12.
+12.Pascarella, S. and Argos, P. (1992). J. Mol. Biol. 224, 461-471.
+13.Collins, J.F. and Coulson, A.F.W. (1987). In Nucleic acid and protein 
+sequence analysis a practical approach, Bishop, M.J. and Rawlings, C.J. ed., 
+chapter 13, pp. 323-358.
+14.Vingron, M. and Sibbald, P.R. (1993). Proc. Natl. Acad. Sci. USA, 90, 8777-
+8781.
+15.Thompson, J.D., Higgins, D.G. and Gibson, T.J. (1994). CABIOS, 10, 19-29.
+16.LŸthy, R., Xenarios, I. and Bucher, P. (1994). Protein Science, 3, 139-146.
+17.Higgins, D.G. and Sharp, P.M. (1988). Gene, 73, 237-244.
+18.Higgins, D.G. and Sharp, P.M. (1989). CABIOS, 5, 151-153.
+19.Higgins, D.G., Bleasby, A.J. and Fuchs, R. (1992). CABIOS, 8, 189-191.
+20.Sneath, P.H.A. and Sokal, R.R. (1973). Numerical Taxonomy, W.H. 
+Freeman, San Francisco.
+21.Saitou, N. and Nei, M. (1987). Mol. Biol. Evol. 4, 406-425.
+22.Wilbur, W.J. and Lipman, D.J. (1983). Proc. Natl. Acad. Sci. USA, 80, 726-
+730.
+23.Musacchio, A., Gibson, T., Lehto, V.-P. and Saraste, M. (1992). FEBS Lett. 
+307, 55-61.
+24.Musacchio, A., Noble, M., Pauptit, R., Wierenga, R. and Saraste, M. (1992). 
+Nature, 359, 851-855.
+25.Bashford, D., Chothia, C. and Lesk, A.M. (1987). J. Mol. Biol. 196, 199-216.
+26.Myers, E.W. and Miller, W. (1988). CABIOS, 4, 11-17.
+27.Thompson, J.D. (1994). CABIOS, (Submitted).
+28.Smith, T.F., Waterman, M.S. and Fitch, W.M. (1981). J. Mol. Evol. 18, 38-46.
+29.Pearson, W.R. and Lipman, D.J. (1988). Proc. Natl. Acad. Sci. USA. 85, 2444-
+2448.
+30.Devereux, J., Haeberli, P. and Smithies, O. (1984). Nucleic Acids Res. 12, 
+387-395.
+31.Felsenstein, J. (1989). Cladistics 5, 164-166.
+32.Kimura, M. (1980). J. Mol. Evol. 16, 111-120.
+33.Kimura, M. (1983). The Neutral Theory of Molecular Evolution.  
+Cambridge University Press, Cambridge.
+34.Felsenstein, J. (1985). Evolution 39, 783-791.
+35.Smith, R.F. and Smith, T.F. (1992) Protein Engineering 5, 35-41.
+36.Krogh, A., Brown, M., Mian, S., Sjšlander, K. and Haussler, D. (1994) J. Mol. 
+Biol. 235-1501-1531.
+37.Jones, D.T., Taylor, W.R. and Thornton, J.M.  (1994). FEBS Lett. 339, 269-275.
+38.Bairoch, A. and Bšckmann, B. (1992) Nucleic Acids Res., 20, 2019-2022.
+39.Noble, M.E.M., Musacchio, A., Saraste, M., Courtneidge, S.A. and 
+Wierenga, R.K. (1993) EMBO J. 12, 2617-2624.
+40.Kabsch, W. and Sander, C. (1983) Biopolymers, 22, 2577-2637.
+
+FIGURE LEGENDS
+
+Figure 1.  The basic progressive alignment procedure, illustrated using a set of 
+7 globins of known tertiary structure.  The sequence names are from Swiss 
+Prot (38):  Hba_Horse: horse alpha globin; Hba_Human: human alpha globin; 
+Hbb_Horse: horse beta globin; Hbb_Human: human beta globin; Myg_Phyca: 
+sperm whale myoglobin; Glb5_Petma: lamprey cyanohaemoglobin; 
+Lgb2_Luplu: lupin leghaemoglobin.   In the distance matrix, the mean 
+number of differences per residue is given.  The unrooted tree shows all 
+branch lengths drawn to scale.  In the rooted tree, all branch lengths (mean 
+number of differences per residue along each branch) are given as well as 
+weights for each sequence.  In the multiple alignment, the approximate 
+positions of the 7 alpha helices, common to all 7 proteins are shown.  This 
+alignment was derived using CLUSTAL W with default parameters and the 
+PAM (3) series of weight matrices.  
+
+Figure 2.  The scoring scheme for comparing two positions from two 
+alignments.   Two sections of alignment with 4 and 2 sequences respectively 
+are shown.   The score of the position with amino acids T,L,K,K versus the 
+position with amino acids V and I is given with and without sequence 
+weights.  M(X,Y) is the weight matrix entry for amino acid X versus amino 
+acid Y.  Wn is the weight for sequence n.
+
+Figure 3.  The variation in local gap opening penalty is plotted for a section of 
+alignment.  The inital gap opening penalty is indicated by a dotted line. Two 
+hydrophilic stretches are underlined.  The lowest penalties correspond to the 
+ends of the alignment, the hydrophilic stretches and the two positions with 
+gaps.   The highest values are within 8 residues of the two gap positions.  The 
+rest of the variation is caused by the residue specific gap penalties (12).
+
+Figure 4.  CLUSTAL W Alignment of a set of SH3 domains taken from (23). 
+Secondary structure assignments for the solved Spectrin (24) and Fyn (39) 
+domains are according to DSSP (40). The alignment was generated in two 
+steps using default parameters. After full multiple alignment, the aligned 
+sequences were realigned. Segments which were correctly aligned in the 
+second pass are underlined. The single misaligned segment in H_P55 and the 
+misaligned residue in H_NCK/2 are boxed.
+
+The sequences are coloured to illustrate significant features. All G (orange) 
+and P (yellow) are coloured. Other residues matching a frequent occurrence of 
+a property in a column are coloured: hydrophobic = blue; hydrophobic 
+tendency = light blue; basic = red; acidic = purple; hydrophilic = green; White 
+= unconserved. The alignment figure was prepared with the GDE sequence 
+editor (S. Smith, Harvard University) and COLORMASK (J. Thompson, 
+EMBL).
+
+
+
+
+Table 1.  Pascarella and Argos residue specific gap modification factors.   
+-----------------------------------------------------------------------------------
+A	1.13		M	1.29
+C	1.13		N	0.63
+D	0.96		P	0.74
+E	1.31		Q	1.07
+F	1.20		R	0.72
+G	0.61		S	0.76
+H	1.00		T	0.89
+I	1.32		V	1.25
+K	0.96		Y	1.00
+L	1.21		W	1.23
+-----------------------------------------------------------------------------------
+The values are normalised around a mean value of 1.0 for H.  The lower the 
+value, the greater the chance of having an adjacent gap.  These are derived 
+from the original table of relative frequencies of gaps adjacent to each residue 
+(12) by subtraction from 2.0.
+
+


Property changes on: trunk/packages/clustalw/branches/upstream/current/clustalw.ms
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/clustalw/branches/upstream/current/clustalw.new
===================================================================
(Binary files differ)


Property changes on: trunk/packages/clustalw/branches/upstream/current/clustalw.new
___________________________________________________________________
Name: svn:executable
   + 
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/clustalw/branches/upstream/current/clustalw_help
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/clustalw_help	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/clustalw_help	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,697 @@
+
+This is the on-line help file for CLUSTAL W ( version 1.83).   
+
+It should be named or defined as: clustalw_help 
+except with MSDOS in which case it should be named CLUSTALW.HLP
+
+For full details of usage and algorithms, please read the CLUSTALW.DOC file.
+
+
+Toby  Gibson                         EMBL, Heidelberg, Germany.
+Des   Higgins                        UCC, Cork, Ireland.
+Julie Thompson                       IGBMC, Strasbourg, France.
+
+
+
+>>NEW <<
+
+  Fasta output
+  ===========
+
+  Write/Read sequence with range specified. The command line syntax
+   for range specification is flexible. You can use one of the following
+   syntax.
+
+       -range=n:m  
+       -range=n-m 
+       -range="n m"
+
+   where m is the starting and m is the length of the sequence.
+
+  Range and range numbers.
+  =======================
+
+  Include range numbers in the ouput.
+
+       -seqno_range=on/off
+
+  The sequence range will be appended as to the names of the sequence.
+
+
+  PIM: Percentage Identity Matrix
+  ===============================
+
+
+
+>>HELP 1 <<             General help for CLUSTAL W (1.81)
+
+Clustal W is a general purpose multiple alignment program for DNA or proteins.
+
+SEQUENCE INPUT:  all sequences must be in 1 file, one after another.  
+7 formats are automatically recognised: NBRF-PIR, EMBL-SWISSPROT, 
+Pearson (Fasta), Clustal (*.aln), GCG-MSF (Pileup), GCG9-RSF and GDE flat file.
+All non-alphabetic characters (spaces, digits, punctuation marks) are ignored
+except "-" which is used to indicate a GAP ("." in MSF-RSF).  
+
+To do a MULTIPLE ALIGNMENT on a set of sequences, use item 1 from this menu to 
+INPUT them; go to menu item 2 to do the multiple alignment.
+
+PROFILE ALIGNMENTS (menu item 3) are used to align 2 alignments.  Use this to
+add a new sequence to an old alignment, or to use secondary structure to guide 
+the alignment process.  GAPS in the old alignments are indicated using the "-" 
+character.   PROFILES can be input in ANY of the allowed formats; just 
+use "-" (or "." for MSF-RSF) for each gap position.
+
+PHYLOGENETIC TREES (menu item 4) can be calculated from old alignments (read in
+with "-" characters to indicate gaps) OR after a multiple alignment while the 
+alignment is still in memory.
+
+
+The program tries to automatically recognise the different file formats used
+and to guess whether the sequences are amino acid or nucleotide.  This is not
+always foolproof.
+
+FASTA and NBRF-PIR formats are recognised by having a ">" as the first 
+character in the file.  
+
+EMBL-Swiss Prot formats are recognised by the letters
+ID at the start of the file (the token for the entry name field).  
+
+CLUSTAL format is recognised by the word CLUSTAL at the beginning of the file.
+
+GCG-MSF format is recognised by one of the following:
+       - the word PileUp at the start of the file. 
+       - the word !!AA_MULTIPLE_ALIGNMENT or !!NA_MULTIPLE_ALIGNMENT
+         at the start of the file.
+       - the word MSF on the first line of the line, and the characters ..
+         at the end of this line.
+
+GCG-RSF format is recognised by the word !!RICH_SEQUENCE at the beginning of
+the file.
+
+
+If 85% or more of the characters in the sequence are from A,C,G,T,U or N, the
+sequence will be assumed to be nucleotide.  This works in 97.3% of cases
+but watch out!
+
+>>HELP 2 <<      Help for multiple alignments
+
+If you have already loaded sequences, use menu item 1 to do the complete
+multiple alignment.  You will be prompted for 2 output files: 1 for the 
+alignment itself; another to store a dendrogram that describes the similarity
+of the sequences to each other.
+
+Multiple alignments are carried out in 3 stages (automatically done from menu
+item 1 ...Do complete multiple alignments now):
+
+1) all sequences are compared to each other (pairwise alignments);
+
+2) a dendrogram (like a phylogenetic tree) is constructed, describing the
+approximate groupings of the sequences by similarity (stored in a file).
+
+3) the final multiple alignment is carried out, using the dendrogram as a guide.
+
+
+PAIRWISE ALIGNMENT parameters control the speed-sensitivity of the initial
+alignments.
+
+MULTIPLE ALIGNMENT parameters control the gaps in the final multiple alignments.
+
+
+RESET GAPS (menu item 7) will remove any new gaps introduced into the sequences
+during multiple alignment if you wish to change the parameters and try again.
+This only takes effect just before you do a second multiple alignment.  You
+can make phylogenetic trees after alignment whether or not this is ON.
+If you turn this OFF, the new gaps are kept even if you do a second multiple
+alignment. This allows you to iterate the alignment gradually.  Sometimes, the 
+alignment is improved by a second or third pass.
+
+SCREEN DISPLAY (menu item 8) can be used to send the output alignments to the 
+screen as well as to the output file.
+
+You can skip the first stages (pairwise alignments; dendrogram) by using an
+old dendrogram file (menu item 3); or you can just produce the dendrogram
+with no final multiple alignment (menu item 2).
+
+
+OUTPUT FORMAT: Menu item 9 (format options) allows you to choose from 6 
+different alignment formats (CLUSTAL, GCG, NBRF-PIR, PHYLIP, GDE, NEXUS, and FASTA).  
+
+
+>>HELP 3 <<      Help for pairwise alignment parameters
+A distance is calculated between every pair of sequences and these are used to
+construct the dendrogram which guides the final multiple alignment. The scores
+are calculated from separate pairwise alignments. These can be calculated using
+2 methods: dynamic programming (slow but accurate) or by the method of Wilbur
+and Lipman (extremely fast but approximate). 
+
+You can choose between the 2 alignment methods using menu option 8.  The
+slow-accurate method is fine for short sequences but will be VERY SLOW for 
+many (e.g. >100) long (e.g. >1000 residue) sequences.   
+
+SLOW-ACCURATE alignment parameters:
+	These parameters do not have any affect on the speed of the alignments. 
+They are used to give initial alignments which are then rescored to give percent
+identity scores.  These % scores are the ones which are displayed on the 
+screen.  The scores are converted to distances for the trees.
+
+1) Gap Open Penalty:      the penalty for opening a gap in the alignment.
+2) Gap extension penalty: the penalty for extending a gap by 1 residue.
+3) Protein weight matrix: the scoring table which describes the similarity
+                          of each amino acid to each other.
+4) DNA weight matrix:     the scores assigned to matches and mismatches 
+                          (including IUB ambiguity codes).
+
+
+FAST-APPROXIMATE alignment parameters:
+
+These similarity scores are calculated from fast, approximate, global align-
+ments, which are controlled by 4 parameters.   2 techniques are used to make
+these alignments very fast: 1) only exactly matching fragments (k-tuples) are
+considered; 2) only the 'best' diagonals (the ones with most k-tuple matches)
+are used.
+
+K-TUPLE SIZE:  This is the size of exactly matching fragment that is used. 
+INCREASE for speed (max= 2 for proteins; 4 for DNA), DECREASE for sensitivity.
+For longer sequences (e.g. >1000 residues) you may need to increase the default.
+
+GAP PENALTY:   This is a penalty for each gap in the fast alignments.  It has
+little affect on the speed or sensitivity except for extreme values.
+
+TOP DIAGONALS: The number of k-tuple matches on each diagonal (in an imaginary
+dot-matrix plot) is calculated.  Only the best ones (with most matches) are
+used in the alignment.  This parameter specifies how many.  Decrease for speed;
+increase for sensitivity.
+
+WINDOW SIZE:  This is the number of diagonals around each of the 'best' 
+diagonals that will be used.  Decrease for speed; increase for sensitivity.
+
+
+>>HELP 4 <<      Help for multiple alignment parameters
+
+These parameters control the final multiple alignment. This is the core of the
+program and the details are complicated. To fully understand the use of the
+parameters and the scoring system, you will have to refer to the documentation.
+
+Each step in the final multiple alignment consists of aligning two alignments 
+or sequences.  This is done progressively, following the branching order in 
+the GUIDE TREE.  The basic parameters to control this are two gap penalties and
+the scores for various identical-non-indentical residues.  
+
+1) and 2) The GAP PENALTIES are set by menu items 1 and 2. These control the 
+cost of opening up every new gap and the cost of every item in a gap. 
+Increasing the gap opening penalty will make gaps less frequent. Increasing 
+the gap extension penalty will make gaps shorter. Terminal gaps are not 
+penalised.
+
+3) The DELAY DIVERGENT SEQUENCES switch delays the alignment of the most
+distantly related sequences until after the most closely related sequences have 
+been aligned.   The setting shows the percent identity level required to delay
+the addition of a sequence; sequences that are less identical than this level
+to any other sequences will be aligned later.
+
+
+
+4) The TRANSITION WEIGHT gives transitions (A <--> G or C <--> T 
+i.e. purine-purine or pyrimidine-pyrimidine substitutions) a weight between 0
+and 1; a weight of zero means that the transitions are scored as mismatches,
+while a weight of 1 gives the transitions the match score. For distantly related
+DNA sequences, the weight should be near to zero; for closely related sequences
+it can be useful to assign a higher score.
+
+
+5) PROTEIN WEIGHT MATRIX leads to a new menu where you are offered a choice of
+weight matrices. The default for proteins in version 1.8 is the PAM series 
+derived by Gonnet and colleagues. Note, a series is used! The actual matrix
+that is used depends on how similar the sequences to be aligned at this 
+alignment step are. Different matrices work differently at each evolutionary
+distance. 
+
+6) DNA WEIGHT MATRIX leads to a new menu where a single matrix (not a series)
+can be selected. The default is the matrix used by BESTFIT for comparison of
+nucleic acid sequences.
+
+Further help is offered in the weight matrix menu.
+
+
+7)  In the weight matrices, you can use negative as well as positive values if
+you wish, although the matrix will be automatically adjusted to all positive
+scores, unless the NEGATIVE MATRIX option is selected.
+
+8) PROTEIN GAP PARAMETERS displays a menu allowing you to set some Gap Penalty
+options which are only used in protein alignments.
+
+ 
+>>HELP A <<           Help for protein gap parameters.
+1) RESIDUE SPECIFIC PENALTIES are amino acid specific gap penalties that reduce
+or increase the gap opening penalties at each position in the alignment or
+sequence.  See the documentation for details.  As an example, positions that 
+are rich in glycine are more likely to have an adjacent gap than positions that
+are rich in valine.
+
+2) 3) HYDROPHILIC GAP PENALTIES are used to increase the chances of a gap within
+a run (5 or more residues) of hydrophilic amino acids; these are likely to
+be loop or random coil regions where gaps are more common.  The residues that 
+are "considered" to be hydrophilic are set by menu item 3.
+
+4) GAP SEPARATION DISTANCE tries to decrease the chances of gaps being too
+close to each other. Gaps that are less than this distance apart are penalised
+more than other gaps. This does not prevent close gaps; it makes them less
+frequent, promoting a block-like appearance of the alignment.
+
+5) END GAP SEPARATION treats end gaps just like internal gaps for the purposes
+of avoiding gaps that are too close (set by GAP SEPARATION DISTANCE above).
+If you turn this off, end gaps will be ignored for this purpose.  This is
+useful when you wish to align fragments where the end gaps are not biologically
+meaningful.
+>>HELP 5 <<      Help for output format options.
+
+Six output formats are offered. You can choose any (or all 6 if you wish).  
+
+CLUSTAL format output is a self explanatory alignment format.  It shows the
+sequences aligned in blocks.  It can be read in again at a later date to
+(for example) calculate a phylogenetic tree or add a new sequence with a 
+profile alignment.
+
+GCG output can be used by any of the GCG programs that can work on multiple
+alignments (e.g. PRETTY, PROFILEMAKE, PLOTALIGN).  It is the same as the GCG
+.msf format files (multiple sequence file); new in version 7 of GCG.
+
+PHYLIP format output can be used for input to the PHYLIP package of Joe 
+Felsenstein.  This is an extremely widely used package for doing every 
+imaginable form of phylogenetic analysis (MUCH more than the the modest intro-
+duction offered by this program).
+
+NBRF-PIR:  this is the same as the standard PIR format with ONE ADDITION.  Gap
+characters "-" are used to indicate the positions of gaps in the multiple 
+alignment.  These files can be re-used as input in any part of clustal that
+allows sequences (or alignments or profiles) to be read in.  
+
+GDE:  this is the flat file format used by the GDE package of Steven Smith.
+
+NEXUS: the format used by several phylogeny programs, including PAUP and
+MacClade.
+
+GDE OUTPUT CASE: sequences in GDE format may be written in either upper or
+lower case.
+
+CLUSTALW SEQUENCE NUMBERS: residue numbers may be added to the end of the
+alignment lines in clustalw format.
+
+OUTPUT ORDER is used to control the order of the sequences in the output
+alignments.  By default, the order corresponds to the order in which the
+sequences were aligned (from the guide tree-dendrogram), thus automatically
+grouping closely related sequences. This switch can be used to set the order
+to the same as the input file.
+
+PARAMETER OUTPUT: This option allows you to save all your parameter settings
+in a parameter file. This file can be used subsequently to rerun Clustal W
+using the same parameters.
+
+>>HELP 6 <<      Help for profile and structure alignments
+   
+By PROFILE ALIGNMENT, we mean alignment using existing alignments. Profile 
+alignments allow you to store alignments of your favourite sequences and add
+new sequences to them in small bunches at a time. A profile is simply an
+alignment of one or more sequences (e.g. an alignment output file from CLUSTAL
+W). Each input can be a single sequence. One or both sets of input sequences
+may include secondary structure assignments or gap penalty masks to guide the
+alignment. 
+
+The profiles can be in any of the allowed input formats with "-" characters
+used to specify gaps (except for MSF-RSF where "." is used).
+
+You have to specify the 2 profiles by choosing menu items 1 and 2 and giving
+2 file names.  Then Menu item 3 will align the 2 profiles to each other. 
+Secondary structure masks in either profile can be used to guide the alignment.
+
+Menu item 4 will take the sequences in the second profile and align them to
+the first profile, 1 at a time.  This is useful to add some new sequences to
+an existing alignment, or to align a set of sequences to a known structure.  
+In this case, the second profile would not be pre-aligned.
+
+
+The alignment parameters can be set using menu items 5, 6 and 7. These are
+EXACTLY the same parameters as used by the general, automatic multiple
+alignment procedure. The general multiple alignment procedure is simply a
+series of profile alignments. Carrying out a series of profile alignments on
+larger and larger groups of sequences, allows you to manually build up a
+complete alignment, if necessary editing intermediate alignments.
+
+SECONDARY STRUCTURE OPTIONS. Menu Option 0 allows you to set 2D structure
+parameters. If a solved structure is available, it can be used to guide the 
+alignment by raising gap penalties within secondary structure elements, so 
+that gaps will preferentially be inserted into unstructured surface loops.
+Alternatively, a user-specified gap penalty mask can be supplied directly.
+
+A gap penalty mask is a series of numbers between 1 and 9, one per position in 
+the alignment. Each number specifies how much the gap opening penalty is to be 
+raised at that position (raised by multiplying the basic gap opening penalty
+by the number) i.e. a mask figure of 1 at a position means no change
+in gap opening penalty; a figure of 4 means that the gap opening penalty is
+four times greater at that position, making gaps 4 times harder to open.
+
+The format for gap penalty masks and secondary structure masks is explained
+in the help under option 0 (secondary structure options).
+>>HELP B <<      Help for secondary structure - gap penalty masks
+
+The use of secondary structure-based penalties has been shown to improve the
+accuracy of multiple alignment. Therefore CLUSTAL W now allows gap penalty 
+masks to be supplied with the input sequences. The masks work by raising gap 
+penalties in specified regions (typically secondary structure elements) so that
+gaps are preferentially opened in the less well conserved regions (typically 
+surface loops).
+
+Options 1 and 2 control whether the input secondary structure information or
+gap penalty masks will be used.
+
+Option 3 controls whether the secondary structure and gap penalty masks should
+be included in the output alignment.
+
+Options 4 and 5 provide the value for raising the gap penalty at core Alpha 
+Helical (A) and Beta Strand (B) residues. In CLUSTAL format, capital residues 
+denote the A and B core structure notation. The basic gap penalties are
+multiplied by the amount specified.
+
+Option 6 provides the value for the gap penalty in Loops. By default this 
+penalty is not raised. In CLUSTAL format, loops are specified by "." in the 
+secondary structure notation.
+
+Option 7 provides the value for setting the gap penalty at the ends of 
+secondary structures. Ends of secondary structures are observed to grow 
+and-or shrink in related structures. Therefore by default these are given 
+intermediate values, lower than the core penalties. All secondary structure 
+read in as lower case in CLUSTAL format gets the reduced terminal penalty.
+
+Options 8 and 9 specify the range of structure termini for the intermediate 
+penalties. In the alignment output, these are indicated as lower case. 
+For Alpha Helices, by default, the range spans the end helical turn. For 
+Beta Strands, the default range spans the end residue and the adjacent loop 
+residue, since sequence conservation often extends beyond the actual H-bonded
+Beta Strand.
+
+CLUSTAL W can read the masks from SWISS-PROT, CLUSTAL or GDE format input
+files. For many 3-D protein structures, secondary structure information is
+recorded in the feature tables of SWISS-PROT database entries. You should
+always check that the assignments are correct - some are quite inaccurate.
+CLUSTAL W looks for SWISS-PROT HELIX and STRAND assignments e.g.
+
+FT   HELIX       100    115
+FT   STRAND      118    119
+
+The structure and penalty masks can also be read from CLUSTAL alignment format 
+as comment lines beginning "!SS_" or "!GM_" e.g.
+
+!SS_HBA_HUMA    ..aaaAAAAAAAAAAaaa.aaaAAAAAAAAAAaaaaaaAaaa.........aaaAAAAAA
+!GM_HBA_HUMA    112224444444444222122244444444442222224222111111111222444444
+HBA_HUMA        VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGK
+
+Note that the mask itself is a set of numbers between 1 and 9 each of which is 
+assigned to the residue(s) in the same column below. 
+
+In GDE flat file format, the masks are specified as text and the names must
+begin with "SS_ or "GM_.
+
+Either a structure or penalty mask or both may be used. If both are included in
+an alignment, the user will be asked which is to be used.
+
+>>HELP C <<      Help for secondary structure - gap penalty mask output options
+   
+   The options in this menu let you choose whether or not to include the masks
+in the CLUSTAL W output alignments. Showing both is useful for understanding
+how the masks work. The secondary structure information is itself very useful
+in judging the alignment quality and in seeing how residue conservation
+patterns vary with secondary structure. 
+
+
+>>HELP 7 <<      Help for phylogenetic trees
+
+1) Before calculating a tree, you must have an ALIGNMENT in memory. This can be
+input in any format or you should have just carried out a full multiple
+alignment and the alignment is still in memory. 
+
+
+*************** Remember YOU MUST ALIGN THE SEQUENCES FIRST!!!! ***************
+
+
+The method used is the NJ (Neighbour Joining) method of Saitou and Nei. First
+you calculate distances (percent divergence) between all pairs of sequence from
+a multiple alignment; second you apply the NJ method to the distance matrix.
+
+2) EXCLUDE POSITIONS WITH GAPS? With this option, any alignment positions where
+ANY of the sequences have a gap will be ignored. This means that 'like' will be
+compared to 'like' in all distances, which is highly desirable. It also
+automatically throws away the most ambiguous parts of the alignment, which are
+concentrated around gaps (usually). The disadvantage is that you may throw away
+much of the data if there are many gaps (which is why it is difficult for us to
+make it the default).  
+
+
+
+3) CORRECT FOR MULTIPLE SUBSTITUTIONS? For small divergence (say <10%) this
+option makes no difference. For greater divergence, it corrects for the fact
+that observed distances underestimate actual evolutionary distances. This is
+because, as sequences diverge, more than one substitution will happen at many
+sites. However, you only see one difference when you look at the present day
+sequences. Therefore, this option has the effect of stretching branch lengths
+in trees (especially long branches). The corrections used here (for DNA or
+proteins) are both due to Motoo Kimura. See the documentation for details.  
+
+Where possible, this option should be used. However, for VERY divergent
+sequences, the distances cannot be reliably corrected. You will be warned if
+this happens. Even if none of the distances in a data set exceed the reliable
+threshold, if you bootstrap the data, some of the bootstrap distances may
+randomly exceed the safe limit.  
+
+4) To calculate a tree, use option 4 (DRAW TREE NOW). This gives an UNROOTED
+tree and all branch lengths. The root of the tree can only be inferred by
+using an outgroup (a sequence that you are certain branches at the outside
+of the tree .... certain on biological grounds) OR if you assume a degree
+of constancy in the 'molecular clock', you can place the root in the 'middle'
+of the tree (roughly equidistant from all tips).
+
+5) TOGGLE PHYLIP BOOTSTRAP POSITIONS
+By default, the bootstrap values are correctly placed on the tree branches of
+the phylip format output tree. The toggle allows them to be placed on the
+nodes, which is incorrect, but some display packages (e.g. TreeTool, TreeView
+and Phylowin) only support node labelling but not branch labelling. Care
+should be taken to note which branches and labels go together.
+
+6) OUTPUT FORMATS: four different formats are allowed. None of these displays
+the tree visually. Useful display programs accepting PHYLIP format include
+NJplot (from Manolo Gouy and supplied with Clustal W), TreeView (Mac-PC), and
+PHYLIP itself - OR get the PHYLIP package and use the tree drawing facilities
+there. (Get the PHYLIP package anyway if you are interested in trees). The
+NEXUS format can be read into PAUP or MacClade.
+
+>>HELP 8 <<      Help for choosing a weight matrix
+
+For protein alignments, you use a weight matrix to determine the similarity of
+non-identical amino acids.  For example, Tyr aligned with Phe is usually judged 
+to be 'better' than Tyr aligned with Pro.
+
+There are three 'in-built' series of weight matrices offered. Each consists of
+several matrices which work differently at different evolutionary distances. To
+see the exact details, read the documentation. Crudely, we store several
+matrices in memory, spanning the full range of amino acid distance (from almost
+identical sequences to highly divergent ones). For very similar sequences, it
+is best to use a strict weight matrix which only gives a high score to
+identities and the most favoured conservative substitutions. For more divergent
+sequences, it is appropriate to use "softer" matrices which give a high score
+to many other frequent substitutions.
+
+1) BLOSUM (Henikoff). These matrices appear to be the best available for 
+carrying out database similarity (homology searches). The matrices used are:
+Blosum 80, 62, 45 and 30. (BLOSUM was the default in earlier Clustal W
+versions)
+
+2) PAM (Dayhoff). These have been extremely widely used since the late '70s.
+We use the PAM 20, 60, 120 and 350 matrices.
+
+3) GONNET. These matrices were derived using almost the same procedure as the
+Dayhoff one (above) but are much more up to date and are based on a far larger
+data set. They appear to be more sensitive than the Dayhoff series. We use the
+GONNET 80, 120, 160, 250 and 350 matrices. This series is the default for
+Clustal W version 1.8.
+
+We also supply an identity matrix which gives a score of 1.0 to two identical 
+amino acids and a score of zero otherwise. This matrix is not very useful.
+Alternatively, you can read in your own (just one matrix, not a series).
+
+A new matrix can be read from a file on disk, if the filename consists only
+of lower case characters. The values in the new weight matrix must be integers
+and the scores should be similarities. You can use negative as well as positive
+values if you wish, although the matrix will be automatically adjusted to all
+positive scores.
+
+
+
+For DNA, a single matrix (not a series) is used. Two hard-coded matrices are 
+available:
+
+
+1) IUB. This is the default scoring matrix used by BESTFIT for the comparison
+of nucleic acid sequences. X's and N's are treated as matches to any IUB
+ambiguity symbol. All matches score 1.9; all mismatches for IUB symbols score 0.
+ 
+ 
+2) CLUSTALW(1.6). The previous system used by Clustal W, in which matches score
+1.0 and mismatches score 0. All matches for IUB symbols also score 0.
+
+INPUT FORMAT  The format used for a new matrix is the same as the BLAST program.
+Any lines beginning with a # character are assumed to be comments. The first
+non-comment line should contain a list of amino acids in any order, using the
+1 letter code, followed by a * character. This should be followed by a square
+matrix of integer scores, with one row and one column for each amino acid. The
+last row and column of the matrix (corresponding to the * character) contain
+the minimum score over the whole matrix.
+
+>>HELP 9 <<      Help for command line parameters
+                DATA (sequences)
+
+-INFILE=file.ext                             :input sequences.
+-PROFILE1=file.ext  and  -PROFILE2=file.ext  :profiles (old alignment).
+
+
+                VERBS (do things)
+
+-OPTIONS	    :list the command line parameters
+-HELP  or -CHECK    :outline the command line params.
+-ALIGN              :do full multiple alignment 
+-TREE               :calculate NJ tree.
+-BOOTSTRAP(=n)      :bootstrap a NJ tree (n= number of bootstraps; def. = 1000).
+-CONVERT            :output the input sequences in a different file format.
+
+
+                PARAMETERS (set things)
+
+***General settings:****
+-INTERACTIVE :read command line, then enter normal interactive menus
+-QUICKTREE   :use FAST algorithm for the alignment guide tree
+-TYPE=       :PROTEIN or DNA sequences
+-NEGATIVE    :protein alignment with negative values in matrix
+-OUTFILE=    :sequence alignment file name
+-OUTPUT=     :GCG, GDE, PHYLIP, PIR or NEXUS
+-OUTORDER=   :INPUT or ALIGNED
+-CASE        :LOWER or UPPER (for GDE output only)
+-SEQNOS=     :OFF or ON (for Clustal output only)
+-SEQNO_RANGE=:OFF or ON (NEW: for all output formats) 
+-RANGE=m,n   :sequence range to write starting m to m+n. 
+
+***Fast Pairwise Alignments:***
+-KTUPLE=n    :word size
+-TOPDIAGS=n  :number of best diags.
+-WINDOW=n    :window around best diags.
+-PAIRGAP=n   :gap penalty
+-SCORE       :PERCENT or ABSOLUTE
+
+
+***Slow Pairwise Alignments:***
+-PWMATRIX=    :Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename
+-PWDNAMATRIX= :DNA weight matrix=IUB, CLUSTALW or filename
+-PWGAPOPEN=f  :gap opening penalty        
+-PWGAPEXT=f   :gap opening penalty
+
+
+***Multiple Alignments:***
+-NEWTREE=      :file for new guide tree
+-USETREE=      :file for old guide tree
+-MATRIX=       :Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename
+-DNAMATRIX=    :DNA weight matrix=IUB, CLUSTALW or filename
+-GAPOPEN=f     :gap opening penalty        
+-GAPEXT=f      :gap extension penalty
+-ENDGAPS       :no end gap separation pen. 
+-GAPDIST=n     :gap separation pen. range
+-NOPGAP        :residue-specific gaps off  
+-NOHGAP        :hydrophilic gaps off
+-HGAPRESIDUES= :list hydrophilic res.    
+-MAXDIV=n      :% ident. for delay
+-TYPE=         :PROTEIN or DNA
+-TRANSWEIGHT=f :transitions weighting
+
+
+***Profile Alignments:***
+-PROFILE      :Merge two alignments by profile alignment
+-NEWTREE1=    :file for new guide tree for profile1
+-NEWTREE2=    :file for new guide tree for profile2
+-USETREE1=    :file for old guide tree for profile1
+-USETREE2=    :file for old guide tree for profile2
+
+
+***Sequence to Profile Alignments:***
+-SEQUENCES   :Sequentially add profile2 sequences to profile1 alignment
+-NEWTREE=    :file for new guide tree
+-USETREE=    :file for old guide tree
+
+
+***Structure Alignments:***
+-NOSECSTR1     :do not use secondary structure-gap penalty mask for profile 1 
+-NOSECSTR2     :do not use secondary structure-gap penalty mask for profile 2
+-SECSTROUT=STRUCTURE or MASK or BOTH or NONE   :output in alignment file
+-HELIXGAP=n    :gap penalty for helix core residues 
+-STRANDGAP=n   :gap penalty for strand core residues
+-LOOPGAP=n     :gap penalty for loop regions
+-TERMINALGAP=n :gap penalty for structure termini
+-HELIXENDIN=n  :number of residues inside helix to be treated as terminal
+-HELIXENDOUT=n :number of residues outside helix to be treated as terminal
+-STRANDENDIN=n :number of residues inside strand to be treated as terminal
+-STRANDENDOUT=n:number of residues outside strand to be treated as terminal 
+
+
+***Trees:***
+-OUTPUTTREE=nj OR phylip OR dist OR nexus
+-SEED=n        :seed number for bootstraps.
+-KIMURA        :use Kimura's correction.   
+-TOSSGAPS      :ignore positions with gaps.
+-BOOTLABELS=node OR branch :position of bootstrap values in tree display
+
+>>HELP 0 <<           Help for tree output format options
+
+Four output formats are offered: 1) Clustal, 2) Phylip, 3) Just the distances
+4) Nexus
+
+None of these formats displays the results graphically. Many packages can
+display trees in the the PHYLIP format 2) below. It can also be imported into
+the PHYLIP programs RETREE, DRAWTREE and DRAWGRAM for graphical display. 
+NEXUS format trees can be read by PAUP and MacClade.
+
+1) Clustal format output. 
+This format is verbose and lists all of the distances between the sequences and
+the number of alignment positions used for each. The tree is described at the
+end of the file. It lists the sequences that are joined at each alignment step
+and the branch lengths. After two sequences are joined, it is referred to later
+as a NODE. The number of a NODE is the number of the lowest sequence in that
+NODE.   
+
+2) Phylip format output.
+This format is the New Hampshire format, used by many phylogenetic analysis
+packages. It consists of a series of nested parentheses, describing the
+branching order, with the sequence names and branch lengths. It can be used by
+the RETREE, DRAWGRAM and DRAWTREE programs of the PHYLIP package to see the
+trees graphically. This is the same format used during multiple alignment for
+the guide trees. 
+
+Use this format with NJplot (Manolo Gouy), supplied with Clustal W. Some other
+packages that can read and display New Hampshire format are TreeView (Mac/PC),
+TreeTool (UNIX), and Phylowin.
+
+3) The distances only.
+This format just outputs a matrix of all the pairwise distances in a format
+that can be used by the Phylip package. It used to be useful when one could not
+produce distances from protein sequences in the Phylip package but is now
+redundant (Protdist of Phylip 3.5 now does this).
+
+4) NEXUS FORMAT TREE. This format is used by several popular phylogeny programs,
+including PAUP and MacClade. The format is described fully in:
+Maddison, D. R., D. L. Swofford and W. P. Maddison.  1997.
+NEXUS: an extensible file format for systematic information.
+Systematic Biology 46:590-621.
+
+5) TOGGLE PHYLIP BOOTSTRAP POSITIONS
+By default, the bootstrap values are placed on the nodes of the phylip format
+output tree. This is inaccurate as the bootstrap values should be associated
+with the tree branches and not the nodes. However, this format can be read and
+displayed by TreeTool, TreeView and Phylowin. An option is available to
+correctly place the bootstrap values on the branches with which they are
+associated.
+

Added: trunk/packages/clustalw/branches/upstream/current/clustalx.c
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/clustalx.c	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/clustalx.c	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,129 @@
+#include <string.h>
+#include <stdlib.h>
+#include <vibrant.h>
+
+#include "clustalw.h"
+
+/*
+*	Prototypes
+*/
+
+extern void *ckalloc(size_t);
+extern void init_interface(void);
+extern void init_matrix(void);
+extern void fill_chartab(void);
+extern void parse_params(Boolean);
+extern void x_menu(void);
+
+/*
+*	Global variables
+*/
+ 
+double **tmat;
+
+char revision_level[] = "X (1.83)";  /* JULIE  feb 2001*/
+Boolean interactive=TRUE;
+#ifdef MSDOS
+        char *help_file_name = "clustalx.hlp";
+#else
+        char *help_file_name = "clustalx_help";
+#endif
+
+sint max_names; /* maximum length of names in current alignment file */
+ 
+float		gap_open, gap_extend;
+float           pw_go_penalty, pw_ge_penalty;
+
+FILE *tree;
+FILE *clustal_outfile, *gcg_outfile, *nbrf_outfile, *phylip_outfile,
+     *gde_outfile, *nexus_outfile;
+FILE *fasta_outfile; /* Ramu */
+sint  *seqlen_array;
+sint max_aln_length;
+short usermat[NUMRES][NUMRES], pw_usermat[NUMRES][NUMRES];
+short score_matrix[NUMRES][NUMRES],score_dnamatrix[NUMRES][NUMRES];
+short segment_matrix[NUMRES][NUMRES],segment_dnamatrix[NUMRES][NUMRES];
+short def_aa_xref[NUMRES+1], aa_xref[NUMRES+1], pw_aa_xref[NUMRES+1];
+short userdnamat[NUMRES][NUMRES], pw_userdnamat[NUMRES][NUMRES];
+short def_dna_xref[NUMRES+1], dna_xref[NUMRES+1], pw_dna_xref[NUMRES+1];
+short score_aa_xref[NUMRES+1],score_dna_xref[NUMRES+1];
+short segment_aa_xref[NUMRES+1],segment_dna_xref[NUMRES+1];
+sint nseqs;
+sint nsets;
+sint *output_index;
+sint **sets;
+sint *seq_weight;
+sint max_aa;
+sint gap_pos1;
+sint gap_pos2;
+sint mat_avscore;
+sint profile_no;
+
+Boolean usemenu=FALSE;
+Boolean dnaflag;
+Boolean distance_tree;
+
+char  **seq_array;
+char **names,**titles;
+char **args;
+char seqname[FILENAMELEN+1];
+
+char *gap_penalty_mask1 = NULL, *gap_penalty_mask2 = NULL;
+char *sec_struct_mask1 = NULL, *sec_struct_mask2 = NULL;
+sint struct_penalties;
+char *ss_name1 = NULL, *ss_name2 = NULL;
+
+Boolean user_series = FALSE;
+UserMatSeries matseries;
+short usermatseries[MAXMAT][NUMRES][NUMRES];
+short aa_xrefseries[MAXMAT][NUMRES+1];
+
+
+extern Int2 Main(void)
+ 
+{
+	int i;
+
+#ifndef WIN_MAC
+#ifdef GetArgc
+	int argc;
+	char **argv;
+	
+	argc=GetArgc();
+	argv=GetArgv();
+#else
+	extern int argc;
+	extern char **argv;
+#endif
+#endif
+
+    init_interface();
+    init_matrix();
+	
+	fill_chartab();
+
+#ifndef WIN_MAC
+	if(argc>1) {
+               args = (char **)ckalloc(argc * sizeof(char *));
+
+                for(i=1;i<argc;++i)
+                {
+                        args[i-1]=(char *)ckalloc((strlen(argv[i])+1) * sizeof(char));
+                        strcpy(args[i-1],argv[i]);
+                }
+                usemenu=FALSE;
+                parse_params(TRUE);
+
+                for(i=0;i<argc-1;i++)
+                        ckfree(args[i]);
+                ckfree(args);
+
+	}
+#endif
+	interactive=TRUE;
+	x_menu();
+	
+	return 0; 
+	/*	exit(0); */
+}
+

Added: trunk/packages/clustalw/branches/upstream/current/clustalx.html
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/clustalx.html	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/clustalx.html	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,2112 @@
+<HEAD>
+<TITLE>ClustalX Help</TITLE>
+</HEAD>
+<BODY BGCOLOR=white>
+<CENTER><H1>ClustalX Help</H1></CENTER>
+<P>
+You can get the latest version of the ClustalX program here:
+</P>
+<DL><DD>
+<A HREF="ftp://ftp-igbmc.u-strasbg.fr/pub/ClustalX/">
+ftp://ftp-igbmc.u-strasbg.fr/pub/ClustalX/</A>
+</DL>
+<P>For full details of usage and algorithms, please read the <A HREF="clustalw.doc"><EM>ClustalW.Doc</EM></A> file.</P>
+<PRE><EM>
+Toby  Gibson                         EMBL, Heidelberg, Germany.
+Des   Higgins                        UCC, Cork, Ireland.
+Julie Thompson/Francois Jeanmougin   IGBMC, Strasbourg, France.
+</EM></PRE>
+<CENTER><H2><A NAME="Index">Index</A></H2></CENTER>
+<OL>
+<LI><A HREF="#G">                      General help for CLUSTAL X (1.8)
+</A></LI>
+<LI><A HREF="#F">                      Input / Output Files 
+</A></LI>
+<LI><A HREF="#E">                          Editing Alignments
+</A></LI>
+<LI><A HREF="#M">                          Multiple Alignments
+</A></LI>
+<LI><A HREF="#P">                   Profile and Structure Alignments
+</A></LI>
+<LI><A HREF="#B">            Secondary Structure / Gap Penalty Masks
+</A></LI>
+<LI><A HREF="#T">                            Phylogenetic Trees
+</A></LI>
+<LI><A HREF="#C">                               Colors
+</A></LI>
+<LI><A HREF="#Q">                       Alignment Quality Analysis
+</A></LI>
+<LI><A HREF="#9">              Command Line Parameters
+</A></LI>
+<LI><A HREF="#R">                             References
+</A></LI>
+</OL>
+<CENTER><H2><A NAME="G">                      General help for CLUSTAL X (1.8)
+</A></H2></CENTER>
+<P>
+</P>
+<P>
+Clustal X is a windows interface for the ClustalW multiple sequence alignment
+program. It provides an integrated environment for performing multiple sequence
+and profile alignments and analysing the results. The sequence alignment is
+displayed in a window on the screen. A versatile coloring scheme has been
+incorporated allowing you to highlight conserved features  in the alignment.
+The pull-down menus at the top of the window allow you to select all the
+options required for traditional multiple sequence and profile alignment.
+</P>
+<P>
+You can cut-and-paste sequences to change the order of the alignment; you can
+select a subset of sequences to be aligned; you can select a sub-range of the
+alignment to be realigned and inserted back into the original alignment.
+</P>
+<P>
+Alignment quality analysis can be performed and low-scoring segments or
+exceptional residues can be highlighted.
+</P>
+<P>
+ClustalX is available for a number of different platforms including: SUN
+Solaris, IRIX5.3 on Silicon Graphics, Digital UNIX on DECStations, Microsoft
+Windows (32 bit) for PC's, Linux ELF for x86 PC's and Macintosh PowerMac. (See
+the README file for Installation instructions.)
+</P>
+<P>
+</P>
+<P>
+<H4>
+SEQUENCE INPUT
+</H4>
+</P>
+<P>
+Sequences and profiles (a term for pre-existing alignments) are input using 
+the FILE menu. Invalid options will be disabled. All sequences must be included
+into 1 file. 7 formats are automatically recognised: NBRF/PIR, EMBL/SWISSPROT,
+Pearson (Fasta), Clustal (*.aln), GCG/MSF (Pileup), GCG9 RSF and GDE flat file.
+All non-alphabetic characters (spaces, digits, punctuation marks) are ignored
+except "-" which is used to indicate a GAP ("." in MSF/RSF).  
+</P>
+<P>
+<H4>
+SEQUENCE / PROFILE ALIGNMENTS
+</H4>
+</P>
+<P>
+Clustal X has two modes which can be selected using the switch directly above
+the sequence display: MULTIPLE ALIGNMENT MODE and PROFILE ALIGNMENT MODE.
+</P>
+<P>
+To do a MULTIPLE ALIGNMENT on a set of sequences, make sure MULTIPLE ALIGNMENT
+MODE is selected. A single sequence data area is then displayed. The ALIGNMENT
+menu then allows you to either produce a guide tree for the alignment, or to do
+a multiple alignment following the guide tree, or to do a full multiple
+alignment.
+</P>
+<P>
+In PROFILE ALIGNMENT MODE, two sequence data areas are displayed, allowing you
+to align 2 alignments (termed profiles). Profiles are also used to add a new
+sequence to an old alignment, or to use secondary structure to guide the
+alignment process. GAPS in the old alignments are indicated using the "-" 
+character. PROFILES can be input in ANY of the allowed formats; just  use "-"
+(or "." for MSF/RSF) for each gap position. In Profile Alignment Mode, a button
+"Lock Scroll" is displayed which allows you to scroll the two profiles together
+using a single scroll bar. When the Lock Scroll is turned off, the two profiles
+can be scrolled independently.
+</P>
+<P>
+<H4>
+PHYLOGENETIC TREES
+</H4>
+</P>
+<P>
+Phylogenetic trees can be calculated from old alignments (read in with "-"
+characters to indicate gaps) OR after a multiple alignment while the alignment
+is still displayed.
+</P>
+<P>
+<H4>
+ALIGNMENT DISPLAY
+</H4>
+</P>
+<P>
+The alignment is displayed on the screen with the sequence names on the left
+hand side. The sequence alignment is for display only, it cannot be edited here
+(except for changing the sequence order by cutting-and-pasting on the sequence
+names). 
+</P>
+<P>
+A ruler is displayed below the sequences, starting at 1 for the first residue
+position (residue numbers in the sequence input file are ignored).
+</P>
+<P>
+A line above the alignment is used to mark strongly conserved positions. Three
+characters ('*', ':' and '.') are used:
+</P>
+<P>
+'*' indicates positions which have a single, fully conserved residue
+</P>
+<P>
+':' indicates that one of the following 'strong' groups is fully conserved:-
+<PRE>
+                 STA  
+                 NEQK  
+                 NHQK  
+                 NDEQ  
+                 QHRK  
+                 MILV  
+                 MILF  
+                 HY  
+                 FYW  
+</PRE>
+</P>
+<P>
+'.' indicates that one of the following 'weaker' groups is fully conserved:-
+<PRE>
+                 CSA  
+                 ATV  
+                 SAG  
+                 STNK  
+                 STPA  
+                 SGND  
+                 SNDEQK  
+                 NDEQHK  
+                 NEQHRK  
+                 FVLIM  
+                 HFY  
+</PRE>
+</P>
+<P>
+These are all the positively scoring groups that occur in the Gonnet Pam250
+matrix. The strong and weak groups are defined as strong score >0.5 and weak
+score =<0.5 respectively.
+</P>
+<P>
+For profile alignments, secondary structure and gap penalty masks are displayed
+above the sequences, if any data is found in the profile input file.
+</P>
+<P>
+</P>
+<P>
+</P>
+<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
+<CENTER><H2><A NAME="F">                      Input / Output Files 
+</A></H2></CENTER>
+<P>
+</P>
+<P>
+LOAD SEQUENCES reads sequences from one of 7 file formats, replacing any
+sequences that are already loaded. All sequences must be in 1 file. The formats
+that are automatically recognised are: NBRF/PIR, EMBL/SWISSPROT, Pearson
+(Fasta), Clustal (*.aln), GCG/MSF (Pileup), GCG9/RSF and GDE flat file.  All
+non-alphabetic characters (spaces, digits, punctuation  marks) are ignored
+except "-" which is used to indicate a GAP ("." in MSF/RSF).
+</P>
+<P>
+The program tries to automatically recognise the different file formats used
+and to guess whether the sequences are amino acid or nucleotide.  This is not
+always foolproof.
+</P>
+<P>
+FASTA and NBRF/PIR formats are recognised by having a ">" as the first 
+character in the file.  
+</P>
+<P>
+EMBL/Swiss Prot formats are recognised by the letters "ID" at the start of the
+file (the token for the entry name field).  
+</P>
+<P>
+CLUSTAL format is recognised by the word CLUSTAL at the beginning of the file.
+</P>
+<P>
+GCG/MSF format is recognised by one of the following:
+<UL>
+<LI>
+       - the word PileUp at the start of the file.
+</LI><LI>
+       - the word !!AA_MULTIPLE_ALIGNMENT or !!NA_MULTIPLE_ALIGNMENT
+         at the start of the file.
+</LI><LI>
+       - the word MSF on the first line of the file, and the characters ..
+         at the end of this line.
+</LI>
+</UL>
+</P>
+<P> 
+GCG/RSF format is recognised by the word !!RICH_SEQUENCE at the beginning of
+the file.
+</P>
+<P>
+</P>
+<P>
+If 85% or more of the characters in the sequence are from A,C,G,T,U or N, the
+sequence will be assumed to be nucleotide.  This works in 97.3% of cases but
+watch out!
+</P>
+<P>
+APPEND SEQUENCES is only valid in MULTIPLE ALIGNMENT MODE. The input sequences
+do not replace those already loaded, but are appended at the end of the
+alignment.
+</P>
+<P>
+SAVE SEQUENCES AS... offers the user a choice of one of six output formats:
+CLUSTAL, NBRF/PIR, GCG/MSF, PHYLIP, NEXUS or GDE. All sequences are written
+to a single file. Options are available to save a range of the alignment, 
+switch between UPPER/LOWER case for GDE files, and to output SEQUENCE NUMBERING
+for CLUSTAL files.
+</P>
+<P>
+LOAD PROFILE 1 reads sequences in the same 7 file formats, replacing any
+sequences already loaded as Profile 1. This option will also remove any
+sequences which are loaded in Profile 2.
+</P>
+<P>
+LOAD PROFILE 2 reads sequences in the same 7 file formats, replacing any
+sequences already loaded as Profile 2.
+</P>
+<P>
+SAVE PROFILE 1 AS... is similar to the Save Sequences option except that only
+those sequences in Profile 1 will be written to the output file.
+</P>
+<P>
+SAVE PROFILE 2 AS... is similar to the Save Sequences option except that only
+those sequences in Profile 2 will be written to the output file.
+</P>
+<P>
+WRITE ALIGNMENT AS POSTSCRIPT will write the sequence display to a postscript
+format file. This will include any secondary structure / gap penalty mask 
+information and the consensus and ruler lines which are displayed on the
+screen. The Alignment Quality curve can be optionally included in the output
+file.
+</P>
+<P>
+WRITE PROFILE 1 AS POSTSCRIPT is similar to WRITE ALIGNMENT AS POSTSCRIPT
+except that only the profile 1 display will be printed.
+</P>
+<P>
+WRITE PROFILE 2 AS POSTSCRIPT is similar to WRITE ALIGNMENT AS POSTSCRIPT
+except that only the profile 2 display will be printed.
+</P>
+<P>
+</P>
+<P>
+<H4>
+POSTSCRIPT PARAMETERS
+</H4>
+</P>
+<P>
+A number of options are available to allow you to configure your postscript
+output file.
+</P>
+<P>
+PS COLORS FILE:
+</P>
+<P>
+The exact RGB values required to reproduce the colors used in the alignment
+window will vary from printer to printer. A PS colors file can be specified
+that contains the RGB values for all the colors required by each of your
+postscript printers.
+</P>
+<P>
+By default, Clustal X looks for a file called 'colprint.par' in the current
+directory (if your running under UNIX, it then looks in your home directory,
+and finally in the directories in your PATH environment variable). If no PS
+colors file is found or a color used on the screen is not defined here, the
+screen RGB values (from the Color Parameter File) are used.
+</P>
+<P>
+The PS colors file consists of one line for each color to be defined, with the
+color name followed by the RGB values (on a scale of 0 to 1). For example,
+</P>
+<P>
+RED          0.9 0.1 0.1
+</P>
+<P>
+Blank lines and comments (lines beginning with a '#' character) are ignored.
+</P>
+<P>
+</P>
+<P>
+PAGE SIZE:  The alignment can be displayed on either A4, A3 or US Letter size
+pages.
+</P>
+<P>
+ORIENTATION: The alignment can be displayed on either a landscape or portrait
+page.
+</P>
+<P>
+PRINT HEADER: An optional header including the postscript filename, and
+creation date can be printed at the top of each page.
+</P>
+<P>
+PRINT QUALITY CURVE: The Alignment Quality curve which is displayed underneath
+the alignment on the screen can be included in the postscript output.
+</P>
+<P>
+PRINT RULER: The ruler which is displayed underneath the alignment on the 
+screen can be included in the postscript output.
+</P>
+<P>
+PRINT RESIDUE NUMBERS: Sequence residue numbers can be printed at the right
+hand side of the alignment.
+</P>
+<P>
+RESIZE TO FIT PAGE: By default, the alignment is scaled to fit the page size
+selected. This option can be turned off, in which case a font size of 10 will
+be used for the sequences.
+</P>
+<P>
+PRINT FROM POSITION/TO: A range of the alignment can be printed. The default
+is to print the full alignment. The first and last residues to be printed are
+specified here.
+</P>
+<P>
+USE BLOCK LENGTH: The alignment can be divided into blocks of residues. The
+number of residues in a block is specified here. More than one block may then
+be printed on a single page. This is useful for long alignments of a small
+number of sequences. If the block length is set to 0, The alignment will not
+be divided into blocks, but printed across a number of pages.
+</P>
+<P>
+</P>
+<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
+<CENTER><H2><A NAME="E">                          Editing Alignments
+</A></H2></CENTER>
+<P>
+</P>
+<P>
+Clustal X allows you to change the order of the sequences in the alignment, by
+cutting-and-pasting the sequence names.
+</P>
+<P>
+To select a group of sequences to be moved, click on a sequence name and drag
+the cursor until all the required sequences are highlighted. Holding down the
+Shift key when clicking on the first name will add new sequences to those
+already selected.
+</P>
+<P>
+(Options are provided to Select All Sequences, Select Profile 1 or Select 
+Profile 2.)
+</P>
+<P>
+The selected sequences can be removed from the alignment by using the EDIT
+menu, CUT option.
+</P>
+<P>
+To add the cut sequences back into an alignment, select a sequence by clicking
+on the sequence name. The cut sequences will be added to the alignment,
+immediately following the selected sequence, by the EDIT menu, PASTE option.
+</P>
+<P>
+To add the cut sequences to an empty alignment (eg. when cutting sequences from
+Profile 1 and pasting them to Profile 2), click on the empty sequence name
+display area, and select the EDIT menu, PASTE option as before.
+</P>
+<P>
+The sequence selection and sequence range selection can be cleared using the
+EDIT menu, CLEAR SEQUENCE SELECTION and CLEAR RANGE SELECTION options
+respectively.
+</P>
+<P>
+To search for a string of residues in the sequences, select the sequences to be
+searched by clicking on the sequence names. You can then enter the string to
+search for by selecting the SEARCH FOR STRING option. If the string is found in
+any of the sequences selected, the sequence name and column number is printed
+below the sequence display.
+</P>
+<P>
+In PROFILE ALIGNMENT MODE, the two profiles can be merged (normally done after
+alignment) by selecting ADD PROFILE 2 TO PROFILE 1. The sequences currently
+displayed as Profile 2 will be appended to Profile 1. 
+</P>
+<P>
+The REMOVE ALL GAPS option will remove all gaps from the sequences currently
+selected.
+WARNING: This option removes ALL gaps, not only those introduced by ClustalX,
+but also those that were read from the input alignment file. Any secondary
+structure information associated with the alignment will NOT be automatically
+realigned.
+</P>
+<P>
+The REMOVE GAP-ONLY COLUMNS will remove those positions in the alignment which
+contain gaps in all sequences. This can occur as a result of removing divergent
+sequences from an alignment, or if an alignment has been realigned.
+</P>
+<P>
+</P>
+<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
+<CENTER><H2><A NAME="M">                          Multiple Alignments
+</A></H2></CENTER>
+<P>
+</P>
+<P>
+Make sure MULTIPLE ALIGNMENT MODE is selected, using the switch directly above
+the sequence display area. Then, use the ALIGNMENT menu to do multiple
+alignments.
+</P>
+<P>
+Multiple alignments are carried out in 3 stages:
+</P>
+<P> 
+1) all sequences are compared to each other (pairwise alignments);
+</P>
+<P> 
+2) a dendrogram (like a phylogenetic tree) is constructed, describing the
+approximate groupings of the sequences by similarity (stored in a file).
+</P>
+<P> 
+3) the final multiple alignment is carried out, using the dendrogram as a guide.
+</P>
+<P>
+The 3 stages are carried out automatically by the DO COMPLETE ALIGNMENT option.
+You can skip the first stages (pairwise alignments; guide tree) by using an old
+guide tree file (DO ALIGNMENT FROM GUIDE TREE); or you can just produce the
+guide tree with no final multiple alignment (PRODUCE GUIDE TREE ONLY).
+</P>
+<P>
+</P>
+<P>
+REALIGN SELECTED SEQUENCES is used to realign badly aligned sequences in the
+alignment. Sequences can be selected by clicking on the sequence names - see
+Editing Alignments for more details. The unselected sequences are then 'fixed'
+and a profile is made including only the unselected sequences. Each of the
+selected sequences in turn is then realigned to this profile. The realigned
+sequences will be displayed as a group at the end the alignment.
+</P>
+<P>
+</P>
+<P>
+REALIGN SELECTED SEQUENCE RANGE is used to realign a small region of the 
+alignment. A residue range can be selected by clicking on the sequence display
+area. A multiple alignment is then performed, following the 3 stages described
+above, but only using the selected residue range. Finally the new alignment of
+the range is pasted back into the full sequence alignment.
+</P>
+<P>
+By default, gap penalties are used at each end of the subrange in order to 
+penalise terminal gaps. If the REALIGN SEGMENT END GAP PENALTIES option is
+switched off, gaps can be introduced at the ends of the residue range at no
+cost.
+</P>
+<P>
+</P>
+<P>
+ALIGNMENT PARAMETERS displays a sub-menu with the following options:
+</P>
+<P>
+RESET NEW GAPS BEFORE ALIGNMENT will remove any new gaps introduced into the
+sequences during multiple alignment if you wish to change the parameters and
+try again. This only takes effect just before you do a second multiple
+alignment. You can make phylogenetic trees after alignment whether or not this
+is ON. If you turn this OFF, the new gaps are kept even if you do a second
+multiple alignment. This allows you to iterate the alignment gradually.
+Sometimes, the alignment is improved by a second or third pass.
+</P>
+<P>
+RESET ALL GAPS BEFORE ALIGNMENT will remove all gaps in the sequences including
+gaps which were read in from the sequence input file. This only takes effect
+just before you do a second multiple alignment.  You can make phylogenetic
+trees after alignment whether or not this is ON.  If you turn this OFF, all
+gaps are kept even if you do a second multiple alignment. This allows you to
+iterate the alignment gradually.  Sometimes, the alignment is improved by a
+second or third pass.
+</P>
+<P>
+</P>
+<P>
+PAIRWISE ALIGNMENT PARAMETERS control the speed/sensitivity of the initial
+alignments.
+</P>
+<P>
+MULTIPLE ALIGNMENT PARAMETERS control the gaps in the final multiple
+alignments.
+</P>
+<P>
+PROTEIN GAP PARAMETERS displays a temporary window which allows you to set
+various parameters only used in the alignment of protein sequences.
+</P>
+<P>
+(SECONDARY STRUCTURE PARAMETERS, for use with the Profile Alignment Mode only,
+allows you to set various parameters only used with gap penalty masks.)
+</P>
+<P>
+SAVE LOG FILE will write the alignment calculation scores to a file. The log
+filename is the same as the input sequence filename, with an extension .log
+appended.
+</P>
+<P>
+</P>
+<P>
+<H4>
+OUTPUT FORMAT OPTIONS
+</H4>
+</P>
+<P>
+You can choose from 6 different alignment formats (CLUSTAL, GCG, NBRF/PIR,
+PHYLIP, GDE and NEXUS).  You can choose more than one (or all 6 if you wish).  
+</P>
+<P>
+CLUSTAL format output is a self explanatory alignment format. It shows the
+sequences aligned in blocks. It can be read in again at a later date to (for
+example) calculate a phylogenetic tree or add in new sequences by profile
+alignment.
+</P>
+<P>
+GCG output can be used by any of the GCG programs that can work on multiple
+alignments (e.g. PRETTY, PROFILEMAKE, PLOTALIGN). It is the same as the GCG
+.msf format files (multiple sequence file); new in version 7 of GCG.
+</P>
+<P>
+NEXUS format is used by several phylogeny programs, including PAUP and
+MacClade.
+</P>
+<P>
+PHYLIP format output can be used for input to the PHYLIP package of Joe 
+Felsenstein.  This is a very widely used package for doing every imaginable
+form of phylogenetic analysis (MUCH more than the the modest introduction
+offered by this program).
+</P>
+<P>
+NBRF/PIR: this is the same as the standard PIR format with ONE ADDITION. Gap
+characters "-" are used to indicate the positions of gaps in the multiple 
+alignment. These files can be re-used as input in any part of clustal that
+allows sequences (or alignments or profiles) to be read in.  
+</P>
+<P>
+GDE:  this format is used by the GDE package of Steven Smith and is understood
+by SEQLAB in GCG 9 or later.
+</P>
+<P>
+GDE OUTPUT CASE: sequences in GDE format may be written in either upper or
+lower case.
+</P>
+<P> 
+CLUSTALW SEQUENCE NUMBERS: residue numbers may be added to the end of the
+alignment lines in clustalw format.
+</P>
+<P>
+OUTPUT ORDER is used to control the order of the sequences in the output
+alignments. By default, it uses the order in which the sequences were aligned
+(from the guide tree/dendrogram), thus automatically grouping closely related
+sequences. It can be switched to be the same as the original input order.
+</P>
+<P>
+PARAMETER OUTPUT: This option will save all your parameter settings in a
+parameter file (suffix .par) during alignment. The file can be subsequently
+used to rerun ClustalW using the same parameters.
+</P>
+<P>
+</P>
+<P>
+<H3>
+ALIGNMENT PARAMETERS
+</H3>
+</P>
+<P>
+<STRONG>
+PAIRWISE ALIGNMENT PARAMETERS
+</STRONG>
+</P>
+<P>
+A distance is calculated between every pair of sequences and these are used to
+construct the phylogenetic tree which guides the final multiple alignment. The
+scores are calculated from separate pairwise alignments. These can be
+calculated using 2 methods: dynamic programming (slow but accurate) or by the
+method of Wilbur and Lipman (extremely fast but approximate).   
+</P>
+<P>
+You can choose between the 2 alignment methods using the PAIRWISE ALIGNMENTS
+option. The slow/accurate method is fast enough for short sequences but will be
+VERY SLOW for many (e.g. >100) long (e.g. >1000 residue) sequences.   
+</P>
+<P>
+</P>
+<P>
+<STRONG>
+SLOW-ACCURATE alignment parameters:
+</STRONG>
+</P>
+<P>
+These parameters do not have any affect on the speed of the alignments. They
+are used to give initial alignments which are then rescored to give percent
+identity scores. These % scores are the ones which are displayed on the 
+screen. The scores are converted to distances for the trees.
+</P>
+<P>
+Gap Open Penalty:      the penalty for opening a gap in the alignment.
+</P>
+<P>
+Gap Extension Penalty: the penalty for extending a gap by 1 residue.
+</P>
+<P>
+Protein Weight Matrix: the scoring table which describes the similarity of 
+each amino acid to each other.
+</P>
+<P>
+Load protein matrix: allows you to read in a comparison table from a file.
+</P>
+<P>
+DNA weight matrix: the scores assigned to matches and mismatches (including
+IUB ambiguity codes).
+</P>
+<P>
+Load DNA matrix: allows you to read in a comparison table from a file.
+</P>
+<P>
+See the Multiple alignment parameters, MATRIX option below for details of the
+matrix input format.
+</P>
+<P>
+</P>
+<P>
+<STRONG>
+FAST-APPROXIMATE alignment parameters:
+</STRONG>
+</P>
+<P>
+These similarity scores are calculated from fast, approximate, global align-
+ments, which are controlled by 4 parameters. 2 techniques are used to make
+these alignments very fast: 1) only exactly matching fragments (k-tuples) are
+considered; 2) only the 'best' diagonals (the ones with most k-tuple matches)
+are used.
+</P>
+<P>
+GAP PENALTY:   This is a penalty for each gap in the fast alignments. It has
+little effect on the speed or sensitivity except for extreme values.
+</P>
+<P>
+K-TUPLE SIZE:  This is the size of exactly matching fragment that is used. 
+INCREASE for speed (max= 2 for proteins; 4 for DNA), DECREASE for sensitivity.
+For longer sequences (e.g. >1000 residues) you may wish to increase the
+default.
+</P>
+<P>
+TOP DIAGONALS: The number of k-tuple matches on each diagonal (in an imaginary
+dot-matrix plot) is calculated. Only the best ones (with most matches) are used
+in the alignment. This parameter specifies how many. Decrease for speed;
+increase for sensitivity.
+</P>
+<P>
+WINDOW SIZE:  This is the number of diagonals around each of the 'best' 
+diagonals that will be used. Decrease for speed; increase for sensitivity.
+</P>
+<P>
+</P>
+<P>
+<STRONG>
+MULTIPLE ALIGNMENT PARAMETERS
+</STRONG>
+</P>
+<P>
+These parameters control the final multiple alignment. This is the core of the
+program and the details are complicated. To fully understand the use of the
+parameters and the scoring system, you will have to refer to the documentation.
+</P>
+<P>
+Each step in the final multiple alignment consists of aligning two alignments 
+or sequences. This is done progressively, following the branching order in the
+GUIDE TREE. The basic parameters to control this are two gap penalties and the
+scores for various identical/non-indentical residues. 
+</P>
+<P>
+The GAP OPENING and EXTENSION PENALTIES can be set here. These control the 
+cost of opening up every new gap and the cost of every item in a gap.  
+Increasing the gap opening penalty will make gaps less frequent. Increasing 
+the gap extension penalty will make gaps shorter. Terminal gaps are not 
+penalised.
+</P>
+<P>
+The DELAY DIVERGENT SEQUENCES switch delays the alignment of the most distantly
+related sequences until after the most closely related sequences have  been
+aligned. The setting shows the percent identity level required to delay the
+addition of a sequence; sequences that are less identical than this level to
+any other sequences will be aligned later.
+</P>
+<P>
+The TRANSITION WEIGHT gives transitions (A<-->G or C<-->T i.e. purine-purine or
+pyrimidine-pyrimidine substitutions) a weight between 0 and 1; a weight of zero
+means that the transitions are scored as mismatches, while a weight of 1 gives
+the transitions the match score. For distantly related DNA sequences, the
+weight should be near to zero; for closely related sequences it can be useful
+to assign a higher score. The default is set to 0.5.
+</P>
+<P>
+</P>
+<P>
+The PROTEIN WEIGHT MATRIX option allows you to choose a series of weight
+matrices. For protein alignments, you use a weight matrix to determine the
+similarity of non-identical amino acids. For example, Tyr aligned with Phe is
+usually judged to be 'better' than Tyr aligned with Pro.
+</P>
+<P>
+There are three 'in-built' series of weight matrices offered. Each consists of
+several matrices which work differently at different evolutionary distances. To
+see the exact details, read the documentation. Crudely, we store several
+matrices in memory, spanning the full range of amino acid distance (from almost
+identical sequences to highly divergent ones). For very similar sequences, it
+is best to use a strict weight matrix which only gives a high score to
+identities and the most favoured conservative substitutions. For more divergent
+sequences, it is appropriate to use "softer" matrices which give a high score
+to many other frequent substitutions.
+</P>
+<P>
+1) BLOSUM (Henikoff). These matrices appear to be the best available for 
+carrying out data base similarity (homology searches). The matrices currently
+used are: Blosum 80, 62, 45 and 30. BLOSUM was the default in earlier Clustal X
+versions.
+</P>
+<P>
+2) PAM (Dayhoff). These have been extremely widely used since the late '70s. We
+currently use the PAM 20, 60, 120, 350 matrices.
+</P>
+<P>
+3) GONNET. These matrices were derived using almost the same procedure as the
+Dayhoff one (above) but are much more up to date and are based on a far larger
+data set. They appear to be more sensitive than the Dayhoff series. We
+currently use the GONNET 80, 120, 160, 250 and 350 matrices. This series is the
+default for Clustal X version 1.8.
+</P>
+<P>
+We also supply an identity matrix which gives a score of 10 to two identical 
+amino acids and a score of zero otherwise. This matrix is not very useful.
+</P>
+<P>
+Load protein matrix: allows you to read in a comparison matrix from a file.
+This can be either a single matrix or a series of matrices (see below for
+format). 
+</P>
+<P>
+</P>
+<P>
+DNA WEIGHT MATRIX option allows you to select a single matrix (not a series)
+used for aligning nucleic acid sequences. Two hard-coded matrices are available:
+</P>
+<P>
+1) IUB. This is the default scoring matrix used by BESTFIT for the comparison
+of nucleic acid sequences. X's and N's are treated as matches to any IUB
+ambiguity symbol. All matches score 1.9; all mismatches for IUB symbols score 0.
+</P>
+<P>
+2) CLUSTALW(1.6). A previous system used by ClustalW, in which matches score
+1.0 and mismatches score 0. All matches for IUB symbols also score 0.
+</P>
+<P>
+Load DNA matrix: allows you to read in a nucleic acid comparison matrix from a
+file (just one matrix, not a series).
+</P>
+<P>
+</P>
+<P>
+SINGLE MATRIX INPUT FORMAT
+The format used for a single matrix is the same as the BLAST program. The
+scores in the new weight matrix should be similarities. You can use negative as
+well as positive values if you wish, although the matrix will be automatically
+adjusted to all positive scores, unless the NEGATIVE MATRIX option is selected.
+Any lines beginning with a # character are assumed to be comments. The first
+non-comment line should contain a list of amino acids in any order, using the 1
+letter code, followed by a * character. This should be followed by a square
+matrix of scores, with one row and one column for each amino acid. The last row
+and column of the matrix (corresponding to the * character) contain the minimum
+score over the whole matrix.
+</P>
+<P>
+MATRIX SERIES INPUT FORMAT
+ClustalX uses different matrices depending on the mean percent identity of the
+sequences to be aligned. You can specify a series of matrices and the range of
+the percent identity for each matrix in a matrix series file. The file is
+automatically recognised by the word CLUSTAL_SERIES at the beginning of the
+file. Each matrix in the series is then specified on one line which should
+start with the word MATRIX. This is followed by the lower and upper limits of
+the sequence percent identities for which you want to apply the matrix. The
+final entry on the matrix line is the filename of a Blast format matrix file
+(see above for details of the single matrix file format).
+</P>
+<P>
+Example.
+</P>
+<P>
+CLUSTAL_SERIES
+</P>
+<P> 
+MATRIX 81 100 /us1/user/julie/matrices/blosum80
+MATRIX 61 80 /us1/user/julie/matrices/blosum62
+MATRIX 31 60 /us1/user/julie/matrices/blosum45
+MATRIX 0 30 /us1/user/julie/matrices/blosum30
+</P>
+<P>
+</P>
+<P>
+<STRONG>
+PROTEIN GAP PARAMETERS
+</STRONG>
+</P>
+<P>
+RESIDUE SPECIFIC PENALTIES are amino acid specific gap penalties that reduce or
+increase the gap opening penalties at each position in the alignment or 
+sequence. See the documentation for details. As an example, positions that are
+rich in glycine are more likely to have an adjacent gap than positions that are
+rich in valine.
+</P>
+<P>
+HYDROPHILIC GAP PENALTIES are used to increase the chances of a gap within a
+run (5 or more residues) of hydrophilic amino acids; these are likely to be
+loop or random coil regions where gaps are more common. The residues that are
+"considered" to be hydrophilic can be entered in HYDROPHILIC RESIDUES.
+</P>
+<P>
+GAP SEPARATION DISTANCE tries to decrease the chances of gaps being too close
+to each other. Gaps that are less than this distance apart are penalised more
+than other gaps. This does not prevent close gaps; it makes them less frequent,
+promoting a block-like appearance of the alignment.
+</P>
+<P>
+END GAP SEPARATION treats end gaps just like internal gaps for the purposes of
+avoiding gaps that are too close (set by GAP SEPARATION DISTANCE above). If you
+turn this off, end gaps will be ignored for this purpose. This is useful when
+you wish to align fragments where the end gaps are not biologically meaningful.
+</P>
+<P>
+</P>
+<P>
+</P>
+<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
+<CENTER><H2><A NAME="P">                   Profile and Structure Alignments
+</A></H2></CENTER>
+<P>
+</P>
+<P>   
+By PROFILE ALIGNMENT, we mean alignment using existing alignments. Profile 
+alignments allow you to store alignments of your favourite sequences and add
+new sequences to them in small bunches at a time. A profile is simply an
+alignment of one or more sequences (e.g. an alignment output file from Clustal
+X). Each input can be a single sequence. One or both sets of input sequences
+may include secondary structure assignments or gap penalty masks to guide the
+alignment. 
+</P>
+<P>
+Make sure PROFILE ALIGNMENT MODE is selected, using the switch directly above
+the sequence display area. Then, use the ALIGNMENT menu to do profile and
+secondary structure alignments.
+</P>
+<P>
+The profiles can be in any of the allowed input formats with "-" characters
+used to specify gaps (except for GCG/MSF where "." is used).
+</P>
+<P>
+You have to load the 2 profiles by choosing FILE, LOAD PROFILE 1 and  LOAD
+PROFILE 2. Then ALIGNMENT, ALIGN PROFILE 2 TO PROFILE 1 will align the 2
+profiles to each other. Secondary structure masks in either profile can be used
+to guide the alignment. This option compares all the sequences in profile 1
+with all the sequences in profile 2 in order to build guide trees which will be
+used to calculate sequence weights, and select appropriate alignment parameters
+for the final profile alignment.
+</P>
+<P>
+You can skip the first stage (pairwise alignments; guide trees) by using old
+guide tree files (ALIGN PROFILES FROM GUIDE TREES). 
+</P>
+<P>
+The ALIGN SEQUENCES TO PROFILE 1 option will take the sequences in the second
+profile and align them to the first profile, 1 at a time.  This is useful to
+add some new sequences to an existing alignment, or to align a set of sequences
+to a known structure. In this case, the second profile set need not be
+pre-aligned.
+</P>
+<P>
+You can skip the first stage (pairwise alignments; guide tree) by using an old
+guide tree file (ALIGN SEQUENCES TO PROFILE 1 FROM TREE). 
+</P>
+<P>
+SAVE LOG FILE will write the alignment calculation scores to a file. The log
+filename is the same as the input sequence filename, with an extension .log
+appended.
+</P>
+<P>
+The alignment parameters can be set using the ALIGNMENT PARAMETERS menu,
+Pairwise Parameters, Multiple Parameters and Protein Gap Parameters options.
+These are EXACTLY the same parameters as used by the general, automatic
+multiple alignment procedure. The general multiple alignment procedure is
+simply a series of profile alignments. Carrying out a series of profile
+alignments on larger and larger groups of sequences, allows you to manually
+build up a complete alignment, if necessary editing intermediate alignments.
+</P>
+<P>
+<STRONG>
+SECONDARY STRUCTURE PARAMETERS
+</STRONG>
+</P>
+<P>
+Use this menu to set secondary structure options. If a solved structure is
+known, it can be used to guide the alignment by raising gap penalties within
+secondary structure elements, so that gaps will preferentially be inserted into
+unstructured surface loop regions. Alternatively, a user-specified gap penalty
+mask can be supplied for a similar purpose.
+</P>
+<P>
+A gap penalty mask is a series of numbers between 1 and 9, one per position in 
+the alignment. Each number specifies how much the gap opening penalty is to be 
+raised at that position (raised by multiplying the basic gap opening penalty
+by the number) i.e. a mask figure of 1 at a position means no change
+in gap opening penalty; a figure of 4 means that the gap opening penalty is
+four times greater at that position, making gaps 4 times harder to open.
+</P>
+<P>
+The format for gap penalty masks and secondary structure masks is explained in
+a separate help section.
+</P>
+<P>
+</P>
+<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
+<CENTER><H2><A NAME="B">            Secondary Structure / Gap Penalty Masks
+</A></H2></CENTER>
+<P>
+</P>
+<P>
+The use of secondary structure-based penalties has been shown to improve  the
+accuracy of sequence alignment. Clustal X now allows secondary structure/ gap
+penalty masks to be supplied with the input sequences used during profile
+alignment. (NB. The secondary structure information is NOT used during multiple
+sequence alignment). The masks work by raising gap penalties in specified
+regions (typically secondary structure elements) so that gaps are
+preferentially opened in the less well conserved regions (typically surface
+loops).
+</P>
+<P>
+The USE PROFILE 1(2) SECONDARY STRUCTURE / GAP PENALTY MASK options control
+whether the input 2D-structure information or gap penalty masks will be used
+during the profile alignment.
+</P>
+<P>
+The OUTPUT options control whether the secondary structure and gap penalty
+masks should be included in the Clustal X output alignments. Showing both is
+useful for understanding how the masks work. The 2D-structure information is
+itself useful in judging the alignment quality and in seeing how residue
+conservation patterns vary with secondary structure. 
+</P>
+<P>
+The HELIX and STRAND GAP PENALTY options provide the value for raising the gap
+penalty at core Alpha Helical (A) and Beta Strand (B) residues. In CLUSTAL
+format, capital residues denote the A and B core structure notation. Basic gap
+penalties are multiplied by the amount specified.
+</P>
+<P>
+The LOOP GAP PENALTY option provides the value for the gap penalty in Loops.
+By default this penalty is not raised. In CLUSTAL format, loops are specified
+by "." in the secondary structure notation.
+</P>
+<P>
+The SECONDARY STRUCTURE TERMINAL PENALTY provides the value for setting the gap
+penalty at the ends of secondary structures. Ends of secondary structures are
+known to grow or shrink, comparing related structures. Therefore by default
+these are given intermediate values, lower than the core penalties. All
+secondary structure read in as lower case in CLUSTAL format gets the reduced
+terminal penalty.
+</P>
+<P>
+The HELIX and STRAND TERMINAL POSITIONS options specify the range of structure
+termini for the intermediate penalties. In the alignment output, these are
+indicated as lower case. For Alpha Helices, by default, the range spans the 
+end-helical turn (3 residues). For Beta Strands, the default range spans the
+end residue and the adjacent loop residue, since sequence conservation often
+extends beyond the actual H-bonded Beta Strand.
+</P>
+<P>
+Clustal X can read the masks from SWISS-PROT, CLUSTAL or GDE format input
+files. For many 3-D protein structures, secondary structure information is
+recorded in the feature tables of SWISS-PROT database entries. You should
+always check that the assignments are correct - some are quite inaccurate.
+Clustal X looks for SWISS-PROT HELIX and STRAND assignments e.g.
+</P>
+<P>
+</P>
+<P>
+<PRE>
+FT   HELIX       100    115
+FT   STRAND      118    119
+</PRE>
+</P>
+<P>
+The structure and penalty masks can also be read from CLUSTAL alignment format 
+as comment lines beginning "!SS_" or "!GM_" e.g.
+</P>
+<P>
+<PRE>
+!SS_HBA_HUMA    ..aaaAAAAAAAAAAaaa.aaaAAAAAAAAAAaaaaaaAaaa.........aaaAAAAAA
+!GM_HBA_HUMA    112224444444444222122244444444442222224222111111111222444444
+HBA_HUMA        VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGK
+</PRE>
+</P>
+<P>
+Note that the mask itself is a set of numbers between 1 and 9 each of which is 
+assigned to the residue(s) in the same column below. 
+</P>
+<P>
+In GDE flat file format, the masks are specified as text and the names must
+begin with "SS_ or "GM_.
+</P>
+<P>
+Either a structure or penalty mask or both may be used. If both are included
+in an alignment, the user will be asked which is to be used.
+</P>
+<P>
+</P>
+<P>
+</P>
+<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
+<CENTER><H2><A NAME="T">                            Phylogenetic Trees
+</A></H2></CENTER>
+<P>
+</P>
+<P>
+Before calculating a tree, you must have an ALIGNMENT in memory. This can be
+input using the FILE menu, LOAD SEQUENCES option or you should have just
+carried out a full multiple alignment and the alignment is still in memory.
+Remember YOU MUST ALIGN THE SEQUENCES FIRST!!!!
+</P>
+<P>
+The method used is the NJ (Neighbour Joining) method of Saitou and Nei. First
+you calculate distances (percent divergence) between all pairs of sequence from
+a multiple alignment; second you apply the NJ method to the distance matrix.
+</P>
+<P>
+To calculate a tree, use the DRAW N-J TREE option. This gives an UNROOTED tree
+and all branch lengths. The root of the tree can only be inferred by using an
+outgroup (a sequence that you are certain branches at the outside of the tree
+.... certain on biological grounds) OR if you assume a degree of constancy in
+the 'molecular clock', you can place the root in the 'middle' of the tree
+(roughly equidistant from all tips).
+</P>
+<P>
+BOOTSTRAP N-J TREE uses a method for deriving confidence values for the 
+groupings in a tree (first adapted for trees by Joe Felsenstein). It involves
+making N random samples of sites from the alignment (N should be LARGE, e.g.
+500 - 1000); drawing N trees (1 from each sample) and counting how many times
+each grouping from the original tree occurs in the sample trees. You can set N
+using the NUMBER OF BOOTSTRAP TRIALS option in the BOOTSTRAP TREE window. In
+practice, you should use a large number of bootstrap replicates (1000 is
+recommended, even if it means running the program for an hour on a slow 
+computer). You can also supply a seed number for the random number generator
+here. Different runs with the same seed will give the same answer. See the
+documentation for more details.
+</P>
+<P>
+EXCLUDE POSITIONS WITH GAPS? With this option, any alignment positions where
+ANY of the sequences have a gap will be ignored. This means that 'like' will
+be compared to 'like' in all distances, which is highly desirable. It also
+automatically throws away the most ambiguous parts of the alignment, which are
+concentrated around gaps (usually). The disadvantage is that you may throw away
+much of the data if there are many gaps (which is why it is difficult for us to
+make it the default).  
+</P>
+<P>
+CORRECT FOR MULTIPLE SUBSTITUTIONS? For small divergence (say <10%) this option
+makes no difference. For greater divergence, this option corrects for the fact
+that observed distances underestimate actual evolutionary distances. This is
+because, as sequences diverge, more than one substitution will happen at many
+sites. However, you only see one difference when you look at the present day
+sequences. Therefore, this option has the effect of stretching branch lengths
+in trees (especially long branches). The corrections used here (for DNA or
+proteins) are both due to Motoo Kimura. See the documentation for details.  
+</P>
+<P>
+Where possible, this option should be used. However, for VERY divergent
+sequences, the distances cannot be reliably corrected. You will be warned if
+this happens. Even if none of the distances in a data set exceed the reliable
+threshold, if you bootstrap the data, some of the bootstrap distances may
+randomly exceed the safe limit.  
+</P>
+<P>
+SAVE LOG FILE will write the tree calculation scores to a file. The log
+filename is the same as the input sequence filename, with an extension .log
+appended.
+</P>
+<P>
+<H4>
+OUTPUT FORMAT OPTIONS
+</H4>
+</P>
+<P>
+Three different formats are allowed. None of these displays the tree visually.
+You can display the tree using the NJPLOT program distributed with Clustal X
+OR get the PHYLIP package and use the tree drawing facilities there. 
+</P>
+<P> 
+1) CLUSTAL FORMAT TREE. This format is verbose and lists all of the distances
+between the sequences and the number of alignment positions used for each. The
+tree is described at the end of the file. It lists the sequences that are
+joined at each alignment step and the branch lengths. After two sequences are
+joined, it is referred to later as a NODE. The number of a NODE is the number
+of the lowest sequence in that NODE.   
+</P>
+<P>
+2) PHYLIP FORMAT TREE. This format is the New Hampshire format, used by many
+phylogenetic analysis packages. It consists of a series of nested parentheses,
+describing the branching order, with the sequence names and branch lengths. It
+can be read by the NJPLOT program distributed with ClustalX. It can also be
+used by the RETREE, DRAWGRAM and DRAWTREE programs of the PHYLIP package to see
+the trees graphically. This is the same format used during multiple alignment
+for the guide trees. Some other packages that can read and display New
+Hampshire format are TreeTool, TreeView, and Phylowin.
+</P>
+<P>
+3) PHYLIP DISTANCE MATRIX. This format just outputs a matrix of all the
+pairwise distances in a format that can be used by the PHYLIP package. It used
+to be useful when one could not produce distances from protein sequences in the
+Phylip package but is now redundant (PROTDIST of Phylip 3.5 now does this).
+</P>
+<P>
+4) NEXUS FORMAT TREE. This format is used by several popular phylogeny programs,
+including PAUP and MacClade. The format is described fully in:
+Maddison, D. R., D. L. Swofford and W. P. Maddison.  1997.
+NEXUS: an extensible file format for systematic information.
+Systematic Biology 46:590-621.
+</P>
+<P>
+BOOTSTRAP LABELS ON: By default, the bootstrap values are correctly placed on
+the tree branches of the phylip format output tree. The toggle allows them to
+be placed on the nodes, which is incorrect, but some display packages (e.g.
+TreeTool, TreeView and Phylowin) only support node labelling but not branch
+labelling. Care should be taken to note which branches and labels go together. 
+</P>
+<P>
+</P>
+<P>
+</P>
+<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
+<CENTER><H2><A NAME="C">                               Colors
+</A></H2></CENTER>
+<P>
+</P>
+<P>
+Clustal X provides a versatile coloring scheme for the sequence alignment 
+display. The sequences (or profiles) are colored automatically, when they are
+loaded. Sequences can be colored either by assigning a color to specific
+residues, or on the basis of an alignment consensus. In the latter case, the
+alignment consensus is calculated automatically, and the residues in each
+column are colored according to the consensus character assigned to that
+column. In this way, you can choose to highlight, for example, conserved
+hydrophylic or hydrophobic positions in the alignment.
+</P>
+<P>
+The 'rules' used to color the alignment are specified in a COLOR PARAMETER
+FILE. Clustal X automatically looks for a file called 'colprot.par' for protein
+sequences or 'coldna.par' for DNA, in the current directory. (If your running
+under UNIX, it then looks in your home directory, and finally in the
+directories in your PATH environment variable).
+</P>
+<P>
+By default, if no color parameter file is found, protein sequences are colored
+by residue as follows:
+</P>
+<P>
+<PRE>
+	Color			Residue Code
+</P>
+<P>
+	ORANGE			GPST
+	RED			HKR
+	BLUE			FWY
+	GREEN			ILMV
+</PRE>
+</P>
+<P>
+In the case of DNA sequences, the default colors are as follows:
+</P>
+<P>
+<PRE>
+	Color			Residue Code
+</P>
+<P>
+	ORANGE			A
+	RED			C
+	BLUE			T
+	GREEN			G
+</PRE>
+</P>
+<P>
+</P>
+<P>
+The default BACKGROUND COLORING option shows the sequence residues using a
+black character on a colored background. It can be switched off to show
+residues as a colored character on a white background. 
+</P>
+<P>
+Either BLACK AND WHITE or DEFAULT COLOR options can be selected. The Color
+option looks first for the color parameter file (as described above) and, if no
+file is found, uses the default residue-specific colors.
+</P>
+<P>
+You can specify your own coloring scheme by using the LOAD COLOR PARAMETER FILE
+option. The format of the color parameter file is described below.
+</P>
+<P>
+<H4>
+COLOR PARAMETER FILE
+</H4>
+</P>
+<P>
+This file is divided into 3 sections:
+</P>
+<P>
+1) the names and rgb values of the colors
+2) the rules for calculating the consensus
+3) the rules for assigning colors to the residues
+</P>
+<P> 
+An example file is given here.
+</P>
+<P>
+<PRE>
+ --------------------------------------------------------------------
+ at rgbindex
+RED          0.9 0.1 0.1
+BLUE         0.1 0.1 0.9
+GREEN        0.1 0.9 0.1
+YELLOW       0.9 0.9 0.0
+</P>
+<P>
+ at consensus
+% = 60% w:l:v:i:m:a:f:c:y:h:p
+# = 80% w:l:v:i:m:a:f:c:y:h:p
+- = 50% e:d
++ = 60% k:r
+q = 50% q:e
+p = 50% p
+n = 50% n
+t = 50% t:s
+</P>
+<P>
+ at color
+g = RED
+p = YELLOW
+t = GREEN if t:%:#
+n = GREEN if n
+w = BLUE if %:#:p
+k = RED if +
+ --------------------------------------------------------------------
+</PRE>
+</P>
+<P>
+The first section is optional and is identified by the header @rgbindex. If
+this section exists, each color used in the file must be named and the rgb
+values specified (on a scale from 0 to 1). If the rgb index section is not
+found, the following set of hard-coded colors will be used.
+</P>
+<P>
+<PRE>
+RED          0.9 0.1 0.1
+BLUE         0.1 0.1 0.9
+GREEN        0.1 0.9 0.1
+ORANGE       0.9 0.7 0.3
+CYAN         0.1 0.9 0.9
+PINK         0.9 0.5 0.5
+MAGENTA      0.9 0.1 0.9
+YELLOW       0.9 0.9 0.0
+</PRE>
+</P>
+<P>
+The second section is optional and is identified by the header @consensus. It
+defines how the consensus is calculated.
+</P>
+<P> 
+The format of each consensus parameter is:-
+</P>
+<P> 
+<PRE>
+c = n% residue_list
+</P>
+<P> 
+        where
+              c             is a character used to identify the parameter.
+              n             is an integer value used as the percentage cutoff
+                            point.
+              residue_list  is a list of residues denoted by a single
+                            character, delimited by a colon (:).
+</PRE>
+</P>
+<P> 
+For example:   # = 60% w:l:v:i
+</P>
+<P>
+will assign a consensus character # to any column in the alignment which
+contains more than 60% of the residues w,l,v and i.
+</P>
+<P>        
+</P>
+<P> 
+The third section is identified by the header @color, and defines how colors
+are assigned to each residue in the alignment.
+</P>
+<P> 
+The color parameters can take one of two formats:
+</P>
+<P>
+<PRE>
+1) r = color
+2) r = color if consensus_list
+</P>
+<P> 
+        where
+              r             is a character used to denote a residue.
+              color         is one of the colors in the GDE color lookup table.
+              residue_list  is a list of residues denoted by a single
+                            character, delimited by a colon (:).
+</PRE>
+</P>
+<P> 
+Examples:
+1) g = ORANGE
+</P>
+<P>
+will color all glycines ORANGE, regardless of the consensus.
+</P>
+<P>
+2) w = BLUE if w:%:#
+</P>
+<P>
+will color BLUE any tryptophan which is found in a column with a consensus of
+w, % or #.
+</P>
+<P> 
+</P>
+<P>
+</P>
+<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
+<CENTER><H2><A NAME="Q">                       Alignment Quality Analysis
+</A></H2></CENTER>
+<P>
+</P>
+<P>
+<H3>
+QUALITY SCORES
+</H3>
+</P>
+<P>
+Clustal X provides an indication of the quality of an alignment by plotting
+a 'conservation score' for each column of the alignment. A high score indicates
+a well-conserved column; a low score indicates low conservation. The quality
+curve is drawn below the alignment.
+</P>
+<P>
+Two methods are also provided to indicate single residues or sequence segments
+which score badly in the alignment.
+</P>
+<P> 
+Low-scoring residues are expected to occur at a moderate frequency in all the
+sequences because of their steady divergence due to the natural processes of
+evolution. The most divergent sequences are likely to have the most outliers.
+However, the highlighted residues are especially useful in pointing to
+sequence misalignments. Note that clustering of highlighted residues is a
+strong indication of misalignment. This can arise due to various reasons, for
+example:
+</P>
+<P> 
+        1. Partial or total misalignments caused by a failure in the
+        alignment algorithm. Usually only in difficult alignment cases.
+</P>
+<P> 
+        2. Partial or total misalignments because at least one of the
+        sequences in the given set is partly or completely unrelated to the
+        other sequences. It is up to the user to check that the set of
+        sequences are alignable.
+</P>
+<P>
+        3. Frameshift translation errors in a protein sequence causing local
+        mismatched regions to be heavily highlighted. These are surprisingly
+        common in database entries. If suspected, a 3-frame translation of
+        the source DNA needs to be examined.
+</P>
+<P> 
+Occasionally, highlighted residues may point to regions of some biological
+significance. This might happen for example if a protein alignment contains a
+sequence which has acquired new functions relative to the main sequence set. It
+is important to exclude other explanations, such as error or the natural
+divergence of sequences, before invoking a biological explanation.
+</P>
+<P>
+</P>
+<P>
+<H3>
+LOW-SCORING SEGMENTS
+</H3>
+</P>
+<P>
+Unreliable regions in the alignment can be highlighted using the Low-Scoring
+Segments option. A sequence-weighted profile is used to indicate any segments
+in the sequences which score badly. Because the profile calculation may take
+some time, an option is provided to calculate LOW-SCORING SEGMENTS. The 
+segment display can then be toggled on or off without having to repeat the
+time-consuming calculations.
+</P>
+<P>
+For details of the low-scoring segment calculation, see the CALCULATION section
+below.
+</P>
+<P>
+</P>
+<P>
+<H4>
+LOW-SCORING SEGMENT PARAMETERS
+</H4>
+</P>
+<P>
+MINIMUM LENGTH OF SEGMENTS: short segments (or even single residues) can be
+hidden by increasing the minimum length of segments which will be displayed.
+</P>
+<P>
+DNA MARKING SCALE is used to remove less significant segments from the 
+highlighted display. Increase the scale to display more segments; decrease the
+scale to remove the least significant.
+</P>
+<P>
+</P>
+<P>
+PROTEIN WEIGHT MATRIX: the scoring table which describes the similarity of each
+amino acid to each other. The matrix is used to calculate the sequence-
+weighted profile scores. There are four 'in-built' Log-Odds matrices offered:
+the Gonnet PAM 80, 120, 250, 350 matrices. A more stringent matrix which only
+gives a high score to identities and the most favoured conservative
+substitutions, may be more suitable when the sequences are closely related. For
+more divergent sequences, it is appropriate to use "softer" matrices which give
+a high score to many other frequent substitutions. This  option automatically
+recalculates the low-scoring segments.
+</P>
+<P>
+</P>
+<P>
+DNA WEIGHT MATRIX: Two hard-coded matrices are available:
+</P>
+<P>
+1) IUB. This is the default scoring matrix used by BESTFIT for the comparison
+of nucleic acid sequences. X's and N's are treated as matches to any IUB
+ambiguity symbol. All matches score 1.0; all mismatches for IUB symbols score
+0.9.
+</P>
+<P>
+2) CLUSTALW(1.6). The previous system used by ClustalW, in which matches score
+1.0 and mismatches score 0. All matches for IUB symbols also score 0. 
+</P>
+<P>
+A new matrix can be read from a file on disk, if the filename consists only
+of lower case characters. The values in the new weight matrix should be
+similarities and should be NEGATIVE for infrequent substitutions.
+</P>
+<P> 
+INPUT FORMAT. The format used for a new matrix is the same as the BLAST
+program. Any lines beginning with a # character are assumed to be comments. The
+first non-comment line should contain a list of amino acids in any order, using
+the 1 letter code, followed by a * character. This should be followed by a
+square matrix of scores, with one row and one column for each amino acid. The
+last row and column of the matrix (corresponding to the * character) contain
+the minimum score over the whole matrix.
+</P>
+<P>
+<H4>
+QUALITY SCORE PARAMETERS
+</H4>
+</P>
+<P>
+You can customise the column 'quality scores' plotted underneath the alignment
+display using the following options.
+</P>
+<P>
+SCORE PLOT SCALE: this is a scalar value from 1 to 10, which can be used to
+change the scale of the quality score plot. 
+</P>
+<P>
+RESIDUE EXCEPTION CUTOFF: this is a scalar value from 1 to 10, which can be
+used to change the number of residue exceptions which are highlighted in the
+alignment display. (For an explanation of this cutoff, see the CALCULATION OF
+RESIDUE EXCEPTIONS section below.)
+</P>
+<P>
+PROTEIN WEIGHT MATRIX: the scoring table which describes the similarity of 
+each amino acid to each other. 
+</P>
+<P> 
+DNA WEIGHT MATRIX: two hard-coded matrices are available: IUB and CLUSTALW(1.6).
+</P>
+<P>
+For more information about the weight matrices, see the help above for
+the Low-scoring Segments Weight Matrix.
+</P>
+<P>
+For details of the quality score calculations, see the CALCULATION section
+below.
+</P>
+<P>
+</P>
+<P>
+<STRONG>
+SHOW LOW-SCORING SEGMENTS
+</STRONG>
+</P>
+<P>                       
+The low-scoring segment display can be toggled on or off. This option does not
+recalculate the profile scores.
+</P>
+<P>
+</P>
+<P>
+<STRONG>
+SHOW EXCEPTIONAL RESIDUES
+</STRONG>
+</P>
+<P>                       
+This option highlights individual residues which score badly in the alignment
+quality calculations. Residues which score exceptionally low are highlighted by
+using a white character on a grey background.
+</P>
+<P>
+<STRONG>
+SAVE QUALITY SCORES TO FILE
+</STRONG>
+</P>
+<P>
+The quality scores that are plotted underneath the alignment display can also
+be saved in a text file. Each column in the alignment is written on one line in
+the output file, with the value of the quality score at the end of the line.
+Only the sequences currently selected in the display are written to the file.
+One use for quality scores is to color residues in a protein structure by
+sequence conservation. In this way conserved surface residues can be
+highlighted to locate functional regions such as ligand-binding sites.
+</P>
+<P>
+</P>
+<P>
+<H3>
+CALCULATION OF QUALITY SCORES
+</H3>
+</P>
+<P>
+Suppose we have an alignment of m sequences of length n. Then, the alignment
+can be written as:
+</P>
+<P>
+<PRE>
+        A11 A12 A13 .......... A1n
+        A21 A22 A23 .......... A2n
+        .
+        .
+        Am1 Am2 Am3 .......... Amn
+</PRE>
+</P>
+<P>
+We also have a residue comparison matrix of size R where C(i,j) is the score
+for aligning residue i with residue j.
+</P>
+<P>
+We want to calculate a score for the conservation of the jth position in the
+alignment.
+</P>
+<P>
+To do this, we define an R-dimensional sequence space. For the jth position in 
+the alignment, each sequence consists of a single residue which is assigned a
+point S in the space. S has R dimensions, and for sequence i, the rth dimension
+is defined as:
+</P>
+<P>
+<PRE>
+	Sr =    C(r,Aij)
+</PRE>
+</P>
+<P>
+We then calculate a consensus value for the jth position in the alignment. This
+value X also has R dimensions, and the rth dimension is defined as:
+</P>
+<P>
+<PRE>
+	Xr = (   SUM   (Fij * C(i,r)) ) / m
+               1<=i<=R
+</PRE>
+</P>
+<P>
+where Fij is the count of residues i at position j in the alignment.
+</P>
+<P>
+Now we can calculate the distance Di between each sequence i and the consensus 
+position X in the R-dimensional space.
+</P>
+<P>
+<PRE>
+	Di = SQRT   (   SUM   (Xr - Sr)(Xr - Sr) )
+                      1<=i<=R
+</P>
+<P>
+</PRE>
+</P>
+<P>
+The quality score for the jth position in the alignment is defined as the mean
+of the sequence distances Di.
+</P>
+<P>
+The score is normalised by multiplying by the percentage of sequences which
+have residues (and not gaps) at this position.
+</P>
+<P>
+<H3>
+CALCULATION OF RESIDUE EXCEPTIONS
+</H3>
+</P>
+<P>
+The jth residue of the ith sequence is considered as an exception if the
+distance Di of the sequence from the consensus value P is greater than (Upper
+Quartile + Inter Quartile Range * Cutoff). The value used as a cutoff for
+displaying exceptions can be set from the SCORE PARAMETERS menu. A high cutoff
+value will only display very significant exceptions; a low value will allow
+more, less significant, exceptions to be highlighted.
+</P>
+<P>
+(NB. Sequences which contain gaps at this position are not included in the
+exception calculation.)
+</P>
+<P>
+</P>
+<P>
+<H3>
+CALCULATION OF LOW-SCORING SEGMENTS
+</H3>
+</P>
+<P>
+Suppose we have an alignment of m sequences of length n. Then, the alignment
+can be written as:
+</P>
+<P>
+<PRE>
+        A11 A12 A13 .......... A1n
+        A21 A22 A23 .......... A2n
+        .
+        .
+        Am1 Am2 Am3 .......... Amn
+</PRE>
+</P>
+<P>
+We also have a residue comparison matrix of size R where C(i,j) is the score
+for aligning residue i with residue j.
+</P>
+<P>
+We calculate sequence weights by building a neighbour-joining tree, in which
+branch lengths are proportional to divergence. Summing the branches by branch
+ownership provides the weights. See (Thompson et al., CABIOS, 10, 19 (1994) and
+Henikoff et al.,JMB, 243, 574 1994).
+</P>
+<P>
+To find the low-scoring segments in a sequence Si, we build a weighted profile
+of the remaining sequences in the alignment. Suppose we find residue r at 
+position j in the sequence; then the score for the jth position in the sequence
+is defined as
+</P>
+<P>
+<PRE>
+	Score(Si,j) = Profile(j,r)   where Profile(j,r) is the profile score
+                                       for residue r at position j in the
+                                       alignment.
+</PRE>
+</P>
+<P>
+These residue scores are summed along the sequence in both forward and backward
+directions. If the sum of the scores is positive, then it is reset to zero.
+Segments which score negatively in both directions are considered as 
+'low-scoring' and will be highlighted in the alignment display.
+</P>
+<P>
+</P>
+<P>
+</P>
+<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
+<CENTER><H2><A NAME="9">              Command Line Parameters
+</A></H2></CENTER>
+<CENTER><H3>                DATA (sequences)
+</H3></CENTER>
+<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
+<TR>
+<TD><STRONG>Parameter</STRONG></TD>
+<TD><STRONG><EM>Description</EM></STRONG></TD>
+</TR>
+<TR>
+<TD><TT>-PROFILE1=file.ext  and  -PROFILE2=file.ext  </TT></TD>
+<TD><EM>profiles (aligned sequences)</EM></TD>
+</TR>
+</TABLE></CENTER>
+<CENTER><H3>                VERBS (do things)
+</H3></CENTER>
+<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
+<TR>
+<TD><STRONG>Parameter</STRONG></TD>
+<TD><STRONG><EM>Description</EM></STRONG></TD>
+</TR>
+<TR>
+<TD><TT>-HELP  or -CHECK    </TT></TD>
+<TD><EM>outline the command line parameters</EM></TD>
+</TR>
+<TR>
+<TD><TT>-ALIGN              </TT></TD>
+<TD><EM>do full multiple alignment </EM></TD>
+</TR>
+<TR>
+<TD><TT>-TREE               </TT></TD>
+<TD><EM>calculate NJ tree</EM></TD>
+</TR>
+<TR>
+<TD><TT>-BOOTSTRAP(=n)      </TT></TD>
+<TD><EM>bootstrap a NJ tree (n= number of bootstraps; def. = 1000)</EM></TD>
+</TR>
+<TR>
+<TD><TT>-CONVERT            </TT></TD>
+<TD><EM>output the input sequences in a different file format</EM></TD>
+</TR>
+</TABLE></CENTER>
+<CENTER><H3>                PARAMETERS (set things)
+</H3></CENTER>
+<CENTER><P><STRONG>***General settings:****
+</STRONG></P></CENTER>
+<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
+<TR>
+<TD><STRONG>Parameter</STRONG></TD>
+<TD><STRONG><EM>Description</EM></STRONG></TD>
+</TR>
+<TR>
+<TD><TT>-INTERACTIVE </TT></TD>
+<TD><EM>read command line, then enter normal interactive menus</EM></TD>
+</TR>
+<TR>
+<TD><TT>-QUICKTREE   </TT></TD>
+<TD><EM>use FAST algorithm for the alignment guide tree</EM></TD>
+</TR>
+<TR>
+<TD><TT>-TYPE=       </TT></TD>
+<TD><EM>PROTEIN or DNA sequences</EM></TD>
+</TR>
+<TR>
+<TD><TT>-NEGATIVE    </TT></TD>
+<TD><EM>protein alignment with negative values in matrix</EM></TD>
+</TR>
+<TR>
+<TD><TT>-OUTFILE=    </TT></TD>
+<TD><EM>sequence alignment file name</EM></TD>
+</TR>
+<TR>
+<TD><TT>-OUTPUT=     </TT></TD>
+<TD><EM>GCG, GDE, PHYLIP, PIR or NEXUS</EM></TD>
+</TR>
+<TR>
+<TD><TT>-OUTORDER=   </TT></TD>
+<TD><EM>INPUT or ALIGNED</EM></TD>
+</TR>
+<TR>
+<TD><TT>-CASE=       </TT></TD>
+<TD><EM>LOWER or UPPER (for GDE output only)</EM></TD>
+</TR>
+<TR>
+<TD><TT>-SEQNOS=     </TT></TD>
+<TD><EM>OFF or ON (for Clustal output only)</EM></TD>
+</TR>
+</TABLE></CENTER>
+<CENTER><H3>***Fast Pairwise Alignments:***
+</H3></CENTER>
+<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
+<TR>
+<TD><STRONG>Parameter</STRONG></TD>
+<TD><STRONG><EM>Description</EM></STRONG></TD>
+</TR>
+<TR>
+<TD><TT>-TOPDIAGS=n  </TT></TD>
+<TD><EM>number of best diags.</EM></TD>
+</TR>
+<TR>
+<TD><TT>-WINDOW=n    </TT></TD>
+<TD><EM>window around best diags.</EM></TD>
+</TR>
+<TR>
+<TD><TT>-PAIRGAP=n   </TT></TD>
+<TD><EM>gap penalty</EM></TD>
+</TR>
+<TR>
+<TD><TT>-SCORE=      </TT></TD>
+<TD><EM>PERCENT or ABSOLUTE</EM></TD>
+</TR>
+</TABLE></CENTER>
+<CENTER><H3>***Slow Pairwise Alignments:***
+</H3></CENTER>
+<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
+<TR>
+<TD><STRONG>Parameter</STRONG></TD>
+<TD><STRONG><EM>Description</EM></STRONG></TD>
+</TR>
+<TR>
+<TD><TT>-PWDNAMATRIX= </TT></TD>
+<TD><EM>DNA weight matrix=IUB, CLUSTALW or filename</EM></TD>
+</TR>
+<TR>
+<TD><TT>-PWGAPOPEN=f  </TT></TD>
+<TD><EM>gap opening penalty</EM></TD>
+</TR>
+<TR>
+<TD><TT>-PWGAPEXT=f  </TT></TD>
+<TD><EM>gap opening penalty</EM></TD>
+</TR>
+</TABLE></CENTER>
+<CENTER><H3>***Multiple Alignments:***
+</H3></CENTER>
+<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
+<TR>
+<TD><STRONG>Parameter</STRONG></TD>
+<TD><STRONG><EM>Description</EM></STRONG></TD>
+</TR>
+<TR>
+<TD><TT>-USETREE=    </TT></TD>
+<TD><EM>file for old guide tree</EM></TD>
+</TR>
+<TR>
+<TD><TT>-MATRIX=     </TT></TD>
+<TD><EM>Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename</EM></TD>
+</TR>
+<TR>
+<TD><TT>-DNAMATRIX=  </TT></TD>
+<TD><EM>DNA weight matrix=IUB, CLUSTALW or filename</EM></TD>
+</TR>
+<TR>
+<TD><TT>-GAPOPEN=f   </TT></TD>
+<TD><EM>gap opening penalty</EM></TD>
+</TR>
+<TR>
+<TD><TT>-GAPEXT=f  </TT></TD>
+<TD><EM>gap extension penalty</EM></TD>
+</TR>
+<TR>
+<TD><TT>-ENDGAPS     </TT></TD>
+<TD><EM>no end gap separation pen.</EM></TD>
+</TR>
+<TR>
+<TD><TT>-GAPDIST=n   </TT></TD>
+<TD><EM>gap separation pen. range</EM></TD>
+</TR>
+<TR>
+<TD><TT>-NOPGAP      </TT></TD>
+<TD><EM>residue-specific gaps off</EM></TD>
+</TR>
+<TR>
+<TD><TT>-NOHGAP    </TT></TD>
+<TD><EM>hydrophilic gaps off</EM></TD>
+</TR>
+<TR>
+<TD><TT>-HGAPRESIDUES= </TT></TD>
+<TD><EM>list hydrophilic res.</EM></TD>
+</TR>
+<TR>
+<TD><TT>-MAXDIV=n    </TT></TD>
+<TD><EM>% ident. for delay</EM></TD>
+</TR>
+<TR>
+<TD><TT>-TYPE=       </TT></TD>
+<TD><EM>PROTEIN or DNA</EM></TD>
+</TR>
+<TR>
+<TD><TT>-TRANSWEIGHT=f </TT></TD>
+<TD><EM>transitions weighting</EM></TD>
+</TR>
+</TABLE></CENTER>
+<CENTER><H3>***Profile Alignments:***
+</H3></CENTER>
+<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
+<TR>
+<TD><STRONG>Parameter</STRONG></TD>
+<TD><STRONG><EM>Description</EM></STRONG></TD>
+</TR>
+<TR>
+<TD><TT>-NEWTREE1=    </TT></TD>
+<TD><EM>file for new guide tree for profile1</EM></TD>
+</TR>
+<TR>
+<TD><TT>-NEWTREE2=    </TT></TD>
+<TD><EM>file for new guide tree for profile2</EM></TD>
+</TR>
+<TR>
+<TD><TT>-USETREE1=    </TT></TD>
+<TD><EM>file for old guide tree for profile1</EM></TD>
+</TR>
+<TR>
+<TD><TT>-USETREE2=    </TT></TD>
+<TD><EM>file for old guide tree for profile2</EM></TD>
+</TR>
+</TABLE></CENTER>
+<CENTER><H3>***Sequence to Profile Alignments:***
+</H3></CENTER>
+<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
+<TR>
+<TD><STRONG>Parameter</STRONG></TD>
+<TD><STRONG><EM>Description</EM></STRONG></TD>
+</TR>
+<TR>
+<TD><TT>-NEWTREE=    </TT></TD>
+<TD><EM>file for new guide tree</EM></TD>
+</TR>
+<TR>
+<TD><TT>-USETREE=    </TT></TD>
+<TD><EM>file for old guide tree</EM></TD>
+</TR>
+</TABLE></CENTER>
+<CENTER><H3>***Structure Alignments:***
+</H3></CENTER>
+<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
+<TR>
+<TD><STRONG>Parameter</STRONG></TD>
+<TD><STRONG><EM>Description</EM></STRONG></TD>
+</TR>
+<TR>
+<TD><TT>-NOSECSTR2     </TT></TD>
+<TD><EM>do not use secondary structure/gap penalty mask for profile 2</EM></TD>
+</TR>
+<TR>
+<TD><TT>-SECSTROUT=STRUCTURE or MASK or BOTH or NONE  </TT></TD>
+<TD><EM>output in alignment file</EM></TD>
+</TR>
+<TR>
+<TD><TT>-HELIXGAP=n    </TT></TD>
+<TD><EM>gap penalty for helix core residues </EM></TD>
+</TR>
+<TR>
+<TD><TT>-STRANDGAP=n   </TT></TD>
+<TD><EM>gap penalty for strand core residues</EM></TD>
+</TR>
+<TR>
+<TD><TT>-LOOPGAP=n     </TT></TD>
+<TD><EM>gap penalty for loop regions</EM></TD>
+</TR>
+<TR>
+<TD><TT>-TERMINALGAP=n </TT></TD>
+<TD><EM>gap penalty for structure termini</EM></TD>
+</TR>
+<TR>
+<TD><TT>-HELIXENDIN=n  </TT></TD>
+<TD><EM>number of residues inside helix to be treated as terminal</EM></TD>
+</TR>
+<TR>
+<TD><TT>-HELIXENDOUT=n </TT></TD>
+<TD><EM>number of residues outside helix to be treated as terminal</EM></TD>
+</TR>
+<TR>
+<TD><TT>-STRANDENDIN=n </TT></TD>
+<TD><EM>number of residues inside strand to be treated as terminal</EM></TD>
+</TR>
+<TR>
+<TD><TT>-STRANDENDOUT=n</TT></TD>
+<TD><EM>number of residues outside strand to be treated as terminal </EM></TD>
+</TR>
+</TABLE></CENTER>
+<CENTER><H3>***Trees:***
+</H3></CENTER>
+<CENTER><TABLE ALIGN=ABSCENTER BORDER=1 CELLSPACING=1 CELLPADDING=5>
+<TR>
+<TD><STRONG>Parameter</STRONG></TD>
+<TD><STRONG><EM>Description</EM></STRONG></TD>
+</TR>
+<TR>
+<TD><TT>-SEED=n    </TT></TD>
+<TD><EM>seed number for bootstraps</EM></TD>
+</TR>
+<TR>
+<TD><TT>-KIMURA      </TT></TD>
+<TD><EM>use Kimura's correction</EM></TD>
+</TR>
+<TR>
+<TD><TT>-TOSSGAPS  </TT></TD>
+<TD><EM>ignore positions with gaps</EM></TD>
+</TR>
+<TR>
+<TD><TT>-BOOTLABELS=node OR branch </TT></TD>
+<TD><EM>position of bootstrap values in tree display</EM></TD>
+</TR>
+</TABLE></CENTER>
+</P>
+<A HREF="#INDEX"> <EM>Back to Index</EM> </A>
+<CENTER><H2><A NAME="R">                             References
+</A></H2></CENTER>
+<P>
+</P>
+<P>
+<STRONG>
+The ClustalX program is described in the manuscript:
+</STRONG>
+</P>
+<P>
+Thompson,J.D., Gibson,T.J., Plewniak,F., Jeanmougin,F. and Higgins,D.G. (1997)
+The ClustalX windows interface: flexible strategies for multiple sequence 
+alignment aided by quality analysis tools. Nucleic Acids Research, 25:4876-4882.
+</P>
+<P>
+</P>
+<P>
+<STRONG>
+The ClustalW program is described in the manuscript:
+</STRONG>
+</P>
+<P>
+Thompson, J.D., Higgins, D.G. and Gibson, T.J. (1994) CLUSTAL W: improving the
+sensitivity of progressive multiple sequence alignment through sequence
+weighting, positions-specific gap penalties and weight matrix choice.  Nucleic
+Acids Research, 22:4673-4680.
+</P>
+<P>
+</P>
+<P>
+<STRONG>
+The ClustalV program is described in the manuscript:
+</STRONG>
+</P>
+<P>
+Higgins,D.G., Bleasby,A.J. and Fuchs,R. (1992) CLUSTAL V: improved software for
+multiple sequence alignment. CABIOS 8,189-191.
+</P>
+<P>
+</P>
+<P>
+<STRONG>
+The original Clustal program is described in the manuscripts:
+</STRONG>
+</P>
+<P>
+Higgins,D.G. and Sharp,P.M. (1989) Fast and sensitive multiple sequence
+alignments on a microcomputer.
+CABIOS 5,151-153.
+</P>
+<P>
+Higgins,D.G. and Sharp,P.M. (1988) CLUSTAL: a package for performing multiple
+sequence alignment on a microcomputer. Gene 73,237-244.
+</P>
+<P>
+<STRONG>
+Some tips on using Clustal X:
+</STRONG>
+</P>
+<P>
+Jeannmougin,F., Thompson,J.D., Gouy,M., Higgins,D.G. and Gibson,T.J. (1998)
+Multiple sequence alignment with Clustal X. Trends Biochem Sci, 23, 403-5.
+</P>
+<P>
+<STRONG>
+Some tips on using Clustal W:
+</STRONG>
+</P>
+<P>
+Higgins, D. G., Thompson, J. D. and Gibson, T. J. (1996) Using CLUSTAL for
+multiple sequence alignments. Methods Enzymol., 266, 383-402.
+</P>
+<P>
+<STRONG>
+You can get the latest version of the ClustalX program by anonymous ftp to:
+</STRONG>
+</P>
+<P>
+ftp-igbmc.u-strasbg.fr
+ftp.embl-heidelberg.de
+ftp.ebi.ac.uk
+</P>
+<P>
+<STRONG>
+Or, have a look at the following WWW site:
+</STRONG>
+</P>
+<P>
+http://www-igbmc.u-strasbg.fr/BioInfo/
+</P>
+<P>
+</P>
+<A HREF="#INDEX"> <EM>Back to Index</EM> </A>

Added: trunk/packages/clustalw/branches/upstream/current/clustalx_help
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/clustalx_help	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/clustalx_help	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,1523 @@
+
+This is the on-line help file for Clustal X (version 1.83), using the NCBI
+Vibrant Toolkit.   
+
+It should be named or defined as: clustalx_help 
+except with MSDOS in which case it should be named ClustalX.HLP
+
+For full details of usage and algorithms, please read the CLUSTALW.DOC file.
+
+
+Toby  Gibson                         EMBL, Heidelberg, Germany.
+Des   Higgins                        UCC, Cork, Ireland.
+Julie Thompson/Francois Jeanmougin   IGBMC, Strasbourg, France.
+
+
+
+
+>>HELP G <<
+                      General help for CLUSTAL X (1.83)
+
+Clustal X is a windows interface for the ClustalW multiple sequence alignment
+program. It provides an integrated environment for performing multiple sequence
+and profile alignments and analysing the results. The sequence alignment is
+displayed in a window on the screen. A versatile coloring scheme has been
+incorporated allowing you to highlight conserved features  in the alignment.
+The pull-down menus at the top of the window allow you to select all the
+options required for traditional multiple sequence and profile alignment.
+
+You can cut-and-paste sequences to change the order of the alignment; you can
+select a subset of sequences to be aligned; you can select a sub-range of the
+alignment to be realigned and inserted back into the original alignment.
+
+Alignment quality analysis can be performed and low-scoring segments or
+exceptional residues can be highlighted.
+
+ClustalX is available for a number of different platforms including: SUN
+Solaris, IRIX5.3 on Silicon Graphics, Digital UNIX on DECStations, Microsoft
+Windows (32 bit) for PC's, Linux ELF for x86 PC's and Macintosh PowerMac. (See
+the README file for Installation instructions.)
+
+
+<H4>
+SEQUENCE INPUT
+</H4>
+
+Sequences and profiles (a term for pre-existing alignments) are input using 
+the FILE menu. Invalid options will be disabled. All sequences must be included
+into 1 file. 7 formats are automatically recognised: NBRF/PIR, EMBL/SWISSPROT,
+Pearson (Fasta), Clustal (*.aln), GCG/MSF (Pileup), GCG9 RSF and GDE flat file.
+All non-alphabetic characters (spaces, digits, punctuation marks) are ignored
+except "-" which is used to indicate a GAP ("." in MSF/RSF).  
+
+<H4>
+SEQUENCE / PROFILE ALIGNMENTS
+</H4>
+
+Clustal X has two modes which can be selected using the switch directly above
+the sequence display: MULTIPLE ALIGNMENT MODE and PROFILE ALIGNMENT MODE.
+
+To do a MULTIPLE ALIGNMENT on a set of sequences, make sure MULTIPLE ALIGNMENT
+MODE is selected. A single sequence data area is then displayed. The ALIGNMENT
+menu then allows you to either produce a guide tree for the alignment, or to do
+a multiple alignment following the guide tree, or to do a full multiple
+alignment.
+
+In PROFILE ALIGNMENT MODE, two sequence data areas are displayed, allowing you
+to align 2 alignments (termed profiles). Profiles are also used to add a new
+sequence to an old alignment, or to use secondary structure to guide the
+alignment process. GAPS in the old alignments are indicated using the "-" 
+character. PROFILES can be input in ANY of the allowed formats; just  use "-"
+(or "." for MSF/RSF) for each gap position. In Profile Alignment Mode, a button
+"Lock Scroll" is displayed which allows you to scroll the two profiles together
+using a single scroll bar. When the Lock Scroll is turned off, the two profiles
+can be scrolled independently.
+
+<H4>
+PHYLOGENETIC TREES
+</H4>
+
+Phylogenetic trees can be calculated from old alignments (read in with "-"
+characters to indicate gaps) OR after a multiple alignment while the alignment
+is still displayed.
+
+<H4>
+ALIGNMENT DISPLAY
+</H4>
+
+The alignment is displayed on the screen with the sequence names on the left
+hand side. The sequence alignment is for display only, it cannot be edited here
+(except for changing the sequence order by cutting-and-pasting on the sequence
+names). 
+
+A ruler is displayed below the sequences, starting at 1 for the first residue
+position (residue numbers in the sequence input file are ignored).
+
+A line above the alignment is used to mark strongly conserved positions. Three
+characters ('*', ':' and '.') are used:
+
+'*' indicates positions which have a single, fully conserved residue
+
+':' indicates that one of the following 'strong' groups is fully conserved:-
+<PRE>
+                 STA  
+                 NEQK  
+                 NHQK  
+                 NDEQ  
+                 QHRK  
+                 MILV  
+                 MILF  
+                 HY  
+                 FYW  
+</PRE>
+
+'.' indicates that one of the following 'weaker' groups is fully conserved:-
+<PRE>
+                 CSA  
+                 ATV  
+                 SAG  
+                 STNK  
+                 STPA  
+                 SGND  
+                 SNDEQK  
+                 NDEQHK  
+                 NEQHRK  
+                 FVLIM  
+                 HFY  
+</PRE>
+
+These are all the positively scoring groups that occur in the Gonnet Pam250
+matrix. The strong and weak groups are defined as strong score >0.5 and weak
+score =<0.5 respectively.
+
+For profile alignments, secondary structure and gap penalty masks are displayed
+above the sequences, if any data is found in the profile input file.
+
+
+>>HELP F <<
+                      Input / Output Files 
+
+LOAD SEQUENCES reads sequences from one of 7 file formats, replacing any
+sequences that are already loaded. All sequences must be in 1 file. The formats
+that are automatically recognised are: NBRF/PIR, EMBL/SWISSPROT, Pearson
+(Fasta), Clustal (*.aln), GCG/MSF (Pileup), GCG9/RSF and GDE flat file.  All
+non-alphabetic characters (spaces, digits, punctuation  marks) are ignored
+except "-" which is used to indicate a GAP ("." in MSF/RSF).
+
+The program tries to automatically recognise the different file formats used
+and to guess whether the sequences are amino acid or nucleotide.  This is not
+always foolproof.
+
+FASTA and NBRF/PIR formats are recognised by having a ">" as the first 
+character in the file.  
+
+EMBL/Swiss Prot formats are recognised by the letters "ID" at the start of the
+file (the token for the entry name field).  
+
+CLUSTAL format is recognised by the word CLUSTAL at the beginning of the file.
+
+GCG/MSF format is recognised by one of the following:
+<UL>
+<LI>
+       - the word PileUp at the start of the file.
+</LI><LI>
+       - the word !!AA_MULTIPLE_ALIGNMENT or !!NA_MULTIPLE_ALIGNMENT
+         at the start of the file.
+</LI><LI>
+       - the word MSF on the first line of the file, and the characters ..
+         at the end of this line.
+</LI>
+</UL>
+ 
+GCG/RSF format is recognised by the word !!RICH_SEQUENCE at the beginning of
+the file.
+
+
+If 85% or more of the characters in the sequence are from A,C,G,T,U or N, the
+sequence will be assumed to be nucleotide.  This works in 97.3% of cases but
+watch out!
+
+APPEND SEQUENCES is only valid in MULTIPLE ALIGNMENT MODE. The input sequences
+do not replace those already loaded, but are appended at the end of the
+alignment.
+
+SAVE SEQUENCES AS... offers the user a choice of one of six output formats:
+CLUSTAL, NBRF/PIR, GCG/MSF, PHYLIP, NEXUS, GDE or FASTA. All sequences are written
+to a single file. Options are available to save a range of the alignment, 
+switch between UPPER/LOWER case for GDE files, and to output SEQUENCE NUMBERING
+for CLUSTAL files. Users can also choose to include the residue range numbers
+by appending them to the sequence names.
+
+LOAD PROFILE 1 reads sequences in the same 7 file formats, replacing any
+sequences already loaded as Profile 1. This option will also remove any
+sequences which are loaded in Profile 2.
+
+LOAD PROFILE 2 reads sequences in the same 7 file formats, replacing any
+sequences already loaded as Profile 2.
+
+SAVE PROFILE 1 AS... is similar to the Save Sequences option except that only
+those sequences in Profile 1 will be written to the output file.
+
+SAVE PROFILE 2 AS... is similar to the Save Sequences option except that only
+those sequences in Profile 2 will be written to the output file.
+
+WRITE ALIGNMENT AS POSTSCRIPT will write the sequence display to a postscript
+format file. This will include any secondary structure / gap penalty mask 
+information and the consensus and ruler lines which are displayed on the
+screen. The Alignment Quality curve can be optionally included in the output
+file.
+
+WRITE PROFILE 1 AS POSTSCRIPT is similar to WRITE ALIGNMENT AS POSTSCRIPT
+except that only the profile 1 display will be printed.
+
+WRITE PROFILE 2 AS POSTSCRIPT is similar to WRITE ALIGNMENT AS POSTSCRIPT
+except that only the profile 2 display will be printed.
+
+
+<H4>
+POSTSCRIPT PARAMETERS
+</H4>
+
+A number of options are available to allow you to configure your postscript
+output file.
+
+PS COLORS FILE:
+
+The exact RGB values required to reproduce the colors used in the alignment
+window will vary from printer to printer. A PS colors file can be specified
+that contains the RGB values for all the colors required by each of your
+postscript printers.
+
+By default, Clustal X looks for a file called 'colprint.par' in the current
+directory (if your running under UNIX, it then looks in your home directory,
+and finally in the directories in your PATH environment variable). If no PS
+colors file is found or a color used on the screen is not defined here, the
+screen RGB values (from the Color Parameter File) are used.
+
+The PS colors file consists of one line for each color to be defined, with the
+color name followed by the RGB values (on a scale of 0 to 1). For example,
+
+RED          0.9 0.1 0.1
+
+Blank lines and comments (lines beginning with a '#' character) are ignored.
+
+
+PAGE SIZE:  The alignment can be displayed on either A4, A3 or US Letter size
+pages.
+
+ORIENTATION: The alignment can be displayed on either a landscape or portrait
+page.
+
+PRINT HEADER: An optional header including the postscript filename, and
+creation date can be printed at the top of each page.
+
+PRINT QUALITY CURVE: The Alignment Quality curve which is displayed underneath
+the alignment on the screen can be included in the postscript output.
+
+PRINT RULER: The ruler which is displayed underneath the alignment on the 
+screen can be included in the postscript output.
+
+PRINT RESIDUE NUMBERS: Sequence residue numbers can be printed at the right
+hand side of the alignment.
+
+RESIZE TO FIT PAGE: By default, the alignment is scaled to fit the page size
+selected. This option can be turned off, in which case a font size of 10 will
+be used for the sequences.
+
+PRINT FROM POSITION/TO: A range of the alignment can be printed. The default
+is to print the full alignment. The first and last residues to be printed are
+specified here.
+
+USE BLOCK LENGTH: The alignment can be divided into blocks of residues. The
+number of residues in a block is specified here. More than one block may then
+be printed on a single page. This is useful for long alignments of a small
+number of sequences. If the block length is set to 0, The alignment will not
+be divided into blocks, but printed across a number of pages.
+
+>>HELP E <<
+                          Editing Alignments
+
+Clustal X allows you to change the order of the sequences in the alignment, by
+cutting-and-pasting the sequence names.
+
+To select a group of sequences to be moved, click on a sequence name and drag
+the cursor until all the required sequences are highlighted. Holding down the
+Shift key when clicking on the first name will add new sequences to those
+already selected.
+
+(Options are provided to Select All Sequences, Select Profile 1 or Select 
+Profile 2.)
+
+The selected sequences can be removed from the alignment by using the EDIT
+menu, CUT option.
+
+To add the cut sequences back into an alignment, select a sequence by clicking
+on the sequence name. The cut sequences will be added to the alignment,
+immediately following the selected sequence, by the EDIT menu, PASTE option.
+
+To add the cut sequences to an empty alignment (eg. when cutting sequences from
+Profile 1 and pasting them to Profile 2), click on the empty sequence name
+display area, and select the EDIT menu, PASTE option as before.
+
+The sequence selection and sequence range selection can be cleared using the
+EDIT menu, CLEAR SEQUENCE SELECTION and CLEAR RANGE SELECTION options
+respectively.
+
+To search for a string of residues in the sequences, select the sequences to be
+searched by clicking on the sequence names. You can then enter the string to
+search for by selecting the SEARCH FOR STRING option. If the string is found in
+any of the sequences selected, the sequence name and column number is printed
+below the sequence display.
+
+In PROFILE ALIGNMENT MODE, the two profiles can be merged (normally done after
+alignment) by selecting ADD PROFILE 2 TO PROFILE 1. The sequences currently
+displayed as Profile 2 will be appended to Profile 1. 
+
+The REMOVE ALL GAPS option will remove all gaps from the sequences currently
+selected.
+WARNING: This option removes ALL gaps, not only those introduced by ClustalX,
+but also those that were read from the input alignment file. Any secondary
+structure information associated with the alignment will NOT be automatically
+realigned.
+
+The REMOVE GAP-ONLY COLUMNS will remove those positions in the alignment which
+contain gaps in all sequences. This can occur as a result of removing divergent
+sequences from an alignment, or if an alignment has been realigned.
+
+>>HELP M <<
+                          Multiple Alignments
+
+Make sure MULTIPLE ALIGNMENT MODE is selected, using the switch directly above
+the sequence display area. Then, use the ALIGNMENT menu to do multiple
+alignments.
+
+Multiple alignments are carried out in 3 stages:
+ 
+1) all sequences are compared to each other (pairwise alignments);
+ 
+2) a dendrogram (like a phylogenetic tree) is constructed, describing the
+approximate groupings of the sequences by similarity (stored in a file).
+ 
+3) the final multiple alignment is carried out, using the dendrogram as a guide.
+
+The 3 stages are carried out automatically by the DO COMPLETE ALIGNMENT option.
+You can skip the first stages (pairwise alignments; guide tree) by using an old
+guide tree file (DO ALIGNMENT FROM GUIDE TREE); or you can just produce the
+guide tree with no final multiple alignment (PRODUCE GUIDE TREE ONLY).
+
+
+REALIGN SELECTED SEQUENCES is used to realign badly aligned sequences in the
+alignment. Sequences can be selected by clicking on the sequence names - see
+Editing Alignments for more details. The unselected sequences are then 'fixed'
+and a profile is made including only the unselected sequences. Each of the
+selected sequences in turn is then realigned to this profile. The realigned
+sequences will be displayed as a group at the end the alignment.
+
+
+REALIGN SELECTED SEQUENCE RANGE is used to realign a small region of the 
+alignment. A residue range can be selected by clicking on the sequence display
+area. A multiple alignment is then performed, following the 3 stages described
+above, but only using the selected residue range. Finally the new alignment of
+the range is pasted back into the full sequence alignment.
+
+By default, gap penalties are used at each end of the subrange in order to 
+penalise terminal gaps. If the REALIGN SEGMENT END GAP PENALTIES option is
+switched off, gaps can be introduced at the ends of the residue range at no
+cost.
+
+
+ALIGNMENT PARAMETERS displays a sub-menu with the following options:
+
+RESET NEW GAPS BEFORE ALIGNMENT will remove any new gaps introduced into the
+sequences during multiple alignment if you wish to change the parameters and
+try again. This only takes effect just before you do a second multiple
+alignment. You can make phylogenetic trees after alignment whether or not this
+is ON. If you turn this OFF, the new gaps are kept even if you do a second
+multiple alignment. This allows you to iterate the alignment gradually.
+Sometimes, the alignment is improved by a second or third pass.
+
+RESET ALL GAPS BEFORE ALIGNMENT will remove all gaps in the sequences including
+gaps which were read in from the sequence input file. This only takes effect
+just before you do a second multiple alignment.  You can make phylogenetic
+trees after alignment whether or not this is ON.  If you turn this OFF, all
+gaps are kept even if you do a second multiple alignment. This allows you to
+iterate the alignment gradually.  Sometimes, the alignment is improved by a
+second or third pass.
+
+
+PAIRWISE ALIGNMENT PARAMETERS control the speed/sensitivity of the initial
+alignments.
+
+MULTIPLE ALIGNMENT PARAMETERS control the gaps in the final multiple
+alignments.
+
+PROTEIN GAP PARAMETERS displays a temporary window which allows you to set
+various parameters only used in the alignment of protein sequences.
+
+(SECONDARY STRUCTURE PARAMETERS, for use with the Profile Alignment Mode only,
+allows you to set various parameters only used with gap penalty masks.)
+
+SAVE LOG FILE will write the alignment calculation scores to a file. The log
+filename is the same as the input sequence filename, with an extension .log
+appended.
+
+
+<H4>
+OUTPUT FORMAT OPTIONS
+</H4>
+
+You can choose from 7 different alignment formats (CLUSTAL, GCG, NBRF/PIR,
+PHYLIP, GDE, NEXUS, FASTA).  You can choose more than one (or all 7 if you wish).  
+
+CLUSTAL format output is a self explanatory alignment format. It shows the
+sequences aligned in blocks. It can be read in again at a later date to (for
+example) calculate a phylogenetic tree or add in new sequences by profile
+alignment.
+
+GCG output can be used by any of the GCG programs that can work on multiple
+alignments (e.g. PRETTY, PROFILEMAKE, PLOTALIGN). It is the same as the GCG
+.msf format files (multiple sequence file); new in version 7 of GCG.
+
+NEXUS format is used by several phylogeny programs, including PAUP and
+MacClade.
+
+PHYLIP format output can be used for input to the PHYLIP package of Joe 
+Felsenstein.  This is a very widely used package for doing every imaginable
+form of phylogenetic analysis (MUCH more than the the modest introduction
+offered by this program).
+
+NBRF/PIR: this is the same as the standard PIR format with ONE ADDITION. Gap
+characters "-" are used to indicate the positions of gaps in the multiple 
+alignment. These files can be re-used as input in any part of clustal that
+allows sequences (or alignments or profiles) to be read in.  
+
+FASTA: this is included for compatibility with numberous sequence analysis programs.
+
+GDE:  this format is used by the GDE package of Steven Smith and is understood
+by SEQLAB in GCG 9 or later.
+
+GDE OUTPUT CASE: sequences in GDE format may be written in either upper or
+lower case.
+ 
+CLUSTALW SEQUENCE NUMBERS: residue numbers may be added to the end of the
+alignment lines in clustalw format.
+
+OUTPUT ORDER is used to control the order of the sequences in the output
+alignments. By default, it uses the order in which the sequences were aligned
+(from the guide tree/dendrogram), thus automatically grouping closely related
+sequences. It can be switched to be the same as the original input order.
+
+PARAMETER OUTPUT: This option will save all your parameter settings in a
+parameter file (suffix .par) during alignment. The file can be subsequently
+used to rerun ClustalW using the same parameters.
+
+
+<H3>
+ALIGNMENT PARAMETERS
+</H3>
+--------------------
+
+<STRONG>
+PAIRWISE ALIGNMENT PARAMETERS
+</STRONG>
+
+A distance is calculated between every pair of sequences and these are used to
+construct the phylogenetic tree which guides the final multiple alignment. The
+scores are calculated from separate pairwise alignments. These can be
+calculated using 2 methods: dynamic programming (slow but accurate) or by the
+method of Wilbur and Lipman (extremely fast but approximate).   
+
+You can choose between the 2 alignment methods using the PAIRWISE ALIGNMENTS
+option. The slow/accurate method is fast enough for short sequences but will be
+VERY SLOW for many (e.g. >100) long (e.g. >1000 residue) sequences.   
+
+
+<STRONG>
+SLOW-ACCURATE alignment parameters:
+</STRONG>
+
+These parameters do not have any affect on the speed of the alignments. They
+are used to give initial alignments which are then rescored to give percent
+identity scores. These % scores are the ones which are displayed on the 
+screen. The scores are converted to distances for the trees.
+
+Gap Open Penalty:      the penalty for opening a gap in the alignment.
+
+Gap Extension Penalty: the penalty for extending a gap by 1 residue.
+
+Protein Weight Matrix: the scoring table which describes the similarity of 
+each amino acid to each other.
+
+Load protein matrix: allows you to read in a comparison table from a file.
+
+DNA weight matrix: the scores assigned to matches and mismatches (including
+IUB ambiguity codes).
+
+Load DNA matrix: allows you to read in a comparison table from a file.
+
+See the Multiple alignment parameters, MATRIX option below for details of the
+matrix input format.
+
+
+<STRONG>
+FAST-APPROXIMATE alignment parameters:
+</STRONG>
+
+These similarity scores are calculated from fast, approximate, global align-
+ments, which are controlled by 4 parameters. 2 techniques are used to make
+these alignments very fast: 1) only exactly matching fragments (k-tuples) are
+considered; 2) only the 'best' diagonals (the ones with most k-tuple matches)
+are used.
+
+GAP PENALTY:   This is a penalty for each gap in the fast alignments. It has
+little effect on the speed or sensitivity except for extreme values.
+
+K-TUPLE SIZE:  This is the size of exactly matching fragment that is used. 
+INCREASE for speed (max= 2 for proteins; 4 for DNA), DECREASE for sensitivity.
+For longer sequences (e.g. >1000 residues) you may wish to increase the
+default.
+
+TOP DIAGONALS: The number of k-tuple matches on each diagonal (in an imaginary
+dot-matrix plot) is calculated. Only the best ones (with most matches) are used
+in the alignment. This parameter specifies how many. Decrease for speed;
+increase for sensitivity.
+
+WINDOW SIZE:  This is the number of diagonals around each of the 'best' 
+diagonals that will be used. Decrease for speed; increase for sensitivity.
+
+
+<STRONG>
+MULTIPLE ALIGNMENT PARAMETERS
+</STRONG>
+
+These parameters control the final multiple alignment. This is the core of the
+program and the details are complicated. To fully understand the use of the
+parameters and the scoring system, you will have to refer to the documentation.
+
+Each step in the final multiple alignment consists of aligning two alignments 
+or sequences. This is done progressively, following the branching order in the
+GUIDE TREE. The basic parameters to control this are two gap penalties and the
+scores for various identical/non-indentical residues. 
+
+The GAP OPENING and EXTENSION PENALTIES can be set here. These control the 
+cost of opening up every new gap and the cost of every item in a gap.  
+Increasing the gap opening penalty will make gaps less frequent. Increasing 
+the gap extension penalty will make gaps shorter. Terminal gaps are not 
+penalised.
+
+The DELAY DIVERGENT SEQUENCES switch delays the alignment of the most distantly
+related sequences until after the most closely related sequences have  been
+aligned. The setting shows the percent identity level required to delay the
+addition of a sequence; sequences that are less identical than this level to
+any other sequences will be aligned later.
+
+The TRANSITION WEIGHT gives transitions (A<-->G or C<-->T i.e. purine-purine or
+pyrimidine-pyrimidine substitutions) a weight between 0 and 1; a weight of zero
+means that the transitions are scored as mismatches, while a weight of 1 gives
+the transitions the match score. For distantly related DNA sequences, the
+weight should be near to zero; for closely related sequences it can be useful
+to assign a higher score. The default is set to 0.5.
+
+
+The PROTEIN WEIGHT MATRIX option allows you to choose a series of weight
+matrices. For protein alignments, you use a weight matrix to determine the
+similarity of non-identical amino acids. For example, Tyr aligned with Phe is
+usually judged to be 'better' than Tyr aligned with Pro.
+
+There are three 'in-built' series of weight matrices offered. Each consists of
+several matrices which work differently at different evolutionary distances. To
+see the exact details, read the documentation. Crudely, we store several
+matrices in memory, spanning the full range of amino acid distance (from almost
+identical sequences to highly divergent ones). For very similar sequences, it
+is best to use a strict weight matrix which only gives a high score to
+identities and the most favoured conservative substitutions. For more divergent
+sequences, it is appropriate to use "softer" matrices which give a high score
+to many other frequent substitutions.
+
+1) BLOSUM (Henikoff). These matrices appear to be the best available for 
+carrying out data base similarity (homology searches). The matrices currently
+used are: Blosum 80, 62, 45 and 30. BLOSUM was the default in earlier Clustal X
+versions.
+
+2) PAM (Dayhoff). These have been extremely widely used since the late '70s. We
+currently use the PAM 20, 60, 120, 350 matrices.
+
+3) GONNET. These matrices were derived using almost the same procedure as the
+Dayhoff one (above) but are much more up to date and are based on a far larger
+data set. They appear to be more sensitive than the Dayhoff series. We
+currently use the GONNET 80, 120, 160, 250 and 350 matrices. This series is the
+default for Clustal X version 1.8.
+
+We also supply an identity matrix which gives a score of 10 to two identical 
+amino acids and a score of zero otherwise. This matrix is not very useful.
+
+Load protein matrix: allows you to read in a comparison matrix from a file.
+This can be either a single matrix or a series of matrices (see below for
+format). 
+
+
+DNA WEIGHT MATRIX option allows you to select a single matrix (not a series)
+used for aligning nucleic acid sequences. Two hard-coded matrices are available:
+
+1) IUB. This is the default scoring matrix used by BESTFIT for the comparison
+of nucleic acid sequences. X's and N's are treated as matches to any IUB
+ambiguity symbol. All matches score 1.9; all mismatches for IUB symbols score 0.
+
+2) CLUSTALW(1.6). A previous system used by ClustalW, in which matches score
+1.0 and mismatches score 0. All matches for IUB symbols also score 0.
+
+Load DNA matrix: allows you to read in a nucleic acid comparison matrix from a
+file (just one matrix, not a series).
+
+
+SINGLE MATRIX INPUT FORMAT
+The format used for a single matrix is the same as the BLAST program. The
+scores in the new weight matrix should be similarities. You can use negative as
+well as positive values if you wish, although the matrix will be automatically
+adjusted to all positive scores, unless the NEGATIVE MATRIX option is selected.
+Any lines beginning with a # character are assumed to be comments. The first
+non-comment line should contain a list of amino acids in any order, using the 1
+letter code, followed by a * character. This should be followed by a square
+matrix of scores, with one row and one column for each amino acid. The last row
+and column of the matrix (corresponding to the * character) contain the minimum
+score over the whole matrix.
+
+MATRIX SERIES INPUT FORMAT
+ClustalX uses different matrices depending on the mean percent identity of the
+sequences to be aligned. You can specify a series of matrices and the range of
+the percent identity for each matrix in a matrix series file. The file is
+automatically recognised by the word CLUSTAL_SERIES at the beginning of the
+file. Each matrix in the series is then specified on one line which should
+start with the word MATRIX. This is followed by the lower and upper limits of
+the sequence percent identities for which you want to apply the matrix. The
+final entry on the matrix line is the filename of a Blast format matrix file
+(see above for details of the single matrix file format).
+
+Example.
+
+CLUSTAL_SERIES
+ 
+MATRIX 81 100 /us1/user/julie/matrices/blosum80
+MATRIX 61 80 /us1/user/julie/matrices/blosum62
+MATRIX 31 60 /us1/user/julie/matrices/blosum45
+MATRIX 0 30 /us1/user/julie/matrices/blosum30
+
+
+<STRONG>
+PROTEIN GAP PARAMETERS
+</STRONG>
+
+RESIDUE SPECIFIC PENALTIES are amino acid specific gap penalties that reduce or
+increase the gap opening penalties at each position in the alignment or 
+sequence. See the documentation for details. As an example, positions that are
+rich in glycine are more likely to have an adjacent gap than positions that are
+rich in valine.
+
+HYDROPHILIC GAP PENALTIES are used to increase the chances of a gap within a
+run (5 or more residues) of hydrophilic amino acids; these are likely to be
+loop or random coil regions where gaps are more common. The residues that are
+"considered" to be hydrophilic can be entered in HYDROPHILIC RESIDUES.
+
+GAP SEPARATION DISTANCE tries to decrease the chances of gaps being too close
+to each other. Gaps that are less than this distance apart are penalised more
+than other gaps. This does not prevent close gaps; it makes them less frequent,
+promoting a block-like appearance of the alignment.
+
+END GAP SEPARATION treats end gaps just like internal gaps for the purposes of
+avoiding gaps that are too close (set by GAP SEPARATION DISTANCE above). If you
+turn this off, end gaps will be ignored for this purpose. This is useful when
+you wish to align fragments where the end gaps are not biologically meaningful.
+
+
+>>HELP P <<
+                   Profile and Structure Alignments
+   
+By PROFILE ALIGNMENT, we mean alignment using existing alignments. Profile 
+alignments allow you to store alignments of your favourite sequences and add
+new sequences to them in small bunches at a time. A profile is simply an
+alignment of one or more sequences (e.g. an alignment output file from Clustal
+X). Each input can be a single sequence. One or both sets of input sequences
+may include secondary structure assignments or gap penalty masks to guide the
+alignment. 
+
+Make sure PROFILE ALIGNMENT MODE is selected, using the switch directly above
+the sequence display area. Then, use the ALIGNMENT menu to do profile and
+secondary structure alignments.
+
+The profiles can be in any of the allowed input formats with "-" characters
+used to specify gaps (except for GCG/MSF where "." is used).
+
+You have to load the 2 profiles by choosing FILE, LOAD PROFILE 1 and  LOAD
+PROFILE 2. Then ALIGNMENT, ALIGN PROFILE 2 TO PROFILE 1 will align the 2
+profiles to each other. Secondary structure masks in either profile can be used
+to guide the alignment. This option compares all the sequences in profile 1
+with all the sequences in profile 2 in order to build guide trees which will be
+used to calculate sequence weights, and select appropriate alignment parameters
+for the final profile alignment.
+
+You can skip the first stage (pairwise alignments; guide trees) by using old
+guide tree files (ALIGN PROFILES FROM GUIDE TREES). 
+
+The ALIGN SEQUENCES TO PROFILE 1 option will take the sequences in the second
+profile and align them to the first profile, 1 at a time.  This is useful to
+add some new sequences to an existing alignment, or to align a set of sequences
+to a known structure. In this case, the second profile set need not be
+pre-aligned.
+
+You can skip the first stage (pairwise alignments; guide tree) by using an old
+guide tree file (ALIGN SEQUENCES TO PROFILE 1 FROM TREE). 
+
+SAVE LOG FILE will write the alignment calculation scores to a file. The log
+filename is the same as the input sequence filename, with an extension .log
+appended.
+
+The alignment parameters can be set using the ALIGNMENT PARAMETERS menu,
+Pairwise Parameters, Multiple Parameters and Protein Gap Parameters options.
+These are EXACTLY the same parameters as used by the general, automatic
+multiple alignment procedure. The general multiple alignment procedure is
+simply a series of profile alignments. Carrying out a series of profile
+alignments on larger and larger groups of sequences, allows you to manually
+build up a complete alignment, if necessary editing intermediate alignments.
+
+<STRONG>
+SECONDARY STRUCTURE PARAMETERS
+</STRONG>
+
+Use this menu to set secondary structure options. If a solved structure is
+known, it can be used to guide the alignment by raising gap penalties within
+secondary structure elements, so that gaps will preferentially be inserted into
+unstructured surface loop regions. Alternatively, a user-specified gap penalty
+mask can be supplied for a similar purpose.
+
+A gap penalty mask is a series of numbers between 1 and 9, one per position in 
+the alignment. Each number specifies how much the gap opening penalty is to be 
+raised at that position (raised by multiplying the basic gap opening penalty
+by the number) i.e. a mask figure of 1 at a position means no change
+in gap opening penalty; a figure of 4 means that the gap opening penalty is
+four times greater at that position, making gaps 4 times harder to open.
+
+The format for gap penalty masks and secondary structure masks is explained in
+a separate help section.
+
+>>HELP B << 
+            Secondary Structure / Gap Penalty Masks
+
+The use of secondary structure-based penalties has been shown to improve  the
+accuracy of sequence alignment. Clustal X now allows secondary structure/ gap
+penalty masks to be supplied with the input sequences used during profile
+alignment. (NB. The secondary structure information is NOT used during multiple
+sequence alignment). The masks work by raising gap penalties in specified
+regions (typically secondary structure elements) so that gaps are
+preferentially opened in the less well conserved regions (typically surface
+loops).
+
+The USE PROFILE 1(2) SECONDARY STRUCTURE / GAP PENALTY MASK options control
+whether the input 2D-structure information or gap penalty masks will be used
+during the profile alignment.
+
+The OUTPUT options control whether the secondary structure and gap penalty
+masks should be included in the Clustal X output alignments. Showing both is
+useful for understanding how the masks work. The 2D-structure information is
+itself useful in judging the alignment quality and in seeing how residue
+conservation patterns vary with secondary structure. 
+
+The HELIX and STRAND GAP PENALTY options provide the value for raising the gap
+penalty at core Alpha Helical (A) and Beta Strand (B) residues. In CLUSTAL
+format, capital residues denote the A and B core structure notation. Basic gap
+penalties are multiplied by the amount specified.
+
+The LOOP GAP PENALTY option provides the value for the gap penalty in Loops.
+By default this penalty is not raised. In CLUSTAL format, loops are specified
+by "." in the secondary structure notation.
+
+The SECONDARY STRUCTURE TERMINAL PENALTY provides the value for setting the gap
+penalty at the ends of secondary structures. Ends of secondary structures are
+known to grow or shrink, comparing related structures. Therefore by default
+these are given intermediate values, lower than the core penalties. All
+secondary structure read in as lower case in CLUSTAL format gets the reduced
+terminal penalty.
+
+The HELIX and STRAND TERMINAL POSITIONS options specify the range of structure
+termini for the intermediate penalties. In the alignment output, these are
+indicated as lower case. For Alpha Helices, by default, the range spans the 
+end-helical turn (3 residues). For Beta Strands, the default range spans the
+end residue and the adjacent loop residue, since sequence conservation often
+extends beyond the actual H-bonded Beta Strand.
+
+Clustal X can read the masks from SWISS-PROT, CLUSTAL or GDE format input
+files. For many 3-D protein structures, secondary structure information is
+recorded in the feature tables of SWISS-PROT database entries. You should
+always check that the assignments are correct - some are quite inaccurate.
+Clustal X looks for SWISS-PROT HELIX and STRAND assignments e.g.
+
+
+<PRE>
+FT   HELIX       100    115
+FT   STRAND      118    119
+</PRE>
+
+The structure and penalty masks can also be read from CLUSTAL alignment format 
+as comment lines beginning "!SS_" or "!GM_" e.g.
+
+<PRE>
+!SS_HBA_HUMA    ..aaaAAAAAAAAAAaaa.aaaAAAAAAAAAAaaaaaaAaaa.........aaaAAAAAA
+!GM_HBA_HUMA    112224444444444222122244444444442222224222111111111222444444
+HBA_HUMA        VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGK
+</PRE>
+
+Note that the mask itself is a set of numbers between 1 and 9 each of which is 
+assigned to the residue(s) in the same column below. 
+
+In GDE flat file format, the masks are specified as text and the names must
+begin with "SS_ or "GM_.
+
+Either a structure or penalty mask or both may be used. If both are included
+in an alignment, the user will be asked which is to be used.
+
+
+>>HELP T <<
+                            Phylogenetic Trees
+
+Before calculating a tree, you must have an ALIGNMENT in memory. This can be
+input using the FILE menu, LOAD SEQUENCES option or you should have just
+carried out a full multiple alignment and the alignment is still in memory.
+Remember YOU MUST ALIGN THE SEQUENCES FIRST!!!!
+
+The method used is the NJ (Neighbour Joining) method of Saitou and Nei. First
+you calculate distances (percent divergence) between all pairs of sequence from
+a multiple alignment; second you apply the NJ method to the distance matrix.
+
+To calculate a tree, use the DRAW N-J TREE option. This gives an UNROOTED tree
+and all branch lengths. The root of the tree can only be inferred by using an
+outgroup (a sequence that you are certain branches at the outside of the tree
+.... certain on biological grounds) OR if you assume a degree of constancy in
+the 'molecular clock', you can place the root in the 'middle' of the tree
+(roughly equidistant from all tips).
+
+BOOTSTRAP N-J TREE uses a method for deriving confidence values for the 
+groupings in a tree (first adapted for trees by Joe Felsenstein). It involves
+making N random samples of sites from the alignment (N should be LARGE, e.g.
+500 - 1000); drawing N trees (1 from each sample) and counting how many times
+each grouping from the original tree occurs in the sample trees. You can set N
+using the NUMBER OF BOOTSTRAP TRIALS option in the BOOTSTRAP TREE window. In
+practice, you should use a large number of bootstrap replicates (1000 is
+recommended, even if it means running the program for an hour on a slow 
+computer). You can also supply a seed number for the random number generator
+here. Different runs with the same seed will give the same answer. See the
+documentation for more details.
+
+EXCLUDE POSITIONS WITH GAPS? With this option, any alignment positions where
+ANY of the sequences have a gap will be ignored. This means that 'like' will
+be compared to 'like' in all distances, which is highly desirable. It also
+automatically throws away the most ambiguous parts of the alignment, which are
+concentrated around gaps (usually). The disadvantage is that you may throw away
+much of the data if there are many gaps (which is why it is difficult for us to
+make it the default).  
+
+CORRECT FOR MULTIPLE SUBSTITUTIONS? For small divergence (say <10%) this option
+makes no difference. For greater divergence, this option corrects for the fact
+that observed distances underestimate actual evolutionary distances. This is
+because, as sequences diverge, more than one substitution will happen at many
+sites. However, you only see one difference when you look at the present day
+sequences. Therefore, this option has the effect of stretching branch lengths
+in trees (especially long branches). The corrections used here (for DNA or
+proteins) are both due to Motoo Kimura. See the documentation for details.  
+
+Where possible, this option should be used. However, for VERY divergent
+sequences, the distances cannot be reliably corrected. You will be warned if
+this happens. Even if none of the distances in a data set exceed the reliable
+threshold, if you bootstrap the data, some of the bootstrap distances may
+randomly exceed the safe limit.  
+
+SAVE LOG FILE will write the tree calculation scores to a file. The log
+filename is the same as the input sequence filename, with an extension .log
+appended.
+
+<H4>
+OUTPUT FORMAT OPTIONS
+</H4>
+
+Three different formats are allowed. None of these displays the tree visually.
+You can display the tree using the NJPLOT program distributed with Clustal X
+OR get the PHYLIP package and use the tree drawing facilities there. 
+ 
+1) CLUSTAL FORMAT TREE. This format is verbose and lists all of the distances
+between the sequences and the number of alignment positions used for each. The
+tree is described at the end of the file. It lists the sequences that are
+joined at each alignment step and the branch lengths. After two sequences are
+joined, it is referred to later as a NODE. The number of a NODE is the number
+of the lowest sequence in that NODE.   
+
+2) PHYLIP FORMAT TREE. This format is the New Hampshire format, used by many
+phylogenetic analysis packages. It consists of a series of nested parentheses,
+describing the branching order, with the sequence names and branch lengths. It
+can be read by the NJPLOT program distributed with ClustalX. It can also be
+used by the RETREE, DRAWGRAM and DRAWTREE programs of the PHYLIP package to see
+the trees graphically. This is the same format used during multiple alignment
+for the guide trees. Some other packages that can read and display New
+Hampshire format are TreeTool, TreeView, and Phylowin.
+
+3) PHYLIP DISTANCE MATRIX. This format just outputs a matrix of all the
+pairwise distances in a format that can be used by the PHYLIP package. It used
+to be useful when one could not produce distances from protein sequences in the
+Phylip package but is now redundant (PROTDIST of Phylip 3.5 now does this).
+
+4) NEXUS FORMAT TREE. This format is used by several popular phylogeny programs,
+including PAUP and MacClade. The format is described fully in:
+Maddison, D. R., D. L. Swofford and W. P. Maddison.  1997.
+NEXUS: an extensible file format for systematic information.
+Systematic Biology 46:590-621.
+
+BOOTSTRAP LABELS ON: By default, the bootstrap values are correctly placed on
+the tree branches of the phylip format output tree. The toggle allows them to
+be placed on the nodes, which is incorrect, but some display packages (e.g.
+TreeTool, TreeView and Phylowin) only support node labelling but not branch
+labelling. Care should be taken to note which branches and labels go together. 
+
+
+>>HELP C <<
+                               Colors
+
+Clustal X provides a versatile coloring scheme for the sequence alignment 
+display. The sequences (or profiles) are colored automatically, when they are
+loaded. Sequences can be colored either by assigning a color to specific
+residues, or on the basis of an alignment consensus. In the latter case, the
+alignment consensus is calculated automatically, and the residues in each
+column are colored according to the consensus character assigned to that
+column. In this way, you can choose to highlight, for example, conserved
+hydrophylic or hydrophobic positions in the alignment.
+
+The 'rules' used to color the alignment are specified in a COLOR PARAMETER
+FILE. Clustal X automatically looks for a file called 'colprot.par' for protein
+sequences or 'coldna.par' for DNA, in the current directory. (If your running
+under UNIX, it then looks in your home directory, and finally in the
+directories in your PATH environment variable).
+
+By default, if no color parameter file is found, protein sequences are colored
+by residue as follows:
+
+<PRE>
+	Color			Residue Code
+
+	ORANGE			GPST
+	RED			HKR
+	BLUE			FWY
+	GREEN			ILMV
+</PRE>
+
+In the case of DNA sequences, the default colors are as follows:
+
+<PRE>
+	Color			Residue Code
+
+	ORANGE			A
+	RED			C
+	BLUE			T
+	GREEN			G
+</PRE>
+
+
+The default BACKGROUND COLORING option shows the sequence residues using a
+black character on a colored background. It can be switched off to show
+residues as a colored character on a white background. 
+
+Either BLACK AND WHITE or DEFAULT COLOR options can be selected. The Color
+option looks first for the color parameter file (as described above) and, if no
+file is found, uses the default residue-specific colors.
+
+You can specify your own coloring scheme by using the LOAD COLOR PARAMETER FILE
+option. The format of the color parameter file is described below.
+
+<H4>
+COLOR PARAMETER FILE
+</H4>
+
+This file is divided into 3 sections:
+
+1) the names and rgb values of the colors
+2) the rules for calculating the consensus
+3) the rules for assigning colors to the residues
+ 
+An example file is given here.
+
+<PRE>
+ --------------------------------------------------------------------
+ at rgbindex
+RED          0.9 0.1 0.1
+BLUE         0.1 0.1 0.9
+GREEN        0.1 0.9 0.1
+YELLOW       0.9 0.9 0.0
+
+ at consensus
+% = 60% w:l:v:i:m:a:f:c:y:h:p
+# = 80% w:l:v:i:m:a:f:c:y:h:p
+- = 50% e:d
++ = 60% k:r
+q = 50% q:e
+p = 50% p
+n = 50% n
+t = 50% t:s
+
+ at color
+g = RED
+p = YELLOW
+t = GREEN if t:%:#
+n = GREEN if n
+w = BLUE if %:#:p
+k = RED if +
+ --------------------------------------------------------------------
+</PRE>
+
+The first section is optional and is identified by the header @rgbindex. If
+this section exists, each color used in the file must be named and the rgb
+values specified (on a scale from 0 to 1). If the rgb index section is not
+found, the following set of hard-coded colors will be used.
+
+<PRE>
+RED          0.9 0.1 0.1
+BLUE         0.1 0.1 0.9
+GREEN        0.1 0.9 0.1
+ORANGE       0.9 0.7 0.3
+CYAN         0.1 0.9 0.9
+PINK         0.9 0.5 0.5
+MAGENTA      0.9 0.1 0.9
+YELLOW       0.9 0.9 0.0
+</PRE>
+
+The second section is optional and is identified by the header @consensus. It
+defines how the consensus is calculated.
+ 
+The format of each consensus parameter is:-
+ 
+<PRE>
+c = n% residue_list
+ 
+        where
+              c             is a character used to identify the parameter.
+              n             is an integer value used as the percentage cutoff
+                            point.
+              residue_list  is a list of residues denoted by a single
+                            character, delimited by a colon (:).
+</PRE>
+ 
+For example:   # = 60% w:l:v:i
+
+will assign a consensus character # to any column in the alignment which
+contains more than 60% of the residues w,l,v and i.
+        
+ 
+The third section is identified by the header @color, and defines how colors
+are assigned to each residue in the alignment.
+ 
+The color parameters can take one of two formats:
+
+<PRE>
+1) r = color
+2) r = color if consensus_list
+ 
+        where
+              r             is a character used to denote a residue.
+              color         is one of the colors in the GDE color lookup table.
+              residue_list  is a list of residues denoted by a single
+                            character, delimited by a colon (:).
+</PRE>
+ 
+Examples:
+1) g = ORANGE
+
+will color all glycines ORANGE, regardless of the consensus.
+
+2) w = BLUE if w:%:#
+
+will color BLUE any tryptophan which is found in a column with a consensus of
+w, % or #.
+ 
+
+>>HELP Q <<
+                       Alignment Quality Analysis
+
+<H3>
+QUALITY SCORES
+</H3>
+--------------
+
+Clustal X provides an indication of the quality of an alignment by plotting
+a 'conservation score' for each column of the alignment. A high score indicates
+a well-conserved column; a low score indicates low conservation. The quality
+curve is drawn below the alignment.
+
+Two methods are also provided to indicate single residues or sequence segments
+which score badly in the alignment.
+ 
+Low-scoring residues are expected to occur at a moderate frequency in all the
+sequences because of their steady divergence due to the natural processes of
+evolution. The most divergent sequences are likely to have the most outliers.
+However, the highlighted residues are especially useful in pointing to
+sequence misalignments. Note that clustering of highlighted residues is a
+strong indication of misalignment. This can arise due to various reasons, for
+example:
+ 
+        1. Partial or total misalignments caused by a failure in the
+        alignment algorithm. Usually only in difficult alignment cases.
+ 
+        2. Partial or total misalignments because at least one of the
+        sequences in the given set is partly or completely unrelated to the
+        other sequences. It is up to the user to check that the set of
+        sequences are alignable.
+
+        3. Frameshift translation errors in a protein sequence causing local
+        mismatched regions to be heavily highlighted. These are surprisingly
+        common in database entries. If suspected, a 3-frame translation of
+        the source DNA needs to be examined.
+ 
+Occasionally, highlighted residues may point to regions of some biological
+significance. This might happen for example if a protein alignment contains a
+sequence which has acquired new functions relative to the main sequence set. It
+is important to exclude other explanations, such as error or the natural
+divergence of sequences, before invoking a biological explanation.
+
+
+<H3>
+LOW-SCORING SEGMENTS
+</H3>
+--------------------
+
+Unreliable regions in the alignment can be highlighted using the Low-Scoring
+Segments option. A sequence-weighted profile is used to indicate any segments
+in the sequences which score badly. Because the profile calculation may take
+some time, an option is provided to calculate LOW-SCORING SEGMENTS. The 
+segment display can then be toggled on or off without having to repeat the
+time-consuming calculations.
+
+For details of the low-scoring segment calculation, see the CALCULATION section
+below.
+
+
+<H4>
+LOW-SCORING SEGMENT PARAMETERS
+</H4>
+------------------------------
+
+MINIMUM LENGTH OF SEGMENTS: short segments (or even single residues) can be
+hidden by increasing the minimum length of segments which will be displayed.
+
+DNA MARKING SCALE is used to remove less significant segments from the 
+highlighted display. Increase the scale to display more segments; decrease the
+scale to remove the least significant.
+
+
+PROTEIN WEIGHT MATRIX: the scoring table which describes the similarity of each
+amino acid to each other. The matrix is used to calculate the sequence-
+weighted profile scores. There are four 'in-built' Log-Odds matrices offered:
+the Gonnet PAM 80, 120, 250, 350 matrices. A more stringent matrix which only
+gives a high score to identities and the most favoured conservative
+substitutions, may be more suitable when the sequences are closely related. For
+more divergent sequences, it is appropriate to use "softer" matrices which give
+a high score to many other frequent substitutions. This  option automatically
+recalculates the low-scoring segments.
+
+
+DNA WEIGHT MATRIX: Two hard-coded matrices are available:
+
+1) IUB. This is the default scoring matrix used by BESTFIT for the comparison
+of nucleic acid sequences. X's and N's are treated as matches to any IUB
+ambiguity symbol. All matches score 1.0; all mismatches for IUB symbols score
+0.9.
+
+2) CLUSTALW(1.6). The previous system used by ClustalW, in which matches score
+1.0 and mismatches score 0. All matches for IUB symbols also score 0. 
+
+A new matrix can be read from a file on disk, if the filename consists only
+of lower case characters. The values in the new weight matrix should be
+similarities and should be NEGATIVE for infrequent substitutions.
+ 
+INPUT FORMAT. The format used for a new matrix is the same as the BLAST
+program. Any lines beginning with a # character are assumed to be comments. The
+first non-comment line should contain a list of amino acids in any order, using
+the 1 letter code, followed by a * character. This should be followed by a
+square matrix of scores, with one row and one column for each amino acid. The
+last row and column of the matrix (corresponding to the * character) contain
+the minimum score over the whole matrix.
+
+<H4>
+QUALITY SCORE PARAMETERS
+</H4>
+------------------------
+
+You can customise the column 'quality scores' plotted underneath the alignment
+display using the following options.
+
+SCORE PLOT SCALE: this is a scalar value from 1 to 10, which can be used to
+change the scale of the quality score plot. 
+
+RESIDUE EXCEPTION CUTOFF: this is a scalar value from 1 to 10, which can be
+used to change the number of residue exceptions which are highlighted in the
+alignment display. (For an explanation of this cutoff, see the CALCULATION OF
+RESIDUE EXCEPTIONS section below.)
+
+PROTEIN WEIGHT MATRIX: the scoring table which describes the similarity of 
+each amino acid to each other. 
+ 
+DNA WEIGHT MATRIX: two hard-coded matrices are available: IUB and CLUSTALW(1.6).
+
+For more information about the weight matrices, see the help above for
+the Low-scoring Segments Weight Matrix.
+
+For details of the quality score calculations, see the CALCULATION section
+below.
+
+
+<STRONG>
+SHOW LOW-SCORING SEGMENTS
+</STRONG>
+                       
+The low-scoring segment display can be toggled on or off. This option does not
+recalculate the profile scores.
+
+
+<STRONG>
+SHOW EXCEPTIONAL RESIDUES
+</STRONG>
+                       
+This option highlights individual residues which score badly in the alignment
+quality calculations. Residues which score exceptionally low are highlighted by
+using a white character on a grey background.
+
+<STRONG>
+SAVE QUALITY SCORES TO FILE
+</STRONG>
+
+The quality scores that are plotted underneath the alignment display can also
+be saved in a text file. Each column in the alignment is written on one line in
+the output file, with the value of the quality score at the end of the line.
+Only the sequences currently selected in the display are written to the file.
+One use for quality scores is to color residues in a protein structure by
+sequence conservation. In this way conserved surface residues can be
+highlighted to locate functional regions such as ligand-binding sites.
+
+
+<H3>
+CALCULATION OF QUALITY SCORES
+</H3>
+-----------------------------
+
+Suppose we have an alignment of m sequences of length n. Then, the alignment
+can be written as:
+
+<PRE>
+        A11 A12 A13 .......... A1n
+        A21 A22 A23 .......... A2n
+        .
+        .
+        Am1 Am2 Am3 .......... Amn
+</PRE>
+
+We also have a residue comparison matrix of size R where C(i,j) is the score
+for aligning residue i with residue j.
+
+We want to calculate a score for the conservation of the jth position in the
+alignment.
+
+To do this, we define an R-dimensional sequence space. For the jth position in 
+the alignment, each sequence consists of a single residue which is assigned a
+point S in the space. S has R dimensions, and for sequence i, the rth dimension
+is defined as:
+
+<PRE>
+	Sr =    C(r,Aij)
+</PRE>
+
+We then calculate a consensus value for the jth position in the alignment. This
+value X also has R dimensions, and the rth dimension is defined as:
+
+<PRE>
+	Xr = (   SUM   (Fij * C(i,r)) ) / m
+               1<=i<=R
+</PRE>
+
+where Fij is the count of residues i at position j in the alignment.
+
+Now we can calculate the distance Di between each sequence i and the consensus 
+position X in the R-dimensional space.
+
+<PRE>
+	Di = SQRT   (   SUM   (Xr - Sr)(Xr - Sr) )
+                      1<=i<=R
+
+</PRE>
+
+The quality score for the jth position in the alignment is defined as the mean
+of the sequence distances Di.
+
+The score is normalised by multiplying by the percentage of sequences which
+have residues (and not gaps) at this position.
+
+<H3>
+CALCULATION OF RESIDUE EXCEPTIONS
+</H3>
+---------------------------------
+
+The jth residue of the ith sequence is considered as an exception if the
+distance Di of the sequence from the consensus value P is greater than (Upper
+Quartile + Inter Quartile Range * Cutoff). The value used as a cutoff for
+displaying exceptions can be set from the SCORE PARAMETERS menu. A high cutoff
+value will only display very significant exceptions; a low value will allow
+more, less significant, exceptions to be highlighted.
+
+(NB. Sequences which contain gaps at this position are not included in the
+exception calculation.)
+
+
+<H3>
+CALCULATION OF LOW-SCORING SEGMENTS
+</H3>
+-----------------------------------
+
+Suppose we have an alignment of m sequences of length n. Then, the alignment
+can be written as:
+
+<PRE>
+        A11 A12 A13 .......... A1n
+        A21 A22 A23 .......... A2n
+        .
+        .
+        Am1 Am2 Am3 .......... Amn
+</PRE>
+
+We also have a residue comparison matrix of size R where C(i,j) is the score
+for aligning residue i with residue j.
+
+We calculate sequence weights by building a neighbour-joining tree, in which
+branch lengths are proportional to divergence. Summing the branches by branch
+ownership provides the weights. See (Thompson et al., CABIOS, 10, 19 (1994) and
+Henikoff et al.,JMB, 243, 574 1994).
+
+To find the low-scoring segments in a sequence Si, we build a weighted profile
+of the remaining sequences in the alignment. Suppose we find residue r at 
+position j in the sequence; then the score for the jth position in the sequence
+is defined as
+
+<PRE>
+	Score(Si,j) = Profile(j,r)   where Profile(j,r) is the profile score
+                                       for residue r at position j in the
+                                       alignment.
+</PRE>
+
+These residue scores are summed along the sequence in both forward and backward
+directions. If the sum of the scores is positive, then it is reset to zero.
+Segments which score negatively in both directions are considered as 
+'low-scoring' and will be highlighted in the alignment display.
+
+
+>>HELP 9 <<
+              Command Line Parameters
+
+                DATA (sequences)
+
+-INFILE=file.ext                             :input sequences
+-PROFILE1=file.ext  and  -PROFILE2=file.ext  :profiles (aligned sequences)
+
+
+                VERBS (do things)
+
+-OPTIONS	    :list the command line parameters
+-HELP  or -CHECK    :outline the command line parameters
+-ALIGN              :do full multiple alignment 
+-TREE               :calculate NJ tree
+-BOOTSTRAP(=n)      :bootstrap a NJ tree (n= number of bootstraps; def. = 1000)
+-CONVERT            :output the input sequences in a different file format
+
+
+                PARAMETERS (set things)
+
+***General settings:****
+-INTERACTIVE :read command line, then enter normal interactive menus
+-QUICKTREE   :use FAST algorithm for the alignment guide tree
+-TYPE=       :PROTEIN or DNA sequences
+-NEGATIVE    :protein alignment with negative values in matrix
+-OUTFILE=    :sequence alignment file name
+-OUTPUT=     :CLUSTAL, GCG, GDE, PHYLIP, PIR, NEXUS, FASTA
+-OUTORDER=   :INPUT or ALIGNED
+-CASE=       :LOWER or UPPER (for GDE output only)
+-SEQNOS=     :OFF or ON (for Clustal output only)
+
+
+***Fast Pairwise Alignments:***
+-KTUPLE=n      :word size
+-TOPDIAGS=n  :number of best diags.
+-WINDOW=n    :window around best diags.
+-PAIRGAP=n   :gap penalty
+-SCORE=      :PERCENT or ABSOLUTE
+
+
+***Slow Pairwise Alignments:***
+-PWMATRIX=    :Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename
+-PWDNAMATRIX= :DNA weight matrix=IUB, CLUSTALW or filename
+-PWGAPOPEN=f  :gap opening penalty
+-PWGAPEXT=f  :gap opening penalty
+ 
+
+***Multiple Alignments:***
+-NEWTREE=    :file for new guide tree
+-USETREE=    :file for old guide tree
+-MATRIX=     :Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename
+-DNAMATRIX=  :DNA weight matrix=IUB, CLUSTALW or filename
+-GAPOPEN=f   :gap opening penalty
+-GAPEXT=f  :gap extension penalty
+-ENDGAPS     :no end gap separation pen.
+-GAPDIST=n   :gap separation pen. range
+-NOPGAP      :residue-specific gaps off
+-NOHGAP    :hydrophilic gaps off
+-HGAPRESIDUES= :list hydrophilic res.
+-MAXDIV=n    :% ident. for delay
+-TYPE=       :PROTEIN or DNA
+-TRANSWEIGHT=f :transitions weighting
+
+
+***Profile Alignments:***
+-PROFILE      :Merge two alignments by profile alignment
+-NEWTREE1=    :file for new guide tree for profile1
+-NEWTREE2=    :file for new guide tree for profile2
+-USETREE1=    :file for old guide tree for profile1
+-USETREE2=    :file for old guide tree for profile2
+
+
+***Sequence to Profile Alignments:***
+-SEQUENCES   :Sequentially add profile2 sequences to profile1 alignment
+-NEWTREE=    :file for new guide tree
+-USETREE=    :file for old guide tree
+
+
+***Structure Alignments:***
+-NOSECSTR1     :do not use secondary structure/gap penalty mask for profile 1 
+-NOSECSTR2     :do not use secondary structure/gap penalty mask for profile 2
+-SECSTROUT=STRUCTURE or MASK or BOTH or NONE  :output in alignment file
+-HELIXGAP=n    :gap penalty for helix core residues 
+-STRANDGAP=n   :gap penalty for strand core residues
+-LOOPGAP=n     :gap penalty for loop regions
+-TERMINALGAP=n :gap penalty for structure termini
+-HELIXENDIN=n  :number of residues inside helix to be treated as terminal
+-HELIXENDOUT=n :number of residues outside helix to be treated as terminal
+-STRANDENDIN=n :number of residues inside strand to be treated as terminal
+-STRANDENDOUT=n:number of residues outside strand to be treated as terminal 
+
+
+***Trees:***
+-OUTPUTTREE=nj OR phylip OR dist OR nexus
+-SEED=n    :seed number for bootstraps
+-KIMURA      :use Kimura's correction
+-TOSSGAPS  :ignore positions with gaps
+-BOOTLABELS=node OR branch :position of bootstrap values in tree display
+
+
+>>HELP R <<
+                             References
+
+<STRONG>
+The ClustalX program is described in the manuscript:
+</STRONG>
+
+Thompson,J.D., Gibson,T.J., Plewniak,F., Jeanmougin,F. and Higgins,D.G. (1997)
+The ClustalX windows interface: flexible strategies for multiple sequence 
+alignment aided by quality analysis tools. Nucleic Acids Research, 25:4876-4882.
+
+
+<STRONG>
+The ClustalW program is described in the manuscript:
+</STRONG>
+
+Thompson, J.D., Higgins, D.G. and Gibson, T.J. (1994) CLUSTAL W: improving the
+sensitivity of progressive multiple sequence alignment through sequence
+weighting, positions-specific gap penalties and weight matrix choice.  Nucleic
+Acids Research, 22:4673-4680.
+
+
+<STRONG>
+The ClustalV program is described in the manuscript:
+</STRONG>
+
+Higgins,D.G., Bleasby,A.J. and Fuchs,R. (1992) CLUSTAL V: improved software for
+multiple sequence alignment. CABIOS 8,189-191.
+
+
+<STRONG>
+The original Clustal program is described in the manuscripts:
+</STRONG>
+
+Higgins,D.G. and Sharp,P.M. (1989) Fast and sensitive multiple sequence
+alignments on a microcomputer.
+CABIOS 5,151-153.
+
+Higgins,D.G. and Sharp,P.M. (1988) CLUSTAL: a package for performing multiple
+sequence alignment on a microcomputer. Gene 73,237-244.
+
+-------------------------------------------------------------------------------
+<STRONG>
+Some tips on using Clustal X:
+</STRONG>
+
+Jeanmougin,F., Thompson,J.D., Gouy,M., Higgins,D.G. and Gibson,T.J. (1998)
+Multiple sequence alignment with Clustal X. Trends Biochem Sci, 23, 403-5.
+
+<STRONG>
+Some tips on using Clustal W:
+</STRONG>
+
+Higgins, D. G., Thompson, J. D. and Gibson, T. J. (1996) Using CLUSTAL for
+multiple sequence alignments. Methods Enzymol., 266, 383-402.
+
+-------------------------------------------------------------------------------
+<STRONG>
+You can get the latest version of the ClustalX program by anonymous ftp to:
+</STRONG>
+
+ftp-igbmc.u-strasbg.fr
+ftp.embl-heidelberg.de
+ftp.ebi.ac.uk
+
+<STRONG>
+Or, have a look at the following WWW site:
+</STRONG>
+
+http://www-igbmc.u-strasbg.fr/BioInfo/
+

Added: trunk/packages/clustalw/branches/upstream/current/coldna.par
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/coldna.par	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/coldna.par	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,16 @@
+# color lookup table - this is optional, if no rgbindex is specified, 8
+# hardcoded colors will be used.
+# A maximum of 16 colors can be specified - any more will be ignored!
+ at rgbindex
+RED          0.9 0.2 0.1
+BLUE         0.1 0.5 0.9
+GREEN        0.1 0.8 0.1
+ORANGE       0.9 0.6 0.3
+ 
+
+ at color
+a = RED
+c = BLUE
+g = ORANGE
+t = GREEN
+u = GREEN

Added: trunk/packages/clustalw/branches/upstream/current/colprint.par
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/colprint.par	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/colprint.par	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,15 @@
+WHITE        1.0 1.0 1.0
+YELLOW       1.0 1.0 0.0
+VIOLET       0.4 0.1 0.9
+RED          0.9 0.5 0.4
+BLUE         0.4 0.9 0.9
+PURPLE       0.7 0.6 0.9
+BLACK        0.0 0.0 0.0
+GREY         0.6 0.7 0.7
+PINK         0.8 0.3 0.8
+ORANGE       0.9 0.7 0.3
+CYAN         0.1 0.7 0.7
+PINK         0.9 0.5 0.5
+MAGENTA      0.8 0.3 0.8
+ORANGE       0.9 0.6 0.3
+

Added: trunk/packages/clustalw/branches/upstream/current/colprot.par
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/colprot.par	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/colprot.par	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,66 @@
+# color lookup table - this is optional, if no rgbindex is specified, 8
+# hardcoded colors will be used.
+# A maximum of 16 colors can be specified - any more will be ignored!
+ at rgbindex
+RED          0.9 0.2 0.1
+BLUE         0.1 0.5 0.9
+GREEN        0.1 0.8 0.1
+CYAN         0.1 0.7 0.7
+PINK         0.9 0.5 0.5
+MAGENTA      0.8 0.3 0.8
+YELLOW       0.8 0.8 0.0
+ORANGE       0.9 0.6 0.3
+
+ at consensus
+% = 60% w:l:v:i:m:a:f:c:y:h:p
+# = 80% w:l:v:i:m:a:f:c:y:h:p
+- = 50% e:d
++ = 60% k:r
+g = 50% g
+n = 50% n
+q = 50% q:e
+p = 50% p
+t = 50% t:s
+A = 85% a
+C = 85% c
+D = 85% d
+E = 85% e
+F = 85% f
+G = 85% g
+H = 85% h
+I = 85% i
+K = 85% k
+L = 85% l
+M = 85% m
+N = 85% n
+P = 85% p
+Q = 85% q
+R = 85% r
+S = 85% s
+T = 85% t
+V = 85% v
+W = 85% w
+Y = 85% y
+
+ at color
+g = ORANGE
+p = YELLOW
+t = GREEN if t:S:T:%:#
+s = GREEN if t:S:T:#
+n = GREEN if n:N:D
+q = GREEN if q:Q:E:+:K:R
+w = BLUE if %:#:A:C:F:H:I:L:M:V:W:Y:P:p
+l = BLUE if %:#:A:C:F:H:I:L:M:V:W:Y:P:p
+v = BLUE if %:#:A:C:F:H:I:L:M:V:W:Y:P:p
+i = BLUE if %:#:A:C:F:H:I:L:M:V:W:Y:P:p
+m = BLUE if %:#:A:C:F:H:I:L:M:V:W:Y:P:p
+a = BLUE if %:#:A:C:F:H:I:L:M:V:W:Y:P:p:T:S:s:G
+f = BLUE if %:#:A:C:F:H:I:L:M:V:W:Y:P:p
+c = BLUE if %:#:A:F:H:I:L:M:V:W:Y:S:P:p
+c = PINK if C
+h = CYAN if %:#:A:C:F:H:I:L:M:V:W:Y:P:p
+y = CYAN if %:#:A:C:F:H:I:L:M:V:W:Y:P:p
+e = MAGENTA if -:D:E:q:Q
+d = MAGENTA if -:D:E:n:N
+k = RED if +:K:R:Q
+r = RED if +:K:R:Q

Added: trunk/packages/clustalw/branches/upstream/current/dayhoff.h
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/dayhoff.h	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/dayhoff.h	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,45 @@
+/* DAYHOFF.H
+   
+   Table of estimated PAMS (actual no. of substitutions per 100 residues)
+   for a range of observed amino acid distances from 75.0% (the first entry
+   in the array), in 0.1% increments, up to 93.0%.  
+
+   These values are used to correct for multiple hits in protein alignments.  
+   The values below are for observed distances above 74.9%.  For values above 
+   93%, an arbitrary value of 1000 PAMS (1000% substitution) is used.  
+
+   These values are derived from a Dayhoff model (1978) of amino acid 
+   substitution and assume average amino acid composition and that amino 
+   acids replace each other at the same rate as in the original Dayhoff model.
+
+   Up to 75% observed distance, use Kimura's emprical formula to derive
+   the correction.  For 75% or greater, use this table.  Kimura's formula
+   is accurate up to about 75% and fails completely above 85%.
+*/
+
+int dayhoff_pams[]={
+  195,   /* 75.0% observed d; 195 PAMs estimated = 195% estimated d */
+  196,   /* 75.1% observed d; 196 PAMs estimated */
+                  197,    198,    199,    200,    200,    201,    202,  203,    
+  204,    205,    206,    207,    208,    209,    209,    210,    211,  212,    
+  213,    214,    215,    216,    217,    218,    219,    220,    221,  222,    
+  223,    224,    226,    227,    228,    229,    230,    231,    232,  233,    
+  234,    236,    237,    238,    239,    240,    241,    243,    244,  245,    
+  246,    248,    249,    250,    /* 250 PAMs = 80.3% observed d */          
+                                  252,    253,    254,    255,    257,  258,    
+  260,    261,    262,    264,    265,    267,    268,    270,    271,  273,    
+  274,    276,    277,    279,    281,    282,    284,    285,    287,  289,    
+  291,    292,    294,    296,    298,    299,    301,    303,    305,  307,    
+  309,    311,    313,    315,    317,    319,    321,    323,    325,  328,    
+  330,    332,    335,    337,    339,    342,    344,    347,    349,  352,    
+  354,    357,    360,    362,    365,    368,    371,    374,    377,  380,    
+  383,    386,    389,    393,    396,    399,    403,    407,    410,  414,    
+  418,    422,    426,    430,    434,    438,    442,    447,    451,  456,    
+  461,    466,    471,    476,    482,    487,    493,    498,    504,  511,    
+  517,    524,    531,    538,    545,    553,    560,    569,    577,  586,    
+  595,    605,    615,    626,    637,    649,    661,    675,    688,  703,    
+  719,    736,    754,    775,    796,    819,    845,    874,    907,  945,
+         /* 92.9% observed; 945 PAMs */    
+  988    /* 93.0% observed; 988 PAMs */
+};
+

Added: trunk/packages/clustalw/branches/upstream/current/gcgcheck.c
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/gcgcheck.c	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/gcgcheck.c	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,15 @@
+#include <ctype.h>   /* because of toupper() */
+int SeqGCGCheckSum(char *seq, int len);
+
+int SeqGCGCheckSum(char *seq, int len)
+{
+	int  i;
+        long check;
+        
+        for( i=0, check=0; i< len; i++,seq++)
+                check += ((i % 57)+1) * toupper(*seq);
+
+        return(check % 10000);
+}
+
+

Added: trunk/packages/clustalw/branches/upstream/current/general.h
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/general.h	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/general.h	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,50 @@
+/* General purpose header file - rf 12/90 */
+
+#ifndef _H_general
+#define _H_general
+
+
+
+/* Macintosh specific */
+#ifdef MAC					/* rf 12/9/94 */
+
+#define const					/* THINK C doesn't know about these identifiers */
+#define signed
+#define volatile
+#define int long
+#ifndef Boolean
+#define Boolean char
+#endif
+#define pint short			/* cast ints in printf statements as pint */
+#define sint int			/* cast ints for sequence lengths */
+#define lint int			/* cast ints for profile scores */
+
+#else 							/* not Macintoshs */
+
+#define pint int			/* cast ints in printf statements as pint */
+#define sint int			/* cast ints for sequence lengths */
+#define lint int 			/* cast ints for profile scores */
+#ifndef Boolean
+#define Boolean char
+#endif
+
+#endif 							/* ifdef MAC */
+
+/* definitions for all machines */
+
+#undef TRUE						/* Boolean values; first undef them, just in case */
+#undef FALSE
+#define TRUE 1
+#define FALSE 0
+
+#define EOS '\0'				/* End-Of-String */
+#define MAXLINE 512			/* Max. line length */
+
+
+#ifdef VMS
+#define signed
+#endif
+
+
+#endif /* ifndef _H_general */
+

Added: trunk/packages/clustalw/branches/upstream/current/globin.pep
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/globin.pep	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/globin.pep	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,86 @@
+>P1;HBB_HUMAN
+Sw:Hbb_Human => HBB_HUMAN
+          VHLTPEEKSA VTALWGKVNV DEVGGEALGR LLVVYPWTQR FFESFGDLST 
+          PDAVMGNPKV KAHGKKVLGA FSDGLAHLDN LKGTFATLSE LHCDKLHVDP 
+          ENFRLLGNVL VCVLAHHFGK EFTPPVQAAY QKVVAGVANA LAHKYH*
+C;ID   HBB_HUMAN      STANDARD;      PRT;   146 AA.
+C;AC   P02023;
+C;DT   21-JUL-1986 (REL. 01, CREATED)
+C;DT   21-JUL-1986 (REL. 01, LAST SEQUENCE UPDATE)
+C;DT   01-APR-1993 (REL. 25, LAST ANNOTATION UPDATE)
+C;DE   HEMOGLOBIN BETA CHAIN. . . . 
+
+>P1;HBB_HORSE
+Sw:Hbb_Horse => HBB_HORSE
+          VQLSGEEKAA VLALWDKVNE EEVGGEALGR LLVVYPWTQR FFDSFGDLSN 
+          PGAVMGNPKV KAHGKKVLHS FGEGVHHLDN LKGTFAALSE LHCDKLHVDP 
+          ENFRLLGNVL VVVLARHFGK DFTPELQASY QKVVAGVANA LAHKYH*
+C;ID   HBB_HORSE      STANDARD;      PRT;   146 AA.
+C;AC   P02062;
+C;DT   21-JUL-1986 (REL. 01, CREATED)
+C;DT   21-JUL-1986 (REL. 01, LAST SEQUENCE UPDATE)
+C;DT   01-MAR-1992 (REL. 21, LAST ANNOTATION UPDATE)
+C;DE   HEMOGLOBIN BETA CHAIN. . . . 
+
+>P1;HBA_HUMAN
+Sw:Hba_Human => HBA_HUMAN
+          VLSPADKTNV KAAWGKVGAH AGEYGAEALE RMFLSFPTTK TYFPHFDLSH 
+          GSAQVKGHGK KVADALTNAV AHVDDMPNAL SALSDLHAHK LRVDPVNFKL 
+          LSHCLLVTLA AHLPAEFTPA VHASLDKFLA SVSTVLTSKY R*
+C;ID   HBA_HUMAN      STANDARD;      PRT;   141 AA.
+C;AC   P01922;
+C;DT   21-JUL-1986 (REL. 01, CREATED)
+C;DT   21-JUL-1986 (REL. 01, LAST SEQUENCE UPDATE)
+C;DT   01-FEB-1994 (REL. 28, LAST ANNOTATION UPDATE)
+C;DE   HEMOGLOBIN ALPHA CHAIN. . . . 
+
+>P1;HBA_HORSE
+Sw:Hba_Horse => HBA_HORSE
+          VLSAADKTNV KAAWSKVGGH AGEYGAEALE RMFLGFPTTK TYFPHFDLSH 
+          GSAQVKAHGK KVGDALTLAV GHLDDLPGAL SNLSDLHAHK LRVDPVNFKL 
+          LSHCLLSTLA VHLPNDFTPA VHASLDKFLS SVSTVLTSKY R*
+C;ID   HBA_HORSE      STANDARD;      PRT;   141 AA.
+C;AC   P01958;
+C;DT   21-JUL-1986 (REL. 01, CREATED)
+C;DT   21-JUL-1986 (REL. 01, LAST SEQUENCE UPDATE)
+C;DT   01-MAR-1992 (REL. 21, LAST ANNOTATION UPDATE)
+C;DE   HEMOGLOBIN ALPHA CHAINS (SLOW AND FAST). . . . 
+
+>P1;MYG_PHYCA
+Sw:Myg_Phyca => MYG_PHYCA
+          VLSEGEWQLV LHVWAKVEAD VAGHGQDILI RLFKSHPETL EKFDRFKHLK 
+          TEAEMKASED LKKHGVTVLT ALGAILKKKG HHEAELKPLA QSHATKHKIP 
+          IKYLEFISEA IIHVLHSRHP GDFGADAQGA MNKALELFRK DIAAKYKELG 
+          YQG*
+C;ID   MYG_PHYCA      STANDARD;      PRT;   153 AA.
+C;AC   P02185;
+C;DT   21-JUL-1986 (REL. 01, CREATED)
+C;DT   21-JUL-1986 (REL. 01, LAST SEQUENCE UPDATE)
+C;DT   01-MAY-1992 (REL. 22, LAST ANNOTATION UPDATE)
+C;DE   MYOGLOBIN. . . . 
+
+>P1;GLB5_PETMA
+Sw:Glb5_Petma => GLB5_PETMA
+          PIVDTGSVAP LSAAEKTKIR SAWAPVYSTY ETSGVDILVK FFTSTPAAQE 
+          FFPKFKGLTT ADQLKKSADV RWHAERIINA VNDAVASMDD TEKMSMKLRD 
+          LSGKHAKSFQ VDPQYFKVLA AVIADTVAAG DAGFEKLMSM ICILLRSAY* 
+C;ID   GLB5_PETMA     STANDARD;      PRT;   149 AA.
+C;AC   P02208;
+C;DT   21-JUL-1986 (REL. 01, CREATED)
+C;DT   21-JUL-1986 (REL. 01, LAST SEQUENCE UPDATE)
+C;DT   01-MAR-1992 (REL. 21, LAST ANNOTATION UPDATE)
+C;DE   GLOBIN V. . . . 
+
+>P1;LGB2_LUPLU
+Sw:Lgb2_Luplu => LGB2_LUPLU
+          GALTESQAAL VKSSWEEFNA NIPKHTHRFF ILVLEIAPAA KDLFSFLKGT 
+          SEVPQNNPEL QAHAGKVFKL VYEAAIQLQV TGVVVTDATL KNLGSVHVSK 
+          GVADAHFPVV KEAILKTIKE VVGAKWSEEL NSAWTIAYDE LAIVIKKEMN 
+          DAA*
+C;ID   LGB2_LUPLU     STANDARD;      PRT;   153 AA.
+C;AC   P02240;
+C;DT   21-JUL-1986 (REL. 01, CREATED)
+C;DT   01-NOV-1988 (REL. 09, LAST SEQUENCE UPDATE)
+C;DT   01-MAR-1992 (REL. 21, LAST ANNOTATION UPDATE)
+C;DE   LEGHEMOGLOBIN II. . . . 
+


Property changes on: trunk/packages/clustalw/branches/upstream/current/globin.pep
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/clustalw/branches/upstream/current/gon90.bla
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/gon90.bla	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/gon90.bla	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,24 @@
+#
+#
+    C     S     T     P      A     G     N      D     E     Q     H     R     K    M      I      L     V     F      Y     W     *
+ 15.10 -1.20-3.00 -8.50  -0.70 -5.60  -5.10 -8.60  -8.60 -7.00-3.90 -5.50 -8.10 -3.50 -4.80  -5.10  -1.80 -3.50 -2.80 -3.90 0.0
+ -1.20  7.302.60  -1.10  1.50  -1.20  0.70  -1.10  -1.60 -1.20-2.10 -2.30 -1.70 -3.70 -5.70  -5.90  -3.90 -7.10 -4.30 -6.80 0.0
+ -3.00 2.60  7.70 -1.70  -0.30 -4.70  -0.50 -2.00  -2.30 -1.50-2.10 -2.40 -1.20 -2.20 -2.40  -4.40  -0.70 -5.80 -5.30 -8.10 0.0
+ -8.50 -1.10-1.70  11.20 -1.30 -5.30  -4.30 -3.80  -3.10 -2.10-4.00 -4.10 -3.30 -7.10 -6.90  -5.40  -5.30 -8.70 -7.10 -10.50 0.0
+ -0.70 1.50 -0.30 -1.30   7.10 -0.80  -2.90 -2.70  -1.40 -1.90-3.50 -3.20 -2.70 -2.30 -3.80  -3.90  -0.40 -5.90 -6.00 -8.20 0.0
+ -5.60 -1.20-4.70 -5.30  -0.80   9.50 -1.40 -2.30  -4.20 -4.00-4.80 -3.90 -4.50 -7.70 -10.60 -9.80  -8.00 -10.80-8.80 -7.60 0.0
+ -5.10 0.70 -0.50 -4.30  -2.90 -1.40   9.30  2.30  -0.90 -0.401.10  -1.90 0.10  -5.70 -6.70  -7.40  -6.30 -7.00 -3.60 -8.10 0.0
+ -8.60 -1.10-2.00 -3.80  -2.70 -2.30  2.30    9.30 3.30  -0.60-1.40 -3.90 -1.70 -8.10 -9.80  -10.10 -8.10 -10.40-6.40 -11.60 0.0
+ -8.60 -1.60-2.30 -3.10  -1.40 -4.20  -0.90  3.30   8.40 2.40 -1.40 -1.90 0.70  -4.90 -6.80  -7.00  -4.70 -9.50 -6.90 -9.20 0.0
+ -7.00 -1.20-1.50 -2.10  -1.90 -4.00  -0.40 -0.60  2.40   8.901.60  1.30  1.80  -1.80 -5.50  -3.60  -4.70 -6.50 -4.90 -5.80 0.0
+ -3.90 -2.10-2.10 -4.00  -3.50 -4.80  1.10  -1.40  -1.40 1.60 12.30 -0.70 -0.90 -3.50 -5.90  -5.30  -6.00 -2.20 2.30  -3.80 0.0
+ -5.50 -2.30-2.40 -4.10  -3.20 -3.90  -1.90 -3.90  -1.90 1.30 -0.70  9.30 3.50  -4.80 -6.50  -5.40  -5.50 -8.40 -4.60 -3.80 0.0
+ -8.10 -1.70-1.20 -3.30  -2.70 -4.50  0.10  -1.70  0.70  1.80 -0.90 3.50   8.10 -3.40 -5.50  -5.40  -4.90 -8.30 -5.50 -8.30 0.0
+ -3.50 -3.70-2.20 -7.10  -2.30 -7.70  -5.70 -8.10  -4.90 -1.80-3.50 -4.80 -3.40 11.10  2.70   3.20  0.40   0.60 -3.30 -4.10 0.0
+ -4.80 -5.70-2.40 -6.90  -3.80 -10.60 -6.70 -9.80  -6.80 -5.50-5.90 -6.50 -5.50 2.70    8.20  2.40  4.20  -1.10 -4.20 -5.80 0.0
+ -5.10 -5.90-4.40 -5.40  -3.90 -9.80  -7.40 -10.10 -7.00 -3.60-5.30 -5.40 -5.40 3.20   2.40    7.40 0.60   1.00 -3.10 -3.90 0.0
+ -1.80 -3.90-0.70 -5.30  -0.40 -8.00  -6.30 -8.10  -4.70 -4.70-6.00 -5.50 -4.90 0.40   4.20   0.60   7.60 -2.70 -4.30 -7.30 0.0
+ -3.50 -7.10-5.80 -8.70  -5.90 -10.80 -7.00 -10.40 -9.50 -6.50-2.20 -8.40 -8.30 0.60  -1.10   1.00  -2.70  11.105.10   2.00 0.0
+ -2.80 -4.30-5.30 -7.10  -6.00 -8.80  -3.60 -6.40  -6.90 -4.902.30  -4.60 -5.50 -3.30 -4.20  -3.10  -4.30  5.10 12.00  2.60 0.0
+ -3.90 -6.80-8.10 -10.50 -8.20 -7.60  -8.10 -11.60 -9.20 -5.80-3.80 -3.80 -8.30 -4.10 -5.80  -3.90  -7.30  2.00 2.60   17.10 0.0
+0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 

Added: trunk/packages/clustalw/branches/upstream/current/interface.c
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/interface.c	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/interface.c	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,4391 @@
+/* command line interface for Clustal W  */
+/* DES was here MARCH. 1994 */
+/* DES was here SEPT.  1994 */
+/* Fixed memory allocation bug in check_param() . Alan Bleasby Dec 2002 */
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <setjmp.h>
+#include "clustalw.h"
+#include "param.h"
+
+/*
+*	Prototypes
+*/
+
+#ifdef UNIX
+FILE    *open_path(char *);
+#endif
+
+
+char *nameonly(char *s) ;
+
+static sint check_param(char **args,char *params[], char *param_arg[]);
+static void set_optional_param(void);
+static sint find_match(char *probe, char *list[], sint n);
+static void show_aln(void);
+static void create_parameter_output(void);
+static void reset_align(void);
+static void reset_prf1(void);
+static void reset_prf2(void);
+static void calc_gap_penalty_mask(int prf_length,char *struct_mask,char *gap_mask);
+void print_sec_struct_mask(int prf_length,char *mask,char *struct_mask);
+/*
+*	 Global variables
+*/
+
+extern sint max_names;
+
+extern Boolean interactive;
+
+extern double  **tmat;
+extern float    gap_open,      gap_extend;
+extern float  	dna_gap_open,  dna_gap_extend;
+extern float 	prot_gap_open, prot_gap_extend;
+extern float    pw_go_penalty,      pw_ge_penalty;
+extern float  	dna_pw_go_penalty,  dna_pw_ge_penalty;
+extern float 	prot_pw_go_penalty, prot_pw_ge_penalty;
+extern char 	revision_level[];
+extern sint    wind_gap,ktup,window,signif;
+extern sint    dna_wind_gap, dna_ktup, dna_window, dna_signif;
+extern sint    prot_wind_gap,prot_ktup,prot_window,prot_signif;
+extern sint	boot_ntrials;		/* number of bootstrap trials */
+extern sint	nseqs;
+extern sint	new_seq;
+extern sint 	*seqlen_array;
+extern sint 	divergence_cutoff;
+extern sint 	debug;
+extern Boolean 	no_weights;
+extern Boolean 	neg_matrix;
+extern Boolean  quick_pairalign;
+extern Boolean	reset_alignments_new;		/* DES */
+extern Boolean	reset_alignments_all;		/* DES */
+extern sint 	gap_dist;
+extern Boolean 	no_hyd_penalties, no_pref_penalties;
+extern sint 	max_aa;
+extern sint 	gap_pos1, gap_pos2;
+extern sint  	max_aln_length;
+extern sint 	*output_index, output_order;
+extern sint profile_no;
+extern short 	usermat[], pw_usermat[];
+extern short 	aa_xref[], pw_aa_xref[];
+extern short 	userdnamat[], pw_userdnamat[];
+extern short 	dna_xref[], pw_dna_xref[];
+extern sint	*seq_weight;
+
+extern Boolean 	lowercase; /* Flag for GDE output - set on comm. line*/
+extern Boolean 	cl_seq_numbers;
+
+extern Boolean seqRange; /*Ramu */
+
+extern Boolean 	output_clustal, output_nbrf, output_phylip, output_gcg, output_gde, output_nexus, output_fasta;
+extern Boolean 	output_tree_clustal, output_tree_phylip, output_tree_distances, output_tree_nexus;
+extern sint     bootstrap_format;
+extern Boolean 	tossgaps, kimura;
+extern Boolean  percent;
+extern Boolean 	explicit_dnaflag;  /* Explicit setting of sequence type on comm.line*/
+extern Boolean 	usemenu;
+extern Boolean 	showaln, save_parameters;
+extern Boolean	dnaflag;
+extern float	transition_weight;
+extern unsigned sint boot_ran_seed;
+
+
+extern FILE 	*tree;
+extern FILE 	*clustal_outfile, *gcg_outfile, *nbrf_outfile, *phylip_outfile, *nexus_outfile;
+extern FILE     *fasta_outfile; /* Ramu */
+extern FILE 	*gde_outfile;
+
+extern char 	hyd_residues[];
+extern char 	*amino_acid_codes;
+extern char 	**args;
+extern char	seqname[];
+
+extern char 	**seq_array;
+extern char 	**names, **titles;
+
+extern char *gap_penalty_mask1,*gap_penalty_mask2;
+extern char *sec_struct_mask1,*sec_struct_mask2;
+extern sint struct_penalties,struct_penalties1,struct_penalties2;
+extern sint output_struct_penalties;
+extern Boolean use_ss1, use_ss2;
+extern char *ss_name1,*ss_name2;
+
+
+char *ss_name = NULL;
+char *sec_struct_mask = NULL;
+char *gap_penalty_mask = NULL;
+
+char  	profile1_name[FILENAMELEN+1];
+char  	profile2_name[FILENAMELEN+1];
+
+Boolean empty;
+Boolean profile1_empty, profile2_empty;   /* whether or not profiles   */
+
+char 	outfile_name[FILENAMELEN+1]="";
+
+static char 	clustal_outname[FILENAMELEN+1], gcg_outname[FILENAMELEN+1];
+static char  	phylip_outname[FILENAMELEN+1],nbrf_outname[FILENAMELEN+1];
+static char  	gde_outname[FILENAMELEN+1], nexus_outname[FILENAMELEN+1];
+static char     fasta_outname[FILENAMELEN+1];  /* Ramu */
+char     clustal_tree_name[FILENAMELEN+1]="";
+char     dist_tree_name[FILENAMELEN+1]="";
+char 	phylip_tree_name[FILENAMELEN+1]="";
+char 	nexus_tree_name[FILENAMELEN+1]="";
+char 	p1_tree_name[FILENAMELEN+1]="";
+char 	p2_tree_name[FILENAMELEN+1]="";
+
+char pim_name[FILENAMELEN+1]=""; /* Ramu */
+
+static char *params[MAXARGS];
+static char *param_arg[MAXARGS];
+
+static char *cmd_line_type[] = {
+                " ",
+                "=n ",
+                "=f ",
+                "=string ",
+                "=filename ",
+                ""};
+
+static sint numparams;
+static Boolean check_tree = TRUE;
+
+sint 	profile1_nseqs;	/* have been filled; the no. of seqs in prof 1*/
+Boolean use_tree_file = FALSE,new_tree_file = FALSE;
+Boolean use_tree1_file = FALSE, use_tree2_file = FALSE;
+Boolean new_tree1_file = FALSE, new_tree2_file = FALSE;
+
+static char *lin2;
+
+MatMenu dnamatrix_menu = {3,
+                "IUB","iub",
+                "CLUSTALW(1.6)","clustalw",
+                "User defined",""
+		};
+
+MatMenu matrix_menu = {5,
+                "BLOSUM series","blosum",
+                "PAM series","pam",
+                "Gonnet series","gonnet",
+                "Identity matrix","id",
+                "User defined",""
+		};
+ 
+MatMenu pw_matrix_menu = {5,
+                "BLOSUM 30","blosum",
+                "PAM 350","pam",
+                "Gonnet 250","gonnet",
+                "Identity matrix","id",
+                "User defined",""
+		};
+
+
+void init_interface(void)
+{
+  empty=TRUE;
+  
+  profile1_empty = TRUE;     /*  */
+  profile2_empty = TRUE;     /*  */
+  
+  lin2 = (char *)ckalloc( (MAXLINE+1) * sizeof (char) );
+	
+}
+
+
+
+
+static sint check_param(char **args,char *params[], char *param_arg[])
+{
+
+/*
+#ifndef MAC
+        char *strtok(char *s1, const char *s2);
+#endif
+*/
+        sint     len,i,j,k,s,n,match[MAXARGS];
+		Boolean 	name1 = FALSE;
+		sint ajb;
+
+	if(args[0]==NULL) return;
+
+
+
+	params[0]=(char *)ckalloc((strlen(args[0])+1)*sizeof(char));
+	if (args[0][0]!=COMMANDSEP)
+	{
+		name1 = TRUE;
+		strcpy(params[0],args[0]);
+	}
+	else
+		strcpy(params[0],&args[0][1]);
+
+        for (i=1;i<MAXARGS;i++) {
+		if(args[i]==NULL) break;
+		params[i]=(char *)ckalloc((strlen(args[i])+1)*sizeof(char));
+		ajb=0;
+		for(j=0;j<strlen(args[i])-1;j++)
+			if(isprint(args[i][j+1])) params[i][ajb++]=args[i][j+1];
+		params[i][ajb]='\0';
+        }
+
+        if (i==MAXARGS) {
+		fprintf(stdout,"Error: too many command line arguments\n");
+ 		return(-1);
+	}
+/*
+    special case - first parameter is input filename
+  */
+  s = 0;
+  if(name1 == TRUE) {
+    strcpy(seqname, params[0]);
+    /*  JULIE
+	convert to lower case now
+    */
+#ifndef UNIX
+    for(k=0;k<(sint)strlen(params[0]);++k) seqname[k]=tolower(params[0][k]);
+#else
+    for(k=0;k<(sint)strlen(params[0]);++k) seqname[k]=params[0][k];
+#endif 
+    s++;
+  }
+  
+  n = i;
+  for (i=s;i<n;i++) {
+    param_arg[i] = NULL;
+    len = (sint)strlen(params[i]);
+    for(j=0; j<len; j++)
+      if(params[i][j] == '=') {
+	param_arg[i] = (char *)ckalloc((len-j) * sizeof(char));
+	strncpy(param_arg[i],&params[i][j+1],len-j-1);
+	params[i][j] = EOS;
+	/*  JULIE
+	    convert keywords to lower case now
+	*/
+	for(k=0;k<j;++k) params[i][k]=tolower(params[i][k]);
+	param_arg[i][len-j-1] = EOS;
+	break;
+      }
+  }
+  
+  /*
+    for each parameter given on the command line, first search the list of recognised optional 
+    parameters....
+  */
+
+  for (i=0;i<n;i++) {
+    if ((i==0) && (name1 == TRUE)) continue;
+    j = 0;
+    match[i] = -1;
+    for(;;) {
+      if (cmd_line_para[j].str[0] == '\0') break;
+      if (!strcmp(params[i],cmd_line_para[j].str)) {
+	match[i] = j;
+	*cmd_line_para[match[i]].flag = i;
+	if ((cmd_line_para[match[i]].type != NOARG) &&
+	    (param_arg[i] == NULL)) {
+	  fprintf(stdout,
+		  "Error: parameter required for /%s\n",params[i]);
+	  exit(1);
+	}
+	/*  JULIE
+	    convert parameters to lower case now, unless the parameter is a filename
+	*/
+#ifdef UNIX
+	else if (cmd_line_para[match[i]].type != FILARG
+		 && param_arg[i] != NULL)
+#endif 
+	  if (param_arg[i]!=0)
+	    {
+	      for(k=0;k<strlen(param_arg[i]);++k)
+		param_arg[i][k]=tolower(param_arg[i][k]);
+	    }
+	break;
+      }
+      j++;
+    }
+  }
+  /*
+    ....then the list of recognised input files,.... 
+*/
+    for (i=0;i<n;i++) {
+		if ((i==0) && (name1 == TRUE)) continue;
+		if (match[i] != -1) continue;
+		j = 0;
+		for(;;) {
+			if (cmd_line_file[j].str[0] == '\0') break;
+			if (!strcmp(params[i],cmd_line_file[j].str)) {
+				match[i] = j;
+				*cmd_line_file[match[i]].flag = i;
+				if ((cmd_line_file[match[i]].type != NOARG) &&
+                                    (param_arg[i] == NULL)) {
+					fprintf(stdout,
+                       				 "Error: parameter required for /%s\n",params[i]);
+					exit(1);
+				}
+				break;
+			}
+			j++;
+		}
+	}
+/*
+	....and finally the recognised verbs. 
+*/
+    for (i=0;i<n;i++) {
+		if ((i==0) && (name1 == TRUE)) continue;
+		if (match[i] != -1) continue;
+		j = 0;
+		for(;;) {
+			if (cmd_line_verb[j].str[0] == '\0') break;
+			if (!strcmp(params[i],cmd_line_verb[j].str)) {
+				match[i] = j;
+				*cmd_line_verb[match[i]].flag = i;
+				if ((cmd_line_verb[match[i]].type != NOARG) &&
+                                    (param_arg[i] == NULL)) {
+					fprintf(stdout,
+                       				 "Error: parameter required for /%s\n",params[i]);
+					exit(1);
+				}
+				break;
+			}
+			j++;
+		}
+	}
+
+/*
+	check for any unrecognised parameters.
+*/
+    for (i=0;i<n;i++) {
+		if (match[i] == -1) {
+			fprintf(stdout,
+                        "Error: unknown option %c%s\n",COMMANDSEP,params[i]);
+			exit(1);
+		}
+	}
+        return(n);
+}
+
+static void set_optional_param(void)
+{
+  int i,temp;
+  int c;
+  float ftemp;
+  char tstr[100];
+  
+  /****************************************************************************/
+  /* look for parameters on command line  e.g. gap penalties, k-tuple etc.    */
+  /****************************************************************************/
+  
+  /*** ? /score=percent or /score=absolute */
+  if(setscore != -1)
+    if(strlen(param_arg[setscore]) > 0) {
+      temp = find_match(param_arg[setscore],score_arg,2);
+      if(temp == 0)
+	percent = TRUE;
+      else if(temp == 1)
+	percent = FALSE;
+      else
+	fprintf(stdout,"\nUnknown SCORE type: %s\n",
+		param_arg[setscore]);
+    }
+  
+  /*** ? /seed=n */
+  if(setseed != -1) {
+    temp = 0;
+    if(strlen(param_arg[setseed]) > 0)
+      if (sscanf(param_arg[setseed],"%d",&temp)!=1) {
+	fprintf(stdout,"Bad option for /seed (must be integer)\n");
+	temp = 0;
+      }
+    if(temp > 0) boot_ran_seed = temp;
+    fprintf(stdout,"\ntemp = %d; seed = %u;\n",(pint)temp,boot_ran_seed);
+  }
+  
+
+/*** ? /output=PIR, GCG, GDE or PHYLIP */
+		if(setoutput != -1)
+		if(strlen(param_arg[setoutput]) > 0) {
+			temp = find_match(param_arg[setoutput],output_arg,6);
+			if (temp >= 0 && temp <= 5) {
+				output_clustal = FALSE;
+				output_gcg     = FALSE;
+				output_phylip  = FALSE;
+				output_nbrf    = FALSE;
+				output_gde     = FALSE;
+				output_nexus   = FALSE;
+				output_fasta   = FALSE;
+			}
+			switch (temp) {
+				case 0: /* GCG */
+					output_gcg     = TRUE;
+					break;
+				case 1: /* GDE */
+					output_gde     = TRUE;
+					break;
+				case 2: /* PIR */
+					output_nbrf    = TRUE;
+					break;
+				case 3: /* PHYLIP */
+					output_phylip  = TRUE;
+					break;
+				case 4: /* NEXUS */
+					output_nexus   = TRUE;
+					break;
+				case 5: /* NEXUS */
+					output_fasta   = TRUE;
+					break;
+				default:
+					fprintf(stdout,"\nUnknown OUTPUT type: %s\n",
+					param_arg[setoutput]);
+			}
+		}
+
+/*** ? /outputtree=NJ or PHYLIP or DIST or NEXUS */
+	if(setoutputtree != -1)
+		if(strlen(param_arg[setoutputtree]) > 0) {
+			temp = find_match(param_arg[setoutputtree],outputtree_arg,4);
+			switch (temp) {
+				case 0: /* NJ */
+					output_tree_clustal = TRUE;
+					break;
+				case 1: /* PHYLIP */
+					output_tree_phylip  = TRUE;
+					break;
+				case 2: /* DIST */
+					output_tree_distances = TRUE;
+					break;
+				case 3: /* NEXUS */
+					output_tree_nexus = TRUE;
+					break;
+				default:
+					fprintf(stdout,"\nUnknown OUTPUT TREE type: %s\n",
+					param_arg[setoutputtree]);
+			}
+		}
+
+/*** ? /profile (sets type of second input file to profile) */
+  if(setprofile != -1)
+    profile_type = PROFILE;
+  
+  /*** ? /sequences (sets type of second input file to list of sequences)  */
+  if(setsequences != -1)
+    profile_type = SEQUENCE;
+  
+  
+  
+  /*** ? /ktuple=n */
+  if(setktuple != -1) {
+    temp = 0;
+    if(strlen(param_arg[setktuple]) > 0)
+      if (sscanf(param_arg[setktuple],"%d",&temp)!=1) {
+	fprintf(stdout,"Bad option for /ktuple (must be integer)\n");
+	temp = 0;
+      }
+    if(temp > 0) {
+      if(dnaflag) {
+	if(temp <= 4) {
+	  ktup         = temp;
+	  dna_ktup     = ktup;
+	  wind_gap     = ktup + 4;
+	  dna_wind_gap = wind_gap;
+	}
+      }
+      else {
+	if(temp <= 2) {
+	  ktup          = temp;
+	  prot_ktup     = ktup;
+	  wind_gap      = ktup + 3;
+	  prot_wind_gap = wind_gap;
+	}
+      }
+    }
+  }
+  
+  /*** ? /pairgap=n */
+  if(setpairgap != -1) {
+    temp = 0;
+    if(strlen(param_arg[setpairgap]) > 0)
+      if (sscanf(param_arg[setpairgap],"%d",&temp)!=1) {
+	fprintf(stdout,"Bad option for /pairgap (must be integer)\n");
+	temp = 0;
+      }
+    if(temp > 0)
+      if(dnaflag) {
+	if(temp > ktup) {
+	  wind_gap     = temp;
+	  dna_wind_gap = wind_gap;
+	}
+      }
+      else {
+	if(temp > ktup) {
+	  wind_gap      = temp;
+	  prot_wind_gap = wind_gap;
+	}
+      }
+  }
+  
+  
+/*** ? /topdiags=n   */
+  if(settopdiags != -1) {
+    temp = 0;
+    if(strlen(param_arg[settopdiags]) > 0)
+      if (sscanf(param_arg[settopdiags],"%d",&temp)!=1) {
+	fprintf(stdout,"Bad option for /topdiags (must be integer)\n");
+	temp = 0;
+      }
+    if(temp > 0)
+      if(dnaflag) {
+	if(temp > ktup) {
+	  signif       = temp;
+	  dna_signif   = signif;
+	}
+      }
+      else {
+	if(temp > ktup) {
+	  signif        = temp;
+	  prot_signif   = signif;
+	}
+      }
+  }
+	
+
+/*** ? /window=n  */
+  if(setwindow != -1) {
+    temp = 0;
+    if(strlen(param_arg[setwindow]) > 0)
+      if (sscanf(param_arg[setwindow],"%d",&temp)!=1) {
+	fprintf(stdout,"Bad option for /window (must be integer)\n");
+	temp = 0;
+      }
+    if(temp > 0)
+      if(dnaflag) {
+	if(temp > ktup) {
+	  window       = temp;
+	  dna_window   = window;
+	}
+      }
+      else {
+	if(temp > ktup) {
+	  window        = temp;
+	  prot_window   = window;
+	}
+      }
+  }
+  
+/*** ? /kimura */
+  if(setkimura != -1)
+    kimura = TRUE;
+  
+  /*** ? /tossgaps */
+  if(settossgaps != -1)
+    tossgaps = TRUE;
+  
+  
+  /*** ? /negative  */
+  if(setnegative != -1)
+    neg_matrix = TRUE;
+  
+  /*** ? /noweights */
+  if(setnoweights!= -1)
+    no_weights = TRUE;
+  
+  
+  /*** ? /pwmatrix=ID (user's file)  */
+  if(setpwmatrix != -1)
+    {
+      temp=strlen(param_arg[setpwmatrix]);
+      if(temp > 0) {
+	for(i=0;i<temp;i++)
+	  if (isupper(param_arg[setpwmatrix][i]))
+	    tstr[i]=tolower(param_arg[setpwmatrix][i]);
+	  else
+	    tstr[i]=param_arg[setpwmatrix][i];
+	tstr[i]='\0';
+	if (strcmp(tstr,"blosum")==0) {
+	  strcpy(pw_mtrxname, tstr);
+	  pw_matnum = 1;
+                        }
+                        else if (strcmp(tstr,"pam")==0) {
+                                strcpy(pw_mtrxname, tstr);
+                                pw_matnum = 2;
+                        }
+                        else if (strcmp(tstr,"gonnet")==0) {
+                                strcpy(pw_mtrxname, tstr);
+                                pw_matnum = 3;
+                        }
+                        else if (strcmp(tstr,"id")==0) {
+                                strcpy(pw_mtrxname, tstr);
+                                pw_matnum = 4;
+                        }
+			else {
+                                if(user_mat(param_arg[setpwmatrix], pw_usermat, pw_aa_xref))
+                                  {
+                                     strcpy(pw_mtrxname,param_arg[setpwmatrix]);
+                                     strcpy(pw_usermtrxname,param_arg[setpwmatrix]);
+                                     pw_matnum=5;
+                                  }
+				else exit(1);
+			}
+
+		}
+	}
+
+/*** ? /matrix=ID (user's file)  */
+	if(setmatrix != -1)
+	{
+		temp=strlen(param_arg[setmatrix]);
+		if(temp > 0) {
+			for(i=0;i<temp;i++)
+				if (isupper(param_arg[setmatrix][i]))
+					tstr[i]=tolower(param_arg[setmatrix][i]);
+				else
+					tstr[i]=param_arg[setmatrix][i];
+			tstr[i]='\0';
+                        if (strcmp(tstr,"blosum")==0) {
+                                strcpy(mtrxname, tstr);
+                                matnum = 1;
+                        }
+                        else if (strcmp(tstr,"pam")==0) {
+                                strcpy(mtrxname, tstr);
+                                matnum = 2;
+                        }
+                        else if (strcmp(tstr,"gonnet")==0) {
+                                strcpy(mtrxname, tstr);
+                                matnum = 3;
+                        }
+                        else if (strcmp(tstr,"id")==0) {
+                                strcpy(mtrxname, tstr);
+                                matnum = 4;
+                        }
+			else {
+                                if(user_mat_series(param_arg[setmatrix], usermat, aa_xref))
+                                  {
+                                     strcpy(mtrxname,param_arg[setmatrix]);
+                                     strcpy(usermtrxname,param_arg[setmatrix]);
+                                     matnum=5;
+                                  }
+				else exit(1);
+			}
+
+		}
+	}
+
+/*** ? /pwdnamatrix=ID (user's file)  */
+	if(setpwdnamatrix != -1)
+	{
+		temp=strlen(param_arg[setpwdnamatrix]);
+		if(temp > 0) {
+			for(i=0;i<temp;i++)
+				if (isupper(param_arg[setpwdnamatrix][i]))
+					tstr[i]=tolower(param_arg[setpwdnamatrix][i]);
+				else
+					tstr[i]=param_arg[setpwdnamatrix][i];
+			tstr[i]='\0';
+                        if (strcmp(tstr,"iub")==0) {
+                                strcpy(pw_dnamtrxname, tstr);
+                                pw_dnamatnum = 1;
+                        }
+                        else if (strcmp(tstr,"clustalw")==0) {
+                                strcpy(pw_dnamtrxname, tstr);
+                                pw_dnamatnum = 2;
+                        }
+			else {
+                                if(user_mat(param_arg[setpwdnamatrix], pw_userdnamat, pw_dna_xref))
+                                  {
+                                     strcpy(pw_dnamtrxname,param_arg[setpwdnamatrix]);
+                                     strcpy(pw_dnausermtrxname,param_arg[setpwdnamatrix]);
+                                     pw_dnamatnum=3;
+                                  }
+				else exit(1);
+			}
+
+		}
+	}
+
+/*** ? /matrix=ID (user's file)  */
+	if(setdnamatrix != -1)
+	{
+		temp=strlen(param_arg[setdnamatrix]);
+		if(temp > 0) {
+			for(i=0;i<temp;i++)
+				if (isupper(param_arg[setdnamatrix][i]))
+					tstr[i]=tolower(param_arg[setdnamatrix][i]);
+				else
+					tstr[i]=param_arg[setdnamatrix][i];
+			tstr[i]='\0';
+                        if (strcmp(tstr,"iub")==0) {
+                                strcpy(dnamtrxname, tstr);
+                                dnamatnum = 1;
+                        }
+                        else if (strcmp(tstr,"clustalw")==0) {
+                                strcpy(dnamtrxname, tstr);
+                                dnamatnum = 2;
+                        }
+			else {
+                                if(user_mat(param_arg[setdnamatrix], userdnamat, dna_xref))
+                                  {
+                                     strcpy(dnamtrxname,param_arg[setdnamatrix]);
+                                     strcpy(dnausermtrxname,param_arg[setdnamatrix]);
+                                     dnamatnum=3;
+                                  }
+				else exit(1);
+			}
+
+		}
+	}
+/*** ? /maxdiv= n */
+	if(setmaxdiv != -1) {
+		temp = 0;
+		if(strlen(param_arg[setmaxdiv]) > 0)
+			if (sscanf(param_arg[setmaxdiv],"%d",&temp)!=1) {
+                 fprintf(stdout,"Bad option for /maxdiv (must be integer)\n");
+                 temp = 0;
+            }
+		if (temp >= 0)
+			divergence_cutoff = temp;
+	}
+
+/*** ? /gapdist= n */
+	if(setgapdist != -1) {
+		temp = 0;
+		if(strlen(param_arg[setgapdist]) > 0)
+			if (sscanf(param_arg[setgapdist],"%d",&temp)!=1) {
+                         fprintf(stdout,"Bad option for /gapdist (must be integer)\n");
+                         temp = 0;
+                    }
+		if (temp >= 0)
+			gap_dist = temp;
+	}
+
+/*** ? /debug= n */
+	if(setdebug != -1) {
+		temp = 0;
+		if(strlen(param_arg[setdebug]) > 0)
+			if (sscanf(param_arg[setdebug],"%d",&temp)!=1) {
+                         fprintf(stdout,"Bad option for /debug (must be integer)\n");
+                         temp = 0;
+                    }
+		if (temp >= 0)
+			debug = temp;
+	}
+
+/*** ? /outfile= (user's file)  */
+	if(setoutfile != -1)
+		if(strlen(param_arg[setoutfile]) > 0) {
+                        strcpy(outfile_name, param_arg[setoutfile]);
+		}
+
+/*** ? /case= lower/upper  */
+	if(setcase != -1) 
+		if(strlen(param_arg[setcase]) > 0) {
+			temp = find_match(param_arg[setcase],case_arg,2);
+			if(temp == 0) {
+				lowercase = TRUE;
+			}
+			else if(temp == 1) {
+				lowercase = FALSE;
+			}
+			else
+				fprintf(stdout,"\nUnknown case %s\n",
+				param_arg[setcase]);
+		}
+
+/*** ? /seqnos=off/on  */
+	if(setseqno != -1) 
+		if(strlen(param_arg[setseqno]) > 0) {
+			temp = find_match(param_arg[setseqno],seqno_arg,2);
+			if(temp == 0) {
+				cl_seq_numbers = FALSE;
+			}
+			else if(temp == 1) {
+				cl_seq_numbers = TRUE;
+			}
+			else
+				fprintf(stdout,"\nUnknown SEQNO option %s\n",
+				param_arg[setseqno]);
+		}
+
+
+
+	if(setseqno_range != -1) 
+		if(strlen(param_arg[setseqno_range]) > 0) {
+			temp = find_match(param_arg[setseqno_range],seqno_range_arg,2);
+			printf("\n comparing  "); 
+			printf("\nparam_arg[setseqno_range]= %s", param_arg[setseqno_range]);
+			/* printf("\nseqno_range_arg = %s ",seqno_range_arg); */
+			printf("\n comparing \n "); 
+
+			if(temp == 0) {
+				seqRange = FALSE;
+			}
+			else if(temp == 1) {
+				seqRange = TRUE;
+
+			}
+			else
+				fprintf(stdout,"\nUnknown Sequence range  option %s\n",
+				param_arg[setseqno_range]);
+		}
+
+
+/*** ? /range=n:m */
+	if(setrange != -1) {
+		temp = 0;
+		if(strlen(param_arg[setrange]) > 0)
+			if (sscanf(param_arg[setrange],"%d:%d",&temp,&temp)!=2) {
+                 fprintf(stdout,"setrange:  Syntax Error: Cannot set range, should be from:to \n");
+                 temp = 0;
+            }
+	}
+
+/*** ? /range=n:m */
+
+
+
+/*** ? /gapopen=n  */
+	if(setgapopen != -1) {
+		ftemp = 0.0;
+		if(strlen(param_arg[setgapopen]) > 0)
+			if (sscanf(param_arg[setgapopen],"%f",&ftemp)!=1) {
+                         fprintf(stdout,"Bad option for /gapopen (must be real number)\n");
+                         ftemp = 0.0;
+                    }
+		if(ftemp >= 0.0)
+			if(dnaflag) {
+					gap_open     = ftemp;
+					dna_gap_open = gap_open;
+			}
+			else {
+					gap_open      = ftemp;
+					prot_gap_open = gap_open;
+			}
+	}
+
+
+/*** ? /gapext=n   */
+	if(setgapext != -1) {
+		ftemp = 0.0;
+		if(strlen(param_arg[setgapext]) > 0)
+			if (sscanf(param_arg[setgapext],"%f",&ftemp)!=1) {
+                         fprintf(stdout,"Bad option for /gapext (must be real number)\n");
+                         ftemp = 0.0;
+                    }
+		if(ftemp >= 0)
+			if(dnaflag) {
+					gap_extend      = ftemp;
+					dna_gap_extend  = gap_extend;
+			}
+			else {
+					gap_extend      = ftemp;
+					prot_gap_extend = gap_extend;
+			}
+	}
+
+/*** ? /transweight=n*/
+	if(settransweight != -1) {
+		ftemp = 0.0;
+		if(strlen(param_arg[settransweight]) > 0)
+			if (sscanf(param_arg[settransweight],"%f",&ftemp)!=1) {
+                         fprintf(stdout,"Bad option for /transweight (must be real number)\n");
+                         ftemp = 0.0;
+                    }
+		transition_weight=ftemp;
+	}
+
+/*** ? /pwgapopen=n  */
+	if(setpwgapopen != -1) {
+		ftemp = 0.0;
+		if(strlen(param_arg[setpwgapopen]) > 0)
+			if (sscanf(param_arg[setpwgapopen],"%f",&ftemp)!=1) {
+                         fprintf(stdout,"Bad option for /pwgapopen (must be real number)\n");
+                         ftemp = 0.0;
+                    }
+		if(ftemp >= 0.0)
+			if(dnaflag) {
+					pw_go_penalty  = ftemp;
+                                        dna_pw_go_penalty = pw_go_penalty;
+			}
+			else {
+					pw_go_penalty  = ftemp;
+                                        prot_pw_go_penalty = pw_go_penalty;
+			}
+	}
+
+
+/*** ? /gapext=n   */
+	if(setpwgapext != -1) {
+		ftemp = 0.0;
+		if(strlen(param_arg[setpwgapext]) > 0)
+			if (sscanf(param_arg[setpwgapext],"%f",&ftemp)!=1) {
+                         fprintf(stdout,"Bad option for /pwgapext (must be real number)\n");
+                         ftemp = 0.0;
+                    }
+		if(ftemp >= 0)
+			if(dnaflag) {
+					pw_ge_penalty  = ftemp;
+                                        dna_pw_ge_penalty = pw_ge_penalty;
+			}
+			else {
+					pw_ge_penalty  = ftemp;
+                                        prot_pw_ge_penalty = pw_ge_penalty;
+			}
+	}
+
+
+
+/*** ? /outorder=n  */
+	if(setoutorder != -1) {
+		if(strlen(param_arg[setoutorder]) > 0)
+			temp = find_match(param_arg[setoutorder],outorder_arg,2);
+			if(temp == 0)  {	
+				output_order   = INPUT;
+			}
+			else if(temp == 1)  {	
+				output_order   = ALIGNED;
+			}
+			else
+				fprintf(stdout,"\nUnknown OUTPUT ORDER type %s\n",
+				param_arg[setoutorder]);
+	}
+
+/*** ? /bootlabels=n  */
+	if(setbootlabels != -1) {
+		if(strlen(param_arg[setbootlabels]) > 0)
+			temp = find_match(param_arg[setbootlabels],bootlabels_arg,2);
+			if(temp == 0)  {	
+				bootstrap_format   = BS_NODE_LABELS;
+			}
+			else if(temp == 1)  {	
+				bootstrap_format   = BS_BRANCH_LABELS;
+			}
+			else
+				fprintf(stdout,"\nUnknown bootlabels type %s\n",
+				param_arg[setoutorder]);
+	}
+
+/*** ? /endgaps */
+	if(setuseendgaps != -1)
+		use_endgaps = FALSE;
+
+/*** ? /nopgap  */
+	if(setnopgap != -1)
+		no_pref_penalties = TRUE;
+
+/*** ? /nohgap  */
+	if(setnohgap != -1)
+		no_hyd_penalties = TRUE;
+
+/*** ? /novgap  */
+	if(setnovgap != -1)
+		no_var_penalties = FALSE;
+
+/*** ? /hgapresidues="string"  */
+	if(sethgapres != -1)
+		if(strlen(param_arg[sethgapres]) > 0) {
+			for (i=0;i<strlen(hyd_residues) && i<26;i++) {
+				c = param_arg[sethgapres][i];
+				if (isalpha(c))
+					hyd_residues[i] = (char)toupper(c);
+				else
+					break;
+			}
+		}
+		
+		
+/*** ? /nosecstr1  */
+	if(setsecstr1 != -1)
+		use_ss1 = FALSE;
+
+/*** ? /nosecstr2  */
+	if(setsecstr2 != -1)
+		use_ss2 = FALSE;
+
+/*** ? /secstroutput  */
+	if(setsecstroutput != -1)
+		if(strlen(param_arg[setsecstroutput]) > 0) {
+			temp = find_match(param_arg[setsecstroutput],outputsecstr_arg,4);
+			if(temp >= 0 && temp <= 3)
+				output_struct_penalties = temp;
+			else
+				fprintf(stdout,"\nUnknown case %s\n",
+				param_arg[setsecstroutput]);
+		}
+
+
+/*** ? /helixgap= n */
+	if(sethelixgap != -1) {
+		temp = 0;
+		if(strlen(param_arg[sethelixgap]) > 0)
+			if (sscanf(param_arg[sethelixgap],"%d",&temp)!=1) {
+                         fprintf(stdout,"Bad option for /helixgap (must be integer)\n");
+                         temp = 0;
+                    }
+		if (temp >= 1 && temp <= 9)
+			helix_penalty = temp;
+	}
+	
+/*** ? /strandgap= n */
+	if(setstrandgap != -1) {
+		temp = 0;
+		if(strlen(param_arg[setstrandgap]) > 0)
+			if (sscanf(param_arg[setstrandgap],"%d",&temp)!=1) {
+                         fprintf(stdout,"Bad option for /strandgap (must be integer)\n");
+                         temp = 0;
+                    }
+		if (temp >= 1 && temp <= 9)
+			strand_penalty = temp;
+	}
+	
+/*** ? /loopgap= n */
+	if(setloopgap != -1) {
+		temp = 0;
+		if(strlen(param_arg[setloopgap]) > 0)
+			if (sscanf(param_arg[setloopgap],"%d",&temp)!=1) {
+                         fprintf(stdout,"Bad option for /loopgap (must be integer)\n");
+                         temp = 0;
+                    }
+		if (temp >= 1 && temp <= 9)
+			loop_penalty = temp;
+	}
+
+/*** ? /terminalgap= n */
+	if(setterminalgap != -1) {
+		temp = 0;
+		if(strlen(param_arg[setterminalgap]) > 0)
+			if (sscanf(param_arg[setterminalgap],"%d",&temp)!=1) {
+                         fprintf(stdout,"Bad option for /terminalgap (must be integer)\n");
+                         temp = 0;
+                    }
+		if (temp >= 1 && temp <= 9) {
+			helix_end_penalty = temp;
+			strand_end_penalty = temp;
+		}
+	}
+	
+/*** ? /helixendin= n */
+	if(sethelixendin != -1) {
+		temp = 0;
+		if(strlen(param_arg[sethelixendin]) > 0)
+			if (sscanf(param_arg[sethelixendin],"%d",&temp)!=1) {
+                         fprintf(stdout,"Bad option for /helixendin (must be integer)\n");
+                         temp = 0;
+                    }
+		if (temp >= 0 && temp <= 3)
+			helix_end_minus = temp;
+	}
+
+/*** ? /helixendout= n */
+	if(sethelixendout != -1) {
+		temp = 0;
+		if(strlen(param_arg[sethelixendout]) > 0)
+			if (sscanf(param_arg[sethelixendout],"%d",&temp)!=1) {
+                         fprintf(stdout,"Bad option for /helixendout (must be integer)\n");
+                         temp = 0;
+                    }
+		if (temp >= 0 && temp <= 3)
+			helix_end_plus = temp;
+	}
+
+/*** ? /strandendin= n */
+	if(setstrandendin != -1) {
+		temp = 0;
+		if(strlen(param_arg[setstrandendin]) > 0)
+			if (sscanf(param_arg[setstrandendin],"%d",&temp)!=1) {
+                         fprintf(stdout,"Bad option for /strandendin (must be integer)\n");
+                         temp = 0;
+                    }
+		if (temp >= 0 && temp <= 3)
+			strand_end_minus = temp;
+	}
+
+/*** ? /strandendout= n */
+	if(setstrandendout != -1) {
+		temp = 0;
+		if(strlen(param_arg[setstrandendout]) > 0)
+			if (sscanf(param_arg[setstrandendout],"%d",&temp)!=1) {
+                         fprintf(stdout,"Bad option for /strandendout (must be integer)\n");
+                         temp = 0;
+                    }
+		if (temp >= 0 && temp <= 3)
+			strand_end_plus = temp;
+	}
+
+}
+ 
+#ifdef UNIX
+FILE *open_path(char *fname)  /* to open in read-only file fname searching for 
+				 it through all path directories */
+{
+#define Mxdir 70
+        char dir[Mxdir+1], *path, *deb, *fin;
+        FILE *fich;
+        sint lf, ltot;
+	char *path1;
+ 
+        path=getenv("PATH"); 	/* get the list of path directories, 
+					separated by :
+    				*/
+
+	/* added for File System Standards  - Francois */
+	path1=(char *)ckalloc((strlen(path)+64)*sizeof(char));
+	strcpy(path1,path);
+	strcat(path1,"/usr/share/clustalx:/usr/local/share/clustalx"); 
+
+        lf=(sint)strlen(fname);
+        deb=path1;
+        do
+                {
+                fin=strchr(deb,':');
+                if(fin!=NULL)
+                        { strncpy(dir,deb,fin-deb); ltot=fin-deb; }
+                else
+                        { strcpy(dir,deb); ltot=(sint)strlen(dir); }
+                /* now one directory is in string dir */
+                if( ltot + lf + 1 <= Mxdir)
+                        {
+                        dir[ltot]='/';
+                        strcpy(dir+ltot+1,fname); /* now dir is appended with fi
+   lename */
+                        if( (fich = fopen(dir,"r") ) != NULL) break;
+                        }
+                else fich = NULL;
+                deb=fin+1;
+                }
+        while (fin != NULL);
+        return fich;
+}
+#endif
+
+
+void get_help(char help_pointer)    /* Help procedure */
+{	
+	FILE *help_file;
+	sint  i, number, nlines;
+	Boolean found_help;
+	char temp[MAXLINE+1];
+	char token = '\0';
+	char *digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+	char *help_marker    = ">>HELP";
+
+	extern char *help_file_name;
+
+#ifdef VMS
+        if((help_file=fopen(help_file_name,"r","rat=cr","rfm=var"))==NULL) {
+            error("Cannot open help file [%s]",help_file_name);
+            return;
+        }
+#else
+
+#ifdef UNIX
+        if((help_file=open_path(help_file_name))==NULL) {
+             if((help_file=fopen(help_file_name,"r"))==NULL) {
+                  error("Cannot open help file [%s]",help_file_name);
+                  return;
+             }
+        }
+        
+#else
+        if((help_file=fopen(help_file_name,"r"))==NULL) {
+            error("Cannot open help file [%s]",help_file_name);
+            return;
+        }
+#endif
+
+#endif
+/*		error("Cannot open help file [%s]",help_file_name);
+		return;
+	}
+*/
+	nlines = 0;
+	number = -1;
+	found_help = FALSE;
+
+	while(TRUE) {
+		if(fgets(temp,MAXLINE+1,help_file) == NULL) {
+			if(!found_help)
+				error("No help found in help file");
+			fclose(help_file);
+			return;
+		}
+		if(strstr(temp,help_marker)) {
+                        token = ' ';
+			for(i=strlen(help_marker); i<8; i++)
+				if(strchr(digits, temp[i])) {
+					token = temp[i];
+					break;
+				}
+		}
+		if(token == help_pointer) {
+			found_help = TRUE;
+			while(fgets(temp,MAXLINE+1,help_file)) {
+				if(strstr(temp, help_marker)){
+				  	if(usemenu) {
+						fprintf(stdout,"\n");
+				    		getstr("Press [RETURN] to continue",lin2);
+				  	}
+					fclose(help_file);
+					return;
+				}
+				if(temp[0]!='<') {
+			       		fputs(temp,stdout);
+			       		++nlines;
+				}
+			       if(usemenu) {
+			          if(nlines >= PAGE_LEN) {
+				     	   fprintf(stdout,"\n");
+			 	  	   getstr("Press [RETURN] to continue or  X  to stop",lin2);
+				  	   if(toupper(*lin2) == 'X') {
+						   fclose(help_file);
+						   return;
+				  	   }
+				  	   else
+						   nlines = 0;
+				   }
+			       }
+			}
+			if(usemenu) {
+				fprintf(stdout,"\n");
+				getstr("Press [RETURN] to continue",lin2);
+			}
+			fclose(help_file);
+		}
+	}
+}
+
+static void show_aln(void)         /* Alignment screen display procedure */
+{
+        FILE *file;
+        sint  nlines;
+        char temp[MAXLINE+1];
+        char file_name[FILENAMELEN+1];
+
+        if(output_clustal) strcpy(file_name,clustal_outname);
+        else if(output_nbrf) strcpy(file_name,nbrf_outname);
+        else if(output_gcg) strcpy(file_name,gcg_outname);
+        else if(output_phylip) strcpy(file_name,phylip_outname);
+        else if(output_gde) strcpy(file_name,gde_outname);
+        else if(output_nexus) strcpy(file_name,nexus_outname);
+        else if(output_fasta) strcpy(file_name,fasta_outname);
+
+#ifdef VMS
+        if((file=fopen(file_name,"r","rat=cr","rfm=var"))==NULL) {
+#else
+        if((file=fopen(file_name,"r"))==NULL) {
+#endif
+                error("Cannot open file [%s]",file_name);
+                return;
+        }
+
+        fprintf(stdout,"\n\n");
+        nlines = 0;
+
+        while(fgets(temp,MAXLINE+1,file)) {
+                fputs(temp,stdout);
+                ++nlines;
+                if(nlines >= PAGE_LEN) {
+                        fprintf(stdout,"\n");
+                        getstr("Press [RETURN] to continue or  X  to stop",lin2);
+                        if(toupper(*lin2) == 'X') {
+                                fclose(file);
+                                return;
+                        }
+                        else
+                                nlines = 0;
+                }
+        }
+        fclose(file);
+        fprintf(stdout,"\n");
+        getstr("Press [RETURN] to continue",lin2);
+}
+
+
+void parse_params(Boolean xmenus)
+{
+	sint i,j,len,temp;
+	static sint cl_error_code=0;
+        char path[FILENAMELEN];
+
+
+	Boolean do_align, do_convert, do_align_only, do_tree_only, do_tree, do_boot, do_profile, do_something;
+
+	if (!xmenus)
+	{
+		fprintf(stdout,"\n\n\n");
+		fprintf(stdout," CLUSTAL %s Multiple Sequence Alignments\n\n\n",revision_level);
+	}
+
+	do_align = do_convert = do_align_only = do_tree_only = do_tree = do_boot = do_profile = do_something = FALSE;
+
+	*seqname=EOS;
+
+/* JULIE 
+	len=(sint)strlen(paramstr);
+   Stop converting command line to lower case - unix, mac, pc are case sensitive
+	for(i=0;i<len;++i) paramstr[i]=tolower(paramstr[i]);
+*/
+
+    numparams = check_param(args, params, param_arg);
+	if (numparams <0) exit(1);
+
+	if(sethelp != -1) {
+		get_help('9');
+		exit(1);
+	}
+
+	if(setoptions != -1) {
+		fprintf(stdout,"clustalw option list:-\n");
+		for (i=0;cmd_line_verb[i].str[0] != '\0';i++) {
+			fprintf(stdout,"\t\t%c%s%s",COMMANDSEP,cmd_line_verb[i].str,cmd_line_type[cmd_line_verb[i].type]);
+			if (cmd_line_verb[i].type == OPTARG) {
+				if (cmd_line_verb[i].arg[0][0] != '\0')
+					fprintf(stdout,"=%s",cmd_line_verb[i].arg[0]);
+				for (j=1;cmd_line_verb[i].arg[j][0] != '\0';j++)
+					fprintf(stdout," OR %s",cmd_line_verb[i].arg[j]);
+			}
+			fprintf(stdout,"\n");
+		}
+		for (i=0;cmd_line_file[i].str[0] != '\0';i++) {
+			fprintf(stdout,"\t\t%c%s%s",COMMANDSEP,cmd_line_file[i].str,cmd_line_type[cmd_line_file[i].type]);
+			if (cmd_line_file[i].type == OPTARG) {
+				if (cmd_line_file[i].arg[0][0] != '\0')
+					fprintf(stdout,"=%s",cmd_line_file[i].arg[0]);
+				for (j=1;cmd_line_file[i].arg[j][0] != '\0';j++)
+					fprintf(stdout," OR %s",cmd_line_file[i].arg[j]);
+			}
+			fprintf(stdout,"\n");
+		}
+		for (i=0;cmd_line_para[i].str[0] != '\0';i++) {
+			fprintf(stdout,"\t\t%c%s%s",COMMANDSEP,cmd_line_para[i].str,cmd_line_type[cmd_line_para[i].type]);
+			if (cmd_line_para[i].type == OPTARG) {
+				if (cmd_line_para[i].arg[0][0] != '\0')
+					fprintf(stdout,"=%s",cmd_line_para[i].arg[0]);
+				for (j=1;cmd_line_para[i].arg[j][0] != '\0';j++)
+					fprintf(stdout," OR %s",cmd_line_para[i].arg[j]);
+			}
+			fprintf(stdout,"\n");
+		}
+		exit(1);
+	}
+
+
+/*****************************************************************************/
+/*  Check to see if sequence type is explicitely stated..override ************/
+/* the automatic checking (DNA or Protein).   /type=d or /type=p *************/
+/*****************************************************************************/
+	if(settype != -1)
+		if(strlen(param_arg[settype])>0) {
+			temp = find_match(param_arg[settype],type_arg,2);
+			if(temp == 0) {
+				dnaflag = FALSE;
+				explicit_dnaflag = TRUE;
+				info("Sequence type explicitly set to Protein");
+			}
+			else if(temp == 1) {
+				info("Sequence type explicitly set to DNA");
+				dnaflag = TRUE;
+				explicit_dnaflag = TRUE;
+			}
+			else
+				fprintf(stdout,"\nUnknown sequence type %s\n",
+				param_arg[settype]);
+		}
+
+
+/***************************************************************************
+*   check to see if 1st parameter does not start with '/' i.e. look for an *
+*   input file as first parameter.   The input file can also be specified  *
+*   by /infile=fname.                                                      *
+****************************************************************************/
+/* JULIE - moved to check_param()
+	if(paramstr[0] != '/') {
+		strcpy(seqname, params[0]);
+	}
+*/
+
+/**************************************************/
+/*  Look for /infile=file.ext on the command line */
+/**************************************************/
+
+	if(setinfile != -1) {
+		if(strlen(param_arg[setinfile]) <= 0) {
+			error("Bad sequence file name");
+			exit(1);
+		}
+		strcpy(seqname, param_arg[setinfile]);
+	}
+
+	if(*seqname != EOS) {
+		profile_no = 0;
+		nseqs = readseqs((sint)1);
+		if(nseqs < 2) {
+			if(nseqs < 0) cl_error_code = 2;
+			else if(nseqs == 0) cl_error_code = 3;
+			else cl_error_code = 4;
+                	fprintf(stdout,
+			"\nNo. of seqs. read = %d. No alignment!\n",(pint)nseqs);
+			exit(cl_error_code);
+		}
+		for(i = 1; i<=nseqs; i++) 
+			info("Sequence %d: %-*s   %6.d %s",
+			(pint)i,max_names,names[i],(pint)seqlen_array[i],dnaflag?"bp":"aa");
+		empty = FALSE;
+		do_something = TRUE;
+	}
+
+	set_optional_param();
+
+/*********************************************************/
+/* Look for /profile1=file.ext  AND  /profile2=file2.ext */
+/* You must give both file names OR neither.             */
+/*********************************************************/
+
+	if(setprofile1 != -1) {
+		if(strlen(param_arg[setprofile1]) <= 0) {
+			error("Bad profile 1 file name");
+			exit(1);
+		}
+		strcpy(seqname, param_arg[setprofile1]);
+		profile_no = 1;
+		profile_input();
+		if(nseqs <= 0) {
+			if(nseqs<0) cl_error_code=2;
+			else if(nseqs==0) cl_error_code=3;
+			exit(cl_error_code);
+		}
+		strcpy(profile1_name,seqname);
+	}
+
+	if(setprofile2 != -1) {
+		if(strlen(param_arg[setprofile2]) <= 0) {
+			error("Bad profile 2 file name");
+			exit(1);
+		}
+		if(profile1_empty) {
+			error("Only 1 profile file (profile 2) specified.");
+			exit(1);
+		}
+		strcpy(seqname, param_arg[setprofile2]);
+		profile_no = 2;
+		profile_input();
+		if(nseqs > profile1_nseqs) 
+			do_something = do_profile = TRUE;
+		else {
+			if(nseqs<0) cl_error_code=2;
+			else if(nseqs==0) cl_error_code=3;
+			error("No sequences read from profile 2");
+			exit(cl_error_code);
+		}
+		strcpy(profile2_name,seqname);
+	}
+
+/*************************************************************************/
+/* Look for /tree or /bootstrap or /align or /usetree ******************/
+/*************************************************************************/
+
+	if (setbatch != -1)
+		interactive=FALSE;
+
+	if (setinteractive != -1)
+		interactive=TRUE;
+
+	if (interactive) {
+		settree = -1;
+		setbootstrap = -1;
+		setalign = -1;
+		setusetree = -1;
+		setusetree1 = -1;
+		setusetree2 = -1;
+		setnewtree = -1;
+		setconvert = -1;
+	}
+
+	if(settree != -1 )
+		if(empty) {
+			error("Cannot draw tree.  No input alignment file");
+			exit(1);
+		}
+		else 
+			do_tree = TRUE;
+
+	if(setbootstrap != -1)
+		if(empty) {
+			error("Cannot bootstrap tree. No input alignment file");
+			exit(1);
+		}
+		else {
+			temp = 0;
+			if(param_arg[setbootstrap] != NULL)
+				 if (sscanf(param_arg[setbootstrap],"%d",&temp)!=1) {
+                         fprintf(stdout,"Bad option for /bootstrap (must be integer)\n");
+                         temp = 0;
+                    };
+			if(temp > 0)          boot_ntrials = temp;
+			do_boot = TRUE;
+		}
+
+	if(setalign != -1)
+		if(empty) {
+			error("Cannot align sequences.  No input file");
+			exit(1);
+		}
+		else 
+			do_align = TRUE;
+
+	if(setconvert != -1)
+		if(empty) {
+			error("Cannot convert sequences.  No input file");
+			exit(1);
+		}
+		else 
+			do_convert = TRUE;
+ 
+	if(setusetree != -1)
+		if(empty) {
+			error("Cannot align sequences.  No input file");
+			exit(1);
+		}
+		else  {
+		        if(strlen(param_arg[setusetree]) == 0) {
+				error("Cannot align sequences.  No tree file specified");
+				exit(1);
+		        }
+                        else {
+			        strcpy(phylip_tree_name, param_arg[setusetree]);
+		        }
+		        use_tree_file = TRUE;
+		        do_align_only = TRUE;
+		}
+
+	if(setnewtree != -1)
+		if(empty) {
+			error("Cannot align sequences.  No input file");
+			exit(1);
+		}
+		else  {
+		        if(strlen(param_arg[setnewtree]) == 0) {
+				error("Cannot align sequences.  No tree file specified");
+				exit(1);
+		        }
+                        else {
+			        strcpy(phylip_tree_name, param_arg[setnewtree]);
+		        }
+		    new_tree_file = TRUE;
+			do_tree_only = TRUE;
+		}
+ 
+	if(setusetree1 != -1)
+		if(profile1_empty) {
+			error("Cannot align profiles.  No input file");
+			exit(1);
+		}
+		else if(profile_type == SEQUENCE) {
+			error("Invalid option /usetree1.");
+			exit(1);
+		}
+		else  {
+		        if(strlen(param_arg[setusetree1]) == 0) {
+				error("Cannot align profiles.  No tree file specified");
+				exit(1);
+		        }
+                        else {
+			        strcpy(p1_tree_name, param_arg[setusetree1]);
+		        }
+		        use_tree1_file = TRUE;
+		        do_align_only = TRUE;
+		}
+
+	if(setnewtree1 != -1)
+		if(profile1_empty) {
+			error("Cannot align profiles.  No input file");
+			exit(1);
+		}
+		else if(profile_type == SEQUENCE) {
+			error("Invalid option /newtree1.");
+			exit(1);
+		}
+		else  {
+		        if(strlen(param_arg[setnewtree1]) == 0) {
+				error("Cannot align profiles.  No tree file specified");
+				exit(1);
+		        }
+                        else {
+			        strcpy(p1_tree_name, param_arg[setnewtree1]);
+		        }
+		    new_tree1_file = TRUE;
+		}
+ 
+	if(setusetree2 != -1)
+		if(profile2_empty) {
+			error("Cannot align profiles.  No input file");
+			exit(1);
+		}
+		else if(profile_type == SEQUENCE) {
+			error("Invalid option /usetree2.");
+			exit(1);
+		}
+		else  {
+		        if(strlen(param_arg[setusetree2]) == 0) {
+				error("Cannot align profiles.  No tree file specified");
+				exit(1);
+		        }
+                        else {
+			        strcpy(p2_tree_name, param_arg[setusetree2]);
+		        }
+		        use_tree2_file = TRUE;
+		        do_align_only = TRUE;
+		}
+
+	if(setnewtree2 != -1)
+		if(profile2_empty) {
+			error("Cannot align profiles.  No input file");
+			exit(1);
+		}
+		else if(profile_type == SEQUENCE) {
+			error("Invalid option /newtree2.");
+			exit(1);
+		}
+		else  {
+		        if(strlen(param_arg[setnewtree2]) == 0) {
+				error("Cannot align profiles.  No tree file specified");
+				exit(1);
+		        }
+                        else {
+			        strcpy(p2_tree_name, param_arg[setnewtree2]);
+		        }
+		    new_tree2_file = TRUE;
+		}
+ 
+
+	if( (!do_tree) && (!do_boot) && (!empty) && (!do_profile) && (!do_align_only) && (!do_tree_only) && (!do_convert)) 
+		do_align = TRUE;
+
+/*** ? /quicktree  */
+        if(setquicktree != -1)
+		quick_pairalign = TRUE;
+
+	if(dnaflag) {
+		gap_open   = dna_gap_open;
+		gap_extend = dna_gap_extend;
+		pw_go_penalty  = dna_pw_go_penalty;
+		pw_ge_penalty  = dna_pw_ge_penalty;
+                ktup       = dna_ktup;
+                window     = dna_window;
+                signif     = dna_signif;
+                wind_gap   = dna_wind_gap;
+
+	}
+	else {
+		gap_open   = prot_gap_open;
+		gap_extend = prot_gap_extend;
+		pw_go_penalty  = prot_pw_go_penalty;
+		pw_ge_penalty  = prot_pw_ge_penalty;
+                ktup       = prot_ktup;
+                window     = prot_window;
+                signif     = prot_signif;
+                wind_gap   = prot_wind_gap;
+	}
+	
+	if(interactive) {
+		if (!xmenus) usemenu = TRUE;
+		return;
+	}
+
+
+	if(!do_something) {
+		error("No input file(s) specified");
+		exit(1);
+	}
+
+
+
+
+/****************************************************************************/
+/* Now do whatever has been requested ***************************************/
+/****************************************************************************/
+
+
+	if(do_profile) {
+		if (profile_type == PROFILE) profile_align(p1_tree_name,p2_tree_name);
+		else new_sequence_align(phylip_tree_name);
+	}
+
+	else if(do_align)
+		align(phylip_tree_name);
+
+        else if(do_convert) {
+                get_path(seqname,path);
+                if(!open_alignment_output(path)) exit(1);
+                create_alignment_output(1,nseqs);
+        }
+
+        else if (do_align_only)
+                get_tree(phylip_tree_name);
+
+	else if(do_tree_only)
+		make_tree(phylip_tree_name);
+
+	else if(do_tree)
+		phylogenetic_tree(phylip_tree_name,clustal_tree_name,dist_tree_name,nexus_tree_name,pim_name);
+
+	else if(do_boot)
+		bootstrap_tree(phylip_tree_name,clustal_tree_name,nexus_tree_name);
+
+	fprintf(stdout,"\n");
+	exit(0);
+
+/*******whew!***now*go*home****/
+}
+
+
+Boolean user_mat(char *str, short *mat, short *xref)
+{
+        sint maxres;
+
+        FILE *infile;
+
+        if(usemenu)
+                getstr("Enter name of the matrix file",lin2);
+        else
+                strcpy(lin2,str);
+
+        if(*lin2 == EOS) return FALSE;
+
+        if((infile=fopen(lin2,"r"))==NULL) {
+                error("Cannot find matrix file [%s]",lin2);
+                return FALSE;
+        }
+
+	strcpy(str, lin2);
+
+	maxres = read_user_matrix(str, mat, xref);
+        if (maxres <= 0) return FALSE;
+
+	return TRUE;
+}
+
+Boolean user_mat_series(char *str, short *mat, short *xref)
+{
+        sint maxres;
+
+        FILE *infile;
+
+        if(usemenu)
+                getstr("Enter name of the matrix file",lin2);
+        else
+                strcpy(lin2,str);
+
+        if(*lin2 == EOS) return FALSE;
+
+        if((infile=fopen(lin2,"r"))==NULL) {
+                error("Cannot find matrix file [%s]",lin2);
+                return FALSE;
+        }
+
+	strcpy(str, lin2);
+
+	maxres = read_matrix_series(str, mat, xref);
+        if (maxres <= 0) return FALSE;
+
+	return TRUE;
+}
+
+
+
+
+
+
+sint seq_input(Boolean append)
+{
+        sint i;
+	sint local_nseqs;
+
+	if(usemenu) {
+fprintf(stdout,"\n\nSequences should all be in 1 file.\n"); 
+fprintf(stdout,"\n7 formats accepted: \n");
+fprintf(stdout,
+"NBRF/PIR, EMBL/SwissProt, Pearson (Fasta), GDE, Clustal, GCG/MSF, RSF.\n\n\n");
+/*fprintf(stdout,
+"\nGCG users should use TOPIR to convert their sequence files before use.\n\n\n");*/
+	}
+
+       if (append)
+          local_nseqs = readseqs(nseqs+(sint)1);
+       else
+          local_nseqs = readseqs((sint)1);  /*  1 is the first seq to be read */
+       if(local_nseqs < 0)               /* file could not be opened */
+           { 
+		return local_nseqs;
+           }
+       else if(local_nseqs == 0)         /* no sequences */
+           {
+	       error("No sequences in file!  Bad format?");
+               return local_nseqs;
+           }
+       else 
+           {
+	   struct_penalties1 = struct_penalties2 = NONE;
+	   if (sec_struct_mask1 != NULL) sec_struct_mask1=ckfree(sec_struct_mask1);
+	   if (sec_struct_mask2 != NULL) sec_struct_mask2=ckfree(sec_struct_mask2);
+	   if (gap_penalty_mask1 != NULL) gap_penalty_mask1=ckfree(gap_penalty_mask1);
+	   if (gap_penalty_mask2 != NULL) gap_penalty_mask2=ckfree(gap_penalty_mask2);
+	   if (ss_name1 != NULL) ss_name1=ckfree(ss_name1);
+	   if (ss_name2 != NULL) ss_name2=ckfree(ss_name2);
+	   
+		if(append) nseqs+=local_nseqs;
+		else nseqs=local_nseqs;
+		info("Sequences assumed to be %s",
+			dnaflag?"DNA":"PROTEIN");
+		if (usemenu) {
+			fprintf(stdout,"\n\n");
+                	for(i=1; i<=nseqs; i++) {
+/* DES                         fprintf(stdout,"%s: = ",names[i]); */
+                        	info("Sequence %d: %-*s   %6.d %s",
+                        	(pint)i,max_names,names[i],(pint)seqlen_array[i],dnaflag?"bp":"aa");
+                	}	
+                }	
+			if(dnaflag) {
+				gap_open   = dna_gap_open;
+				gap_extend = dna_gap_extend;
+			}
+			else {
+				gap_open   = prot_gap_open;
+				gap_extend = prot_gap_extend;
+			}
+			empty=FALSE;
+	   }
+	return local_nseqs;	
+}
+
+
+
+
+
+
+
+sint profile_input(void)   /* read a profile   */
+{                                           /* profile_no is 1 or 2  */
+        sint local_nseqs, i;
+	
+        if(profile_no == 2 && profile1_empty) 
+           {
+             error("You must read in profile number 1 first");
+             return 0;
+           }
+
+    if(profile_no == 1)     /* for the 1st profile */
+      {
+       local_nseqs = readseqs((sint)1); /* (1) means 1st seq to be read = no. 1 */
+       if(local_nseqs < 0)               /* file could not be opened */
+           { 
+		return local_nseqs;
+           }
+       else if(local_nseqs == 0)         /* no sequences  */
+           {
+	       error("No sequences in file!  Bad format?");
+		return local_nseqs;
+           }
+       else if (local_nseqs > 0)
+           { 				/* success; found some seqs. */
+		struct_penalties1 = NONE;
+		if (sec_struct_mask1 != NULL) sec_struct_mask1=ckfree(sec_struct_mask1);
+		if (gap_penalty_mask1 != NULL) gap_penalty_mask1=ckfree(gap_penalty_mask1);
+		if (ss_name1 != NULL) ss_name1=ckfree(ss_name1);
+                if (struct_penalties != NONE) /* feature table / mask in alignment */
+                	{
+					struct_penalties1 = struct_penalties;
+					if (struct_penalties == SECST) {
+						sec_struct_mask1 = (char *)ckalloc((max_aln_length) * sizeof (char));
+						for (i=0;i<max_aln_length;i++)
+							sec_struct_mask1[i] = sec_struct_mask[i];
+					}
+					gap_penalty_mask1 = (char *)ckalloc((max_aln_length) * sizeof (char));
+					for (i=0;i<max_aln_length;i++)
+						gap_penalty_mask1[i] = gap_penalty_mask[i];
+        				ss_name1 = (char *)ckalloc( (MAXNAMES+1) * sizeof (char));
+
+					strcpy(ss_name1,ss_name);
+if (debug>0) {
+for (i=0;i<seqlen_array[1];i++)
+	fprintf(stdout,"%c",gap_penalty_mask1[i]);
+fprintf(stdout,"\n");
+}
+					}
+                nseqs = profile1_nseqs = local_nseqs;
+				info("No. of seqs=%d",(pint)nseqs);
+				profile1_empty=FALSE;
+				profile2_empty=TRUE;
+	   }
+      }
+    else
+      {			        /* first seq to be read = profile1_nseqs + 1 */
+       local_nseqs = readseqs(profile1_nseqs+(sint)1); 
+       if(local_nseqs < 0)               /* file could not be opened */
+           { 
+		return local_nseqs;
+           }
+       else if(local_nseqs == 0)         /* no sequences */
+           {
+	       error("No sequences in file!  Bad format?");
+		return local_nseqs;
+           }
+       else if(local_nseqs > 0)
+           {
+		struct_penalties2 = NONE;
+		if (sec_struct_mask2 != NULL) sec_struct_mask2=ckfree(sec_struct_mask2);
+		if (gap_penalty_mask2 != NULL) gap_penalty_mask2=ckfree(gap_penalty_mask2);
+		if (ss_name2 != NULL) ss_name2=ckfree(ss_name2);
+                if (struct_penalties != NONE) /* feature table / mask in alignment */
+                	{
+					struct_penalties2 = struct_penalties;
+					if (struct_penalties == SECST) {
+						sec_struct_mask2 = (char *)ckalloc((max_aln_length) * sizeof (char));
+						for (i=0;i<max_aln_length;i++)
+							sec_struct_mask2[i] = sec_struct_mask[i];
+					}
+					gap_penalty_mask2 = (char *)ckalloc((max_aln_length) * sizeof (char));
+					for (i=0;i<max_aln_length;i++)
+						gap_penalty_mask2[i] = gap_penalty_mask[i];
+        				ss_name2 = (char *)ckalloc( (MAXNAMES+1) * sizeof (char));
+					strcpy(ss_name2,ss_name);
+if (debug>0) {
+for (i=0;i<seqlen_array[profile1_nseqs+1];i++)
+	fprintf(stdout,"%c",gap_penalty_mask2[i]);
+fprintf(stdout,"\n");
+}
+					}
+				info("No. of seqs in profile=%d",(pint)local_nseqs);
+                nseqs = profile1_nseqs + local_nseqs;
+                info("Total no. of seqs     =%d",(pint)nseqs);
+				profile2_empty=FALSE;
+				empty = FALSE;
+	   }
+
+      }
+	if (sec_struct_mask != NULL) sec_struct_mask=ckfree(sec_struct_mask);
+	if (gap_penalty_mask != NULL) gap_penalty_mask=ckfree(gap_penalty_mask);
+	if (ss_name != NULL) ss_name=ckfree(ss_name);
+
+	if(local_nseqs<=0) return local_nseqs;
+	
+	info("Sequences assumed to be %s",
+		dnaflag?"DNA":"PROTEIN");
+	if (usemenu) fprintf(stdout,"\n\n");
+        for(i=profile2_empty?1:profile1_nseqs+1; i<=nseqs; i++) {
+                info("Sequence %d: %-*s   %6.d %s",
+                   (pint)i,max_names,names[i],(pint)seqlen_array[i],dnaflag?"bp":"aa");
+        }	
+	if(dnaflag) {
+		gap_open   = dna_gap_open;
+		gap_extend = dna_gap_extend;
+	}
+	else {
+		gap_open   = prot_gap_open;
+		gap_extend = prot_gap_extend;
+	}
+
+	return nseqs;
+}
+
+
+
+static void calc_gap_penalty_mask(int prf_length, char *mask, char *gap_mask)
+{
+	int i,j;
+	char *struct_mask;
+
+	struct_mask = (char *)ckalloc((prf_length+1) * sizeof(char));
+/*
+    calculate the gap penalty mask from the secondary structures
+*/
+	i=0;
+	while (i<prf_length) {
+		if (tolower(mask[i]) == 'a' || mask[i] == '$') {
+			for (j = -helix_end_plus; j<0; j++) {
+				if ((i+j>=0) && (tolower(struct_mask[i+j]) != 'a')
+				             && (tolower(struct_mask[i+j]) != 'b'))
+					struct_mask[i+j] = 'a';
+			}
+			for (j = 0; j<helix_end_minus; j++) {
+				if (i+j>=prf_length || (tolower(mask[i+j]) != 'a'
+				                    && mask[i+j] != '$')) break;
+				struct_mask[i+j] = 'a';
+			}
+			i += j;
+			while (tolower(mask[i]) == 'a'
+				                    || mask[i] == '$') {
+				if (i>=prf_length) break;
+				if (mask[i] == '$') {
+					struct_mask[i] = 'A';
+					i++;
+					break;
+				}
+				else struct_mask[i] = mask[i];
+				i++;
+			}
+			for (j = 0; j<helix_end_minus; j++) {
+				if ((i-j-1>=0) && (tolower(mask[i-j-1]) == 'a'
+				                    || mask[i-j-1] == '$'))
+					struct_mask[i-j-1] = 'a';
+			}
+			for (j = 0; j<helix_end_plus; j++) {
+				if (i+j>=prf_length) break;
+				struct_mask[i+j] = 'a';
+			}
+		}
+	 	else if (tolower(mask[i]) == 'b' || mask[i] == '%') {
+			for (j = -strand_end_plus; j<0; j++) {
+				if ((i+j>=0) && (tolower(struct_mask[i+j]) != 'a')
+				             && (tolower(struct_mask[i+j]) != 'b'))
+					struct_mask[i+j] = 'b';
+			}
+			for (j = 0; j<strand_end_minus; j++) {
+				if (i+j>=prf_length || (tolower(mask[i+j]) != 'b'
+				                    && mask[i+j] != '%')) break;
+				struct_mask[i+j] = 'b';
+			}
+			i += j;
+			while (tolower(mask[i]) == 'b'
+				                    || mask[i] == '%') {
+				if (i>=prf_length) break;
+				if (mask[i] == '%') {
+					struct_mask[i] = 'B';
+					i++;
+					break;
+				}
+				else struct_mask[i] = mask[i];
+				i++;
+			}
+			for (j = 0; j<strand_end_minus; j++) {
+				if ((i-j-1>=0) && (tolower(mask[i-j-1]) == 'b'
+				                    || mask[i-j-1] == '%'))
+				struct_mask[i-j-1] = 'b';
+			}
+			for (j = 0; j<strand_end_plus; j++) {
+				if (i+j>=prf_length) break;
+ 				struct_mask[i+j] = 'b';
+			}
+		}
+	else i++;
+	}
+
+	for(i=0;i<prf_length;i++) {
+		switch (struct_mask[i]) {
+			case 'A':
+				gap_mask[i] = helix_penalty+'0';
+				break;
+			case 'a':
+				gap_mask[i] = helix_end_penalty+'0';
+				break;
+			case 'B':
+				gap_mask[i] = strand_penalty+'0';
+				break;
+			case 'b':
+				gap_mask[i] = strand_end_penalty+'0';
+				break;
+			default:
+				gap_mask[i] = loop_penalty+'0';
+				break;
+		}
+	}
+
+	struct_mask=ckfree(struct_mask);
+	
+}
+
+void print_sec_struct_mask(int prf_length, char *mask, char *struct_mask)
+{
+	int i,j;
+
+/*
+    calculate the gap penalty mask from the secondary structures
+*/
+	i=0;
+	while (i<prf_length) {
+		if (tolower(mask[i]) == 'a' || mask[i] == '$') {
+			for (j = 0; j<helix_end_minus; j++) {
+				if (i+j>=prf_length || (tolower(mask[i+j]) != 'a'
+				                    && mask[i+j] != '$')) break;
+				struct_mask[i+j] = 'a';
+			}
+			i += j;
+			while (tolower(mask[i]) == 'a'
+				                    || mask[i] == '$') {
+				if (i>=prf_length) break;
+				if (mask[i] == '$') {
+					struct_mask[i] = 'A';
+					i++;
+					break;
+				}
+				else struct_mask[i] = mask[i];
+				i++;
+			}
+			for (j = 0; j<helix_end_minus; j++) {
+				if ((i-j-1>=0) && (tolower(mask[i-j-1]) == 'a'
+				                    || mask[i-j-1] == '$'))
+					struct_mask[i-j-1] = 'a';
+			}
+		}
+	 	else if (tolower(mask[i]) == 'b' || mask[i] == '%') {
+			for (j = 0; j<strand_end_minus; j++) {
+				if (i+j>=prf_length || (tolower(mask[i+j]) != 'b'
+				                    && mask[i+j] != '%')) break;
+				struct_mask[i+j] = 'b';
+			}
+			i += j;
+			while (tolower(mask[i]) == 'b'
+				                    || mask[i] == '%') {
+				if (i>=prf_length) break;
+				if (mask[i] == '%') {
+					struct_mask[i] = 'B';
+					i++;
+					break;
+				}
+				else struct_mask[i] = mask[i];
+				i++;
+			}
+			for (j = 0; j<strand_end_minus; j++) {
+				if ((i-j-1>=0) && (tolower(mask[i-j-1]) == 'b'
+				                    || mask[i-j-1] == '%'))
+				struct_mask[i-j-1] = 'b';
+			}
+		}
+	else i++;
+	}
+}
+
+
+
+FILE *  open_output_file(char *prompt,      char *path, 
+				char *file_name,   char *file_extension)
+ 
+{	static char temp[FILENAMELEN+1];
+	static char local_prompt[MAXLINE];
+	FILE * file_handle;
+
+/*	if (*file_name == EOS) {
+*/		strcpy(file_name,path);
+		strcat(file_name,file_extension);
+/*	}
+*/
+	if(strcmp(file_name,seqname)==0) {
+		warning("Output file name is the same as input file.");
+		if (usemenu) {
+			strcpy(local_prompt,"\n\nEnter new name to avoid overwriting ");
+			strcat(local_prompt," [%s]: ");          
+			fprintf(stdout,local_prompt,file_name);
+			gets(temp);
+			if(*temp != EOS) strcpy(file_name,temp);
+		}
+	}
+	else if (usemenu) {
+		strcpy(local_prompt,prompt);
+		strcat(local_prompt," [%s]: ");          
+		fprintf(stdout,local_prompt,file_name);
+		gets(temp);
+		if(*temp != EOS) strcpy(file_name,temp);
+	}
+
+#ifdef VMS
+	if((file_handle=fopen(file_name,"w","rat=cr","rfm=var"))==NULL) {
+#else
+	if((file_handle=fopen(file_name,"w"))==NULL) {
+#endif
+		error("Cannot open output file [%s]",file_name);
+		return NULL;
+	}
+	return file_handle;
+}
+
+
+
+FILE *  open_explicit_file(char *file_name)
+{ 
+	FILE * file_handle;
+
+	if (*file_name == EOS) {
+		error("Bad output file [%s]",file_name);
+		return NULL;
+	}
+#ifdef VMS
+	if((file_handle=fopen(file_name,"w","rat=cr","rfm=var"))==NULL) {
+#else
+	if((file_handle=fopen(file_name,"w"))==NULL) {
+#endif
+		error("Cannot open output file [%s]",file_name);
+		return NULL;
+	}
+	return file_handle;
+}
+
+
+/* Ramu void */
+
+void align(char *phylip_name)
+{ 
+	char path[FILENAMELEN+1];
+	FILE *tree;
+	sint count;
+	
+	if(empty && usemenu) {
+		error("No sequences in memory. Load sequences first.");
+		return;
+	}
+
+	   struct_penalties1 = struct_penalties2 = NONE;
+	   if (sec_struct_mask1 != NULL) sec_struct_mask1=ckfree(sec_struct_mask1);
+	   if (sec_struct_mask2 != NULL) sec_struct_mask2=ckfree(sec_struct_mask2);
+	   if (gap_penalty_mask1 != NULL) gap_penalty_mask1=ckfree(gap_penalty_mask1);
+	   if (gap_penalty_mask2 != NULL) gap_penalty_mask2=ckfree(gap_penalty_mask2);
+	   if (ss_name1 != NULL) ss_name1=ckfree(ss_name1);
+	   if (ss_name2 != NULL) ss_name2=ckfree(ss_name2);
+
+
+        get_path(seqname,path);
+/* DES DEBUG 
+	fprintf(stdout,"\n\n Seqname = %s  \n Path = %s \n\n",seqname,path);
+*/
+	if(usemenu || !interactive) {
+        	if(!open_alignment_output(path)) return;
+	}
+
+	if (nseqs >= 2) {
+
+        	get_path(seqname,path);
+        	if (phylip_name[0]!=EOS) {
+                	if((tree = open_explicit_file(
+                	phylip_name))==NULL) return;
+        	}
+        	else {
+                 	if((tree = open_output_file(
+                	"\nEnter name for new GUIDE TREE           file  ",path,
+                	phylip_name,"dnd")) == NULL) return;
+        	}
+	}
+
+	if (save_parameters) create_parameter_output();
+
+	if(reset_alignments_new || reset_alignments_all) reset_align();
+
+        info("Start of Pairwise alignments");
+        info("Aligning...");
+        if(dnaflag) {
+                gap_open   = dna_gap_open;
+                gap_extend = dna_gap_extend;
+                pw_go_penalty  = dna_pw_go_penalty;
+                pw_ge_penalty  = dna_pw_ge_penalty;
+                ktup       = dna_ktup;
+                window     = dna_window;
+                signif     = dna_signif;
+                wind_gap   = dna_wind_gap;
+
+        }
+        else {
+                gap_open   = prot_gap_open;
+                gap_extend = prot_gap_extend;
+                pw_go_penalty  = prot_pw_go_penalty;
+                pw_ge_penalty  = prot_pw_ge_penalty;
+                ktup       = prot_ktup;
+                window     = prot_window;
+                signif     = prot_signif;
+                wind_gap   = prot_wind_gap;
+
+        }
+
+        if (quick_pairalign)
+           show_pair((sint)0,nseqs,(sint)0,nseqs);
+        else
+           pairalign((sint)0,nseqs,(sint)0,nseqs);
+
+	if (nseqs >= 2) {
+
+		guide_tree(tree,1,nseqs);
+		info("Guide tree        file created:   [%s]",
+                phylip_name);
+	}
+
+	
+	count = malign((sint)0,phylip_name);
+	
+	if (count <= 0) return;
+
+	if (usemenu) fprintf(stdout,"\n\n\n");
+	
+	create_alignment_output(1,nseqs);
+        if (showaln && usemenu) show_aln();
+	phylip_name[0]=EOS;
+	return ;
+}
+
+
+
+
+
+void new_sequence_align(char *phylip_name)
+{ 
+	char path[FILENAMELEN+1];
+	char tree_name[FILENAMELEN+1],temp[MAXLINE+1];
+	Boolean use_tree;
+	FILE *tree;
+	sint i,j,count;
+	float dscore;
+	Boolean save_ss2;
+	
+	if(profile1_empty && usemenu) {
+		error("No profile in memory. Input 1st profile first.");
+		return;
+	}
+
+	if(profile2_empty && usemenu) {
+		error("No sequences in memory. Input sequences first.");
+		return;
+	}
+
+        get_path(profile2_name,path);
+
+        if(usemenu || !interactive) {
+        	if(!open_alignment_output(path)) return;
+	}
+
+	new_seq = profile1_nseqs+1;
+
+/* check for secondary structure information for list of sequences */
+
+	save_ss2 = use_ss2;
+	if (struct_penalties2 != NONE && use_ss2 == TRUE && (nseqs - profile1_nseqs >
+1)) {
+		if (struct_penalties2 == SECST) 
+			warning("Warning: ignoring secondary structure for a list of sequences");
+		else if (struct_penalties2 == GMASK)
+			warning("Warning: ignoring gap penalty mask for a list of sequences");
+		use_ss2 = FALSE;
+	}
+
+	for (i=1;i<=new_seq;i++) {
+     		for (j=i+1;j<=new_seq;j++) {
+       			dscore = countid(i,j);
+       			tmat[i][j] = ((double)100.0 - (double)dscore)/(double)100.0;
+       			tmat[j][i] = tmat[i][j];
+     		}
+   	}
+
+	tree_name[0] = EOS;
+	use_tree = FALSE;
+	if (nseqs >= 2) {
+		if (check_tree && usemenu) {
+			strcpy(tree_name,path);
+			strcat(tree_name,"dnd");
+#ifdef VMS
+        	if((tree=fopen(tree_name,"r","rat=cr","rfm=var"))!=NULL) {
+#else
+        	if((tree=fopen(tree_name,"r"))!=NULL) {
+#endif
+		if (usemenu)
+            	fprintf(stdout,"\nUse the existing GUIDE TREE file,  %s  (y/n) ? [y]: ",
+                                           tree_name);
+                gets(temp);
+                if(*temp != 'n' && *temp != 'N') {
+                    strcpy(phylip_name,tree_name);
+                    use_tree = TRUE;
+                }
+                fclose(tree);
+        	}
+		}
+		else if (!usemenu && use_tree_file) {
+			use_tree = TRUE;
+		}
+	}
+	
+	if (save_parameters) create_parameter_output();
+
+	if(reset_alignments_new || reset_alignments_all) {
+/*
+		reset_prf1();
+*/
+		reset_prf2();
+	}
+	else fix_gaps();
+
+	if (struct_penalties1 == SECST)
+
+		calc_gap_penalty_mask(seqlen_array[1],sec_struct_mask1,gap_penalty_mask1);
+
+	if (struct_penalties2 == SECST)
+
+calc_gap_penalty_mask(seqlen_array[profile1_nseqs+1],sec_struct_mask2,gap_penalty_mask2);
+
+
+/* create the new tree file, if necessary */
+
+	if (use_tree == FALSE) {
+
+		if (nseqs >= 2) {
+        		get_path(profile2_name,path);
+        		if (phylip_name[0]!=EOS) {
+                		if((tree = open_explicit_file(
+                		phylip_name))==NULL) return;
+        		}
+        		else {
+                 		if((tree = open_output_file(
+                		"\nEnter name for new GUIDE TREE           file  ",path,
+                		phylip_name,"dnd")) == NULL) return;
+        		}
+		}
+        info("Start of Pairwise alignments");
+        info("Aligning...");
+        if(dnaflag) {
+                gap_open   = dna_gap_open;
+                gap_extend = dna_gap_extend;
+                pw_go_penalty  = dna_pw_go_penalty;
+                pw_ge_penalty  = dna_pw_ge_penalty;
+                ktup       = dna_ktup;
+                window     = dna_window;
+                signif     = dna_signif;
+                wind_gap   = dna_wind_gap;
+
+        }
+        else {
+                gap_open   = prot_gap_open;
+                gap_extend = prot_gap_extend;
+                pw_go_penalty  = prot_pw_go_penalty;
+                pw_ge_penalty  = prot_pw_ge_penalty;
+                ktup       = prot_ktup;
+                window     = prot_window;
+                signif     = prot_signif;
+                wind_gap   = prot_wind_gap;
+
+        }
+
+        if (quick_pairalign)
+           show_pair((sint)0,nseqs,new_seq-2,nseqs);
+        else
+           pairalign((sint)0,nseqs,new_seq-2,nseqs);
+
+		if (nseqs >= 2) {
+			guide_tree(tree,1,nseqs);
+			info("Guide tree        file created:   [%s]",
+               		phylip_name);
+		}
+	}
+	
+	if (new_tree_file) return;
+
+	count = seqalign(new_seq-2,phylip_name);
+	
+	use_ss2 = save_ss2;
+	
+	if (count <= 0) return;
+
+	if (usemenu) fprintf(stdout,"\n\n\n");
+	
+	create_alignment_output(1,nseqs);
+        if (showaln && usemenu) show_aln();
+
+	phylip_name[0]=EOS;
+
+}
+
+
+
+
+
+void make_tree(char *phylip_name)
+{
+	char path[FILENAMELEN+1];
+	FILE *tree;
+	
+	if(empty) {
+		error("No sequences in memory. Load sequences first.");
+		return;
+	}
+
+	   struct_penalties1 = struct_penalties2 = NONE;
+	   if (sec_struct_mask1 != NULL) sec_struct_mask1=ckfree(sec_struct_mask1);
+	   if (sec_struct_mask2 != NULL) sec_struct_mask2=ckfree(sec_struct_mask2);
+	   if (gap_penalty_mask1 != NULL) gap_penalty_mask1=ckfree(gap_penalty_mask1);
+	   if (gap_penalty_mask2 != NULL) gap_penalty_mask2=ckfree(gap_penalty_mask2);
+	   if (ss_name1 != NULL) ss_name1=ckfree(ss_name1);
+	   if (ss_name2 != NULL) ss_name2=ckfree(ss_name2);
+
+	if(reset_alignments_new || reset_alignments_all) reset_align();
+
+        get_path(seqname,path);
+
+	if (nseqs < 2) {
+		error("Less than 2 sequences in memory. Phylogenetic tree cannot be built.");
+		return;
+	}
+
+	if (save_parameters) create_parameter_output();
+
+	info("Start of Pairwise alignments");
+	info("Aligning...");
+        if(dnaflag) {
+                gap_open   = dna_gap_open;
+                gap_extend = dna_gap_extend;
+                pw_go_penalty  = dna_pw_go_penalty;
+                pw_ge_penalty  = dna_pw_ge_penalty;
+                ktup       = dna_ktup;
+                window     = dna_window;
+                signif     = dna_signif;
+                wind_gap   = dna_wind_gap;
+
+        }
+        else {
+                gap_open   = prot_gap_open;
+                gap_extend = prot_gap_extend;
+                pw_go_penalty  = prot_pw_go_penalty;
+                pw_ge_penalty  = prot_pw_ge_penalty;
+                ktup       = prot_ktup;
+                window     = prot_window;
+                signif     = prot_signif;
+                wind_gap   = prot_wind_gap;
+
+
+        }
+   
+        if (quick_pairalign)
+          show_pair((sint)0,nseqs,(sint)0,nseqs);
+        else
+          pairalign((sint)0,nseqs,(sint)0,nseqs);
+
+	if (nseqs >= 2) {
+        	get_path(seqname,path);
+        	if (phylip_name[0]!=EOS) {
+                	if((tree = open_explicit_file(
+                	phylip_name))==NULL) return;
+        	}
+        	else {
+                 	if((tree = open_output_file(
+                	"\nEnter name for new GUIDE TREE           file  ",path,
+                	phylip_name,"dnd")) == NULL) return;
+        	}
+
+		guide_tree(tree,1,nseqs);
+		info("Guide tree        file created:   [%s]",
+               	phylip_name);
+	}
+	
+	if(reset_alignments_new || reset_alignments_all) reset_align();
+
+	phylip_name[0]=EOS;
+}
+
+
+
+
+
+
+
+
+
+void get_tree(char *phylip_name)
+{
+	char path[FILENAMELEN+1],temp[MAXLINE+1];
+	sint count;
+	
+	if(empty) {
+		error("No sequences in memory. Load sequences first.");
+		return;
+	}
+	   struct_penalties1 = struct_penalties2 = NONE;
+	   if (sec_struct_mask1 != NULL) sec_struct_mask1=ckfree(sec_struct_mask1);
+	   if (sec_struct_mask2 != NULL) sec_struct_mask2=ckfree(sec_struct_mask2);
+	   if (gap_penalty_mask1 != NULL) gap_penalty_mask1=ckfree(gap_penalty_mask1);
+	   if (gap_penalty_mask2 != NULL) gap_penalty_mask2=ckfree(gap_penalty_mask2);
+	   if (ss_name1 != NULL) ss_name1=ckfree(ss_name1);
+	   if (ss_name2 != NULL) ss_name2=ckfree(ss_name2);
+
+
+        get_path(seqname,path);
+
+        if(usemenu || !interactive) {
+        	if(!open_alignment_output(path)) return;
+	}
+
+	if(reset_alignments_new || reset_alignments_all) reset_align();
+
+        get_path(seqname,path);
+
+        if (nseqs >= 2) {
+          
+        	if(usemenu) {
+       			strcpy(phylip_name,path);
+       			strcat(phylip_name,"dnd");
+
+            fprintf(stdout,"\nEnter a name for the guide tree file [%s]: ",
+                                           phylip_name);
+                	gets(temp);
+                	if(*temp != EOS)
+                        	strcpy(phylip_name,temp);
+        	}
+
+        	if(usemenu || !interactive) {
+#ifdef VMS
+        		if((tree=fopen(phylip_name,"r","rat=cr","rfm=var"))==NULL) {
+#else
+        		if((tree=fopen(phylip_name,"r"))==NULL) {
+#endif
+                		error("Cannot open tree file [%s]",phylip_name);
+                		return;
+        		}
+		}
+	}
+	else {
+        	info("Start of Pairwise alignments");
+        	info("Aligning...");
+        	if(dnaflag) {
+                	gap_open   = dna_gap_open;
+                	gap_extend = dna_gap_extend;
+                	pw_go_penalty  = dna_pw_go_penalty;
+                	pw_ge_penalty  = dna_pw_ge_penalty;
+                	ktup       = dna_ktup;
+                	window     = dna_window;
+                	signif     = dna_signif;
+                	wind_gap   = dna_wind_gap;
+
+        	}
+        	else {
+                	gap_open   = prot_gap_open;
+                	gap_extend = prot_gap_extend;
+                	pw_go_penalty  = prot_pw_go_penalty;
+                	pw_ge_penalty  = prot_pw_ge_penalty;
+                	ktup       = prot_ktup;
+                	window     = prot_window;
+                	signif     = prot_signif;
+                	wind_gap   = prot_wind_gap;
+
+        	}
+
+            if (quick_pairalign)
+                show_pair((sint)0,nseqs,(sint)0,nseqs);
+            else
+		   		pairalign((sint)0,nseqs,(sint)0,nseqs);
+	}
+
+	if (save_parameters) create_parameter_output();
+
+	count = malign(0,phylip_name);
+	if (count <= 0) return;
+
+	if (usemenu) fprintf(stdout,"\n\n\n");
+
+	create_alignment_output(1,nseqs);
+        if (showaln && usemenu) show_aln();
+
+	phylip_name[0]=EOS;
+}
+
+
+
+void profile_align(char *p1_tree_name,char *p2_tree_name)
+{
+	char path[FILENAMELEN+1];
+	char tree_name[FILENAMELEN+1];
+	char temp[MAXLINE+1];
+	Boolean use_tree1,use_tree2;
+	FILE *tree;
+	sint count,i,j,dscore;
+	
+	if(profile1_empty || profile2_empty) {
+		error("No sequences in memory. Load sequences first.");
+		return;
+	}
+
+	get_path(profile1_name,path);
+	
+        if(usemenu || !interactive) {
+        	if(!open_alignment_output(path)) return;
+	}
+
+	if(reset_alignments_new || reset_alignments_all) {
+		reset_prf1();
+		reset_prf2();
+	}
+	else fix_gaps();
+
+	tree_name[0] = EOS;
+	use_tree1 = FALSE;
+	if (profile1_nseqs >= 2) {
+		if (check_tree && usemenu) {
+			strcpy(tree_name,path);
+			strcat(tree_name,"dnd");
+#ifdef VMS
+        	if((tree=fopen(tree_name,"r","rat=cr","rfm=var"))!=NULL) {
+#else
+        	if((tree=fopen(tree_name,"r"))!=NULL) {
+#endif
+            	fprintf(stdout,"\nUse the existing GUIDE TREE file for Profile 1,  %s  (y/n) ? [y]: ",
+                                           tree_name);
+                gets(temp);
+                if(*temp != 'n' && *temp != 'N') {
+                    strcpy(p1_tree_name,tree_name);
+                    use_tree1 = TRUE;
+                }
+                fclose(tree);
+        	}
+		}
+		else if (!usemenu && use_tree1_file) {
+			use_tree1 = TRUE;
+		}
+	}
+	tree_name[0] = EOS;
+	use_tree2 = FALSE;
+	get_path(profile2_name,path);
+	if (nseqs-profile1_nseqs >= 2) {
+		if (check_tree && usemenu) {
+			strcpy(tree_name,path);
+			strcat(tree_name,"dnd");
+#ifdef VMS
+        	if((tree=fopen(tree_name,"r","rat=cr","rfm=var"))!=NULL) {
+#else
+        	if((tree=fopen(tree_name,"r"))!=NULL) {
+#endif
+            	fprintf(stdout,"\nUse the existing GUIDE TREE file for Profile 2,  %s  (y/n) ? [y]: ",
+                                           tree_name);
+                gets(temp);
+                if(*temp != 'n' && *temp != 'N') {
+                    strcpy(p2_tree_name,tree_name);
+                    use_tree2 = TRUE;
+                }
+                fclose(tree);
+        	}
+		}
+		else if (!usemenu && use_tree2_file) {
+			use_tree2 = TRUE;
+		}
+	}
+				
+	if (save_parameters) create_parameter_output();
+
+	if (struct_penalties1 == SECST)
+
+		calc_gap_penalty_mask(seqlen_array[1],sec_struct_mask1,gap_penalty_mask1);
+
+	if (struct_penalties2 == SECST)
+
+		calc_gap_penalty_mask(seqlen_array[profile1_nseqs+1],sec_struct_mask2,gap_penalty_mask2);
+
+	if (use_tree1 == FALSE)
+		if (profile1_nseqs >= 2) {
+                	for (i=1;i<=profile1_nseqs;i++) {
+                        	for (j=i+1;j<=profile1_nseqs;j++) {
+                                	dscore = countid(i,j);
+                                	tmat[i][j] = (100.0 - dscore)/100.0;
+                                	tmat[j][i] = tmat[i][j];
+                        	}
+                	}
+        		get_path(profile1_name,path);
+        		if (p1_tree_name[0]!=EOS) {
+                		if((tree = open_explicit_file(p1_tree_name))==NULL) return;
+        		}
+        		else {
+                 		if((tree = open_output_file(
+                		"\nEnter name for new GUIDE TREE file for profile 1 ",path,
+                		p1_tree_name,"dnd")) == NULL) return;
+        		}
+
+			guide_tree(tree,1,profile1_nseqs);
+			info("Guide tree        file created:   [%s]",
+               		p1_tree_name);
+		}
+	if (use_tree2 == FALSE)
+		if(nseqs-profile1_nseqs >= 2) {
+                	for (i=1+profile1_nseqs;i<=nseqs;i++) {
+                        	for (j=i+1;j<=nseqs;j++) {
+                                	dscore = countid(i,j);
+                                	tmat[i][j] = (100.0 - dscore)/100.0;
+                                	tmat[j][i] = tmat[i][j];
+                        	}
+                	}
+        		if (p2_tree_name[0]!=EOS) {
+                		if((tree = open_explicit_file(p2_tree_name))==NULL) return;
+        		}
+        		else {
+        			get_path(profile2_name,path);
+                 		if((tree = open_output_file(
+                		"\nEnter name for new GUIDE TREE file for profile 2 ",path,
+                		p2_tree_name,"dnd")) == NULL) return;
+        		}
+			guide_tree(tree,profile1_nseqs+1,nseqs-profile1_nseqs);
+			info("Guide tree        file created:   [%s]",
+               		p2_tree_name);
+		}
+
+	if (new_tree1_file || new_tree2_file) return;
+
+/* do an initial alignment to get the pairwise identities between the two
+profiles - used to set parameters for the final alignment */
+	count = palign1();
+	if (count == 0) return;
+
+	reset_prf1();
+	reset_prf2();
+
+	count = palign2(p1_tree_name,p2_tree_name);
+
+	if (count == 0) return;
+
+	if(usemenu) fprintf(stdout,"\n\n\n");
+
+	create_alignment_output(1,nseqs);
+        if (showaln && usemenu) show_aln();
+
+	p1_tree_name[0]=EOS;
+	p2_tree_name[0]=EOS;
+}
+
+
+
+
+
+
+ typedef struct rangeNum {
+   int start;
+   int end;
+ } rangeNum;
+ 
+
+/**** ********************************************************************************
+ *
+ *
+ *
+ *   INPUT:  
+ * 
+ *   RETURNS:  the range objects with the from, to range for each seqs.
+ *
+ *             the best things is to couple this up with the seqnames
+ *             structure (there is no struct for seqnames yet!)
+ */
+
+
+void fillrange(rangeNum *rnum, sint fres, sint len, sint fseq)
+{  
+  sint val;
+  sint i,ii;
+  sint j,slen;	
+
+  char tmpName[FILENAMELEN+15];
+  int istart =0;
+  int iend = 0; /* to print sequence start-end with names */
+  int found =0;
+  int ngaps=0;
+  int tmpStart=0; 
+  int tmpEnd=0;
+  int ntermgaps=0;
+  int pregaps=0;
+  int tmpk=0;
+  int isRange=0;
+  int formula =0;
+
+  tmpName[0] = '\0';
+  slen = 0;
+
+  ii = fseq ;
+  i = output_index[ii];
+  if( (sscanf(names[i],"%[^/]/%d-%d",tmpName, &tmpStart, &tmpEnd) == 3)) {
+    isRange = 1;
+  }
+  for(tmpk=1; tmpk<fres; tmpk++) { /* do this irrespective of above sscanf */
+    val = seq_array[i][tmpk];
+    if ((val < 0) || (val > max_aa)) { /*it is gap */
+      pregaps++;
+    }
+  }
+  for(j=fres; j<fres+len; j++) {
+    val = seq_array[i][j];
+    if((val == -3) || (val == 253))
+      break;
+    else if((val < 0) || (val > max_aa)) {
+      /* residue = '-'; */
+      ngaps++;
+    }
+    else {
+      /* residue = amino_acid_codes[val]; */
+      found = j;
+    }
+    if ( found && (istart == 0) ) {
+      istart = found;
+      ntermgaps = ngaps;
+    }
+    slen++;
+  }
+  if( seqRange) {
+    printf("Name : %s ",names[i]);
+    printf("\n  fres = %d ",fres);
+    printf("   len = %d ",len);
+    printf("\n  istart = %d ",istart);
+    printf("\n  tmpStart = %d ",tmpStart);
+    printf("\n  ngaps = %d ",ngaps);
+    printf("\n  pregaps = %d ",pregaps);
+    if (!isRange)
+      formula = istart - pregaps;
+    else
+      formula = istart - pregaps +  ( tmpStart == 1 ? 0: tmpStart-1) ;
+
+    printf("\n\nsuggestion  istart - pregaps + tmpStart - ntermgaps = %d - %d + %d - %d",istart,
+	   pregaps,tmpStart,ntermgaps);
+    printf(" formula %d ",formula);
+  }
+  else {
+    printf("\n no range found .... strange,  istart = %d",istart);
+    formula = 1;
+  }
+  if (pregaps == fres-1) /* all gaps -  now the conditions........ */ 
+    formula = tmpStart ; /* keep the previous start... */
+  formula = (formula <= 0) ? 1: formula;
+  if (pregaps ==0 && tmpStart == 0) {
+    formula = fres;
+  }
+  iend = formula + len - ngaps -1;
+
+  rnum->start = formula;
+  rnum->end = iend;
+  printf("\n check... %s %d - %d",names[i],rnum->start,rnum->end);
+  printf(" Done checking.........");
+}
+
+
+void fasta_out(FILE *fastaout, sint fres, sint len, sint fseq, sint lseq)
+{
+
+    char *seq, residue;
+    sint val;
+    sint i,ii;
+    sint j,slen;	
+    sint line_length;
+
+    rangeNum  *rnum;  
+    int tmpk;
+
+    seq = (char *)ckalloc((len+1) * sizeof(char)); 
+    
+    line_length=PAGEWIDTH-max_names;
+    line_length=line_length-line_length % 10; /* round to a multiple of 10*/
+    if (line_length > LINELENGTH) line_length=LINELENGTH;
+
+    if(seqRange) {
+      rnum = (struct rangeNum *) malloc(sizeof(struct rangeNum));
+    }
+
+    for(ii=fseq; ii<=lseq; ii++) {
+      i = output_index[ii];
+      slen = 0;
+      for(j=fres; j<fres+len; j++) {
+	val = seq_array[i][j];
+	if((val == -3) || (val == 253))
+	  break;
+	else if((val < 0) || (val > max_aa)) {
+	  residue = '-';
+	}
+	else {
+	  residue = amino_acid_codes[val];
+	}
+	if (lowercase) 
+	  seq[j-fres] = (char)tolower((int)residue);
+	else
+	  seq[j-fres] = residue;
+	slen++;
+      }
+      fprintf(fastaout, ">%-s",nameonly(names[i]));
+      if(seqRange) {
+	fillrange(rnum,fres, len, ii);
+	fprintf(fastaout,"/%d-%d",rnum->start, rnum->end);
+      }
+      fprintf(fastaout,"\n");
+      for(j=1; j<=slen; j++) {
+	fprintf(fastaout,"%c",toupper(seq[j-1]));
+	if((j % line_length == 0) || (j == slen)) 
+	  fprintf(fastaout,"\n");
+      }
+    }
+    seq=ckfree((void *)seq);
+
+    if(seqRange) 
+      if (rnum) 
+	free(rnum);
+    /* just try and see 
+    printf("\n Now....  calculating percentage identity....\n\n");
+    calc_percidentity();*/
+
+}
+
+
+void clustal_out(FILE *clusout, sint fres, sint len, sint fseq, sint lseq)
+{
+    static char *seq1;
+    static sint *seq_no;
+    static sint *print_seq_no;
+    char *ss_mask1, *ss_mask2;
+    char  temp[MAXLINE];
+    char c;
+    sint val;
+    sint ii,lv1,catident1[NUMRES],catident2[NUMRES],ident,chunks;
+    sint i,j,k,l;
+    sint pos,ptr;
+    sint line_length;
+
+    rangeNum *rnum;
+    char tmpStr[FILENAMELEN+15];
+    int tmpk;
+
+    /*
+      stop doing this ...... opens duplicate files in VMS  DES
+      fclose(clusout);
+      if ((clusout=fopen(clustal_outname,"w")) == NULL)
+      {
+      fprintf(stdout,"Error opening %s\n",clustal_outfile);
+      return;
+      }
+    */
+
+    if(seqRange) {
+      rnum = (struct rangeNum *) malloc(sizeof(struct rangeNum));
+      if ( rnum ==NULL ) {
+	printf("cannot alloc memory for rnum");
+      }
+    }
+
+    seq_no = (sint *)ckalloc((nseqs+1) * sizeof(sint));
+    print_seq_no = (sint *)ckalloc((nseqs+1) * sizeof(sint));
+    for (i=fseq;i<=lseq;i++)
+      {
+	print_seq_no[i] = seq_no[i] = 0;
+	for(j=1;j<fres;j++) {
+	  val = seq_array[i][j];
+	  if((val >=0) || (val <=max_aa)) seq_no[i]++;
+	}
+      }
+
+    seq1 = (char *)ckalloc((max_aln_length+1) * sizeof(char));
+    
+    if (struct_penalties1 == SECST && use_ss1 == TRUE) {
+      ss_mask1 = (char *)ckalloc((seqlen_array[1]+10) * sizeof(char));
+      for (i=0;i<seqlen_array[1];i++)
+	ss_mask1[i] = sec_struct_mask1[i];
+      print_sec_struct_mask(seqlen_array[1],sec_struct_mask1,ss_mask1);
+    }
+    if (struct_penalties2 == SECST && use_ss2 == TRUE) {
+      ss_mask2 = (char *)ckalloc((seqlen_array[profile1_nseqs+1]+10) * sizeof(char));
+      for (i=0;i<seqlen_array[profile1_nseqs+1];i++)
+	ss_mask2[i] = sec_struct_mask2[i];
+      print_sec_struct_mask(seqlen_array[profile1_nseqs+1],sec_struct_mask2,ss_mask2);
+    }
+    
+    fprintf(clusout,"CLUSTAL %s multiple sequence alignment\n\n",
+	    revision_level);
+    
+    /* decide the line length for this alignment - maximum is LINELENGTH */
+    line_length=PAGEWIDTH-max_names;
+    line_length=line_length-line_length % 10; /* round to a multiple of 10*/
+    if (line_length > LINELENGTH) line_length=LINELENGTH;
+    
+    chunks = len/line_length;
+    if(len % line_length != 0)
+      ++chunks;
+    
+    for(lv1=1;lv1<=chunks;++lv1) {
+      pos = ((lv1-1)*line_length)+1;
+      ptr = (len<pos+line_length-1) ? len : pos+line_length-1;
+      
+      fprintf(clusout,"\n");
+      
+      if (output_struct_penalties == 0 || output_struct_penalties == 2) {
+	if (struct_penalties1 == SECST && use_ss1 == TRUE) {
+	  for(i=pos;i<=ptr;++i) {
+	    val=ss_mask1[i+fres-2];
+	    if (val == gap_pos1 || val == gap_pos2)
+	      temp[i-pos]='-';
+	    else
+	      temp[i-pos]=val;
+	  }
+	  temp[ptr-pos+1]=EOS;
+	  if(seqRange) /*Ramu*/
+	    fprintf(clusout,"!SS_%-*s  %s\n",max_names+15,ss_name1,temp);
+	  else
+	    fprintf(clusout,"!SS_%-*s  %s\n",max_names,ss_name1,temp);
+	}
+      }
+      if (output_struct_penalties == 1 || output_struct_penalties == 2) {
+	if (struct_penalties1 != NONE && use_ss1 == TRUE) {
+	  for(i=pos;i<=ptr;++i) {
+	    val=gap_penalty_mask1[i+fres-2];
+	    if (val == gap_pos1 || val == gap_pos2)
+	      temp[i-pos]='-';
+	    else
+	      temp[i-pos]=val;
+	  }
+	  temp[ptr-pos+1]=EOS;
+	  fprintf(clusout,"!GM_%-*s  %s\n",max_names,ss_name1,temp);
+	}
+      }
+      if (output_struct_penalties == 0 || output_struct_penalties == 2) {
+	if (struct_penalties2 == SECST && use_ss2 == TRUE) {
+	  for(i=pos;i<=ptr;++i) {
+	    val=ss_mask2[i+fres-2];
+	    if (val == gap_pos1 || val == gap_pos2)
+	      temp[i-pos]='-';
+	    else
+	      temp[i-pos]=val;
+	  }
+	  temp[ptr-pos+1]=EOS;
+	  if (seqRange )
+	    fprintf(clusout,"!SS_%-*s  %s\n",max_names+15,ss_name2,temp);
+	  else
+	    fprintf(clusout,"!SS_%-*s  %s\n",max_names,ss_name2,temp);
+	}
+      }
+      if (output_struct_penalties == 1 || output_struct_penalties == 2) {
+	if (struct_penalties2 != NONE && use_ss2 == TRUE) {
+	  for(i=pos;i<=ptr;++i) {
+	    val=gap_penalty_mask2[i+fres-2];
+	    if (val == gap_pos1 || val == gap_pos2)
+	      temp[i-pos]='-';
+	    else
+	      temp[i-pos]=val;
+	  }
+	  temp[ptr-pos+1]=EOS;
+	  fprintf(clusout,"!GM_%-*s  %s\n",max_names,ss_name2,temp);
+	}
+      }
+      
+      for(ii=fseq;ii<=lseq;++ii) {
+	i=output_index[ii];
+	print_seq_no[i] = 0;
+	for(j=pos;j<=ptr;++j) {
+	  if (j+fres-1<=seqlen_array[i])
+	    val = seq_array[i][j+fres-1];
+	  else val = -3;
+	  if((val == -3) || (val == 253)) break;
+	  else if((val < 0) || (val > max_aa)){
+	    seq1[j]='-';
+	  }
+	  else {
+	    seq1[j]=amino_acid_codes[val];
+	    seq_no[i]++;
+	    print_seq_no[i]=1;
+	  } 
+	}
+	for(;j<=ptr;++j) seq1[j]='-';
+	strncpy(temp,&seq1[pos],ptr-pos+1);
+	temp[ptr-pos+1]=EOS;
+	if (!seqRange) {
+	  fprintf(clusout,"%-*s",max_names+5,names[i]); 
+	}
+	else {
+	  fillrange(rnum,fres, len, ii);
+	  sprintf(tmpStr,"%s/%d-%d", nameonly(names[i]), rnum->start, rnum->end);
+	  fprintf(clusout,"%-*s",max_names+15,tmpStr);
+	}
+	fprintf(clusout," %s",temp);
+	if (cl_seq_numbers && print_seq_no[i])
+	  fprintf(clusout," %d",seq_no[i]);
+	fprintf(clusout,"\n");
+      }
+      
+      for(i=pos;i<=ptr;++i) {
+	seq1[i]=' ';
+	ident=0;
+	for(j=1;res_cat1[j-1]!=NULL;j++) catident1[j-1] = 0;
+	for(j=1;res_cat2[j-1]!=NULL;j++) catident2[j-1] = 0;
+	for(j=fseq;j<=lseq;++j) {
+	  if((seq_array[fseq][i+fres-1] >=0) && 
+	     (seq_array[fseq][i+fres-1] <= max_aa)) {
+	    if(seq_array[fseq][i+fres-1] == seq_array[j][i+fres-1])
+	      ++ident;
+	    for(k=1;res_cat1[k-1]!=NULL;k++) {
+	      for(l=0;(c=res_cat1[k-1][l]);l++) {
+		if (amino_acid_codes[seq_array[j][i+fres-1]]==c)
+		  {
+		    catident1[k-1]++;
+		    break;
+		  }
+	      }
+	    }
+	    for(k=1;res_cat2[k-1]!=NULL;k++) {
+	      for(l=0;(c=res_cat2[k-1][l]);l++) {
+		if (amino_acid_codes[seq_array[j][i+fres-1]]==c)
+		  {
+		    catident2[k-1]++;
+		    break;
+		  }
+	      }
+	    }
+	  }
+	}
+	if(ident==lseq-fseq+1)
+	  seq1[i]='*';
+	else if (!dnaflag) {
+	  for(k=1;res_cat1[k-1]!=NULL;k++) {
+	    if (catident1[k-1]==lseq-fseq+1) {
+	      seq1[i]=':';
+	      break;
+	    }
+	  }
+	  if(seq1[i]==' ')
+	    for(k=1;res_cat2[k-1]!=NULL;k++) {
+	      if (catident2[k-1]==lseq-fseq+1) {
+		seq1[i]='.';
+		break;
+	      }
+	    }
+	}
+      }
+      strncpy(temp,&seq1[pos],ptr-pos+1);
+      temp[ptr-pos+1]=EOS;
+      for(k=0;k<max_names+6;k++) fprintf(clusout," ");
+      if(seqRange) /*<ramu>*/
+	fprintf(clusout,"          "); /*</ramu>*/
+      fprintf(clusout,"%s\n",temp);
+    }
+        
+    seq1=ckfree((void *)seq1);
+    if (struct_penalties1 == SECST && use_ss1 == TRUE) ckfree(ss_mask1);
+    if (struct_penalties2 == SECST && use_ss2 == TRUE) ckfree(ss_mask2);
+    /* DES	ckfree(output_index); */
+
+    if(seqRange) 
+      if (rnum) 
+	free(rnum);
+} 
+
+
+
+
+void gcg_out(FILE *gcgout, sint fres, sint len, sint fseq, sint lseq)
+{
+  /*        static char *aacids = "XCSTPAGNDEQHRKMILVFYW";*/
+  /*	static char *nbases = "XACGT";	*/
+  char *seq, residue;
+  sint val;
+  sint *all_checks;
+  sint i,ii,chunks,block;
+  sint j,k,pos1,pos2;	
+  long grand_checksum;
+  
+  /*<ramu>*/
+  rangeNum *rnum;
+  char tmpStr[FILENAMELEN+15];
+  int tmpk;
+
+  if(seqRange) {
+    rnum = (struct rangeNum *) malloc(sizeof(struct rangeNum));
+    if ( rnum ==NULL ) {
+      printf("cannot alloc memory for rnum");
+    }
+  }
+
+  seq = (char *)ckalloc((max_aln_length+1) * sizeof(char));
+  all_checks = (sint *)ckalloc((lseq+1) * sizeof(sint));
+  
+  for(i=fseq; i<=lseq; i++) {
+    for(j=fres; j<=fres+len-1; j++) {
+      val = seq_array[i][j];
+      if((val == -3) || (val == 253)) break;
+      else if((val < 0) || (val > max_aa))
+	residue = '.';
+      else {
+	residue = amino_acid_codes[val];
+      }
+      seq[j-fres+1] = residue;
+    }
+    /* pad any short sequences with gaps, to make all sequences the same length */
+    for(; j<=fres+len-1; j++) 
+      seq[j-fres+1] = '.';
+    all_checks[i] = SeqGCGCheckSum(seq+1, (int)len);
+  }	
+  
+  grand_checksum = 0;
+  for(i=1; i<=nseqs; i++) grand_checksum += all_checks[output_index[i]];
+  grand_checksum = grand_checksum % 10000;
+  fprintf(gcgout,"PileUp\n\n");
+  fprintf(gcgout,"\n\n   MSF:%5d  Type: ",(pint)len);
+  if(dnaflag)
+    fprintf(gcgout,"N");
+  else
+    fprintf(gcgout,"P");
+  fprintf(gcgout,"    Check:%6ld   .. \n\n", (long)grand_checksum);
+  for(ii=fseq; ii<=lseq; ii++)  {
+    i = output_index[ii];
+    fprintf(gcgout,
+	    " Name: %s oo  Len:%5d  Check:%6ld  Weight:  %.1f\n",
+	    names[i],(pint)len,(long)all_checks[i],(float)seq_weight[i-1]*100.0/(float)INT_SCALE_FACTOR);
+  }
+  fprintf(gcgout,"\n//\n");  
+  
+  chunks = len/GCG_LINELENGTH;
+  if(len % GCG_LINELENGTH != 0) ++chunks;
+  
+  for(block=1; block<=chunks; block++) {
+    fprintf(gcgout,"\n\n");
+    pos1 = ((block-1) * GCG_LINELENGTH) + 1;
+    pos2 = (len<pos1+GCG_LINELENGTH-1)? len : pos1+GCG_LINELENGTH-1;
+    for(ii=fseq; ii<=lseq; ii++) {
+      i = output_index[ii];
+      if (!seqRange) {
+	fprintf(gcgout,"\n%-*s ",max_names+5,names[i]);
+      }
+      else {
+	fillrange(rnum,fres, len, ii);
+	sprintf(tmpStr,"%s/%d-%d",nameonly(names[i]),rnum->start,rnum->end);
+	fprintf(gcgout,"\n%-*s",max_names+15,tmpStr);
+      }
+      for(j=pos1, k=1; j<=pos2; j++, k++) {
+	/*
+	  JULIE -
+	  check for sint sequences - pad out with '.' characters to end of alignment
+	*/
+	if (j+fres-1<=seqlen_array[i])
+	  val = seq_array[i][j+fres-1];
+	else val = -3;
+	if((val == -3) || (val == 253))
+	  residue = '.';
+	else if((val < 0) || (val > max_aa))
+	  residue = '.';
+	else {
+	  residue = amino_acid_codes[val];
+	}
+	fprintf(gcgout,"%c",residue);
+	if(j % 10 == 0) fprintf(gcgout," ");
+      }
+    }
+  }
+  /* DES	ckfree(output_index); */
+  
+  seq=ckfree((void *)seq);
+  all_checks=ckfree((void *)all_checks);
+  fprintf(gcgout,"\n\n");
+
+
+  if(seqRange) if (rnum) free(rnum);
+}
+
+
+/* <Ramu> */
+/************************************************************************
+ *
+ *
+ *    Removes the sequence range from sequence name
+ *
+ *
+ *    INPUT: Sequence name
+ *           (e.g. finc_rat/1-200 )
+ *
+ *
+ *    RETURNS:  pointer to string
+ */
+
+char *nameonly(char *s)
+{
+    static char tmp[FILENAMELEN+1];
+    int i =0;
+
+    while (*s != '/' && *s != '\0') {
+	tmp[i++] = *s++;
+    }
+    tmp[i] = '\0';
+    return &tmp[0];
+}
+
+
+int startFind(char *s)
+{
+    int i = 0;
+    sint val;
+    printf("\n Debug.....\n %s",s);
+
+    while( *s ) {
+	val = *s;
+	if ( (val <0 ) || (val > max_aa)) {
+	    i++;
+	    *s++;
+	    printf("%c",amino_acid_codes[val]);
+	}
+    }
+    return i;
+}
+
+/*
+void fasta_out(FILE *fastaout, sint fres, sint len, sint fseq, sint lseq)
+{
+	char residue;
+	sint val;
+	sint i,ii;
+	sint j,k;	
+	
+	for(ii=fseq; ii<=lseq; ii++)  {
+	    i = output_index[ii];
+	    fprintf(fastaout,">%-s",names[i],len);
+	    j = 1;
+	    while(j<len) {
+		if ( ! (j%80) ) {
+		    fprintf(fastaout,"\n");
+			}
+		val = seq_array[i][j];
+		if((val < 0) || (val > max_aa))
+		    residue = '-';
+		else {
+		    residue = amino_acid_codes[val];
+		}
+		fprintf(fastaout,"%c",residue);
+		j++;
+	    }
+	    fprintf(fastaout,"\n");
+	}
+
+}
+*/
+
+/* </Ramu> */
+
+void nexus_out(FILE *nxsout, sint fres, sint len, sint fseq, sint lseq)
+{
+/*      static char *aacids = "XCSTPAGNDEQHRKMILVFYW";*/
+/*		static char *nbases = "XACGT";	*/
+  char residue;
+  sint val;
+  sint i,ii,chunks,block;	
+  sint j,k,pos1,pos2;	
+  
+
+  /*<ramu>*/
+  rangeNum *rnum;
+  char tmpStr[FILENAMELEN+15];
+  int tmpk;
+
+  if(seqRange) {
+    rnum = (struct rangeNum *) malloc(sizeof(struct rangeNum));
+    if ( rnum ==NULL ) {
+      printf("cannot alloc memory for rnum");
+    }
+  }
+
+
+  chunks = len/GCG_LINELENGTH;
+  if(len % GCG_LINELENGTH != 0) ++chunks;
+  
+  fprintf(nxsout,"#NEXUS\n");
+  fprintf(nxsout,"BEGIN DATA;\n");
+  fprintf(nxsout,"dimensions ntax=%d nchar=%d;\n",(pint)nseqs,(pint)len);
+  fprintf(nxsout,"format missing=?\n");
+  fprintf(nxsout,"symbols=\"");
+  for(i=0;i<=max_aa;i++)
+    fprintf(nxsout,"%c",amino_acid_codes[i]);
+  fprintf(nxsout,"\"\n");
+  fprintf(nxsout,"interleave datatype=");
+  fprintf(nxsout, dnaflag ? "DNA " : "PROTEIN ");
+  fprintf(nxsout,"gap= -;\n");
+  fprintf(nxsout,"\nmatrix");
+  
+  for(block=1; block<=chunks; block++) {
+    pos1 = ((block-1) * GCG_LINELENGTH)+1;
+    pos2 = (len<pos1+GCG_LINELENGTH-1)? len : pos1+GCG_LINELENGTH-1;
+    for(ii=fseq; ii<=lseq; ii++)  {
+      i = output_index[ii];
+      if (!seqRange) {
+	fprintf(nxsout,"\n%-*s ",max_names+1,names[i]);
+      }
+      else {
+	fillrange(rnum,fres, len, ii);
+	sprintf(tmpStr,"%s/%d-%d",nameonly(names[i]),rnum->start,rnum->end);
+	fprintf(nxsout,"\n%-*s",max_names+15,tmpStr);
+      }
+      for(j=pos1, k=1; j<=pos2; j++, k++) {
+	if (j+fres-1<=seqlen_array[i])
+	  val = seq_array[i][j+fres-1];
+	else val = -3;
+	if((val == -3) || (val == 253))
+	  break;
+	else if((val < 0) || (val > max_aa))
+	  residue = '-';
+	else {
+	  residue = amino_acid_codes[val];
+	}
+	fprintf(nxsout,"%c",residue);
+      }
+    }
+    fprintf(nxsout,"\n");
+  }
+  fprintf(nxsout,";\nend;\n");
+  /* DES	ckfree(output_index); */
+
+  if(seqRange) if (rnum) free(rnum);
+
+}
+
+
+
+
+void phylip_out(FILE *phyout, sint fres, sint len, sint fseq, sint lseq)
+{
+/*      static char *aacids = "XCSTPAGNDEQHRKMILVFYW";*/
+/*		static char *nbases = "XACGT";	*/
+  char residue;
+  sint val;
+  sint i,ii,chunks,block;	
+  sint j,k,pos1,pos2;	
+  sint name_len;
+  Boolean warn;
+  char **snames;
+  
+  /*<ramu>*/
+  rangeNum *rnum;
+  char tmpStr[FILENAMELEN+15];
+  int tmpk;
+
+
+  if(seqRange) {
+    rnum = (struct rangeNum *) malloc(sizeof(struct rangeNum));
+    if ( rnum ==NULL ) {
+      printf("cannot alloc memory for rnum");      
+    }
+  }
+
+  snames=(char **)ckalloc((lseq-fseq+2)*sizeof(char *));
+  name_len=0;
+  for(i=fseq; i<=lseq; i++)  {
+    snames[i]=(char *)ckalloc((11)*sizeof(char));
+    ii=strlen(names[i]);
+    strncpy(snames[i],names[i],10);
+    if(name_len<ii) name_len=ii;
+  }
+  if(name_len>10) {
+    warn=FALSE;
+    for(i=fseq; i<=lseq; i++)  {
+      for(j=i+1;j<=lseq;j++) {
+	if (strcmp(snames[i],snames[j]) == 0) 
+	  warn=TRUE;
+      }
+    }
+    if(warn)
+      warning("Truncating sequence names to 10 characters for PHYLIP output.\n"
+	      "Names in the PHYLIP format file are NOT unambiguous.");
+    else
+      warning("Truncating sequence names to 10 characters for PHYLIP output.");
+  }
+  
+  
+  chunks = len/GCG_LINELENGTH;
+  if(len % GCG_LINELENGTH != 0) ++chunks;
+  
+  fprintf(phyout,"%6d %6d",(pint)nseqs,(pint)len);
+  
+  for(block=1; block<=chunks; block++) {
+    pos1 = ((block-1) * GCG_LINELENGTH)+1;
+    pos2 = (len<pos1+GCG_LINELENGTH-1)? len : pos1+GCG_LINELENGTH-1;
+    for(ii=fseq; ii<=lseq; ii++)  {
+      i = output_index[ii];
+      if(block == 1)  {
+	if(!seqRange) {
+	  fprintf(phyout,"\n%-10s ",snames[i]);
+	}
+	else
+	  {
+	    fillrange(rnum,fres, len, ii);
+	    sprintf(tmpStr,"%s/%d-%d",nameonly(names[i]),rnum->start,rnum->end);
+	    fprintf(phyout,"\n%-*s",max_names+15,tmpStr);
+	  }
+      }
+      else
+	fprintf(phyout,"\n           ");
+      for(j=pos1, k=1; j<=pos2; j++, k++) {
+	if (j+fres-1<=seqlen_array[i])
+	  val = seq_array[i][j+fres-1];
+	else val = -3;
+	if((val == -3) || (val == 253))
+	  break;
+	else if((val < 0) || (val > max_aa))
+	  residue = '-';
+	else {
+	  residue = amino_acid_codes[val];
+	}
+	fprintf(phyout,"%c",residue);
+	if(j % 10 == 0) fprintf(phyout," ");
+      }
+    }
+    fprintf(phyout,"\n");
+  }
+  /* DES	ckfree(output_index); */
+  
+  for(i=fseq;i<=lseq;i++)
+    ckfree(snames[i]);
+  ckfree(snames);
+  
+  if(seqRange) if (rnum) free(rnum);
+
+}
+
+
+
+
+
+void nbrf_out(FILE *nbout, sint fres, sint len, sint fseq, sint lseq)
+{
+/*      static char *aacids = "XCSTPAGNDEQHRKMILVFYW";*/
+/*		static char *nbases = "XACGT";	*/
+	char *seq, residue;
+	sint val;
+	sint i,ii;
+	sint j,slen;	
+	sint line_length;
+
+
+  /*<ramu>*/
+  rangeNum *rnum;
+  char tmpStr[FILENAMELEN+15];
+  int tmpk;
+
+  if(seqRange) {
+    rnum = (struct rangeNum *) malloc(sizeof(struct rangeNum));
+    if ( rnum ==NULL ) {
+      printf("cannot alloc memory for rnum");
+    }
+  }
+
+  seq = (char *)ckalloc((max_aln_length+1) * sizeof(char));
+  
+  /* decide the line length for this alignment - maximum is LINELENGTH */
+  line_length=PAGEWIDTH-max_names;
+  line_length=line_length-line_length % 10; /* round to a multiple of 10*/
+  if (line_length > LINELENGTH) line_length=LINELENGTH;
+  
+  for(ii=fseq; ii<=lseq; ii++) {
+    i = output_index[ii];
+    fprintf(nbout, dnaflag ? ">DL;" : ">P1;");
+    if (!seqRange) {
+      fprintf(nbout, "%s\n%s\n", names[i], titles[i]);
+    }
+    else {
+      fillrange(rnum,fres, len, ii);
+      sprintf(tmpStr,"%s/%d-%d",nameonly(names[i]),rnum->start,rnum->end);
+      fprintf(nbout,"%s\n%s\n",tmpStr,titles[i]);
+    }
+    slen = 0;
+    for(j=fres; j<fres+len; j++) {
+      val = seq_array[i][j];
+      if((val == -3) || (val == 253))
+	break;
+      else if((val < 0) || (val > max_aa))
+	residue = '-';
+      else {
+	residue = amino_acid_codes[val];
+      }
+      seq[j-fres] = residue;
+      slen++;
+    }
+    for(j=1; j<=slen; j++) {
+      fprintf(nbout,"%c",seq[j-1]);
+      if((j % line_length == 0) || (j == slen)) 
+	fprintf(nbout,"\n");
+    }
+    fprintf(nbout,"*\n");
+  }	
+  /* DES	ckfree(output_index);  */
+  
+  seq=ckfree((void *)seq);
+
+  if(seqRange) if (rnum) free(rnum);
+
+}
+
+
+void gde_out(FILE *gdeout, sint fres, sint len, sint fseq, sint lseq)
+{
+/*      static char *aacids = "XCSTPAGNDEQHRKMILVFYW";*/
+/*		static char *nbases = "XACGT";	*/
+	char *seq, residue;
+	sint val;
+	char *ss_mask1, *ss_mask2;
+	sint i,ii;
+	sint j,slen;	
+	sint line_length;
+
+
+  /*<ramu>*/
+  rangeNum *rnum;
+  char tmpStr[FILENAMELEN+15];
+  int tmpk;
+
+  if(seqRange) {
+    rnum = (struct rangeNum *) malloc(sizeof(struct rangeNum));
+    if ( rnum ==NULL ) {
+      printf("cannot alloc memory for rnum");
+    }
+  }
+
+  seq = (char *)ckalloc((max_aln_length+1) * sizeof(char));
+  
+  /* decide the line length for this alignment - maximum is LINELENGTH */
+  line_length=PAGEWIDTH-max_names;
+  line_length=line_length-line_length % 10; /* round to a multiple of 10*/
+  if (line_length > LINELENGTH) line_length=LINELENGTH;
+  
+  if (struct_penalties1 == SECST && use_ss1 == TRUE) {
+    ss_mask1 = (char *)ckalloc((seqlen_array[1]+10) * sizeof(char));
+    for (i=0;i<seqlen_array[1];i++)
+      ss_mask1[i] = sec_struct_mask1[i];
+    print_sec_struct_mask(seqlen_array[1],sec_struct_mask1,ss_mask1);
+  }
+  if (struct_penalties2 == SECST && use_ss2 == TRUE) {
+    ss_mask2 = (char *)ckalloc((seqlen_array[profile1_nseqs+1]+10) *
+			       sizeof(char));
+    for (i=0;i<seqlen_array[profile1_nseqs+1];i++)
+      ss_mask2[i] = sec_struct_mask2[i];
+    print_sec_struct_mask(seqlen_array[profile1_nseqs+1],sec_struct_mask2,ss_mask2);  
+  }
+
+	
+  for(ii=fseq; ii<=lseq; ii++) {
+    i = output_index[ii];
+    fprintf(gdeout, dnaflag ? "#" : "%%");
+    if(!seqRange) {
+      fprintf(gdeout, "%s\n", names[i]);
+    }
+    else {
+      fillrange(rnum,fres, len, ii);
+      fprintf(gdeout,"%s/%d-%d\n",nameonly(names[i]),rnum->start,rnum->end);
+    }
+    slen = 0;
+    for(j=fres; j<fres+len; j++) {
+      val = seq_array[i][j];
+      if((val == -3) || (val == 253))
+	break;
+      else if((val < 0) || (val > max_aa))
+	residue = '-';
+      else {
+	residue = amino_acid_codes[val];
+      }
+      if (lowercase)
+	seq[j-fres] = (char)tolower((int)residue);
+      else
+	seq[j-fres] = residue;
+      slen++;
+    }
+    for(j=1; j<=slen; j++) {
+      fprintf(gdeout,"%c",seq[j-1]);
+      if((j % line_length == 0) || (j == slen)) 
+	fprintf(gdeout,"\n");
+    }
+  }
+  /* DES	ckfree(output_index); */
+  
+  if (output_struct_penalties == 0 || output_struct_penalties == 2) {
+    if (struct_penalties1 == SECST && use_ss1 == TRUE) {
+      fprintf(gdeout,"\"SS_%-*s\n",max_names,ss_name1);
+      for(i=fres; i<fres+len; i++) {
+	val=ss_mask1[i-1];
+	if (val == gap_pos1 || val == gap_pos2)
+	  seq[i-fres]='-';
+	else
+	  seq[i-fres]=val;
+      }
+      seq[i-fres]=EOS;
+      for(i=1; i<=len; i++) {
+	fprintf(gdeout,"%c",seq[i-1]);
+	if((i % line_length == 0) || (i == len)) 
+	  fprintf(gdeout,"\n");
+      }
+    }
+    
+    if (struct_penalties2 == SECST && use_ss2 == TRUE) {
+      fprintf(gdeout,"\"SS_%-*s\n",max_names,ss_name2);
+      for(i=fres; i<fres+len; i++) {
+	val=ss_mask2[i-1];
+	if (val == gap_pos1 || val == gap_pos2)
+	  seq[i-fres]='-';
+	else
+	  seq[i-fres]=val;
+      }
+      seq[i]=EOS;
+      for(i=1; i<=len; i++) {
+	fprintf(gdeout,"%c",seq[i-1]);
+	if((i % line_length == 0) || (i == len)) 
+	  fprintf(gdeout,"\n");
+      }
+    }
+  }
+  if (output_struct_penalties == 1 || output_struct_penalties == 2) {
+    if (struct_penalties1 != NONE && use_ss1 == TRUE) {
+      fprintf(gdeout,"\"GM_%-*s\n",max_names,ss_name1);
+      for(i=fres; i<fres+len; i++) {
+	val=gap_penalty_mask1[i-1];
+	if (val == gap_pos1 || val == gap_pos2)
+	  seq[i-fres]='-';
+	else
+	  seq[i-fres]=val;
+      }
+      seq[i]=EOS;
+      for(i=1; i<=len; i++) {
+	fprintf(gdeout,"%c",seq[i-1]);
+	if((i % line_length == 0) || (i == len)) 
+	  fprintf(gdeout,"\n");
+      }
+    }
+    if (struct_penalties2 != NONE && use_ss2 == TRUE) {
+      fprintf(gdeout,"\"GM_%-*s\n",max_names,ss_name2);
+      for(i=fres; i<fres+len; i++) {
+	val=gap_penalty_mask2[i-1];
+	if (val == gap_pos1 || val == gap_pos2)
+	  seq[i-fres]='-';
+	else
+	  seq[i-fres]=val;
+      }
+      seq[i]=EOS;
+      for(i=1; i<=len; i++) {
+	fprintf(gdeout,"%c",seq[i-1]);
+	if((i % line_length == 0) || (i == len)) 
+	  fprintf(gdeout,"\n");
+      }
+    }
+  }
+  
+  if (struct_penalties1 == SECST && use_ss1 == TRUE) ckfree(ss_mask1);
+  if (struct_penalties2 == SECST && use_ss2 == TRUE) ckfree(ss_mask2);
+  seq=ckfree((void *)seq);
+  
+
+  if(seqRange) if (rnum) free(rnum);
+
+}
+
+
+Boolean open_alignment_output(char *path)
+{
+
+  if(!output_clustal && !output_nbrf && !output_gcg &&
+     !output_phylip && !output_gde && !output_nexus && !output_fasta) {
+    error("You must select an alignment output format");
+    return FALSE;
+  }
+  
+  if(output_clustal) 
+    if (outfile_name[0]!=EOS) {
+      strcpy(clustal_outname,outfile_name);
+      if((clustal_outfile = open_explicit_file(
+					       clustal_outname))==NULL) return FALSE;
+    }
+    else {
+      /* DES DEBUG 
+	 fprintf(stdout,"\n\n path = %s\n clustal_outname = %s\n\n",
+	 path,clustal_outname);
+      */
+      if((clustal_outfile = open_output_file(
+					     "\nEnter a name for the CLUSTAL output file ",path,
+					     clustal_outname,"aln"))==NULL) return FALSE;
+      /* DES DEBUG 
+	 fprintf(stdout,"\n\n path = %s\n clustal_outname = %s\n\n",
+	 path,clustal_outname);
+      */
+    }
+  if(output_nbrf) 
+    if (outfile_name[0]!=EOS) {
+      strcpy(nbrf_outname,outfile_name);
+      if( (nbrf_outfile = open_explicit_file(nbrf_outname))==NULL) 
+	return FALSE;
+    }
+    else
+      if((nbrf_outfile = open_output_file(
+					  "\nEnter a name for the NBRF/PIR output file",path,
+					  nbrf_outname,"pir"))==NULL) return FALSE;
+  if(output_gcg) 
+    if (outfile_name[0]!=EOS) {
+      strcpy(gcg_outname,outfile_name);
+      if((gcg_outfile = open_explicit_file( gcg_outname))==NULL) 
+	return FALSE;
+    }
+    else
+      if((gcg_outfile = open_output_file(
+					 "\nEnter a name for the GCG output file     ",path,
+					 gcg_outname,"msf"))==NULL) return FALSE;
+  if(output_phylip) 
+    if (outfile_name[0]!=EOS) {
+      strcpy(phylip_outname,outfile_name);
+      if((phylip_outfile = open_explicit_file(
+					      phylip_outname))==NULL) return FALSE;
+    }
+    else
+      if((phylip_outfile = open_output_file(
+					    "\nEnter a name for the PHYLIP output file  ",path,
+					    phylip_outname,"phy"))==NULL) return FALSE;
+  if(output_gde) 
+    if (outfile_name[0]!=EOS) {
+      strcpy(gde_outname,outfile_name);
+      if((gde_outfile = open_explicit_file(
+					   gde_outname))==NULL) return FALSE;
+    }
+    else
+      if((gde_outfile = open_output_file(
+					 "\nEnter a name for the GDE output file     ",path,
+					 gde_outname,"gde"))==NULL) return FALSE;
+  if(output_nexus) 
+    if (outfile_name[0]!=EOS) {
+      strcpy(nexus_outname,outfile_name);
+      if((nexus_outfile = open_explicit_file(
+					     nexus_outname))==NULL) return FALSE;
+    }
+    else
+      if((nexus_outfile = open_output_file(
+					   "\nEnter a name for the NEXUS output file   ",path,
+					   nexus_outname,"nxs"))==NULL) return FALSE;
+  
+  /* Ramu */
+  if(output_fasta) 
+    if (outfile_name[0]!=EOS) {
+      strcpy(fasta_outname,outfile_name);
+      if((fasta_outfile = open_explicit_file(
+					     fasta_outname))==NULL) return FALSE;
+    }
+    else
+      if((fasta_outfile = open_output_file(
+					   "\nEnter a name for the Fasta output file   ",path,
+					   fasta_outname,"fasta"))==NULL) return FALSE;
+  
+  return TRUE;
+}
+
+
+
+
+void create_alignment_output(sint fseq, sint lseq)
+{
+  sint i,length;
+  
+  sint ifres; /* starting sequence range - Ramu          */
+  sint ilres; /* ending sequence range */
+  char ignore; 
+  Boolean rangeOK;
+
+  length=0;
+
+  ifres = 1;
+  ilres = 0;
+  rangeOK = FALSE;
+  for (i=fseq;i<=lseq;i++)
+    if (length < seqlen_array[i])
+      length = seqlen_array[i];
+  ilres=length;
+
+
+  if (setrange != -1 ) {
+    /* printf("\n ==================== seqRange is set \n"); */
+    if ( sscanf(param_arg[setrange],"%d%[ :,-]%d",&ifres,&ignore,&ilres) !=3) {
+      info("seqrange numers are not set properly, using default....");
+      ifres = 1;
+      ilres = length;
+    }
+    else
+      rangeOK = TRUE;
+  }
+  if ( rangeOK && ilres > length ) {
+    ilres = length; /* if asked for more, set the limit, Ramui */
+    info("Seqrange %d is more than the %d  setting it to %d ",ilres,length,length);
+  }
+
+  /* if (usemenu) info("Consensus length = %d",(pint)length);*/
+
+  if (usemenu) info("Consensus length = %d",(pint)ilres);  /* Ramu */
+
+  /*
+  printf("\n creating output ....... normal.... setrange = %d \n",setrange);
+  printf(" ---------> %d   %d \n\n ",ifres,ilres);
+  printf(" ---------> %d  \n\n ",length);
+  */
+  
+  if(output_clustal) {
+    clustal_out(clustal_outfile, ifres, ilres,  fseq, lseq);
+		fclose(clustal_outfile);
+		info("CLUSTAL-Alignment file created  [%s]",clustal_outname);
+  }
+  if(output_nbrf)  {
+    nbrf_out(nbrf_outfile, ifres, ilres, /*1, length */ fseq, lseq);
+    fclose(nbrf_outfile);
+    info("NBRF/PIR-Alignment file created [%s]",nbrf_outname);
+  }
+  if(output_gcg)  {
+    gcg_out(gcg_outfile, ifres, ilres, /*1, length */ fseq, lseq);
+    fclose(gcg_outfile);
+    info("GCG-Alignment file created      [%s]",gcg_outname);
+  }
+  if(output_phylip)  {
+    phylip_out(phylip_outfile, ifres, ilres, /*1, length */ fseq, lseq);
+    fclose(phylip_outfile);
+    info("PHYLIP-Alignment file created   [%s]",phylip_outname);
+  }
+  if(output_gde)  {
+    gde_out(gde_outfile, ifres, ilres /*1, length */, fseq, lseq);
+    fclose(gde_outfile);
+    info("GDE-Alignment file created      [%s]",gde_outname);
+  }
+  if(output_nexus)  {
+    nexus_out(nexus_outfile, ifres, ilres /*1, length */, fseq, lseq);
+    fclose(nexus_outfile);
+    info("NEXUS-Alignment file created    [%s]",nexus_outname);
+  }
+  /*  Ramu */
+  if(output_fasta)  {
+    fasta_out(fasta_outfile, ifres, ilres /*1, length */, fseq, lseq);
+    fclose(fasta_outfile);
+    info("Fasta-Alignment file created    [%s]",fasta_outname);
+  }
+}
+
+
+static void reset_align(void)   /* remove gaps from older alignments (code =
+				   gap_pos1) */
+{		      				/* EXCEPT for gaps that were INPUT with the seqs.*/
+  register sint sl;   		     /* which have  code = gap_pos2  */
+  sint i,j;
+  
+  for(i=1;i<=nseqs;++i) {
+    sl=0;
+    for(j=1;j<=seqlen_array[i];++j) {
+      if(seq_array[i][j] == gap_pos1 && 
+	 ( reset_alignments_new ||
+	   reset_alignments_all)) continue;
+      if(seq_array[i][j] == gap_pos2 && (reset_alignments_all)) continue;
+      ++sl;
+      seq_array[i][sl]=seq_array[i][j];
+    }
+    seqlen_array[i]=sl;
+  }
+}
+
+
+
+static void reset_prf1(void)   /* remove gaps from older alignments (code =
+				  gap_pos1) */
+{		      				/* EXCEPT for gaps that were INPUT with the seqs.*/
+  register sint sl;   		     /* which have  code = gap_pos2  */
+  sint i,j;
+  
+  if (struct_penalties1 != NONE) {
+    sl=0;
+    for (j=0;j<seqlen_array[1];++j) {
+      if (gap_penalty_mask1[j] == gap_pos1 && (reset_alignments_new ||
+					       reset_alignments_all)) continue;
+      if (gap_penalty_mask1[j] == gap_pos2 && (reset_alignments_all)) continue;
+      gap_penalty_mask1[sl]=gap_penalty_mask1[j];
+      ++sl;
+    }
+  }
+  
+  if (struct_penalties1 == SECST) {
+    sl=0;
+    for (j=0;j<seqlen_array[1];++j) {
+      if (sec_struct_mask1[j] == gap_pos1 && (reset_alignments_new ||
+					      reset_alignments_all)) continue;
+      if (sec_struct_mask1[j] == gap_pos2 && (reset_alignments_all)) continue;
+      sec_struct_mask1[sl]=sec_struct_mask1[j];
+      ++sl;
+    }
+  }
+  
+  for(i=1;i<=profile1_nseqs;++i) {
+    sl=0;
+    for(j=1;j<=seqlen_array[i];++j) {
+      if(seq_array[i][j] == gap_pos1 && (reset_alignments_new ||
+					 reset_alignments_all)) continue;
+      if(seq_array[i][j] == gap_pos2 && (reset_alignments_all)) continue;
+      ++sl;
+      seq_array[i][sl]=seq_array[i][j];
+    }
+    seqlen_array[i]=sl;
+  }
+  
+  
+}
+
+
+
+static void reset_prf2(void)   /* remove gaps from older alignments (code =
+				  gap_pos1) */
+{		      				/* EXCEPT for gaps that were INPUT with the seqs.*/
+  register sint sl;   		     /* which have  code = gap_pos2  */
+  sint i,j;
+  
+  if (struct_penalties2 != NONE) {
+    sl=0;
+    for (j=0;j<seqlen_array[profile1_nseqs+1];++j) {
+      if (gap_penalty_mask2[j] == gap_pos1 && (reset_alignments_new ||
+					       reset_alignments_all)) continue;
+      if (gap_penalty_mask2[j] == gap_pos2 && (reset_alignments_all)) continue;
+      gap_penalty_mask2[sl]=gap_penalty_mask2[j];
+      ++sl;
+    }
+  }
+  
+  if (struct_penalties2 == SECST) {
+    sl=0;
+    for (j=0;j<seqlen_array[profile1_nseqs+1];++j) {
+      if (sec_struct_mask2[j] == gap_pos1 && (reset_alignments_new ||
+					      reset_alignments_all)) continue;
+      if (sec_struct_mask2[j] == gap_pos2 && (reset_alignments_all)) continue;
+			sec_struct_mask2[sl]=sec_struct_mask2[j];
+			++sl;
+    }
+  }
+  
+  for(i=profile1_nseqs+1;i<=nseqs;++i) {
+    sl=0;
+    for(j=1;j<=seqlen_array[i];++j) {
+      if(seq_array[i][j] == gap_pos1 && (reset_alignments_new ||
+					 reset_alignments_all)) continue;
+      if(seq_array[i][j] == gap_pos2 && (reset_alignments_all)) continue;
+      ++sl;
+      seq_array[i][sl]=seq_array[i][j];
+    }
+    seqlen_array[i]=sl;
+  }
+  
+  
+}
+
+
+
+void fix_gaps(void)   /* fix gaps introduced in older alignments (code = gap_pos1) */
+{		      				
+  sint i,j;
+  
+  if (struct_penalties1 != NONE) {
+    for (j=0;j<seqlen_array[1];++j) {
+      if (gap_penalty_mask1[j] == gap_pos1)
+	gap_penalty_mask1[j]=gap_pos2;
+    }
+  }
+  
+  if (struct_penalties1 == SECST) {
+    for (j=0;j<seqlen_array[1];++j) {
+      if (sec_struct_mask1[j] == gap_pos1)
+	sec_struct_mask1[j]=gap_pos2;
+    }
+  }
+  
+  for(i=1;i<=nseqs;++i) {
+    for(j=1;j<=seqlen_array[i];++j) {
+      if(seq_array[i][j] == gap_pos1)
+	seq_array[i][j]=gap_pos2;
+    }
+  }
+}
+
+static sint find_match(char *probe, char *list[], sint n)
+{
+  sint i,j,len;
+  sint count,match=0;
+  
+  len = (sint)strlen(probe);
+  for (i=0;i<len;i++) {
+    count = 0;
+    for (j=0;j<n;j++) {
+      if (probe[i] == list[j][i]) {
+	match = j;
+	count++;
+      }
+    }
+    if (count == 0) return((sint)-1);
+    if (count == 1) return(match);
+  }
+  return((sint)-1);
+}
+
+static void create_parameter_output(void)
+{
+  char parname[FILENAMELEN+1], temp[FILENAMELEN+1];
+  char path[FILENAMELEN+1];
+  FILE *parout;
+  
+  get_path(seqname,path);
+  strcpy(parname,path);
+  strcat(parname,"par");
+  
+  if(usemenu) {
+    fprintf(stdout,"\nEnter a name for the parameter output file [%s]: ",
+	    parname);
+    gets(temp);
+    if(*temp != EOS)
+      strcpy(parname,temp);
+  }
+
+/* create a file with execute permissions first */
+  remove(parname);
+  /*
+    fd = creat(parname, 0777);
+    close(fd);
+  */
+  
+  if((parout = open_explicit_file(parname))==NULL) return;
+  
+  fprintf(parout,"clustalw \\\n");
+  if (!empty && profile1_empty) fprintf(parout,"-infile=%s \\\n",seqname);
+  if (!profile1_empty) fprintf(parout,"-profile1=%s\\\n",profile1_name);
+  if (!profile2_empty) fprintf(parout,"-profile2=%s \\\n",profile2_name);
+  if (dnaflag == TRUE) 
+    fprintf(parout,"-type=dna \\\n");
+  else
+    fprintf(parout,"-type=protein \\\n");
+  
+  if (quick_pairalign) {
+    fprintf(parout,"-quicktree \\\n");
+    fprintf(parout,"-ktuple=%d \\\n",(pint)ktup);
+    fprintf(parout,"-window=%d \\\n",(pint)window);
+    fprintf(parout,"-pairgap=%d \\\n",(pint)wind_gap);
+    fprintf(parout,"-topdiags=%d \\\n",(pint)signif);    
+    if (percent) fprintf(parout,"-score=percent \\\n");      
+    else
+      fprintf(parout,"-score=absolute \\\n");      
+  }
+  else {
+    if (!dnaflag) {
+      fprintf(parout,"-pwmatrix=%s \\\n",pw_mtrxname);
+      fprintf(parout,"-pwgapopen=%.2f \\\n",prot_pw_go_penalty);
+      fprintf(parout,"-pwgapext=%.2f \\\n",prot_pw_ge_penalty);
+    }
+    else {
+      fprintf(parout,"-pwgapopen=%.2f \\\n",pw_go_penalty);
+      fprintf(parout,"-pwgapext=%.2f \\\n",pw_ge_penalty);
+    }
+  }
+  
+  if (!dnaflag) {
+    fprintf(parout,"-matrix=%s \\\n",mtrxname);
+    fprintf(parout,"-gapopen=%.2f \\\n",prot_gap_open);
+    fprintf(parout,"-gapext=%.2f \\\n",prot_gap_extend);
+  }
+  else {
+    fprintf(parout,"-gapopen=%.2f \\\n",dna_gap_open);
+    fprintf(parout,"-gapext=%.2f \\\n",dna_gap_extend);
+  }
+  
+  fprintf(parout,"-maxdiv=%d \\\n",(pint)divergence_cutoff);
+  if (!use_endgaps) fprintf(parout,"-endgaps \\\n");    
+  
+  if (!dnaflag) {
+    if (neg_matrix) fprintf(parout,"-negative \\\n");   
+    if (no_pref_penalties) fprintf(parout,"-nopgap \\\n");     
+    if (no_hyd_penalties) fprintf(parout,"-nohgap \\\n");     
+    if (no_var_penalties) fprintf(parout,"-novgap \\\n");     
+    fprintf(parout,"-hgapresidues=%s \\\n",hyd_residues);
+    fprintf(parout,"-gapdist=%d \\\n",(pint)gap_dist);     
+  }
+  else {
+    fprintf(parout,"-transweight=%.2f \\\n",transition_weight);
+  }
+  
+  if (output_gcg) fprintf(parout,"-output=gcg \\\n");
+  else if (output_gde) fprintf(parout,"-output=gde \\\n");
+  else if (output_nbrf) fprintf(parout,"-output=pir \\\n");
+  else if (output_phylip) fprintf(parout,"-output=phylip \\\n");
+  else if (output_nexus) fprintf(parout,"-output=nexus \\\n");
+  if (outfile_name[0]!=EOS) fprintf(parout,"-outfile=%s \\\n",outfile_name);
+  if (output_order==ALIGNED) fprintf(parout,"-outorder=aligned \\\n");  
+  else                      fprintf(parout,"-outorder=input \\\n");  
+  if (output_gde)
+    if (lowercase) fprintf(parout,"-case=lower \\\n");
+    else           fprintf(parout,"-case=upper \\\n");
+  
+  
+  fprintf(parout,"-interactive\n");
+  
+  /*
+    if (kimura) fprintf(parout,"-kimura \\\n");     
+    if (tossgaps) fprintf(parout,"-tossgaps \\\n");   
+    fprintf(parout,"-seed=%d \\\n",(pint)boot_ran_seed);
+    fprintf(parout,"-bootstrap=%d \\\n",(pint)boot_ntrials);
+  */
+  fclose(parout);
+}
+
+
+#define isgap(val1) ( (val1 < 0) || (val1 > max_aa) )
+#define isend(val1) ((val1 == -3)||(val1 == 253) )
+
+void calc_percidentity(FILE *pfile)
+{
+  double **pmat;
+  char residue;
+  
+  float ident;
+  int nmatch;
+  
+  sint val1, val2;
+  
+  sint i,j,k, length_longest;
+  sint length_shortest;
+  
+  int rs=0, rl=0;
+  /* findout sequence length, longest and shortest ; */
+  length_longest=0;
+  length_shortest=0;
+
+  for (i=1;i<=nseqs;i++) {
+    /*printf("\n %d :  %d ",i,seqlen_array[i]);*/
+    if (length_longest < seqlen_array[i]){
+      length_longest = seqlen_array[i];
+      rs = i;
+    }
+    if (length_shortest > seqlen_array[i]) {
+      length_shortest = seqlen_array[i];
+      rl = i;
+    }
+  }
+  /*
+  printf("\n shortest length  %s %d ",names[rs], length_shortest);
+  printf("\n longest est length  %s %d",names[rl], length_longest);
+  */  
+
+  pmat = (double **)ckalloc((nseqs+1) * sizeof(double *));
+  for (i=0;i<=nseqs;i++)
+    pmat[i] = (double *)ckalloc((nseqs+1) * sizeof(double));
+  for (i = 0; i <= nseqs; i++)
+    for (j = 0; j <= nseqs; j++)
+      pmat[i][j] = 0.0;
+
+  nmatch = 0;
+
+  for (i=1; i <= nseqs; i++) {
+    /*printf("\n %5d:  comparing %s with  ",i,names[i]); */
+    for (j=i; j<=nseqs ;  j++) {
+      printf("\n           %s ",names[j]);
+      ident = 0;
+      nmatch = 0;
+      for(k=1;  k<=length_longest; k++) {
+	val1 = seq_array[i][k];
+	val2 = seq_array[j][k];
+	if ( isend(val1) || isend(val2)) break;  /* end of sequence ????? */
+	if ( isgap(val1)  || isgap(val2) ) continue; /* residue = '-'; */
+	if (val1 == val2) {
+	  ident++ ;
+	  nmatch++;
+	  /*	residue = amino_acid_codes[val1]; 
+	printf("%c:",residue);
+	residue = amino_acid_codes[val2]; 
+	printf("%c  ",residue);*/
+	}
+	else {
+	  nmatch++ ;
+	    }
+      }
+      ident = ident/nmatch * 100.0 ;
+      pmat[i][j] = ident;
+      pmat[j][i]= ident;
+      /*      printf("  %d x %d .... match %d %d \n",i,j,ident,pmat[i][j]);  */
+    }
+
+  }
+  /*  printf("\n nmatch = %d\n ", nmatch);*/
+  fprintf(pfile,"#\n#\n#  Percent Identity  Matrix - created by Clustal%s \n#\n#\n",revision_level);
+  for(i=1;i<=nseqs;i++) {
+    fprintf(pfile,"\n %5d: %-*s",i,max_names,names[i]);
+    for(j=1;j<=nseqs;j++) {
+      fprintf(pfile,"%8.0f",pmat[i][j]);
+    }
+  }
+  fprintf(pfile,"\n");
+
+  for (i=0;i<nseqs;i++) 
+    pmat[i]=ckfree((void *)pmat[i]);
+  pmat=ckfree((void *)pmat);
+
+}

Added: trunk/packages/clustalw/branches/upstream/current/makefile
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/makefile	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/makefile	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,61 @@
+install: clustalx clustalw
+
+clean:
+	rm *.o
+
+OBJECTS = interface.o sequence.o showpair.o malign.o \
+  	util.o trees.o gcgcheck.o prfalign.o pairalign.o \
+  	calcgapcoeff.o calcprf1.o calcprf2.o calctree.o \
+        readmat.o alnscore.o random.o 
+
+XOBJECTS = xutils.o xmenu.o xcolor.o xdisplay.o xscore.o
+
+HEADERS = general.h clustalw.h
+
+CC	= cc
+CFLAGS  = -c -O
+LFLAGS	= -O -lm 
+NCBI_INC  = /dec/biolo/ncbi/include
+NCBI_LIB	= /dec/biolo/ncbi/lib
+CXFLAGS  = -DWIN_MOTIF -I$(NCBI_INC)
+LXFLAGS	= -L$(NCBI_LIB) -lvibrant -lncbi -lpthread -lXm -lXmu -lXt -lX11 -lm 
+
+clustalw : $(OBJECTS) amenu.o clustalw.o
+	$(CC) -o $@ $(OBJECTS) amenu.o clustalw.o $(LFLAGS)
+
+interface.o : interface.c $(HEADERS) param.h
+	$(CC) $(CFLAGS) $*.c
+
+amenu.o : amenu.c $(HEADERS) param.h
+	$(CC) $(CFLAGS) $*.c
+
+clustalx : $(OBJECTS) $(XOBJECTS) clustalx.o
+	$(CC) -o $@ $(OBJECTS) $(XOBJECTS) clustalx.o $(LFLAGS) $(LXFLAGS)
+
+clustalx.o : clustalx.c $(HEADERS)
+	$(CC) $(CFLAGS) $(CXFLAGS) $*.c
+
+xmenu.o : xmenu.c $(HEADERS) param.h
+	$(CC) $(CFLAGS) $(CXFLAGS) $*.c
+
+xdisplay.o : xdisplay.c $(HEADERS) param.h
+	$(CC) $(CFLAGS) $(CXFLAGS) $*.c
+
+xutils.o : xutils.c $(HEADERS) param.h
+	$(CC) $(CFLAGS) $(CXFLAGS) $*.c
+
+xcolor.o : xcolor.c $(HEADERS) param.h
+	$(CC) $(CFLAGS) $(CXFLAGS) $*.c
+
+xscore.o : xscore.c $(HEADERS) param.h
+	$(CC) $(CFLAGS) $(CXFLAGS) $*.c
+
+readmat.o : readmat.c $(HEADERS) matrices.h
+	$(CC) $(CFLAGS) $*.c
+
+trees.o : trees.c $(HEADERS) dayhoff.h
+	$(CC) $(CFLAGS) $*.c
+
+.c.o :
+	$(CC) $(CFLAGS) $?
+

Added: trunk/packages/clustalw/branches/upstream/current/makefile.alpha
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/makefile.alpha	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/makefile.alpha	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,65 @@
+install: clustalx clustalw
+
+clean:
+	rm *.o
+
+OBJECTS = interface.o sequence.o showpair.o malign.o \
+  	util.o trees.o gcgcheck.o prfalign.o pairalign.o \
+  	calcgapcoeff.o calcprf1.o calcprf2.o calctree.o \
+        readmat.o alnscore.o random.o 
+
+XOBJECTS = xutils.o xmenu.o xcolor.o xdisplay.o xscore.o
+
+HEADERS = general.h clustalw.h
+
+CC	= cc
+CFLAGS  = -c -O
+LFLAGS	= -O -lm 
+NCBI_INC  = /dec/biolo/ncbi/include
+NCBI_LIB	= /dec/biolo/ncbi/lib
+CXFLAGS  = -DWIN_MOTIF -I$(NCBI_INC)
+LXFLAGS	= -L$(NCBI_LIB) -lvibrant -lncbi -lpthread -lXm -lXmu -lXt -lX11 -lm 
+
+clustalw : $(OBJECTS) amenu.o clustalw.o
+	$(CC) -o $@ $(OBJECTS) amenu.o clustalw.o $(LFLAGS)
+
+clustalt : $(OBJECTS) amenu.o clustalw.o
+	$(CC) -o clustalt $(OBJECTS) amenu.o clustalw.o $(LFLAGS)
+
+
+interface.o : interface.c $(HEADERS) param.h
+	$(CC) $(CFLAGS) $*.c
+
+amenu.o : amenu.c $(HEADERS) param.h
+	$(CC) $(CFLAGS) $*.c
+
+clustalx : $(OBJECTS) $(XOBJECTS) clustalx.o
+	$(CC) -o $@ $(OBJECTS) $(XOBJECTS) clustalx.o $(LFLAGS) $(LXFLAGS)
+
+clustalx.o : clustalx.c $(HEADERS)
+	$(CC) $(CFLAGS) $(CXFLAGS) $*.c
+
+xmenu.o : xmenu.c $(HEADERS) param.h
+	$(CC) $(CFLAGS) $(CXFLAGS) $*.c
+
+xdisplay.o : xdisplay.c $(HEADERS) param.h
+	$(CC) $(CFLAGS) $(CXFLAGS) $*.c
+
+xutils.o : xutils.c $(HEADERS) param.h
+	$(CC) $(CFLAGS) $(CXFLAGS) $*.c
+
+xcolor.o : xcolor.c $(HEADERS) param.h
+	$(CC) $(CFLAGS) $(CXFLAGS) $*.c
+
+xscore.o : xscore.c $(HEADERS) param.h
+	$(CC) $(CFLAGS) $(CXFLAGS) $*.c
+
+readmat.o : readmat.c $(HEADERS) matrices.h
+	$(CC) $(CFLAGS) $*.c
+
+trees.o : trees.c $(HEADERS) dayhoff.h
+	$(CC) $(CFLAGS) $*.c
+
+.c.o :
+	$(CC) $(CFLAGS) $?
+

Added: trunk/packages/clustalw/branches/upstream/current/makefile.linux
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/makefile.linux	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/makefile.linux	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,58 @@
+install: clustalx clustalw
+
+clean:
+	rm *.o
+
+OBJECTS = interface.o sequence.o showpair.o malign.o util.o trees.o gcgcheck.o prfalign.o pairalign.o calcgapcoeff.o calcprf1.o calcprf2.o calctree.o readmat.o alnscore.o random.o 
+
+XOBJECTS = xutils.o xmenu.o xcolor.o xdisplay.o xscore.o
+
+HEADERS = general.h clustalw.h
+
+CC	= cc
+CFLAGS  = -c -O
+LFLAGS	= -O -lm
+CXFLAGS  = -DWIN_MOTIF -I/usr/bio/src/ncbi/include
+LXFLAGS	= -L/usr/bio/src/ncbi/lib -L/usr/ccs/lib -L/usr/X11R6/lib -lvibrant -lncbi -lXm -lXmu -lXpm -lXt -lX11 -lm
+
+
+static:	$(OBJECTS) amenu.o clustalw.o $(XOBJECTS) clustalx.o
+	$(CC) -o clustalx.static $(OBJECTS) $(XOBJECTS) clustalx.o $(LFLAGS) $(LXFLAGS) -lXext   -lX11 -lSM -static /usr/X11R6/lib/libICE.a 
+	$(CC) -o clustalw $(OBJECTS) amenu.o clustalw.o $(LFLAGS)
+
+clustalw : $(OBJECTS) amenu.o clustalw.o
+	$(CC) -o $@ $(OBJECTS) amenu.o clustalw.o $(LFLAGS)
+
+amenu.o : amenu.c $(HEADERS) param.h
+	$(CC) $(CFLAGS) $*.c
+
+clustalx : $(OBJECTS) $(XOBJECTS) clustalx.o
+	$(CC) -o $@ $(OBJECTS) $(XOBJECTS) clustalx.o $(LFLAGS) $(LXFLAGS)
+
+clustalx.o : clustalx.c $(HEADERS)
+	$(CC) $(CFLAGS) $(CXFLAGS) $*.c
+
+xmenu.o : xmenu.c $(HEADERS) param.h
+	$(CC) $(CFLAGS) $(CXFLAGS) $*.c
+
+xdisplay.o : xdisplay.c $(HEADERS) param.h
+	$(CC) $(CFLAGS) $(CXFLAGS) $*.c
+
+xutils.o : xutils.c $(HEADERS) param.h
+	$(CC) $(CFLAGS) $(CXFLAGS) $*.c
+
+xcolor.o : xcolor.c $(HEADERS) param.h
+	$(CC) $(CFLAGS) $(CXFLAGS) $*.c
+
+xscore.o : xscore.c $(HEADERS) param.h
+	$(CC) $(CFLAGS) $(CXFLAGS) $*.c
+
+readmat.o : readmat.c $(HEADERS) matrices.h
+	$(CC) $(CFLAGS) $*.c
+
+trees.o : trees.c $(HEADERS) dayhoff.h
+	$(CC) $(CFLAGS) $*.c
+
+.c.o :
+	$(CC) $(CFLAGS) $?
+

Added: trunk/packages/clustalw/branches/upstream/current/makefile.sgi
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/makefile.sgi	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/makefile.sgi	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,58 @@
+install: clustalx clustalw
+
+clean:
+	rm *.o
+
+OBJECTS = interface.o sequence.o showpair.o malign.o \
+  	util.o trees.o gcgcheck.o prfalign.o pairalign.o \
+  	calcgapcoeff.o calcprf1.o calcprf2.o calctree.o \
+        readmat.o alnscore.o random.o 
+
+XOBJECTS = xutils.o xmenu.o xcolor.o xdisplay.o xscore.o
+
+HEADERS = general.h clustalw.h
+
+CC	= cc
+CFLAGS  = -c -O
+LFLAGS	= -O -lm 
+NCBI_INC  = /biolo/ncbi/include
+NCBI_LIB  = /biolo/ncbi/lib
+CXFLAGS  = -DWIN_MOTIF -I$(NCBI_INC) 
+LXFLAGS	= -L$(NCBI_LIB) -L/usr/ccs/lib/ -lvibrant -lncbi -lXm -lXt -lX11 -lXmu -lm
+
+clustalw : $(OBJECTS) amenu.o clustalw.o
+	$(CC) -o $@ $(OBJECTS) amenu.o clustalw.o $(LFLAGS)
+
+amenu.o : amenu.c $(HEADERS) param.h
+	$(CC) $(CFLAGS) $*.c
+
+clustalx : $(OBJECTS) $(XOBJECTS) clustalx.o
+	$(CC) -o $@ $(OBJECTS) $(XOBJECTS) clustalx.o $(LFLAGS) $(LXFLAGS)
+
+clustalx.o : clustalx.c $(HEADERS)
+	$(CC) $(CFLAGS) $(CXFLAGS) $*.c
+
+xmenu.o : xmenu.c $(HEADERS) param.h
+	$(CC) $(CFLAGS) $(CXFLAGS) $*.c
+
+xdisplay.o : xdisplay.c $(HEADERS) param.h
+	$(CC) $(CFLAGS) $(CXFLAGS) $*.c
+
+xutils.o : xutils.c $(HEADERS) param.h
+	$(CC) $(CFLAGS) $(CXFLAGS) $*.c
+
+xcolor.o : xcolor.c $(HEADERS) param.h
+	$(CC) $(CFLAGS) $(CXFLAGS) $*.c
+
+xscore.o : xscore.c $(HEADERS) param.h
+	$(CC) $(CFLAGS) $(CXFLAGS) $*.c
+
+readmat.o : readmat.c $(HEADERS) matrices.h
+	$(CC) $(CFLAGS) $*.c
+
+trees.o : trees.c $(HEADERS) dayhoff.h
+	$(CC) $(CFLAGS) $*.c
+
+.c.o :
+	$(CC) $(CFLAGS) $?
+

Added: trunk/packages/clustalw/branches/upstream/current/makefile.sun
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/makefile.sun	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/makefile.sun	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,61 @@
+install: clustalx clustalw
+	
+clean:
+	rm *.o
+
+OBJECTS = interface.o sequence.o showpair.o malign.o \
+  	util.o trees.o gcgcheck.o prfalign.o pairalign.o \
+  	calcgapcoeff.o calcprf1.o calcprf2.o calctree.o \
+        readmat.o alnscore.o random.o 
+
+XOBJECTS = xutils.o xmenu.o xcolor.o xdisplay.o xscore.o
+
+HEADERS = general.h clustalw.h
+
+CC	= cc
+CFLAGS  = -c -O
+LFLAGS	= -O -lm
+NCBI_INC = /workbench/include/ncbi
+NCBI_LIB = /workbench/lib/ncbi
+CXFLAGS  = -DWIN_MOTIF -I$(NCBI_INC) -I/opt/SUNWmotif/include
+LXFLAGS	= -L$(NCBI_LIB) -L/usr/ccs/lib/ -L/opt/SUNWmotif/lib -Bstatic -lvibrant -lncbi -Bdynamic -lXm -lXmu -Bdynamic -lXt -lX11 -lgen
+
+clustalw : $(OBJECTS) amenu.o clustalw.o
+	$(CC) -o $@ $(OBJECTS) amenu.o clustalw.o $(LFLAGS)
+
+interface.o : interface.c $(HEADERS) param.h
+	$(CC) $(CFLAGS) $*.c
+
+amenu.o : amenu.c $(HEADERS) param.h
+	$(CC) $(CFLAGS) $*.c
+
+clustalx : $(OBJECTS) $(XOBJECTS) clustalx.o
+	$(CC) -o $@ $(OBJECTS) $(XOBJECTS) clustalx.o $(LFLAGS) $(LXFLAGS)
+
+clustalx.o : clustalx.c $(HEADERS)
+	$(CC) $(CFLAGS) $(CXFLAGS) $*.c
+
+xmenu.o : xmenu.c $(HEADERS) param.h
+	$(CC) $(CFLAGS) $(CXFLAGS) $*.c
+
+xdisplay.o : xdisplay.c $(HEADERS) param.h
+	$(CC) $(CFLAGS) $(CXFLAGS) $*.c
+
+xutils.o : xutils.c $(HEADERS) param.h
+	$(CC) $(CFLAGS) $(CXFLAGS) $*.c
+
+xcolor.o : xcolor.c $(HEADERS) param.h
+	$(CC) $(CFLAGS) $(CXFLAGS) $*.c
+
+xscore.o : xscore.c $(HEADERS) param.h
+	$(CC) $(CFLAGS) $(CXFLAGS) $*.c
+
+readmat.o : readmat.c $(HEADERS) matrices.h
+	$(CC) $(CFLAGS) $*.c
+
+trees.o : trees.c $(HEADERS) dayhoff.h
+	$(CC) $(CFLAGS) $*.c
+
+.c.o :
+	$(CC) $(CFLAGS) $?
+

Added: trunk/packages/clustalw/branches/upstream/current/malign.c
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/malign.c	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/malign.c	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,654 @@
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include "clustalw.h"
+
+
+/*
+ *       Prototypes
+ */
+
+/*
+ *       Global Variables
+ */
+
+extern double  **tmat;
+extern Boolean no_weights;
+extern sint     debug;
+extern sint     max_aa;
+extern sint     nseqs;
+extern sint     profile1_nseqs;
+extern sint     nsets;
+extern sint     **sets;
+extern sint     divergence_cutoff;
+extern sint     *seq_weight;
+extern sint     output_order, *output_index;
+extern Boolean distance_tree;
+extern char    seqname[];
+extern sint     *seqlen_array;
+extern char    **seq_array;
+
+sint malign(sint istart,char *phylip_name) /* full progressive alignment*/
+{
+   static 	sint *aligned;
+   static 	sint *group;
+   static 	sint ix;
+
+   sint 	*maxid, max, sum;
+   sint		*tree_weight;
+   sint 		i,j,set,iseq=0;
+   sint 		status,entries;
+   lint		score = 0;
+
+
+   info("Start of Multiple Alignment");
+
+/* get the phylogenetic tree from *.ph */
+
+   if (nseqs >= 2) 
+     {
+       status = read_tree(phylip_name, (sint)0, nseqs);
+       if (status == 0) return((sint)0);
+     }
+
+/* calculate sequence weights according to branch lengths of the tree -
+   weights in global variable seq_weight normalised to sum to 100 */
+
+   calc_seq_weights((sint)0, nseqs, seq_weight);
+
+/* recalculate tmat matrix as percent similarity matrix */
+
+   status = calc_similarities(nseqs);
+   if (status == 0) return((sint)0);
+
+/* for each sequence, find the most closely related sequence */
+
+   maxid = (sint *)ckalloc( (nseqs+1) * sizeof (sint));
+   for (i=1;i<=nseqs;i++)
+     {
+         maxid[i] = -1;
+         for (j=1;j<=nseqs;j++) 
+           if (j!=i && maxid[i] < tmat[i][j]) maxid[i] = tmat[i][j];
+     }
+
+/* group the sequences according to their relative divergence */
+
+   if (istart == 0)
+     {
+        sets = (sint **) ckalloc( (nseqs+1) * sizeof (sint *) );
+        for(i=0;i<=nseqs;i++)
+           sets[i] = (sint *)ckalloc( (nseqs+1) * sizeof (sint) );
+
+        create_sets((sint)0,nseqs);
+        info("There are %d groups",(pint)nsets);
+
+/* clear the memory used for the phylogenetic tree */
+
+        if (nseqs >= 2)
+             clear_tree(NULL);
+
+/* start the multiple alignments.........  */
+
+        info("Aligning...");
+
+/* first pass, align closely related sequences first.... */
+
+        ix = 0;
+        aligned = (sint *)ckalloc( (nseqs+1) * sizeof (sint) );
+        for (i=0;i<=nseqs;i++) aligned[i] = 0;
+
+        for(set=1;set<=nsets;++set)
+         {
+          entries=0;
+          for (i=1;i<=nseqs;i++)
+            {
+               if ((sets[set][i] != 0) && (maxid[i] > divergence_cutoff))
+                 {
+                    entries++;
+                    if  (aligned[i] == 0)
+                       {
+                          if (output_order==INPUT)
+                            {
+                              ++ix;
+                              output_index[i] = i;
+                            }
+                          else output_index[++ix] = i;
+                          aligned[i] = 1;
+                       }
+                 }
+            }
+
+          if(entries > 0) score = prfalign(sets[set], aligned);
+          else score=0.0;
+
+
+/* negative score means fatal error... exit now!  */
+
+          if (score < 0) 
+             {
+                return(-1);
+             }
+          if ((entries > 0) && (score > 0))
+             info("Group %d: Sequences:%4d      Score:%d",
+             (pint)set,(pint)entries,(pint)score);
+          else
+             info("Group %d:                     Delayed",
+             (pint)set);
+        }
+
+        for (i=0;i<=nseqs;i++)
+          sets[i]=ckfree((void *)sets[i]);
+        sets=ckfree(sets);
+     }
+   else
+     {
+/* clear the memory used for the phylogenetic tree */
+
+        if (nseqs >= 2)
+             clear_tree(NULL);
+
+        aligned = (sint *)ckalloc( (nseqs+1) * sizeof (sint) );
+        ix = 0;
+        for (i=1;i<=istart+1;i++)
+         {
+           aligned[i] = 1;
+           ++ix;
+           output_index[i] = i;
+         }
+        for (i=istart+2;i<=nseqs;i++) aligned[i] = 0;
+     }
+
+/* second pass - align remaining, more divergent sequences..... */
+
+/* if not all sequences were aligned, for each unaligned sequence,
+   find it's closest pair amongst the aligned sequences.  */
+
+    group = (sint *)ckalloc( (nseqs+1) * sizeof (sint));
+    tree_weight = (sint *) ckalloc( (nseqs) * sizeof(sint) );
+    for (i=0;i<nseqs;i++)
+   		tree_weight[i] = seq_weight[i];
+
+/* if we haven't aligned any sequences, in the first pass - align the
+two most closely related sequences now */
+   if(ix==0)
+     {
+        max = -1;
+	iseq = 0;
+        for (i=1;i<=nseqs;i++)
+          {
+             for (j=i+1;j<=nseqs;j++)
+               {
+                  if (max < tmat[i][j])
+		  {
+                     max = tmat[i][j];
+                     iseq = i;
+                  }
+              }
+          }
+        aligned[iseq]=1;
+        if (output_order == INPUT)
+          {
+            ++ix;
+            output_index[iseq] = iseq;
+          }
+         else
+            output_index[++ix] = iseq;
+     }
+
+    while (ix < nseqs)
+      {
+             for (i=1;i<=nseqs;i++) {
+                if (aligned[i] == 0)
+                  {
+                     maxid[i] = -1;
+                     for (j=1;j<=nseqs;j++) 
+                        if ((maxid[i] < tmat[i][j]) && (aligned[j] != 0))
+                            maxid[i] = tmat[i][j];
+                  }
+              }
+/* find the most closely related sequence to those already aligned */
+
+            max = -1;
+	    iseq = 0;
+            for (i=1;i<=nseqs;i++)
+              {
+                if ((aligned[i] == 0) && (maxid[i] > max))
+                  {
+                     max = maxid[i];
+                     iseq = i;
+                  }
+              }
+
+
+/* align this sequence to the existing alignment */
+/* weight sequences with percent identity with profile*/
+/* OR...., multiply sequence weights from tree by percent identity with new sequence */
+   if(no_weights==FALSE) {
+   for (j=0;j<nseqs;j++)
+       if (aligned[j+1] != 0)
+              seq_weight[j] = tree_weight[j] * tmat[j+1][iseq];
+/*
+  Normalise the weights, such that the sum of the weights = INT_SCALE_FACTOR
+*/
+
+         sum = 0;
+         for (j=0;j<nseqs;j++)
+           if (aligned[j+1] != 0)
+            sum += seq_weight[j];
+         if (sum == 0)
+          {
+           for (j=0;j<nseqs;j++)
+                seq_weight[j] = 1;
+                sum = j;
+          }
+         for (j=0;j<nseqs;j++)
+           if (aligned[j+1] != 0)
+             {
+               seq_weight[j] = (seq_weight[j] * INT_SCALE_FACTOR) / sum;
+               if (seq_weight[j] < 1) seq_weight[j] = 1;
+             }
+	}
+
+         entries = 0;
+         for (j=1;j<=nseqs;j++)
+           if (aligned[j] != 0)
+              {
+                 group[j] = 1;
+                 entries++;
+              }
+           else if (iseq==j)
+              {
+                 group[j] = 2;
+                 entries++;
+              }
+         aligned[iseq] = 1;
+
+         score = prfalign(group, aligned);
+         info("Sequence:%d     Score:%d",(pint)iseq,(pint)score);
+         if (output_order == INPUT)
+          {
+            ++ix;
+            output_index[iseq] = iseq;
+          }
+         else
+            output_index[++ix] = iseq;
+      }
+
+   group=ckfree((void *)group);
+   aligned=ckfree((void *)aligned);
+   maxid=ckfree((void *)maxid);
+   tree_weight=ckfree((void *)tree_weight);
+
+   aln_score();
+
+/* make the rest (output stuff) into routine clustal_out in file amenu.c */
+
+   return(nseqs);
+
+}
+
+sint seqalign(sint istart,char *phylip_name) /* sequence alignment to existing profile */
+{
+   static 	sint *aligned, *tree_weight;
+   static 	sint *group;
+   static 	sint ix;
+
+   sint 	*maxid, max;
+   sint 		i,j,status,iseq;
+   sint 		sum,entries;
+   lint		score = 0;
+
+
+   info("Start of Multiple Alignment");
+
+/* get the phylogenetic tree from *.ph */
+
+   if (nseqs >= 2) 
+     {
+       status = read_tree(phylip_name, (sint)0, nseqs);
+       if (status == 0) return(0);
+     }
+
+/* calculate sequence weights according to branch lengths of the tree -
+   weights in global variable seq_weight normalised to sum to 100 */
+
+   calc_seq_weights((sint)0, nseqs, seq_weight);
+   
+   tree_weight = (sint *) ckalloc( (nseqs) * sizeof(sint) );
+   for (i=0;i<nseqs;i++)
+   		tree_weight[i] = seq_weight[i];
+
+/* recalculate tmat matrix as percent similarity matrix */
+
+   status = calc_similarities(nseqs);
+   if (status == 0) return((sint)0);
+
+/* for each sequence, find the most closely related sequence */
+
+   maxid = (sint *)ckalloc( (nseqs+1) * sizeof (sint));
+   for (i=1;i<=nseqs;i++)
+     {
+         maxid[i] = -1;
+         for (j=1;j<=nseqs;j++) 
+           if (maxid[i] < tmat[i][j]) maxid[i] = tmat[i][j];
+     }
+
+/* clear the memory used for the phylogenetic tree */
+
+        if (nseqs >= 2)
+             clear_tree(NULL);
+
+        aligned = (sint *)ckalloc( (nseqs+1) * sizeof (sint) );
+        ix = 0;
+        for (i=1;i<=istart+1;i++)
+         {
+           aligned[i] = 1;
+           ++ix;
+           output_index[i] = i;
+         }
+        for (i=istart+2;i<=nseqs;i++) aligned[i] = 0;
+
+/* for each unaligned sequence, find it's closest pair amongst the
+   aligned sequences.  */
+
+    group = (sint *)ckalloc( (nseqs+1) * sizeof (sint));
+
+    while (ix < nseqs)
+      {
+        if (ix > 0) 
+          {
+             for (i=1;i<=nseqs;i++) {
+                if (aligned[i] == 0)
+                  {
+                     maxid[i] = -1;
+                     for (j=1;j<=nseqs;j++) 
+                        if ((maxid[i] < tmat[i][j]) && (aligned[j] != 0))
+                            maxid[i] = tmat[i][j];
+                  }
+              }
+          }
+
+/* find the most closely related sequence to those already aligned */
+
+         max = -1;
+         for (i=1;i<=nseqs;i++)
+           {
+             if ((aligned[i] == 0) && (maxid[i] > max))
+               {
+                  max = maxid[i];
+                  iseq = i;
+               }
+           }
+
+/* align this sequence to the existing alignment */
+
+         entries = 0;
+         for (j=1;j<=nseqs;j++)
+           if (aligned[j] != 0)
+              {
+                 group[j] = 1;
+                 entries++;
+              }
+           else if (iseq==j)
+              {
+                 group[j] = 2;
+                 entries++;
+              }
+         aligned[iseq] = 1;
+
+
+/* EITHER....., set sequence weights equal to percent identity with new sequence */
+/*
+           for (j=0;j<nseqs;j++)
+              seq_weight[j] = tmat[j+1][iseq];
+*/
+/* OR...., multiply sequence weights from tree by percent identity with new sequence */
+           for (j=0;j<nseqs;j++)
+              seq_weight[j] = tree_weight[j] * tmat[j+1][iseq];
+if (debug>1)         
+  for (j=0;j<nseqs;j++) if (group[j+1] == 1)fprintf (stdout,"sequence %d: %d\n", j+1,tree_weight[j]);
+/*
+  Normalise the weights, such that the sum of the weights = INT_SCALE_FACTOR
+*/
+
+         sum = 0;
+         for (j=0;j<nseqs;j++)
+           if (group[j+1] == 1) sum += seq_weight[j];
+         if (sum == 0)
+          {
+           for (j=0;j<nseqs;j++)
+                seq_weight[j] = 1;
+                sum = j;
+          }
+         for (j=0;j<nseqs;j++)
+             {
+               seq_weight[j] = (seq_weight[j] * INT_SCALE_FACTOR) / sum;
+               if (seq_weight[j] < 1) seq_weight[j] = 1;
+             }
+
+if (debug > 1) {
+  fprintf(stdout,"new weights\n");
+  for (j=0;j<nseqs;j++) if (group[j+1] == 1)fprintf( stdout,"sequence %d: %d\n", j+1,seq_weight[j]);
+}
+
+         score = prfalign(group, aligned);
+         info("Sequence:%d     Score:%d",(pint)iseq,(pint)score);
+         if (output_order == INPUT)
+          {
+            ++ix;
+            output_index[iseq] = iseq;
+          }
+         else
+            output_index[++ix] = iseq;
+      }
+
+   group=ckfree((void *)group);
+   aligned=ckfree((void *)aligned);
+   maxid=ckfree((void *)maxid);
+
+   aln_score();
+
+/* make the rest (output stuff) into routine clustal_out in file amenu.c */
+
+   return(nseqs);
+
+}
+
+
+sint palign1(void)  /* a profile alignment */
+{
+   sint 		i,j,temp;
+   sint 		entries;
+   sint 		*aligned, *group;
+   float        dscore;
+   lint			score;
+
+   info("Start of Initial Alignment");
+
+/* calculate sequence weights according to branch lengths of the tree -
+   weights in global variable seq_weight normalised to sum to INT_SCALE_FACTOR */
+
+   temp = INT_SCALE_FACTOR/nseqs;
+   for (i=0;i<nseqs;i++) seq_weight[i] = temp;
+
+   distance_tree = FALSE;
+
+/* do the initial alignment.........  */
+
+   group = (sint *)ckalloc( (nseqs+1) * sizeof (sint));
+
+   for(i=1; i<=profile1_nseqs; ++i)
+         group[i] = 1;
+   for(i=profile1_nseqs+1; i<=nseqs; ++i)
+         group[i] = 2;
+   entries = nseqs;
+
+   aligned = (sint *)ckalloc( (nseqs+1) * sizeof (sint) );
+   for (i=1;i<=nseqs;i++) aligned[i] = 1;
+
+   score = prfalign(group, aligned);
+   info("Sequences:%d      Score:%d",(pint)entries,(pint)score);
+   group=ckfree((void *)group);
+   aligned=ckfree((void *)aligned);
+
+   for (i=1;i<=nseqs;i++) {
+     for (j=i+1;j<=nseqs;j++) {
+       dscore = countid(i,j);
+       tmat[i][j] = ((double)100.0 - (double)dscore)/(double)100.0;
+       tmat[j][i] = tmat[i][j];
+     }
+   }
+
+   return(nseqs);
+}
+
+float countid(sint s1, sint s2)
+{
+   char c1,c2;
+   sint i;
+   sint count,total;
+   float score;
+
+   count = total = 0;
+   for (i=1;i<=seqlen_array[s1] && i<=seqlen_array[s2];i++) {
+     c1 = seq_array[s1][i];
+     c2 = seq_array[s2][i];
+     if ((c1>=0) && (c1<max_aa)) {
+       total++;
+       if (c1 == c2) count++;
+     }
+
+   }
+
+   if(total==0) score=0;
+   else
+   score = 100.0 * (float)count / (float)total;
+   return(score);
+
+}
+
+sint palign2(char *p1_tree_name,char *p2_tree_name)  /* a profile alignment */
+{
+   sint 	i,j,sum,entries,status;
+   lint 		score;
+   sint 	*aligned, *group;
+   sint		*maxid,*p1_weight,*p2_weight;
+   sint dscore;
+
+   info("Start of Multiple Alignment");
+
+/* get the phylogenetic trees from *.ph */
+
+   if (profile1_nseqs >= 2)
+     {
+        status = read_tree(p1_tree_name, (sint)0, profile1_nseqs);
+        if (status == 0) return(0);
+     }
+
+/* calculate sequence weights according to branch lengths of the tree -
+   weights in global variable seq_weight normalised to sum to 100 */
+
+   p1_weight = (sint *) ckalloc( (profile1_nseqs) * sizeof(sint) );
+
+   calc_seq_weights((sint)0, profile1_nseqs, p1_weight);
+
+/* clear the memory for the phylogenetic tree */
+
+   if (profile1_nseqs >= 2)
+        clear_tree(NULL);
+
+   if (nseqs-profile1_nseqs >= 2)
+     {
+        status = read_tree(p2_tree_name, profile1_nseqs, nseqs);
+        if (status == 0) return(0);
+     }
+
+   p2_weight = (sint *) ckalloc( (nseqs) * sizeof(sint) );
+
+   calc_seq_weights(profile1_nseqs,nseqs, p2_weight);
+
+
+/* clear the memory for the phylogenetic tree */
+
+   if (nseqs-profile1_nseqs >= 2)
+        clear_tree(NULL);
+
+/* convert tmat distances to similarities */
+
+   for (i=1;i<nseqs;i++)
+        for (j=i+1;j<=nseqs;j++) {
+            tmat[i][j]=100.0-tmat[i][j]*100.0;
+            tmat[j][i]=tmat[i][j];
+        }
+     
+
+/* weight sequences with max percent identity with other profile*/
+
+   maxid = (sint *)ckalloc( (nseqs+1) * sizeof (sint));
+   for (i=0;i<profile1_nseqs;i++) {
+         maxid[i] = 0;
+         for (j=profile1_nseqs+1;j<=nseqs;j++) 
+                      if(maxid[i]<tmat[i+1][j]) maxid[i] = tmat[i+1][j];
+         seq_weight[i] = maxid[i]*p1_weight[i];
+   }
+
+   for (i=profile1_nseqs;i<nseqs;i++) {
+         maxid[i] = -1;
+         for (j=1;j<=profile1_nseqs;j++)
+                      if(maxid[i]<tmat[i+1][j]) maxid[i] = tmat[i+1][j];
+         seq_weight[i] = maxid[i]*p2_weight[i];
+   }
+/*
+  Normalise the weights, such that the sum of the weights = INT_SCALE_FACTOR
+*/
+
+         sum = 0;
+         for (j=0;j<nseqs;j++)
+            sum += seq_weight[j];
+         if (sum == 0)
+          {
+           for (j=0;j<nseqs;j++)
+                seq_weight[j] = 1;
+                sum = j;
+          }
+         for (j=0;j<nseqs;j++)
+             {
+               seq_weight[j] = (seq_weight[j] * INT_SCALE_FACTOR) / sum;
+               if (seq_weight[j] < 1) seq_weight[j] = 1;
+             }
+if (debug > 1) {
+  fprintf(stdout,"new weights\n");
+  for (j=0;j<nseqs;j++) fprintf( stdout,"sequence %d: %d\n", j+1,seq_weight[j]);
+}
+
+
+/* do the alignment.........  */
+
+   info("Aligning...");
+
+   group = (sint *)ckalloc( (nseqs+1) * sizeof (sint));
+
+   for(i=1; i<=profile1_nseqs; ++i)
+         group[i] = 1;
+   for(i=profile1_nseqs+1; i<=nseqs; ++i)
+         group[i] = 2;
+   entries = nseqs;
+
+   aligned = (sint *)ckalloc( (nseqs+1) * sizeof (sint) );
+   for (i=1;i<=nseqs;i++) aligned[i] = 1;
+
+   score = prfalign(group, aligned);
+   info("Sequences:%d      Score:%d",(pint)entries,(pint)score);
+   group=ckfree((void *)group);
+   p1_weight=ckfree((void *)p1_weight);
+   p2_weight=ckfree((void *)p2_weight);
+   aligned=ckfree((void *)aligned);
+   maxid=ckfree((void *)maxid);
+
+/* DES   output_index = (int *)ckalloc( (nseqs+1) * sizeof (int)); */
+   for (i=1;i<=nseqs;i++) output_index[i] = i;
+
+   return(nseqs);
+}
+

Added: trunk/packages/clustalw/branches/upstream/current/matrices.h
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/matrices.h	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/matrices.h	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,854 @@
+char *amino_acid_order = "ABCDEFGHIKLMNPQRSTVWXYZ";
+
+short blosum30mt[]={
+  4,
+  0,  5,
+ -3, -2, 17,
+  0,  5, -3,  9,
+  0,  0,  1,  1,  6,
+ -2, -3, -3, -5, -4, 10,
+  0,  0, -4, -1, -2, -3,  8,
+ -2, -2, -5, -2,  0, -3, -3, 14,
+  0, -2, -2, -4, -3,  0, -1, -2,  6,
+  0,  0, -3,  0,  2, -1, -1, -2, -2,  4,
+ -1, -1,  0, -1, -1,  2, -2, -1,  2, -2,  4,
+  1, -2, -2, -3, -1, -2, -2,  2,  1,  2,  2,  6,
+  0,  4, -1,  1, -1, -1,  0, -1,  0,  0, -2,  0,  8,
+ -1, -2, -3, -1,  1, -4, -1,  1, -3,  1, -3, -4, -3, 11,
+  1, -1, -2, -1,  2, -3, -2,  0, -2,  0, -2, -1, -1,  0,  8,
+ -1, -2, -2, -1, -1, -1, -2, -1, -3,  1, -2,  0, -2, -1,  3,  8,
+  1,  0, -2,  0,  0, -1,  0, -1, -1,  0, -2, -2,  0, -1, -1, -1,  4,
+  1,  0, -2, -1, -2, -2, -2, -2,  0, -1,  0,  0,  1,  0,  0, -3,  2,  5,
+  1, -2, -2, -2, -3,  1, -3, -3,  4, -2,  1,  0, -2, -4, -3, -1, -1,  1,  5,
+ -5, -5, -2, -4, -1,  1,  1, -5, -3, -2, -2, -3, -7, -3, -1,  0, -3, -5, -3, 20,
+  0, -1, -2, -1, -1, -1, -1, -1,  0,  0,  0,  0,  0, -1,  0, -1,  0,  0,  0, -2, -1,
+ -4, -3, -6, -1, -2,  3, -3,  0, -1, -1,  3, -1, -4, -2, -1,  0, -2, -1,  1,  5, -1,  9,
+  0,  0,  0,  0,  5, -4, -2,  0, -3,  1, -1, -1, -1,  0,  4,  0, -1, -1, -3, -1,  0, -2,  4};
+
+/*
+short blosum35mt[]={
+  5,
+ -1,  5,
+ -2, -2, 15,
+ -1,  5, -3,  8,
+ -1,  0, -1,  2,  6,
+ -2, -2, -4, -3, -3,  8,
+  0,  0, -3, -2, -2, -3,  7,
+ -2,  0, -4,  0, -1, -3, -2, 12,
+ -1, -2, -4, -3, -3,  1, -3, -3,  5,
+  0,  0, -2, -1,  1, -1, -1, -2, -2,  5,
+ -2, -2, -2, -2, -1,  2, -3, -2,  2, -2,  5,
+  0, -2, -4, -3, -2,  0, -1,  1,  1,  0,  3,  6,
+ -1,  4, -1,  1, -1, -1,  1,  1, -1,  0, -2, -1,  7,
+ -2, -1, -4, -1,  0, -4, -2, -1, -1,  0, -3, -3, -2, 10,
+  0,  0, -3, -1,  2, -4, -2, -1, -2,  0, -2, -1,  1,  0,  7,
+ -1, -1, -3, -1, -1, -1, -2, -1, -3,  2, -2,  0, -1, -2,  2,  8,
+  1,  0, -3, -1,  0, -1,  1, -1, -2,  0, -2, -1,  0, -2,  0, -1,  4,
+  0, -1, -1, -1, -1, -1, -2, -2, -1,  0,  0,  0,  0,  0,  0, -2,  2,  5,
+  0, -2, -2, -2, -2,  1, -3, -4,  4, -2,  2,  1, -2, -3, -3, -1, -1,  1,  5,
+ -2, -3, -5, -3, -1,  1, -1, -4, -1,  0,  0,  1, -2, -4, -1,  0, -2, -2, -2, 16,
+  0, -1, -2, -1, -1, -1, -1, -1,  0,  0,  0,  0,  0, -1, -1, -1,  0,  0,  0, -1, -1,
+ -1, -2, -5, -2, -1,  3, -2,  0,  0, -1,  0,  0, -2, -3,  0,  0, -1, -2,  0,  3, -1,  8,
+ -1,  0, -2,  1,  5, -3, -2, -1, -3,  1, -2, -2,  0,  0,  4,  0,  0, -1, -2, -1,  0, -1,  4};
+*/
+short blosum40mt[]={
+  5,
+ -1,  5,
+ -2, -2, 16,
+ -1,  6, -2,  9,
+ -1,  1, -2,  2,  7,
+ -3, -3, -2, -4, -3,  9,
+  1, -1, -3, -2, -3, -3,  8,
+ -2,  0, -4,  0,  0, -2, -2, 13,
+ -1, -3, -4, -4, -4,  1, -4, -3,  6,
+ -1,  0, -3,  0,  1, -3, -2, -1, -3,  6,
+ -2, -3, -2, -3, -2,  2, -4, -2,  2, -2,  6,
+ -1, -3, -3, -3, -2,  0, -2,  1,  1, -1,  3,  7,
+ -1,  4, -2,  2, -1, -3,  0,  1, -2,  0, -3, -2,  8,
+ -2, -2, -5, -2,  0, -4, -1, -2, -2, -1, -4, -2, -2, 11,
+  0,  0, -4, -1,  2, -4, -2,  0, -3,  1, -2, -1,  1, -2,  8,
+ -2, -1, -3, -1, -1, -2, -3,  0, -3,  3, -2, -1,  0, -3,  2,  9,
+  1,  0, -1,  0,  0, -2,  0, -1, -2,  0, -3, -2,  1, -1,  1, -1,  5,
+  0,  0, -1, -1, -1, -1, -2, -2, -1,  0, -1, -1,  0,  0, -1, -2,  2,  6,
+  0, -3, -2, -3, -3,  0, -4, -4,  4, -2,  2,  1, -3, -3, -3, -2, -1,  1,  5,
+ -3, -4, -6, -5, -2,  1, -2, -5, -3, -2, -1, -2, -4, -4, -1, -2, -5, -4, -3, 19,
+  0, -1, -2, -1, -1, -1, -1, -1, -1, -1, -1,  0, -1, -2, -1, -1,  0,  0, -1, -2, -1,
+ -2, -3, -4, -3, -2,  4, -3,  2,  0, -1,  0,  1, -2, -3, -1, -1, -2, -1, -1,  3, -1,  9,
+ -1,  2, -3,  1,  5, -4, -2,  0, -4,  1, -2, -2,  0, -1,  4,  0,  0, -1, -3, -2, -1, -2,  5};
+
+short blosum45mt[]={
+  5,
+ -1,  4,
+ -1, -2, 12,
+ -2,  5, -3,  7,
+ -1,  1, -3,  2,  6,
+ -2, -3, -2, -4, -3,  8,
+  0, -1, -3, -1, -2, -3,  7,
+ -2,  0, -3,  0,  0, -2, -2, 10,
+ -1, -3, -3, -4, -3,  0, -4, -3,  5,
+ -1,  0, -3,  0,  1, -3, -2, -1, -3,  5,
+ -1, -3, -2, -3, -2,  1, -3, -2,  2, -3,  5,
+ -1, -2, -2, -3, -2,  0, -2,  0,  2, -1,  2,  6,
+ -1,  4, -2,  2,  0, -2,  0,  1, -2,  0, -3, -2,  6,
+ -1, -2, -4, -1,  0, -3, -2, -2, -2, -1, -3, -2, -2,  9,
+ -1,  0, -3,  0,  2, -4, -2,  1, -2,  1, -2,  0,  0, -1,  6,
+ -2, -1, -3, -1,  0, -2, -2,  0, -3,  3, -2, -1,  0, -2,  1,  7,
+  1,  0, -1,  0,  0, -2,  0, -1, -2, -1, -3, -2,  1, -1,  0, -1,  4,
+  0,  0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1,  0, -1, -1, -1,  2,  5,
+  0, -3, -1, -3, -3,  0, -3, -3,  3, -2,  1,  1, -3, -3, -3, -2, -1,  0,  5,
+ -2, -4, -5, -4, -3,  1, -2, -3, -2, -2, -2, -2, -4, -3, -2, -2, -4, -3, -3, 15,
+  0, -1, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  0,  0, -1, -2, -1,
+ -2, -2, -3, -2, -2,  3, -3,  2,  0, -1,  0,  0, -2, -3, -1, -1, -2, -1, -1,  3, -1,  8,
+ -1,  2, -3,  1,  4, -3, -2,  0, -3,  1, -2, -1,  0, -1,  4,  0,  0, -1, -3, -2, -1, -2,  4};
+
+/*
+short blosum50mt[]={
+  5,
+ -2,  5,
+ -1, -3, 13,
+ -2,  5, -4,  8,
+ -1,  1, -3,  2,  6,
+ -3, -4, -2, -5, -3,  8,
+  0, -1, -3, -1, -3, -4,  8,
+ -2,  0, -3, -1,  0, -1, -2, 10,
+ -1, -4, -2, -4, -4,  0, -4, -4,  5,
+ -1,  0, -3, -1,  1, -4, -2,  0, -3,  6,
+ -2, -4, -2, -4, -3,  1, -4, -3,  2, -3,  5,
+ -1, -3, -2, -4, -2,  0, -3, -1,  2, -2,  3,  7,
+ -1,  4, -2,  2,  0, -4,  0,  1, -3,  0, -4, -2,  7,
+ -1, -2, -4, -1, -1, -4, -2, -2, -3, -1, -4, -3, -2, 10,
+ -1,  0, -3,  0,  2, -4, -2,  1, -3,  2, -2,  0,  0, -1,  7,
+ -2, -1, -4, -2,  0, -3, -3,  0, -4,  3, -3, -2, -1, -3,  1,  7,
+  1,  0, -1,  0, -1, -3,  0, -1, -3,  0, -3, -2,  1, -1,  0, -1,  5,
+  0,  0, -1, -1, -1, -2, -2, -2, -1, -1, -1, -1,  0, -1, -1, -1,  2,  5,
+  0, -4, -1, -4, -3, -1, -4, -4,  4, -3,  1,  1, -3, -3, -3, -3, -2,  0,  5,
+ -3, -5, -5, -5, -3,  1, -3, -3, -3, -3, -2, -1, -4, -4, -1, -3, -4, -3, -3, 15,
+ -1, -1, -2, -1, -1, -2, -2, -1, -1, -1, -1, -1, -1, -2, -1, -1, -1,  0, -1, -3, -1,
+ -2, -3, -3, -3, -2,  4, -3,  2, -1, -2, -1,  0, -2, -3, -1, -1, -2, -2, -1,  2, -1,  8,
+ -1,  2, -3,  1,  5, -4, -2,  0, -3,  1, -3, -1,  0, -1,  4,  0,  0, -1, -3, -2, -1, -2,  5};
+
+short blosum55mt[]={
+  5,
+ -2,  5,
+  0, -4, 13,
+ -2,  5, -4,  8,
+ -1,  1, -4,  2,  7,
+ -3, -5, -3, -5, -4,  9,
+  0, -1, -3, -2, -3, -4,  8,
+ -2,  0, -4, -1, -1, -1, -2, 11,
+ -2, -4, -2, -4, -4,  0, -5, -4,  6,
+ -1,  0, -4, -1,  1, -4, -2,  0, -4,  6,
+ -2, -4, -2, -5, -4,  1, -5, -3,  2, -3,  6,
+ -1, -3, -2, -4, -3,  0, -3, -2,  2, -2,  3,  8,
+ -2,  4, -3,  2,  0, -4,  0,  1, -4,  0, -4, -3,  8,
+ -1, -2, -3, -2, -1, -5, -3, -3, -3, -1, -4, -3, -2, 10,
+ -1,  0, -4,  0,  2, -4, -2,  1, -4,  2, -3,  0,  0, -1,  7,
+ -2, -1, -4, -2,  0, -3, -3,  0, -4,  3, -3, -2, -1, -3,  1,  8,
+  2,  0, -1,  0,  0, -3,  0, -1, -3,  0, -3, -2,  1, -1,  0, -1,  5,
+  0, -1, -1, -1, -1, -3, -2, -2, -1, -1, -2, -1,  0, -1, -1, -1,  2,  6,
+  0, -4, -1, -4, -3, -1, -4, -4,  4, -3,  1,  1, -4, -3, -3, -3, -2,  0,  5,
+ -4, -5, -4, -5, -3,  2, -3, -3, -3, -4, -3, -2, -5, -5, -2, -3, -4, -3, -4, 15,
+ -1, -1, -2, -2, -1, -2, -2, -1, -1, -1, -1, -1, -1, -2, -1, -1, -1, -1, -1, -3, -1,
+ -2, -3, -3, -3, -2,  4, -4,  2, -1, -2, -1, -1, -2, -4, -1, -2, -2, -2, -2,  3, -1,  9,
+ -1,  2, -4,  1,  5, -4, -3,  0, -4,  1, -3, -2,  0, -1,  4,  0,  0, -1, -3, -3, -1, -2,  5};
+
+
+short blosum62mt[]={
+  4,
+ -2,  4,
+  0, -3,  9,
+ -2,  4, -3,  6,
+ -1,  1, -4,  2,  5,
+ -2, -3, -2, -3, -3,  6,
+  0, -1, -3, -1, -2, -3,  6,
+ -2,  0, -3, -1,  0, -1, -2,  8,
+ -1, -3, -1, -3, -3,  0, -4, -3,  4,
+ -1,  0, -3, -1,  1, -3, -2, -1, -3,  5,
+ -1, -4, -1, -4, -3,  0, -4, -3,  2, -2,  4,
+ -1, -3, -1, -3, -2,  0, -3, -2,  1, -1,  2,  5,
+ -2,  3, -3,  1,  0, -3,  0,  1, -3,  0, -3, -2,  6,
+ -1, -2, -3, -1, -1, -4, -2, -2, -3, -1, -3, -2, -2,  7,
+ -1,  0, -3,  0,  2, -3, -2,  0, -3,  1, -2,  0,  0, -1,  5,
+ -1, -1, -3, -2,  0, -3, -2,  0, -3,  2, -2, -1,  0, -2,  1,  5,
+  1,  0, -1,  0,  0, -2,  0, -1, -2,  0, -2, -1,  1, -1,  0, -1,  4,
+  0, -1, -1, -1, -1, -2, -2, -2, -1, -1, -1, -1,  0, -1, -1, -1,  1,  5,
+  0, -3, -1, -3, -2, -1, -3, -3,  3, -2,  1,  1, -3, -2, -2, -3, -2,  0,  4,
+ -3, -4, -2, -4, -3,  1, -2, -2, -3, -3, -2, -1, -4, -4, -2, -3, -3, -2, -3, 11,
+  0, -1, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -1, -1,  0,  0, -1, -2, -1,
+ -2, -3, -2, -3, -2,  3, -3,  2, -1, -2, -1, -1, -2, -3, -1, -2, -2, -2, -1,  2, -1,  7,
+ -1,  1, -3,  1,  4, -3, -2,  0, -3,  1, -3, -1,  0, -1,  3,  0,  0, -1, -2, -3, -1, -2,  4};
+*/
+
+short blosum62mt2[]={
+  8,
+ -4,  8,
+  0, -6, 18,
+ -4,  8, -6, 12,
+ -2,  2, -8,  4, 10,
+ -4, -6, -4, -6, -6, 12,
+  0, -2, -6, -2, -4, -6, 12,
+ -4,  0, -6, -2,  0, -2, -4, 16,
+ -2, -6, -2, -6, -6,  0, -8, -6,  8,
+ -2,  0, -6, -2,  2, -6, -4, -2, -6, 10,
+ -2, -8, -2, -8, -6,  0, -8, -6,  4, -4,  8,
+ -2, -6, -2, -6, -4,  0, -6, -4,  2, -2,  4, 10,
+ -4,  6, -6,  2,  0, -6,  0,  2, -6,  0, -6, -4, 12,
+ -2, -4, -6, -2, -2, -8, -4, -4, -6, -2, -6, -4, -4, 14,
+ -2,  0, -6,  0,  4, -6, -4,  0, -6,  2, -4,  0,  0, -2, 10,
+ -2, -2, -6, -4,  0, -6, -4,  0, -6,  4, -4, -2,  0, -4,  2, 10,
+  2,  0, -2,  0,  0, -4,  0, -2, -4,  0, -4, -2,  2, -2,  0, -2,  8,
+  0, -2, -2, -2, -2, -4, -4, -4, -2, -2, -2, -2,  0, -2, -2, -2,  2, 10,
+  0, -6, -2, -6, -4, -2, -6, -6,  6, -4,  2,  2, -6, -4, -4, -6, -4,  0,  8,
+ -6, -8, -4, -8, -6,  2, -4, -4, -6, -6, -4, -2, -8, -8, -4, -6, -6, -4, -6, 22,
+  0, -2, -4, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -4, -2, -2,  0,  0, -2, -4, -2,
+ -4, -6, -4, -6, -4,  6, -6,  4, -2, -4, -2, -2, -4, -6, -2, -4, -4, -4, -2,  4, -2, 14,
+ -2,  2, -6,  2,  8, -6, -4,  0, -6,  2, -6, -2,  0, -2,  6,  0,  0, -2, -4, -6, -2, -4,  8};
+
+/*
+short blosum65mt[]={
+  4,
+ -2,  4,
+  0, -3,  9,
+ -2,  4, -4,  6,
+ -1,  1, -4,  2,  5,
+ -2, -3, -2, -4, -3,  6,
+  0, -1, -3, -1, -2, -3,  6,
+ -2,  0, -3, -1,  0, -1, -2,  8,
+ -1, -3, -1, -3, -3,  0, -4, -3,  4,
+ -1,  0, -3, -1,  1, -3, -2, -1, -3,  5,
+ -2, -4, -1, -4, -3,  0, -4, -3,  2, -3,  4,
+ -1, -3, -2, -3, -2,  0, -3, -2,  1, -2,  2,  6,
+ -2,  3, -3,  1,  0, -3, -1,  1, -3,  0, -4, -2,  6,
+ -1, -2, -3, -2, -1, -4, -2, -2, -3, -1, -3, -3, -2,  8,
+ -1,  0, -3,  0,  2, -3, -2,  1, -3,  1, -2,  0,  0, -1,  6,
+ -1, -1, -4, -2,  0, -3, -2,  0, -3,  2, -2, -2,  0, -2,  1,  6,
+  1,  0, -1,  0,  0, -2,  0, -1, -2,  0, -3, -2,  1, -1,  0, -1,  4,
+  0, -1, -1, -1, -1, -2, -2, -2, -1, -1, -1, -1,  0, -1, -1, -1,  1,  5,
+  0, -3, -1, -3, -3, -1, -3, -3,  3, -2,  1,  1, -3, -2, -2, -3, -2,  0,  4,
+ -3, -4, -2, -5, -3,  1, -3, -2, -2, -3, -2, -2, -4, -4, -2, -3, -3, -3, -3, 10,
+ -1, -1, -2, -1, -1, -2, -2, -1, -1, -1, -1, -1, -1, -2, -1, -1, -1, -1, -1, -2, -1,
+ -2, -3, -2, -3, -2,  3, -3,  2, -1, -2, -1, -1, -2, -3, -2, -2, -2, -2, -1,  2, -1,  7,
+ -1,  1, -4,  1,  4, -3, -2,  0, -3,  1, -3, -2,  0, -1,  3,  0,  0, -1, -2, -3, -1, -2,  4};
+
+short blosum70mt[]={
+  4,
+ -2,  4,
+ -1, -4,  9,
+ -2,  4, -4,  6,
+ -1,  1, -4,  1,  5,
+ -2, -4, -2, -4, -4,  6,
+  0, -1, -3, -2, -2, -4,  6,
+ -2, -1, -4, -1,  0, -1, -2,  8,
+ -2, -4, -1, -4, -4,  0, -4, -4,  4,
+ -1, -1, -4, -1,  1, -3, -2, -1, -3,  5,
+ -2, -4, -2, -4, -3,  0, -4, -3,  2, -3,  4,
+ -1, -3, -2, -3, -2,  0, -3, -2,  1, -2,  2,  6,
+ -2,  3, -3,  1,  0, -3, -1,  0, -4,  0, -4, -2,  6,
+ -1, -2, -3, -2, -1, -4, -3, -2, -3, -1, -3, -3, -2,  8,
+ -1,  0, -3, -1,  2, -3, -2,  1, -3,  1, -2,  0,  0, -2,  6,
+ -2, -1, -4, -2,  0, -3, -3,  0, -3,  2, -3, -2, -1, -2,  1,  6,
+  1,  0, -1,  0,  0, -3, -1, -1, -3,  0, -3, -2,  0, -1,  0, -1,  4,
+  0, -1, -1, -1, -1, -2, -2, -2, -1, -1, -2, -1,  0, -1, -1, -1,  1,  5,
+  0, -3, -1, -4, -3, -1, -4, -3,  3, -3,  1,  1, -3, -3, -2, -3, -2,  0,  4,
+ -3, -4, -3, -5, -4,  1, -3, -2, -3, -3, -2, -2, -4, -4, -2, -3, -3, -3, -3, 11,
+ -1, -1, -2, -2, -1, -2, -2, -1, -1, -1, -1, -1, -1, -2, -1, -1, -1, -1, -1, -3, -1,
+ -2, -3, -3, -4, -3,  3, -4,  2, -1, -2, -1, -1, -2, -3, -2, -2, -2, -2, -2,  2, -2,  7,
+ -1,  0, -4,  1,  4, -4, -2,  0, -3,  1, -3, -2,  0, -1,  3,  0,  0, -1, -3, -3, -1, -2,  4};
+
+short blosum75mt[]={
+  4,
+ -2,  4,
+ -1, -4,  9,
+ -2,  4, -4,  6,
+ -1,  1, -5,  1,  5,
+ -3, -4, -2, -4, -4,  6,
+  0, -1, -3, -2, -3, -4,  6,
+ -2, -1, -4, -1,  0, -2, -2,  8,
+ -2, -4, -1, -4, -4,  0, -5, -4,  4,
+ -1, -1, -4, -1,  1, -4, -2, -1, -3,  5,
+ -2, -4, -2, -4, -4,  0, -4, -3,  1, -3,  4,
+ -1, -3, -2, -4, -2,  0, -3, -2,  1, -2,  2,  6,
+ -2,  3, -3,  1, -1, -4, -1,  0, -4,  0, -4, -3,  6,
+ -1, -2, -4, -2, -1, -4, -3, -2, -3, -1, -3, -3, -3,  8,
+ -1,  0, -3, -1,  2, -4, -2,  1, -3,  1, -3,  0,  0, -2,  6,
+ -2, -1, -4, -2,  0, -3, -3,  0, -3,  2, -3, -2, -1, -2,  1,  6,
+  1,  0, -1, -1,  0, -3, -1, -1, -3,  0, -3, -2,  0, -1,  0, -1,  5,
+  0, -1, -1, -1, -1, -2, -2, -2, -1, -1, -2, -1,  0, -1, -1, -1,  1,  5,
+  0, -4, -1, -4, -3, -1, -4, -4,  3, -3,  1,  1, -3, -3, -2, -3, -2,  0,  4,
+ -3, -5, -3, -5, -4,  1, -3, -2, -3, -4, -2, -2, -4, -5, -2, -3, -3, -3, -3, 11,
+ -1, -2, -2, -2, -1, -2, -2, -1, -2, -1, -1, -1, -1, -2, -1, -1, -1, -1, -1, -3, -1,
+ -2, -3, -3, -4, -3,  3, -4,  2, -2, -2, -1, -2, -3, -4, -2, -2, -2, -2, -2,  2, -2,  7,
+ -1,  0, -4,  1,  4, -4, -2,  0, -4,  1, -3, -2,  0, -2,  3,  0,  0, -1, -3, -3, -1, -3,  4};
+*/
+
+short blosum80mt[]={
+  7,
+ -3,  6,
+ -1, -6, 13,
+ -3,  6, -7, 10,
+ -2,  1, -7,  2,  8,
+ -4, -6, -4, -6, -6, 10,
+  0, -2, -6, -3, -4, -6,  9,
+ -3, -1, -7, -2,  0, -2, -4, 12,
+ -3, -6, -2, -7, -6, -1, -7, -6,  7,
+ -1, -1, -6, -2,  1, -5, -3, -1, -5,  8,
+ -3, -7, -3, -7, -6,  0, -7, -5,  2, -4,  6,
+ -2, -5, -3, -6, -4,  0, -5, -4,  2, -3,  3,  9,
+ -3,  5, -5,  2, -1, -6, -1,  1, -6,  0, -6, -4,  9,
+ -1, -4, -6, -3, -2, -6, -5, -4, -5, -2, -5, -4, -4, 12,
+ -2, -1, -5, -1,  3, -5, -4,  1, -5,  2, -4, -1,  0, -3,  9,
+ -3, -2, -6, -3, -1, -5, -4,  0, -5,  3, -4, -3, -1, -3,  1,  9,
+  2,  0, -2, -1, -1, -4, -1, -2, -4, -1, -4, -3,  1, -2, -1, -2,  7,
+  0, -1, -2, -2, -2, -4, -3, -3, -2, -1, -3, -1,  0, -3, -1, -2,  2,  8,
+ -1, -6, -2, -6, -4, -2, -6, -5,  4, -4,  1,  1, -5, -4, -4, -4, -3,  0,  7,
+ -5, -8, -5, -8, -6,  0, -6, -4, -5, -6, -4, -3, -7, -7, -4, -5, -6, -5, -5, 16,
+ -1, -3, -4, -3, -2, -3, -3, -2, -2, -2, -2, -2, -2, -3, -2, -2, -1, -1, -2, -5, -2,
+ -4, -5, -5, -6, -5,  4, -6,  3, -3, -4, -2, -3, -4, -6, -3, -4, -3, -3, -3,  3, -3, 11,
+ -2,  0, -7,  1,  6, -6, -4,  0, -6,  1, -5, -3, -1, -2,  5,  0, -1, -2, -4, -5, -1, -4,  6};
+
+/*
+short blosum85mt[]={
+  5,
+ -2,  4,
+ -1, -4,  9,
+ -2,  4, -5,  7,
+ -1,  0, -5,  1,  6,
+ -3, -4, -3, -4, -4,  7,
+  0, -1, -4, -2, -3, -4,  6,
+ -2, -1, -5, -2, -1, -2, -3,  8,
+ -2, -5, -2, -5, -4, -1, -5, -4,  5,
+ -1, -1, -4, -1,  0, -4, -2, -1, -3,  6,
+ -2, -5, -2, -5, -4,  0, -5, -3,  1, -3,  4,
+ -2, -4, -2, -4, -3, -1, -4, -3,  1, -2,  2,  7,
+ -2,  4, -4,  1, -1, -4, -1,  0, -4,  0, -4, -3,  7,
+ -1, -3, -4, -2, -2, -4, -3, -3, -4, -2, -4, -3, -3,  8,
+ -1, -1, -4, -1,  2, -4, -3,  1, -4,  1, -3,  0,  0, -2,  6,
+ -2, -2, -4, -2, -1, -4, -3,  0, -4,  2, -3, -2, -1, -2,  1,  6,
+  1,  0, -2, -1, -1, -3, -1, -1, -3, -1, -3, -2,  0, -1, -1, -1,  5,
+  0, -1, -2, -2, -1, -3, -2, -2, -1, -1, -2, -1,  0, -2, -1, -2,  1,  5,
+ -1, -4, -1, -4, -3, -1, -4, -4,  3, -3,  0,  0, -4, -3, -3, -3, -2,  0,  5,
+ -3, -5, -4, -6, -4,  0, -4, -3, -3, -5, -3, -2, -5, -5, -3, -4, -4, -4, -3, 11,
+ -1, -2, -3, -2, -1, -2, -2, -2, -2, -1, -2, -1, -2, -2, -1, -2, -1, -1, -1, -3, -2,
+ -3, -4, -3, -4, -4,  3, -5,  2, -2, -3, -2, -2, -3, -4, -2, -3, -2, -2, -2,  2, -2,  7,
+ -1,  0, -5,  1,  4, -4, -3,  0, -4,  1, -4, -2, -1, -2,  4,  0, -1, -1, -3, -4, -1, -3,  4};
+
+short blosum90mt[]={
+  5,
+ -2,  4,
+ -1, -4,  9,
+ -3,  4, -5,  7,
+ -1,  0, -6,  1,  6,
+ -3, -4, -3, -5, -5,  7,
+  0, -2, -4, -2, -3, -5,  6,
+ -2, -1, -5, -2, -1, -2, -3,  8,
+ -2, -5, -2, -5, -4, -1, -5, -4,  5,
+ -1, -1, -4, -1,  0, -4, -2, -1, -4,  6,
+ -2, -5, -2, -5, -4,  0, -5, -4,  1, -3,  5,
+ -2, -4, -2, -4, -3, -1, -4, -3,  1, -2,  2,  7,
+ -2,  4, -4,  1, -1, -4, -1,  0, -4,  0, -4, -3,  7,
+ -1, -3, -4, -3, -2, -4, -3, -3, -4, -2, -4, -3, -3,  8,
+ -1, -1, -4, -1,  2, -4, -3,  1, -4,  1, -3,  0,  0, -2,  7,
+ -2, -2, -5, -3, -1, -4, -3,  0, -4,  2, -3, -2, -1, -3,  1,  6,
+  1,  0, -2, -1, -1, -3, -1, -2, -3, -1, -3, -2,  0, -2, -1, -1,  5,
+  0, -1, -2, -2, -1, -3, -3, -2, -1, -1, -2, -1,  0, -2, -1, -2,  1,  6,
+ -1, -4, -2, -5, -3, -2, -5, -4,  3, -3,  0,  0, -4, -3, -3, -3, -2, -1,  5,
+ -4, -6, -4, -6, -5,  0, -4, -3, -4, -5, -3, -2, -5, -5, -3, -4, -4, -4, -3, 11,
+ -1, -2, -3, -2, -2, -2, -2, -2, -2, -1, -2, -1, -2, -2, -1, -2, -1, -1, -2, -3, -2,
+ -3, -4, -4, -4, -4,  3, -5,  1, -2, -3, -2, -2, -3, -4, -3, -3, -3, -2, -3,  2, -2,  8,
+ -1,  0, -5,  0,  4, -4, -3,  0, -4,  1, -4, -2, -1, -2,  4,  0, -1, -1, -3, -4, -1, -3,  4};
+*/
+
+short pam20mt[]={
+  6,
+ -5,  6,
+ -8,-14, 10,
+ -4,  6,-16,  8,
+ -3,  0,-16,  2,  8,
+ -9,-12,-15,-17,-16,  9,
+ -3, -4,-11, -4, -5,-10,  7,
+ -8, -2, -8, -5, -6, -7,-10,  9,
+ -6, -7, -7, -9, -6, -3,-13,-11,  9,
+ -8, -3,-16, -6, -5,-16, -8, -8, -7,  7,
+ -7,-10,-17,-15,-10, -4,-12, -7, -2, -9,  7,
+ -6,-12,-16,-13, -8, -5,-10,-13, -2, -3,  0, 11,
+ -5,  6,-13,  1, -3,-10, -4, -1, -6, -2, -8,-11,  8,
+ -2, -8, -9, -9, -7,-11, -7, -5,-10, -8, -8, -9, -7,  8,
+ -5, -4,-16, -4,  0,-15, -8,  0, -9, -4, -6, -5, -5, -4,  9,
+ -8, -9, -9,-12,-11,-10,-11, -3, -6, -1,-10, -5, -7, -5, -2,  9,
+ -1, -2, -4, -5, -5, -7, -3, -7, -8, -5, -9, -6, -1, -3, -6, -4,  7,
+ -1, -4, -9, -6, -7,-10, -7, -8, -3, -4, -8, -5, -3, -5, -7, -8,  0,  7,
+ -3, -9, -7, -9, -8, -9, -7, -7,  1,-10, -3, -2, -9, -7, -8, -9, -8, -4,  7,
+-16,-11,-18,-17,-19, -6,-17, -8,-16,-14, -7,-15, -9,-16,-15, -3, -6,-15,-18, 13,
+ -4, -6,-11, -7, -6, -9, -6, -6, -6, -6, -7, -6, -4, -6, -6, -7, -4, -5, -6,-13, -6,
+ -9, -7, -5,-13, -9,  1,-16, -4, -7,-10, -8,-13, -5,-16,-14,-11, -8, -7, -8, -6, -9, 10,
+ -4, -1,-16,  0,  6,-16, -6, -2, -7, -5, -8, -6, -4, -5,  7, -5, -6, -7, -8,-17, -6,-11,  6};
+
+short pam60mt[]={
+  5,
+ -2,  5,
+ -5, -9,  9,
+ -2,  5,-10,  7,
+ -1,  2,-10,  3,  7,
+ -6, -8, -9,-11,-10,  8,
+  0, -2, -7, -2, -2, -7,  6,
+ -5,  0, -6, -2, -3, -4, -6,  8,
+ -3, -4, -4, -5, -4, -1, -7, -6,  7,
+ -5, -1,-10, -2, -3,-10, -5, -4, -4,  6,
+ -4, -7,-11, -9, -7, -1, -8, -4,  0, -6,  6,
+ -3, -6,-10, -7, -5, -2, -6, -7,  1,  0,  2, 10,
+ -2,  5, -7,  2,  0, -6, -1,  1, -4,  0, -5, -6,  6,
+  0, -4, -6, -5, -3, -7, -4, -2, -6, -4, -5, -6, -4,  7,
+ -3, -1,-10, -1,  2, -9, -5,  2, -5, -1, -3, -2, -2, -1,  7,
+ -5, -5, -6, -6, -6, -7, -7,  0, -4,  2, -6, -2, -3, -2,  0,  8,
+  1,  0, -1, -2, -2, -5,  0, -4, -4, -2, -6, -4,  1,  0, -3, -2,  5,
+  1, -2, -5, -3, -4, -6, -3, -5, -1, -2, -5, -2, -1, -2, -4, -4,  1,  6,
+ -1, -5, -4, -6, -4, -5, -4, -5,  3, -6, -1,  0, -5, -4, -5, -5, -4, -1,  6,
+-10, -8,-12,-11,-12, -3,-11, -5,-10, -8, -4, -9, -6,-10, -9,  0, -4, -9,-11, 13,
+ -2, -3, -6, -3, -3, -5, -3, -3, -3, -3, -4, -3, -2, -3, -3, -4, -2, -2, -3, -8, -3,
+ -6, -5, -2, -8, -7,  3,-10, -2, -4, -7, -5, -7, -3,-10, -8, -8, -5, -5, -5, -3, -5,  9,
+ -2,  1,-10,  2,  5,-10, -3,  0, -4, -2, -5, -4, -1, -2,  6, -2, -3, -4, -5,-11, -3, -7,  5};
+
+short pam120mt[]={
+  3,
+  0,  4,
+ -3, -6,  9,
+  0,  4, -7,  5,
+  0,  3, -7,  3,  5,
+ -4, -5, -6, -7, -7,  8,
+  1,  0, -4,  0, -1, -5,  5,
+ -3,  1, -4,  0, -1, -3, -4,  7,
+ -1, -3, -3, -3, -3,  0, -4, -4,  6,
+ -2,  0, -7, -1, -1, -7, -3, -2, -3,  5,
+ -3, -4, -7, -5, -4,  0, -5, -3,  1, -4,  5,
+ -2, -4, -6, -4, -3, -1, -4, -4,  1,  0,  3,  8,
+ -1,  3, -5,  2,  1, -4,  0,  2, -2,  1, -4, -3,  4,
+  1, -2, -4, -3, -2, -5, -2, -1, -3, -2, -3, -3, -2,  6,
+ -1,  0, -7,  1,  2, -6, -3,  3, -3,  0, -2, -1,  0,  0,  6,
+ -3, -2, -4, -3, -3, -5, -4,  1, -2,  2, -4, -1, -1, -1,  1,  6,
+  1,  0,  0,  0, -1, -3,  1, -2, -2, -1, -4, -2,  1,  1, -2, -1,  3,
+  1,  0, -3, -1, -2, -4, -1, -3,  0, -1, -3, -1,  0, -1, -2, -2,  2,  4,
+  0, -3, -3, -3, -3, -3, -2, -3,  3, -4,  1,  1, -3, -2, -3, -3, -2,  0,  5,
+ -7, -6, -8, -8, -8, -1, -8, -3, -6, -5, -3, -6, -4, -7, -6,  1, -2, -6, -8, 12,
+ -1, -1, -4, -2, -1, -3, -2, -2, -1, -2, -2, -2, -1, -2, -1, -2, -1, -1, -1, -5, -2,
+ -4, -3, -1, -5, -5,  4, -6, -1, -2, -5, -2, -4, -2, -6, -5, -5, -3, -3, -3, -2, -3,  8,
+ -1,  2, -7,  3,  4, -6, -2,  1, -3, -1, -3, -2,  0, -1,  4, -1, -1, -2, -3, -7, -1, -5,  4};
+
+/*
+short pam160mt[]={
+  2,
+  0,  3,
+ -2, -4,  9,
+  0,  3, -5,  4,
+  0,  2, -5,  3,  4,
+ -3, -4, -5, -6, -5,  7,
+  1,  0, -3,  0,  0, -4,  4,
+ -2,  1, -3,  0,  0, -2, -3,  6,
+ -1, -2, -2, -3, -2,  0, -3, -3,  5,
+ -2,  0, -5,  0, -1, -5, -2, -1, -2,  4,
+ -2, -4, -6, -4, -3,  1, -4, -2,  2, -3,  5,
+ -1, -3, -5, -3, -2,  0, -3, -3,  2,  0,  3,  7,
+  0,  2, -4,  2,  1, -3,  0,  2, -2,  1, -3, -2,  3,
+  1, -1, -3, -2, -1, -4, -1, -1, -2, -2, -3, -2, -1,  5,
+ -1,  1, -5,  1,  2, -5, -2,  2, -2,  0, -2, -1,  0,  0,  5,
+ -2, -1, -3, -2, -2, -4, -3,  1, -2,  3, -3, -1, -1, -1,  1,  6,
+  1,  0,  0,  0,  0, -3,  1, -1, -2, -1, -3, -2,  1,  1, -1, -1,  2,
+  1,  0, -2, -1, -1, -3, -1, -2,  0,  0, -2, -1,  0,  0, -1, -1,  1,  3,
+  0, -2, -2, -3, -2, -2, -2, -2,  3, -3,  1,  1, -2, -2, -2, -3, -1,  0,  4,
+ -5, -5, -7, -6, -7, -1, -7, -3, -5, -4, -2, -4, -4, -5, -5,  1, -2, -5, -6, 12,
+  0, -1, -3, -1, -1, -3, -1, -1, -1, -1, -2, -1,  0, -1, -1, -1,  0,  0, -1, -4, -1,
+ -3, -3,  0, -4, -4,  5, -5,  0, -2, -4, -2, -3, -2, -5, -4, -4, -3, -3, -3, -1, -3,  8,
+  0,  2, -5,  2,  3, -5, -1,  1, -2,  0, -3, -2,  1, -1,  3,  0, -1, -1, -2, -6, -1, -4,  3};
+
+short pam250mt[]={
+  2,
+  0,  3,
+ -2, -4, 12,
+  0,  3, -5,  4,
+  0,  3, -5,  3,  4,
+ -3, -4, -4, -6, -5,  9,
+  1,  0, -3,  1,  0, -5,  5,
+ -1,  1, -3,  1,  1, -2, -2,  6,
+ -1, -2, -2, -2, -2,  1, -3, -2,  5,
+ -1,  1, -5,  0,  0, -5, -2,  0, -2,  5,
+ -2, -3, -6, -4, -3,  2, -4, -2,  2, -3,  6,
+ -1, -2, -5, -3, -2,  0, -3, -2,  2,  0,  4,  6,
+  0,  2, -4,  2,  1, -3,  0,  2, -2,  1, -3, -2,  2,
+  1, -1, -3, -1, -1, -5,  0,  0, -2, -1, -3, -2,  0,  6,
+  0,  1, -5,  2,  2, -5, -1,  3, -2,  1, -2, -1,  1,  0,  4,
+ -2, -1, -4, -1, -1, -4, -3,  2, -2,  3, -3,  0,  0,  0,  1,  6,
+  1,  0,  0,  0,  0, -3,  1, -1, -1,  0, -3, -2,  1,  1, -1,  0,  2,
+  1,  0, -2,  0,  0, -3,  0, -1,  0,  0, -2, -1,  0,  0, -1, -1,  1,  3,
+  0, -2, -2, -2, -2, -1, -1, -2,  4, -2,  2,  2, -2, -1, -2, -2, -1,  0,  4,
+ -6, -5, -8, -7, -7,  0, -7, -3, -5, -3, -2, -4, -4, -6, -5,  2, -2, -5, -6, 17,
+  0, -1, -3, -1, -1, -2, -1, -1, -1, -1, -1, -1,  0, -1, -1, -1,  0,  0, -1, -4, -1,
+ -3, -3,  0, -4, -4,  7, -5,  0, -1, -4, -1, -2, -2, -5, -4, -4, -3, -3, -2,  0, -2, 10,
+  0,  2, -5,  3,  3, -5,  0,  2, -2,  0, -3, -2,  1,  0,  3,  0,  0, -1, -2, -6, -1, -4,  3};
+*/
+short pam350mt[]={
+  2,
+  1,  3,
+ -2, -5, 18,
+  1,  3, -6,  4,
+  1,  3, -6,  4,  4,
+ -4, -5, -5, -6, -6, 13,
+  2,  1, -4,  1,  1, -6,  5,
+ -1,  1, -4,  1,  1, -2, -2,  7,
+  0, -2, -3, -2, -2,  2, -2, -2,  5,
+ -1,  1, -6,  1,  0, -6, -1,  1, -2,  5,
+ -2, -4, -7, -4, -4,  3, -4, -2,  4, -3,  8,
+ -1, -2, -6, -3, -2,  1, -3, -2,  3,  0,  5,  6,
+  0,  2, -4,  2,  2, -4,  1,  2, -2,  1, -3, -2,  2,
+  1,  0, -3,  0,  0, -5,  0,  0, -2, -1, -3, -2,  0,  6,
+  0,  2, -6,  2,  3, -5, -1,  3, -2,  1, -2, -1,  1,  1,  4,
+ -1,  0, -4, -1,  0, -5, -2,  2, -2,  4, -3,  0,  1,  0,  2,  7,
+  1,  1,  0,  1,  0, -4,  1, -1, -1,  0, -3, -2,  1,  1,  0,  0,  1,
+  1,  0, -2,  0,  0, -3,  1, -1,  0,  0, -2, -1,  1,  1,  0, -1,  1,  2,
+  0, -2, -2, -2, -2, -1, -1, -2,  4, -2,  3,  2, -2, -1, -2, -3, -1,  0,  5,
+ -7, -6,-10, -8, -8,  1, -8, -3, -6, -4, -2, -5, -5, -7, -5,  4, -3, -6, -7, 27,
+  0,  0, -3, -1,  0, -2, -1,  0,  0, -1, -1,  0,  0,  0,  0, -1,  0,  0,  0, -5, -1,
+ -4, -4,  1, -5, -5, 11, -6,  0,  0, -5,  0, -2, -3, -6, -5, -5, -3, -3, -2,  1, -2, 14,
+  0,  2, -6,  3,  3, -6,  0,  2, -2,  1, -3, -2,  2,  0,  3,  1,  0,  0, -2, -7,  0, -5,  3};
+
+/*
+short md_40mt[]={
+  9,
+  0,  0,
+ -7,  0, 16,
+ -6,  0,-13, 11,
+ -5,  0,-15,  3, 11,
+-11,  0, -5,-15,-16, 13,
+ -3,  0, -7, -4, -4,-15, 10,
+ -9,  0, -6, -4, -8, -7,-10, 14,
+ -6,  0,-11,-12,-12, -5,-13,-11, 11,
+ -8,  0,-12, -8, -3,-16, -9, -6,-11, 11,
+ -9,  0,-10,-14,-13, -1,-14, -7, -1,-12,  9,
+ -6,  0, -9,-12,-11, -7,-12, -9,  1, -7,  1, 14,
+ -6,  0, -8,  1, -5,-12, -5,  0, -8, -1,-12, -9, 12,
+ -2,  0,-11,-11,-11,-11, -9, -4,-11,-10, -5,-10, -9, 12,
+ -7,  0,-12, -6,  0,-14, -9,  2,-12, -1, -6, -8, -5, -3, 12,
+ -7,  0, -5,-10, -8,-15, -4,  0,-10,  3, -9, -8, -6, -6,  0, 11,
+  0,  0, -2, -6, -8, -6, -2, -6, -8, -7, -7, -8,  1, -1, -7, -5,  9,
+  1,  0, -7, -8, -8,-11, -7, -7, -2, -5, -9, -2, -2, -4, -7, -6,  1, 10,
+ -1,  0, -7, -9, -8, -6, -8,-12,  4,-12, -2,  0,-10, -9,-11,-11, -7, -4, 10,
+-14,  0, -4,-15,-15, -7, -7,-13,-13,-13, -8,-11,-14,-14,-11, -4, -9,-12,-10, 18,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+-13,  0, -2, -8,-14,  2,-13,  2, -9,-13, -9,-11, -6,-13, -9,-10, -7,-10,-11, -6,  0, 14,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0};
+
+short md_120mt[]={
+  6,
+  0,  0,
+ -3,  0, 14,
+ -2,  0, -7,  8,
+ -2,  0, -8,  5,  8,
+ -6,  0, -2, -9,-10, 11,
+  0,  0, -3,  0, -1, -9,  8,
+ -4,  0, -2, -1, -3, -2, -4, 11,
+ -1,  0, -5, -7, -7, -1, -6, -6,  7,
+ -4,  0, -6, -2,  0, -9, -4, -1, -6,  8,
+ -4,  0, -5, -8, -8,  2, -8, -4,  2, -6,  7,
+ -2,  0, -5, -7, -6, -2, -6, -5,  3, -4,  3, 10,
+ -1,  0, -3,  3, -1, -6, -1,  2, -4,  1, -6, -5,  8,
+  0,  0, -5, -5, -5, -5, -4, -1, -5, -4, -2, -5, -3,  9,
+ -3,  0, -6, -1,  2, -7, -4,  4, -6,  2, -3, -4, -1,  0,  9,
+ -3,  0, -2, -4, -3, -8, -1,  2, -6,  4, -5, -4, -2, -2,  2,  8,
+  2,  0,  0, -2, -3, -3,  0, -2, -3, -3, -3, -3,  2,  1, -3, -2,  5,
+  2,  0, -3, -3, -4, -6, -2, -3,  0, -2, -4,  0,  1,  0, -3, -3,  2,  6,
+  1,  0, -3, -5, -5, -2, -4, -6,  5, -6,  1,  2, -5, -4, -6, -6, -3,  0,  7,
+ -8,  0,  0, -9, -9, -3, -3, -6, -7, -6, -4, -6, -8, -8, -6, -1, -5, -7, -6, 17,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+ -7,  0,  2, -4, -7,  5, -8,  4, -5, -7, -4, -6, -2, -7, -4, -5, -3, -6, -6, -2,  0, 12,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0};
+
+short md_250mt[]={
+  2,
+  0,  0,
+ -1,  0, 11,
+ -1,  0, -3,  5,
+ -1,  0, -4,  4,  5,
+ -3,  0,  0, -5, -5,  8,
+  1,  0, -1,  1,  1, -5,  5,
+ -2,  0,  0,  0,  0,  0, -2,  6,
+  0,  0, -2, -3, -3,  0, -3, -3,  4,
+ -1,  0, -3,  0,  1, -5, -1,  1, -3,  5,
+ -1,  0, -2, -4, -4,  2, -4, -2,  2, -3,  5,
+  0,  0, -2, -3, -3,  0, -3, -2,  3, -2,  3,  6,
+  0,  0, -1,  2,  1, -3,  0,  1, -2,  1, -3, -2,  3,
+  1,  0, -2, -2, -2, -2, -1,  0, -2, -1,  0, -2, -1,  6,
+ -1,  0, -3,  0,  2, -4, -1,  3, -3,  2, -2, -2,  0,  0,  5,
+ -1,  0, -1, -1,  0, -4,  0,  2, -3,  4, -3, -2,  0, -1,  2,  5,
+  1,  0,  1,  0, -1, -2,  1, -1, -1, -1, -2, -1,  1,  1, -1, -1,  2,
+  2,  0, -1, -1, -1, -2,  0, -1,  1, -1, -1,  0,  1,  1, -1, -1,  1,  2,
+  1,  0, -2, -3, -2,  0, -2, -3,  4, -3,  2,  2, -2, -1, -3, -3, -1,  0,  4,
+ -4,  0,  1, -5, -5, -1, -1, -3, -4, -3, -2, -3, -4, -4, -3,  0, -3, -4, -3, 15,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+ -3,  0,  2, -2, -4,  5, -4,  4, -2, -3, -1, -3, -1, -3, -2, -2, -1, -3, -3,  0,  0,  9,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0};
+
+short md_350mt[]={
+  1,
+  0,  0,
+  0,  0,  9,
+  0,  0, -2,  3,
+  0,  0, -2,  3,  3,
+ -2,  0,  1, -3, -4,  6,
+  1,  0,  0,  1,  1, -3,  4,
+ -1,  0,  0,  0,  0,  0, -1,  3,
+  0,  0, -1, -2, -2,  1, -2, -2,  3,
+ -1,  0, -1,  0,  1, -3,  0,  1, -2,  3,
+ -1,  0, -1, -3, -3,  2, -2, -1,  2, -2,  3,
+  0,  0, -1, -2, -2,  1, -2, -1,  2, -2,  2,  3,
+  0,  0, -1,  1,  1, -2,  0,  1, -1,  1, -2, -1,  2,
+  1,  0, -1, -1, -1, -2, -1,  0, -1, -1,  0, -1,  0,  4,
+ -1,  0, -2,  1,  1, -2,  0,  2, -2,  2, -1, -1,  0,  0,  3,
+ -1,  0,  0,  0,  0, -3,  0,  1, -2,  3, -2, -1,  0,  0,  2,  3,
+  1,  0,  0,  0,  0, -1,  1,  0, -1,  0, -1, -1,  1,  1,  0,  0,  1,
+  1,  0,  0,  0, -1, -1,  0, -1,  0,  0, -1,  0,  0,  1, -1,  0,  1,  1,
+  0,  0, -1, -2, -2,  0, -1, -2,  2, -2,  1,  2, -1, -1, -2, -2,  0,  0,  2,
+ -3,  0,  1, -4, -3,  0, -1, -2, -3, -2, -1, -2, -3, -3, -2,  0, -2, -3, -2, 14,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+ -2,  0,  2, -2, -2,  5, -3,  3, -1, -2,  0, -1, -1, -2, -1, -1, -1, -2, -2,  0,  0,  7,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0};
+*/
+
+short idmat[]={
+10,
+ 0, 10,
+ 0, 0, 10,
+ 0, 0, 0, 10,
+ 0, 0, 0, 0, 10,
+ 0, 0, 0, 0, 0, 10,
+ 0, 0, 0, 0, 0, 0, 10,
+ 0, 0, 0, 0, 0, 0, 0, 10,
+ 0, 0, 0, 0, 0, 0, 0, 0, 10,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 10,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,10,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,10,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,10,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,10,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,10};
+
+short gon40mt[]={
+  92,
+   0,   0,
+ -31,   0, 163,
+ -56,   0,-135, 111,
+ -37,   0,-140,  16, 105,
+ -92,   0, -64,-152,-143, 126,
+ -32,   0, -91, -51, -76,-152, 105,
+ -65,   0, -67, -41, -40, -50, -81, 145,
+ -76,   0, -87,-150,-106, -39,-158, -94, 104,
+ -54,   0,-132, -47, -13,-127, -79, -34, -86, 103,
+ -68,   0, -85,-155,-108, -13,-141, -85,   5, -85,  89,
+ -45,   0, -63,-130, -80, -16,-114, -60,  10, -57,  16, 140,
+ -62,   0, -83,   6, -38,-104, -40,  -7, -99, -20,-112, -91, 115,
+ -37,   0,-137, -69, -60,-128, -87, -71,-108, -62, -83,-119, -78, 124,
+ -43,   0,-113, -32,  10,-100, -71,   0, -91,   2, -60, -35, -25, -46, 118,
+ -61,   0, -86, -77, -50,-130, -69, -31,-103,  19, -84, -81, -47, -73,  -6, 112,
+   0,   0, -35, -36, -41,-111, -37, -48, -95, -43, -95, -64, -11, -35, -35, -51,  99,
+ -25,   0, -59, -47, -52, -90, -85, -46, -51, -34, -78, -44, -27, -42, -39, -52,  13, 100,
+ -22,   0, -43,-133, -74, -58,-122, -98,  28, -82, -18, -22,-103, -86, -79, -88, -74, -25,  97,
+-120,   0, -68,-171,-131,  -6,-108, -70, -93,-127, -71, -72,-119,-149, -87, -63, -98,-120,-115, 181,
+   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+ -95,   0, -56, -98,-107,  31,-129,   5, -76, -88, -64, -66, -62,-106, -81, -75, -69, -87, -73,   1,   0, 135,
+   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0};
+
+short gon80mt[]={
+  75,
+   0,   0,
+ -10,   0, 154,
+ -31,   0, -93,  96,
+ -17,   0, -94,  31,  88,
+ -64,   0, -39,-111,-102, 114,
+ -11,   0, -61, -26, -47,-115,  97,
+ -39,   0, -43, -17, -17, -26, -53, 127,
+ -43,   0, -54,-106, -73, -15,-114, -64,  86,
+ -30,   0, -88, -21,   4, -89, -50, -12, -59,  85,
+ -43,   0, -55,-109, -75,   7,-104, -57,  22, -58,  77,
+ -26,   0, -39, -88, -53,   3, -83, -38,  25, -37,  31, 117,
+ -34,   0, -55,  21, -13, -75, -18,   9, -71,  -2, -79, -62,  97,
+ -16,   0, -93, -42, -35, -93, -58, -45, -75, -37, -58, -78, -48, 114,
+ -22,   0, -76,  -9,  23, -70, -44,  14, -60,  17, -39, -19,  -6, -24,  95,
+ -36,   0, -60, -44, -23, -90, -43, -10, -71,  33, -58, -53, -22, -45,  11,  97,
+  14,   0, -15, -14, -19, -77, -16, -25, -62, -20, -64, -41,   5, -14, -15, -27,  78,
+  -5,   0, -34, -24, -27, -62, -52, -24, -28, -15, -49, -25,  -7, -20, -18, -27,  25,  81,
+  -6,   0, -21, -89, -51, -31, -86, -65,  41, -54,   3,   1, -69, -57, -51, -60, -43,  -9,  80,
+ -87,   0, -43,-124, -98,  16, -81, -43, -63, -89, -44, -45, -86,-112, -62, -41, -72, -87, -80, 173,
+   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+ -65,   0, -32, -69, -74,  49, -94,  21, -47, -60, -35, -37, -39, -76, -53, -50, -46, -58, -47,  23,   0, 123,
+   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0};
+
+short gon120mt[]={
+  59,
+   0,   0,
+  -1,   0, 144,
+ -18,   0, -69,  82,
+  -9,   0, -68,  35,  72,
+ -48,   0, -26, -87, -78, 102,
+  -3,   0, -45, -14, -31, -92,  90,
+ -26,   0, -31,  -7,  -6, -14, -37, 110,
+ -27,   0, -36, -80, -55,  -3, -87, -48,  72,
+ -19,   0, -64,  -8,  11, -67, -34,  -2, -44,  69,
+ -30,   0, -39, -82, -57,  15, -82, -42,  28, -44,  66,
+ -17,   0, -26, -64, -40,  11, -65, -28,  29, -27,  34,  95,
+ -20,   0, -41,  26,  -1, -58,  -7,  14, -55,   5, -61, -46,  80,
+  -6,   0, -68, -28, -22, -72, -41, -31, -56, -24, -44, -56, -32, 105,
+ -12,   0, -56,   1,  25, -53, -30,  17, -43,  20, -30, -14,   1, -14,  74,
+ -23,   0, -45, -27, -10, -68, -30,  -1, -53,  36, -44, -38, -10, -30,  16,  83,
+  16,   0,  -7,  -5,  -9, -58,  -6, -14, -44, -10, -47, -29,  10,  -5,  -7, -15,  60,
+   2,   0, -21, -13, -15, -47, -35, -14, -17,  -6, -34, -16,   0, -10,  -9, -16,  26,  64,
+   0,   0, -11, -65, -38, -17, -65, -47,  42, -39,  13,  10, -50, -42, -36, -44, -28,  -3,  65,
+ -68,   0, -29, -96, -78,  27, -66, -28, -46, -68, -29, -31, -68, -89, -49, -30, -57, -67, -59, 166,
+   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+ -48,   0, -20, -53, -56,  55, -74,  26, -31, -44, -20, -22, -28, -59, -38, -37, -35, -42, -33,  33,   0, 111,
+   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0};
+
+short gon160mt[]={
+  46,
+   0,   0,
+   3,   0, 135,
+ -11,   0, -53,  70,
+  -4,   0, -52,  34,  59,
+ -38,   0, -18, -70, -62,  91,
+   2,   0, -34,  -7, -21, -76,  82,
+ -18,   0, -23,  -1,  -1,  -7, -27,  93,
+ -18,   0, -25, -62, -43,   3, -70, -37,  59,
+ -12,   0, -48,  -1,  13, -53, -24,   2, -35,  55,
+ -22,   0, -29, -65, -45,  19, -67, -32,  30, -34,  57,
+ -12,   0, -19, -50, -31,  14, -52, -21,  29, -21,  34,  76,
+ -12,   0, -31,  26,   5, -47,  -2,  15, -44,   8, -48, -36,  65,
+  -1,   0, -52, -19, -14, -58, -30, -22, -43, -16, -35, -42, -22,  96,
+  -7,   0, -42,   6,  23, -41, -21,  17, -32,  20, -24, -12,   5,  -8,  56,
+ -16,   0, -35, -16,  -3, -53, -21,   3, -41,  35, -35, -29,  -4, -21,  17,  71,
+  16,   0,  -2,   0,  -3, -45,  -1,  -8, -33,  -4, -36, -23,  11,   0,  -2,  -9,  44,
+   5,   0, -14,  -6,  -8, -36, -24,  -8, -12,  -2, -24, -11,   3,  -4,  -4,  -9,  23,  50,
+   1,   0,  -6, -49, -30,  -8, -52, -35,  40, -30,  17,  14, -38, -32, -27, -34, -20,   0,  53,
+ -55,   0, -21, -78, -64,  32, -55, -19, -34, -54, -20, -22, -55, -74, -40, -24, -47, -54, -45, 158,
+   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+ -37,   0, -13, -42, -44,  56, -60,  27, -20, -35, -11, -13, -22, -48, -29, -29, -28, -32, -24,  38,   0, 100,
+   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0};
+
+short gon250mt[]={
+  24,
+   0,   0,
+   5,   0, 115,
+  -3,   0, -32,  47,
+   0,   0, -30,  27,  36,
+ -23,   0,  -8, -45, -39,  70,
+   5,   0, -20,   1,  -8, -52,  66,
+  -8,   0, -13,   4,   4,  -1, -14,  60,
+  -8,   0, -11, -38, -27,  10, -45, -22,  40,
+  -4,   0, -28,   5,  12, -33, -11,   6, -21,  32,
+ -12,   0, -15, -40, -28,  20, -44, -19,  28, -21,  40,
+  -7,   0,  -9, -30, -20,  16, -35, -13,  25, -14,  28,  43,
+  -3,   0, -18,  22,   9, -31,   4,  12, -28,   8, -30, -22,  38,
+   3,   0, -31,  -7,  -5, -38, -16, -11, -26,  -6, -23, -24,  -9,  76,
+  -2,   0, -24,   9,  17, -26, -10,  12, -19,  15, -16, -10,   7,  -2,  27,
+  -6,   0, -22,  -3,   4, -32, -10,   6, -24,  27, -22, -17,   3,  -9,  15,  47,
+  11,   0,   1,   5,   2, -28,   4,  -2, -18,   1, -21, -14,   9,   4,   2,  -2,  22,
+   6,   0,  -5,   0,  -1, -22, -11,  -3,  -6,   1, -13,  -6,   5,   1,   0,  -2,  15,  25,
+   1,   0,   0, -29, -19,   1, -33, -20,  31, -17,  18,  16, -22, -18, -15, -20, -10,   0,  34,
+ -36,   0, -10, -52, -43,  36, -40,  -8, -18, -35,  -7, -10, -36, -50, -27, -16, -33, -35, -26, 142,
+   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+ -22,   0,  -5, -28, -27,  51, -40,  22,  -7, -21,   0,  -2, -14, -31, -17, -18, -19, -19, -11,  41,   0,  78,
+   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0};
+
+short gon300mt[]={
+  16,
+   0,   0,
+   5,   0, 104,
+  -1,   0, -24,  37,
+   1,   0, -23,  23,  27,
+ -18,   0,  -5, -37, -31,  60,
+   5,   0, -15,   3,  -4, -42,  58,
+  -6,   0, -10,   5,   4,   0, -10,  45,
+  -6,   0,  -7, -30, -21,  11, -36, -16,  33,
+  -2,   0, -21,   6,  11, -26,  -7,   5, -17,  24,
+  -9,   0, -10, -32, -22,  19, -36, -14,  25, -17,  33,
+  -5,   0,  -6, -24, -16,  15, -28, -10,  22, -11,  24,  31,
+  -1,   0, -14,  18,   9, -25,   5,  10, -22,   8, -24, -17,  27,
+   3,   0, -23,  -4,  -2, -30, -11,  -8, -20,  -3, -18, -19,  -6,  66,
+  -1,   0, -18,   9,  14, -20,  -6,   9, -15,  13, -13,  -8,   7,  -1,  18,
+  -4,   0, -17,   0,   5, -25,  -6,   6, -19,  22, -18, -13,   4,  -6,  13,  37,
+   8,   0,   1,   5,   3, -22,   4,  -1, -14,   2, -17, -11,   7,   4,   2,   0,  15,
+   5,   0,  -3,   1,   1, -17,  -7,  -1,  -4,   2,  -9,  -5,   4,   2,   1,  -1,  11,  17,
+   0,   0,   1, -23, -15,   4, -26, -15,  26, -13,  17,  15, -17, -14, -12, -15,  -8,   0,  26,
+ -29,   0,  -7, -42, -36,  36, -34,  -5, -13, -28,  -4,  -6, -30, -41, -23, -14, -27, -28, -19, 132,
+   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+ -17,   0,  -3, -22, -22,  46, -33,  18,  -3, -17,   3,   1, -12, -25, -14, -14, -15, -15,  -7,  40,   0,  67,
+   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0};
+
+short gon350mt[]={
+  10,
+   0,   0,
+   4,   0,  93,
+   0,   0, -19,  29,
+   1,   0, -17,  19,  20,
+ -14,   0,  -3, -30, -25,  51,
+   5,   0, -12,   4,  -2, -35,  51,
+  -4,   0,  -8,   5,   4,   1,  -7,  33,
+  -4,   0,  -5, -24, -17,  11, -29, -13,  27,
+  -1,   0, -16,   6,   9, -21,  -4,   5, -13,  18,
+  -7,   0,  -7, -25, -18,  18, -30, -11,  22, -14,  28,
+  -4,   0,  -4, -19, -13,  14, -23,  -8,  19,  -9,  21,  23,
+   0,   0, -11,  15,   9, -20,   5,   8, -18,   7, -19, -14,  20,
+   3,   0, -18,  -2,   0, -25,  -7,  -5, -16,  -2, -15, -14,  -3,  56,
+   0,   0, -14,   8,  11, -16,  -4,   7, -11,  10, -11,  -7,   6,   0,  12,
+  -2,   0, -13,   2,   6, -20,  -4,   6, -15,  18, -14, -11,   4,  -4,  10,  28,
+   6,   0,   1,   5,   3, -18,   5,   0, -11,   2, -13,  -9,   6,   4,   2,   1,  10,
+   4,   0,  -2,   2,   1, -13,  -5,  -1,  -3,   2,  -7,  -4,   4,   2,   1,   0,   8,  11,
+   0,   0,   2, -18, -12,   5, -21, -11,  22, -10,  16,  14, -13, -11,  -9, -12,  -6,   0,  21,
+ -24,   0,  -4, -35, -29,  35, -30,  -3,  -9, -23,  -1,  -3, -24, -34, -19, -12, -22, -23, -14, 124,
+   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+ -14,   0,  -1, -18, -17,  42, -27,  15,  -1, -14,   5,   2, -10, -20, -11, -12, -12, -12,  -4,  39,   0,  57,
+   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0};
+
+
+char *nucleic_acid_order = "ABCDGHKMNRSTUVWXY";
+ 
+short clustalvdnamt[]={
+ 10,
+  0,  0,
+  0,  0, 10,
+  0,  0,  0,  0,
+  0,  0,  0,  0, 10,
+  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 10,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 10,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0};
+
+short swgapdnamt[]={
+ 10,
+ -9, 10,
+ -9, 10, 10,
+ 10, 10, -9, 10,
+ -9, 10, -9, 10, 10,
+ 10, 10, 10, 10, -9, 10,
+ -9, 10, -9, 10, 10, 10, 10,
+ 10, 10, 10, 10, -9, 10, -9, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, -9, 10, 10, 10, 10, 10, 10, 10,
+ -9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+ -9, 10, -9, 10, -9, 10, 10, -9, 10, -9, -9, 10,
+ -9, 10, -9, 10, -9, 10, 10, -9, 10, -9, -9, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, -9, -9, 10,
+ 10, 10, -9, 10, -9, 10, 10, 10, 10, 10, -9, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+ -9, 10, 10, 10, -9, 10, 10, 10, 10, -9, 10, 10, 10, 10, 10, 10, 10};
+

Added: trunk/packages/clustalw/branches/upstream/current/matrixseries.gon
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/matrixseries.gon	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/matrixseries.gon	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,7 @@
+CLUSTAL_SERIES
+
+
+MATRIX 61 100 /us1/user/julie/matrices/gon80.bla
+MATRIX 41 60 /us1/user/julie/matrices/gon120.bla
+MATRIX 21 40 /us1/user/julie/matrices/gon250.bla
+MATRIX 0 40 /us1/user/julie/matrices/gon350.bla

Added: trunk/packages/clustalw/branches/upstream/current/pairalign.c
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/pairalign.c	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/pairalign.c	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,615 @@
+/* Change int h to int gh everywhere  DES June 1994 */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include "clustalw.h"
+
+#define MIN(a,b) ((a)<(b)?(a):(b))
+#define MAX(a,b) ((a)>(b)?(a):(b))
+
+#define gap(k)  ((k) <= 0 ? 0 : g + gh * (k))
+#define tbgap(k)  ((k) <= 0 ? 0 : tb + gh * (k))
+#define tegap(k)  ((k) <= 0 ? 0 : te + gh * (k))
+
+/*
+ *	Prototypes
+ */
+static void add(sint v);
+static sint calc_score(sint iat, sint jat, sint v1, sint v2);
+static float tracepath(sint tsb1,sint tsb2);
+static void forward_pass(char *ia, char *ib, sint n, sint m);
+static void reverse_pass(char *ia, char *ib);
+static sint diff(sint A, sint B, sint M, sint N, sint tb, sint te);
+static void del(sint k);
+
+/*
+ *   Global variables
+ */
+#ifdef MAC
+#define pwint   short
+#else
+#define pwint   int
+#endif
+static sint		int_scale;
+
+extern double   **tmat;
+extern float    pw_go_penalty;
+extern float    pw_ge_penalty;
+extern float	transition_weight;
+extern sint 	nseqs;
+extern sint 	max_aa;
+extern sint 	gap_pos1,gap_pos2;
+extern sint  	max_aln_length;
+extern sint 	*seqlen_array;
+extern sint 	debug;
+extern sint  	mat_avscore;
+extern short 	blosum30mt[],pam350mt[],idmat[],pw_usermat[],pw_userdnamat[];
+extern short    clustalvdnamt[],swgapdnamt[];
+extern short    gon250mt[];
+extern short 	def_dna_xref[],def_aa_xref[],pw_dna_xref[],pw_aa_xref[];
+extern Boolean  dnaflag;
+extern char 	**seq_array;
+extern char 	*amino_acid_codes;
+extern char 	pw_mtrxname[];
+extern char 	pw_dnamtrxname[];
+
+static float 	mm_score;
+static sint 	print_ptr,last_print;
+static sint 	*displ;
+static pwint 	*HH, *DD, *RR, *SS;
+static sint 	g, gh;
+static sint   	seq1, seq2;
+static sint     matrix[NUMRES][NUMRES];
+static pwint    maxscore;
+static sint    	sb1, sb2, se1, se2;
+
+
+sint pairalign(sint istart, sint iend, sint jstart, sint jend)
+{
+  short	 *mat_xref;
+  static sint    si, sj, i;
+  static sint    n,m,len1,len2;
+  static sint    maxres;
+  static short    *matptr;
+  static char   c;
+  static float gscale,ghscale;
+
+  displ = (sint *)ckalloc((2*max_aln_length+1) * sizeof(sint));
+  HH = (pwint *)ckalloc((max_aln_length) * sizeof(pwint));
+  DD = (pwint *)ckalloc((max_aln_length) * sizeof(pwint));
+  RR = (pwint *)ckalloc((max_aln_length) * sizeof(pwint));
+  SS = (pwint *)ckalloc((max_aln_length) * sizeof(pwint));
+		
+#ifdef MAC
+  int_scale = 10;
+#else
+  int_scale = 100;
+#endif
+  gscale=ghscale=1.0;
+  if (dnaflag)
+    {
+      if (debug>1) fprintf(stdout,"matrix %s\n",pw_dnamtrxname);
+      if (strcmp(pw_dnamtrxname, "iub") == 0)
+	{ 
+	  matptr = swgapdnamt;
+	  mat_xref = def_dna_xref;
+	}
+      else if (strcmp(pw_dnamtrxname, "clustalw") == 0)
+	{ 
+	  matptr = clustalvdnamt;
+	  mat_xref = def_dna_xref;
+	  gscale=0.6667;
+	  ghscale=0.751;
+	}
+      else
+	{
+	  matptr = pw_userdnamat;
+	  mat_xref = pw_dna_xref;
+	}
+      maxres = get_matrix(matptr, mat_xref, matrix, TRUE, int_scale);
+      if (maxres == 0) return((sint)-1);
+
+      matrix[0][4]=transition_weight*matrix[0][0];
+      matrix[4][0]=transition_weight*matrix[0][0];
+      matrix[2][11]=transition_weight*matrix[0][0];
+      matrix[11][2]=transition_weight*matrix[0][0];
+      matrix[2][12]=transition_weight*matrix[0][0];
+      matrix[12][2]=transition_weight*matrix[0][0];
+    }
+  else
+    {
+      if (debug>1) fprintf(stdout,"matrix %s\n",pw_mtrxname);
+      if (strcmp(pw_mtrxname, "blosum") == 0)
+	{
+	  matptr = blosum30mt;
+	  mat_xref = def_aa_xref;
+	}
+      else if (strcmp(pw_mtrxname, "pam") == 0)
+	{
+	  matptr = pam350mt;
+	  mat_xref = def_aa_xref;
+	}
+      else if (strcmp(pw_mtrxname, "gonnet") == 0)
+	{
+	  matptr = gon250mt;
+	  int_scale /= 10;
+	  mat_xref = def_aa_xref;
+	}
+      else if (strcmp(pw_mtrxname, "id") == 0)
+	{
+	  matptr = idmat;
+	  mat_xref = def_aa_xref;
+	}
+      else
+	{
+	  matptr = pw_usermat;
+	  mat_xref = pw_aa_xref;
+	}
+
+      maxres = get_matrix(matptr, mat_xref, matrix, TRUE, int_scale);
+      if (maxres == 0) return((sint)-1);
+    }
+
+
+  for (si=MAX(0,istart);si<nseqs && si<iend;si++)
+    {
+      n = seqlen_array[si+1];
+      len1 = 0;
+      for (i=1;i<=n;i++) {
+	c = seq_array[si+1][i];
+	if ((c!=gap_pos1) && (c != gap_pos2)) len1++;
+      }
+
+      for (sj=MAX(si+1,jstart+1);sj<nseqs && sj<jend;sj++)
+	{
+	  m = seqlen_array[sj+1];
+	  if(n==0 || m==0) {
+	    tmat[si+1][sj+1]=1.0;
+	    tmat[sj+1][si+1]=1.0;
+	    continue;
+	  }
+	  len2 = 0;
+	  for (i=1;i<=m;i++) {
+	    c = seq_array[sj+1][i];
+	    if ((c!=gap_pos1) && (c != gap_pos2)) len2++;
+	  }
+
+	  if (dnaflag) {
+	    g = 2 * (float)pw_go_penalty * int_scale*gscale;
+	    gh = pw_ge_penalty * int_scale*ghscale;
+	  }
+	  else {
+	    if (mat_avscore <= 0)
+              g = 2 * (float)(pw_go_penalty + log((double)(MIN(n,m))))*int_scale;
+	    else
+              g = 2 * mat_avscore * (float)(pw_go_penalty +
+					    log((double)(MIN(n,m))))*gscale;
+	    gh = pw_ge_penalty * int_scale;
+	  }
+
+	  if (debug>1) fprintf(stdout,"go %d ge %d\n",(pint)g,(pint)gh);
+
+	  /*
+	    align the sequences
+	  */
+	  seq1 = si+1;
+        seq2 = sj+1;
+
+        forward_pass(&seq_array[seq1][0], &seq_array[seq2][0],
+           n, m);
+
+        reverse_pass(&seq_array[seq1][0], &seq_array[seq2][0]);
+
+        last_print = 0;
+	print_ptr = 1;
+/*
+        sb1 = sb2 = 1;
+        se1 = n-1;
+        se2 = m-1;
+*/
+
+/* use Myers and Miller to align two sequences */
+
+        maxscore = diff(sb1-1, sb2-1, se1-sb1+1, se2-sb2+1, 
+        (sint)0, (sint)0);
+ 
+/* calculate percentage residue identity */
+
+        mm_score = tracepath(sb1,sb2);
+
+		if(len1==0 || len2==0) mm_score=0;
+		else
+			mm_score /= (float)MIN(len1,len2);
+
+        tmat[si+1][sj+1] = ((float)100.0 - mm_score)/(float)100.0;
+        tmat[sj+1][si+1] = ((float)100.0 - mm_score)/(float)100.0;
+
+if (debug>1)
+{
+        fprintf(stdout,"Sequences (%d:%d) Aligned. Score: %d CompScore:  %d\n",
+                           (pint)si+1,(pint)sj+1, 
+                           (pint)mm_score, 
+                           (pint)maxscore/(MIN(len1,len2)*100));
+}
+else
+{
+        info("Sequences (%d:%d) Aligned. Score:  %d",
+                                      (pint)si+1,(pint)sj+1, 
+                                      (pint)mm_score);
+}
+
+   }
+  }
+   displ=ckfree((void *)displ);
+   HH=ckfree((void *)HH);
+   DD=ckfree((void *)DD);
+   RR=ckfree((void *)RR);
+   SS=ckfree((void *)SS);
+
+
+  return((sint)1);
+}
+
+static void add(sint v)
+{
+
+        if(last_print<0) {
+                displ[print_ptr-1] = v;
+                displ[print_ptr++] = last_print;
+        }
+        else
+                last_print = displ[print_ptr++] = v;
+}
+
+static sint calc_score(sint iat,sint jat,sint v1,sint v2)
+{
+        sint ipos,jpos;
+		sint ret;
+
+        ipos = v1 + iat;
+        jpos = v2 + jat;
+
+        ret=matrix[(int)seq_array[seq1][ipos]][(int)seq_array[seq2][jpos]];
+
+	return(ret);
+}
+
+
+static float tracepath(sint tsb1,sint tsb2)
+{
+	char c1,c2;
+    sint  i1,i2,r;
+    sint i,k,pos,to_do;
+	sint count;
+	float score;
+	char s1[600], s2[600];
+
+        to_do=print_ptr-1;
+        i1 = tsb1;
+        i2 = tsb2;
+
+	pos = 0;
+	count = 0;
+        for(i=1;i<=to_do;++i) {
+
+	  if (debug>1) fprintf(stdout,"%d ",(pint)displ[i]);
+	  if(displ[i]==0) {
+	    c1 = seq_array[seq1][i1];
+	    c2 = seq_array[seq2][i2];
+	    
+	    if (debug>0)
+	      {
+		if (c1>max_aa) s1[pos] = '-';
+		else s1[pos]=amino_acid_codes[c1];
+		if (c2>max_aa) s2[pos] = '-';
+		else s2[pos]=amino_acid_codes[c2];
+	      }
+	    
+	    if ((c1!=gap_pos1) && (c1 != gap_pos2) &&
+		(c1 == c2)) count++;
+	    ++i1;
+	    ++i2;
+	    ++pos;
+	  }
+	  else {
+	    if((k=displ[i])>0) {
+	      
+	      if (debug>0)
+		for (r=0;r<k;r++)
+		  {
+		    s1[pos+r]='-';
+		    if (seq_array[seq2][i2+r]>max_aa) s2[pos+r] = '-';
+		    else s2[pos+r]=amino_acid_codes[seq_array[seq2][i2+r]];
+		  }
+	      
+	      i2 += k;
+	      pos += k;
+	    }
+	    else {
+	      
+	      if (debug>0)
+		for (r=0;r<(-k);r++)
+		  {
+		    s2[pos+r]='-';
+		    if (seq_array[seq1][i1+r]>max_aa) s1[pos+r] = '-';
+		    else s1[pos+r]=amino_acid_codes[seq_array[seq1][i1+r]];
+		  }
+	      
+	      i1 -= k;
+	      pos -= k;
+	    }
+	  }
+        }
+	if (debug>0) fprintf(stdout,"\n");
+	if (debug>0) 
+	  {
+	    for (i=0;i<pos;i++) fprintf(stdout,"%c",s1[i]);
+	    fprintf(stdout,"\n");
+	    for (i=0;i<pos;i++) fprintf(stdout,"%c",s2[i]);
+	    fprintf(stdout,"\n");
+	  }
+	/*
+	  if (count <= 0) count = 1;
+	*/
+	score = 100.0 * (float)count;
+	return(score);
+}
+
+
+static void forward_pass(char *ia, char *ib, sint n, sint m)
+{
+
+  sint i,j;
+  pwint f,hh,p,t;
+
+  maxscore = 0;
+  se1 = se2 = 0;
+  for (i=0;i<=m;i++)
+    {
+       HH[i] = 0;
+       DD[i] = -g;
+    }
+
+  for (i=1;i<=n;i++)
+     {
+        hh = p = 0;
+		f = -g;
+
+        for (j=1;j<=m;j++)
+           {
+
+              f -= gh; 
+              t = hh - g - gh;
+              if (f<t) f = t;
+
+              DD[j] -= gh;
+              t = HH[j] - g - gh;
+              if (DD[j]<t) DD[j] = t;
+
+              hh = p + matrix[(int)ia[i]][(int)ib[j]];
+              if (hh<f) hh = f;
+              if (hh<DD[j]) hh = DD[j];
+              if (hh<0) hh = 0;
+
+              p = HH[j];
+              HH[j] = hh;
+
+              if (hh > maxscore)
+                {
+                   maxscore = hh;
+                   se1 = i;
+                   se2 = j;
+                }
+           }
+     }
+
+}
+
+
+static void reverse_pass(char *ia, char *ib)
+{
+
+  sint i,j;
+  pwint f,hh,p,t;
+  pwint cost;
+
+  cost = 0;
+  sb1 = sb2 = 1;
+  for (i=se2;i>0;i--)
+    {
+       HH[i] = -1;
+       DD[i] = -1;
+    }
+
+  for (i=se1;i>0;i--)
+     {
+        hh = f = -1;
+        if (i == se1) p = 0;
+        else p = -1;
+
+        for (j=se2;j>0;j--)
+           {
+
+              f -= gh; 
+              t = hh - g - gh;
+              if (f<t) f = t;
+
+              DD[j] -= gh;
+              t = HH[j] - g - gh;
+              if (DD[j]<t) DD[j] = t;
+
+              hh = p + matrix[(int)ia[i]][(int)ib[j]];
+              if (hh<f) hh = f;
+              if (hh<DD[j]) hh = DD[j];
+
+              p = HH[j];
+              HH[j] = hh;
+
+              if (hh > cost)
+                {
+                   cost = hh;
+                   sb1 = i;
+                   sb2 = j;
+                   if (cost >= maxscore) break;
+                }
+           }
+        if (cost >= maxscore) break;
+     }
+
+}
+
+static int diff(sint A,sint B,sint M,sint N,sint tb,sint te)
+{
+  sint type;
+  sint midi,midj,i,j;
+  int midh;
+  static pwint f, hh, e, s, t;
+  
+  if(N<=0)  {
+    if(M>0) {
+      del(M);
+    }
+    
+    return(-(int)tbgap(M));
+  }
+  
+  if(M<=1) {
+    if(M<=0) {
+      add(N);
+      return(-(int)tbgap(N));
+    }
+    
+    midh = -(tb+gh) - tegap(N);
+    hh = -(te+gh) - tbgap(N);
+    if (hh>midh) midh = hh;
+    midj = 0;
+    for(j=1;j<=N;j++) {
+      hh = calc_score(1,j,A,B)
+	- tegap(N-j) - tbgap(j-1);
+      if(hh>midh) {
+	midh = hh;
+	midj = j;
+      }
+    }
+    
+    if(midj==0) {
+      del(1);
+      add(N);
+    }
+    else {
+      if(midj>1)
+	add(midj-1);
+      displ[print_ptr++] = last_print = 0;
+      if(midj<N)
+	add(N-midj);
+    }
+    return midh;
+  }
+  
+/* Divide: Find optimum midpoint (midi,midj) of cost midh */
+  
+  midi = M / 2;
+  HH[0] = 0.0;
+  t = -tb;
+  for(j=1;j<=N;j++) {
+    HH[j] = t = t-gh;
+    DD[j] = t-g;
+  }
+  
+  t = -tb;
+  for(i=1;i<=midi;i++) {
+    s=HH[0];
+    HH[0] = hh = t = t-gh;
+    f = t-g;
+    for(j=1;j<=N;j++) {
+      if ((hh=hh-g-gh) > (f=f-gh)) f=hh;
+      if ((hh=HH[j]-g-gh) > (e=DD[j]-gh)) e=hh;
+      hh = s + calc_score(i,j,A,B);
+      if (f>hh) hh = f;
+      if (e>hh) hh = e;
+      
+      s = HH[j];
+      HH[j] = hh;
+      DD[j] = e;
+    }
+  }
+  
+  DD[0]=HH[0];
+  
+  RR[N]=0;
+  t = -te;
+  for(j=N-1;j>=0;j--) {
+    RR[j] = t = t-gh;
+    SS[j] = t-g;
+  }
+  
+  t = -te;
+  for(i=M-1;i>=midi;i--) {
+    s = RR[N];
+    RR[N] = hh = t = t-gh;
+    f = t-g;
+    
+    for(j=N-1;j>=0;j--) {
+      
+      if ((hh=hh-g-gh) > (f=f-gh)) f=hh;
+      if ((hh=RR[j]-g-gh) > (e=SS[j]-gh)) e=hh;
+      hh = s + calc_score(i+1,j+1,A,B);
+      if (f>hh) hh = f;
+      if (e>hh) hh = e;
+      
+      s = RR[j];
+      RR[j] = hh;
+      SS[j] = e;
+      
+    }
+  }
+  
+  SS[N]=RR[N];
+  
+  midh=HH[0]+RR[0];
+  midj=0;
+  type=1;
+  for(j=0;j<=N;j++) {
+    hh = HH[j] + RR[j];
+    if(hh>=midh)
+      if(hh>midh || (HH[j]!=DD[j] && RR[j]==SS[j])) {
+	midh=hh;
+	midj=j;
+      }
+  }
+  
+  for(j=N;j>=0;j--) {
+    hh = DD[j] + SS[j] + g;
+    if(hh>midh) {
+      midh=hh;
+      midj=j;
+      type=2;
+    }
+  }
+  
+  /* Conquer recursively around midpoint  */
+  
+  
+  if(type==1) {             /* Type 1 gaps  */
+    diff(A,B,midi,midj,tb,g);
+    diff(A+midi,B+midj,M-midi,N-midj,g,te);
+  }
+  else {
+    diff(A,B,midi-1,midj,tb,0.0);
+    del(2);
+    diff(A+midi+1,B+midj,M-midi-1,N-midj,0.0,te);
+  }
+  
+  return midh;       /* Return the score of the best alignment */
+}
+
+static void del(sint k)
+{
+  if(last_print<0)
+    last_print = displ[print_ptr-1] -= k;
+  else
+    last_print = displ[print_ptr++] = -(k);
+}
+
+

Added: trunk/packages/clustalw/branches/upstream/current/param.h
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/param.h	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/param.h	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,383 @@
+#define MAXARGS 100
+
+typedef struct {
+	char *str;
+	sint *flag;
+	int type;
+	char **arg;
+} cmd_line_data;
+
+/* 
+   command line switches
+*/
+sint setoptions = -1;
+sint sethelp = -1;
+sint setinteractive = -1;
+sint setbatch = -1;
+sint setgapopen = -1;
+sint setgapext = -1;
+sint setpwgapopen = -1;
+sint setpwgapext = -1;
+sint setoutorder = -1;
+sint setbootlabels = -1;
+sint setpwmatrix = -1;
+sint setmatrix = -1;
+sint setpwdnamatrix = -1;
+sint setdnamatrix = -1;
+sint setnegative = -1;
+sint setnoweights = -1;
+sint setoutput = -1;
+sint setoutputtree = -1;
+sint setquicktree = -1;
+sint settype = -1;
+sint setcase = -1;
+sint setseqno = -1;
+
+sint setseqno_range = -1;
+sint setrange = -1;
+
+sint settransweight = -1;
+sint setseed = -1;
+sint setscore = -1;
+sint setwindow = -1;
+sint setktuple = -1;
+sint setkimura = -1;
+sint settopdiags = -1;
+sint setpairgap = -1;
+sint settossgaps = -1;
+sint setnopgap = -1;
+sint setnohgap = -1;
+sint setnovgap = -1;
+sint sethgapres = -1;
+sint setvgapres = -1;
+sint setuseendgaps = -1;
+sint setmaxdiv = -1;
+sint setgapdist = -1;
+sint setdebug = -1;
+sint setoutfile = -1;
+sint setinfile = -1;
+sint setprofile1 = -1;
+sint setprofile2 = -1;
+sint setalign = -1;
+sint setconvert = -1;
+sint setnewtree = -1;
+sint setusetree = -1;
+sint setnewtree1 = -1;
+sint setusetree1 = -1;
+sint setnewtree2 = -1;
+sint setusetree2 = -1;
+sint setbootstrap = -1;
+sint settree = -1;
+sint setprofile = -1;
+sint setsequences = -1;
+sint setsecstr1 = -1;
+sint setsecstr2 = -1;
+sint setsecstroutput = -1;
+sint sethelixgap = -1;
+sint setstrandgap = -1;
+sint setloopgap = -1;
+sint setterminalgap = -1;
+sint sethelixendin = -1;
+sint sethelixendout = -1;
+sint setstrandendin = -1;
+sint setstrandendout = -1;
+
+/*
+   multiple alignment parameters
+*/
+float 		dna_gap_open = 15.0,  dna_gap_extend = 6.66;
+float 		prot_gap_open = 10.0, prot_gap_extend = 0.2;
+sint		profile_type = PROFILE;
+sint 		gap_dist = 4;
+sint 		output_order   = ALIGNED;
+sint    	divergence_cutoff = 30;
+sint	    matnum = 3;
+char 		mtrxname[FILENAMELEN+1] = "gonnet";
+sint	    dnamatnum = 1;
+char 		dnamtrxname[FILENAMELEN+1] = "iub";
+char 		hyd_residues[] = "GPSNDQEKR";
+Boolean 	no_weights = FALSE;
+Boolean 	neg_matrix = FALSE;
+Boolean		no_hyd_penalties = FALSE;
+Boolean		no_var_penalties = TRUE;
+Boolean		no_pref_penalties = FALSE;
+Boolean		use_endgaps = FALSE;
+Boolean		endgappenalties = FALSE;
+Boolean		reset_alignments_new  = FALSE;		/* DES */
+Boolean		reset_alignments_all  = FALSE;		/* DES */
+sint		output_struct_penalties = 0;
+sint        struct_penalties1 = NONE;
+sint        struct_penalties2 = NONE;
+Boolean		use_ss1 = TRUE;
+Boolean		use_ss2 = TRUE;
+sint        helix_penalty = 4;
+sint        strand_penalty = 4;
+sint        loop_penalty = 1;
+sint        helix_end_minus = 3;
+sint        helix_end_plus = 0;
+sint        strand_end_minus = 1;
+sint        strand_end_plus = 1;
+sint        helix_end_penalty = 2;
+sint        strand_end_penalty = 2;
+Boolean	    use_ambiguities = FALSE;
+
+/*
+   pairwise alignment parameters
+*/
+float  		dna_pw_go_penalty = 15.0,  dna_pw_ge_penalty = 6.66;
+float 		prot_pw_go_penalty = 10.0, prot_pw_ge_penalty = 0.1;
+sint	    pw_matnum = 3;
+char 		pw_mtrxname[FILENAMELEN+1] = "gonnet";
+sint	    pw_dnamatnum = 1;
+char 		pw_dnamtrxname[FILENAMELEN+1] = "iub";
+char     usermtrxname[FILENAMELEN+1], pw_usermtrxname[FILENAMELEN+1];
+char     dnausermtrxname[FILENAMELEN+1], pw_dnausermtrxname[FILENAMELEN+1];
+
+Boolean  	quick_pairalign = FALSE;
+float		transition_weight = 0.5;
+sint		new_seq;
+
+/*
+   quick pairwise alignment parameters
+*/
+sint   	     	dna_ktup      = 2;   /* default parameters for DNA */
+sint    	    	dna_wind_gap  = 5;
+sint    	    	dna_signif    = 4;
+sint    	    	dna_window    = 4;
+
+sint        	prot_ktup     = 1;   /* default parameters for proteins */
+sint        	prot_wind_gap = 3;
+sint        	prot_signif   = 5;
+sint        	prot_window   = 5;
+Boolean         percent=TRUE;
+Boolean		tossgaps = FALSE;
+Boolean		kimura = FALSE;
+
+
+sint	        boot_ntrials  = 1000;
+unsigned sint    boot_ran_seed = 111;
+
+
+sint    		debug = 0;
+
+Boolean        	explicit_dnaflag = FALSE; /* Explicit setting of sequence type on comm.line*/
+Boolean        	lowercase = TRUE; /* Flag for GDE output - set on comm. line*/
+Boolean        	cl_seq_numbers = FALSE;
+
+Boolean        	seqRange = FALSE; /* Ramu */
+
+Boolean        	output_clustal = TRUE;
+Boolean        	output_gcg     = FALSE;
+Boolean        	output_phylip  = FALSE;
+Boolean        	output_nbrf    = FALSE;
+Boolean        	output_gde     = FALSE;
+Boolean        	output_nexus   = FALSE;
+Boolean        	output_fasta   = FALSE;
+
+Boolean         showaln        = TRUE;
+Boolean         save_parameters = FALSE;
+
+/* DES */
+Boolean        	output_tree_clustal   = FALSE;
+Boolean        	output_tree_phylip    = TRUE;
+Boolean        	output_tree_distances = FALSE;
+Boolean        	output_tree_nexus = FALSE;
+Boolean        	output_pim = FALSE;
+
+
+sint		bootstrap_format      = BS_BRANCH_LABELS;
+
+/*These are all the positively scoring groups that occur in the Gonnet Pam250
+matrix. There are strong and weak groups, defined as strong score >0.5 and
+weak score =<0.5. Strong matching columns to be assigned ':' and weak matches
+assigned '.' in the clustal output format.
+*/
+
+char *res_cat1[] = {
+                "STA",
+                "NEQK",
+                "NHQK",
+                "NDEQ",
+                "QHRK",
+                "MILV",
+                "MILF",
+                "HY",
+                "FYW",
+                NULL };
+
+char *res_cat2[] = {
+                "CSA",
+                "ATV",
+                "SAG",
+                "STNK",
+                "STPA",
+                "SGND",
+                "SNDEQK",
+                "NDEQHK",
+                "NEQHRK",
+                "FVLIM",
+                "HFY",
+                NULL };
+
+
+
+static char *type_arg[] = {
+                "protein",
+                "dna",
+		""};
+
+static char *bootlabels_arg[] = {
+                "node",
+                "branch",
+		""};
+
+static char *outorder_arg[] = {
+                "input",
+                "aligned",
+		""};
+
+static char *case_arg[] = {
+                "lower",
+                "upper",
+		""};
+
+static char *seqno_arg[] = {
+                "off",
+                "on",
+		""};
+
+static char *seqno_range_arg[] = {
+                "off",
+                "on",
+		""};
+
+static char *score_arg[] = {
+                "percent",
+                "absolute",
+		""};
+
+static char *output_arg[] = {
+                "gcg",
+                "gde",
+                "pir",
+                "phylip",
+                "nexus",
+                "fasta",
+		""};
+
+static char *outputtree_arg[] = {
+                "nj",
+                "phylip",
+                "dist",
+                "nexus",
+		""};
+
+static char *outputsecstr_arg[] = {
+                "structure",
+                "mask",
+                "both",
+                "none",
+		""};
+
+/*
+     command line initialisation
+
+     type = 0    no argument
+     type = 1    integer argument
+     type = 2    float argument
+     type = 3    string argument
+     type = 4    filename
+     type = 5    opts
+*/
+#define NOARG 0
+#define INTARG 1
+#define FLTARG 2
+#define STRARG 3
+#define FILARG 4
+#define OPTARG 5
+
+
+/* command line switches for DATA       **************************/
+cmd_line_data cmd_line_file[] = {
+     "infile",		&setinfile,		FILARG,	NULL,
+     "profile1",	&setprofile1,		FILARG,	NULL,
+     "profile2",	&setprofile2,		FILARG,	NULL,
+     "",		NULL,			-1};
+/* command line switches for VERBS      **************************/
+cmd_line_data cmd_line_verb[] = {
+     "help",		&sethelp,		NOARG,	NULL,
+     "check",       &sethelp,    		NOARG,	NULL,
+     "options",		&setoptions,		NOARG,	NULL,
+     "align",		&setalign,		NOARG,	NULL,
+     "newtree",		&setnewtree,		FILARG,	NULL,
+     "usetree",		&setusetree,		FILARG,	NULL,
+     "newtree1",	&setnewtree1,		FILARG,	NULL,
+     "usetree1",	&setusetree1,		FILARG,	NULL,
+     "newtree2",	&setnewtree2,		FILARG,	NULL,
+     "usetree2",	&setusetree2,		FILARG,	NULL,
+     "bootstrap",	&setbootstrap,		NOARG,	NULL,
+     "tree",		&settree, 		NOARG,	NULL,
+     "quicktree",	&setquicktree,		NOARG,	NULL,
+     "convert",		&setconvert,		NOARG,	NULL,
+     "interactive",	&setinteractive,	NOARG,	NULL,
+     "batch",		&setbatch,		NOARG,	NULL,
+     "",		NULL,			-1};
+/* command line switches for PARAMETERS **************************/
+cmd_line_data cmd_line_para[] = {
+     "type",		&settype,		OPTARG,	type_arg,
+     "profile",	&setprofile,	NOARG,	NULL,
+     "sequences",	&setsequences,	NOARG,	NULL,
+     "matrix",		&setmatrix,		FILARG,	NULL,
+     "dnamatrix",	&setdnamatrix,		FILARG,	NULL,
+     "negative",	&setnegative,		NOARG,	NULL,
+     "noweights",	&setnoweights,		NOARG,	NULL,
+     "gapopen", 	&setgapopen,		FLTARG,	NULL,
+     "gapext",		&setgapext,		FLTARG,	NULL,
+     "endgaps",		&setuseendgaps,		NOARG,	NULL,
+     "nopgap",		&setnopgap,		NOARG,	NULL,
+     "nohgap",		&setnohgap,		NOARG,	NULL,
+     "novgap",		&setnovgap,		NOARG,	NULL,
+     "hgapresidues",	&sethgapres,		STRARG,	NULL,
+     "maxdiv",		&setmaxdiv,		INTARG,	NULL,
+
+     "gapdist",		&setgapdist,		INTARG,	NULL,
+     "pwmatrix",	&setpwmatrix,		FILARG,	NULL,
+     "pwdnamatrix",	&setpwdnamatrix,	FILARG,	NULL,
+     "pwgapopen",	&setpwgapopen,		FLTARG,	NULL,
+     "pwgapext",	&setpwgapext,		FLTARG,	NULL,
+     "ktuple",		&setktuple,		INTARG,	NULL,
+     "window",		&setwindow,		INTARG,	NULL,
+     "pairgap",		&setpairgap,		INTARG,	NULL,
+     "topdiags",	&settopdiags,		INTARG,	NULL,
+     "score",		&setscore,		OPTARG,	score_arg,
+     "transweight",	&settransweight,	FLTARG,	NULL,
+     "seed",		&setseed,		INTARG,	NULL,
+     "kimura",		&setkimura,		NOARG,	NULL,
+     "tossgaps",	&settossgaps,		NOARG,	NULL,
+     "bootlabels",	&setbootlabels,		OPTARG,	bootlabels_arg,
+     "debug",		&setdebug,		INTARG,	NULL,
+     "output",		&setoutput,		OPTARG,	output_arg,
+     "outputtree",	&setoutputtree,		OPTARG,	outputtree_arg,
+     "outfile",		&setoutfile,		FILARG,	NULL,
+     "outorder",	&setoutorder,		OPTARG,	outorder_arg,
+     "case",		&setcase,		OPTARG,	case_arg,
+     "seqnos",		&setseqno,		OPTARG,	seqno_arg,
+
+     "seqno_range",	&setseqno_range,	OPTARG,	seqno_range_arg, /* this one should be on/off  and */
+     "range",           &setrange,             STRARG, NULL,  /* this one should be like 10:20  ,   messy option settings */
+
+     "nosecstr1",   &setsecstr1,		NOARG, NULL,
+     "nosecstr2",   &setsecstr2,		NOARG, NULL,
+     "secstrout",   &setsecstroutput,	OPTARG,  outputsecstr_arg,
+     "helixgap",    &sethelixgap,		INTARG, NULL,
+     "strandgap",   &setstrandgap,		INTARG, NULL,
+     "loopgap",     &setloopgap,		INTARG, NULL,
+     "terminalgap", &setterminalgap,	INTARG, NULL,
+     "helixendin",  &sethelixendin,		INTARG, NULL,
+     "helixendout", &sethelixendout,	INTARG, NULL,
+     "strandendin", &setstrandendin,	INTARG, NULL,
+     "strandendout",&setstrandendout,	INTARG, NULL,
+
+     "",		NULL,			-1};
+
+

Added: trunk/packages/clustalw/branches/upstream/current/prfalign.c
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/prfalign.c	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/prfalign.c	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,1132 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include "clustalw.h"
+#define ENDALN 127
+
+#define MAX(a,b) ((a)>(b)?(a):(b))
+#define MIN(a,b) ((a)<(b)?(a):(b))
+
+/*
+ *   Prototypes
+ */
+static lint 	pdiff(sint A,sint B,sint i,sint j,sint go1,sint go2);
+static lint 	prfscore(sint n, sint m);
+static sint 	gap_penalty1(sint i, sint j,sint k);
+static sint 	open_penalty1(sint i, sint j);
+static sint 	ext_penalty1(sint i, sint j);
+static sint 	gap_penalty2(sint i, sint j,sint k);
+static sint 	open_penalty2(sint i, sint j);
+static sint 	ext_penalty2(sint i, sint j);
+static void 	padd(sint k);
+static void 	pdel(sint k);
+static void 	palign(void);
+static void 	ptracepath(sint *alen);
+static void 	add_ggaps(void);
+static char *     add_ggaps_mask(char *mask, int len, char *path1, char *path2);
+
+/*
+ *   Global variables
+ */
+extern double 	**tmat;
+extern float 	gap_open, gap_extend;
+extern float    transition_weight;
+extern sint 	gap_pos1, gap_pos2;
+extern sint 	max_aa;
+extern sint 	nseqs;
+extern sint 	*seqlen_array;
+extern sint 	*seq_weight;
+extern sint    	debug;
+extern Boolean 	neg_matrix;
+extern sint 	mat_avscore;
+extern short  	blosum30mt[], blosum40mt[], blosum45mt[];
+extern short  	blosum62mt2[], blosum80mt[];
+extern short  	pam20mt[], pam60mt[];
+extern short  	pam120mt[], pam160mt[], pam350mt[];
+extern short  	gon40mt[], gon80mt[];
+extern short    gon120mt[], gon160mt[], gon250mt[], gon350mt[];
+extern short  	clustalvdnamt[],swgapdnamt[];
+extern short  	idmat[];
+extern short    usermat[];
+extern short    userdnamat[];
+extern Boolean user_series;
+extern UserMatSeries matseries;
+
+extern short	def_dna_xref[],def_aa_xref[],dna_xref[],aa_xref[];
+extern sint		max_aln_length;
+extern Boolean	distance_tree;
+extern Boolean	dnaflag;
+extern char 	mtrxname[];
+extern char 	dnamtrxname[];
+extern char 	**seq_array;
+extern char 	*amino_acid_codes;
+extern char     *gap_penalty_mask1,*gap_penalty_mask2;
+extern char     *sec_struct_mask1,*sec_struct_mask2;
+extern sint     struct_penalties1, struct_penalties2;
+extern Boolean  use_ss1, use_ss2;
+extern Boolean endgappenalties;
+
+static sint 	print_ptr,last_print;
+static sint 		*displ;
+
+static char   	**alignment;
+static sint    	*aln_len;
+static sint    *aln_weight;
+static char   	*aln_path1, *aln_path2;
+static sint    	alignment_len;
+static sint    	**profile1, **profile2;
+static lint 	    *HH, *DD, *RR, *SS;
+static lint 	    *gS;
+static sint    matrix[NUMRES][NUMRES];
+static sint    nseqs1, nseqs2;
+static sint    	prf_length1, prf_length2;
+static sint    *gaps;
+static sint    gapcoef1,gapcoef2;
+static sint    lencoef1,lencoef2;
+static Boolean switch_profiles;
+
+lint prfalign(sint *group, sint *aligned)
+{
+
+  static Boolean found;
+  static Boolean negative;
+  static Boolean error_given=FALSE;
+  static sint    i, j, count = 0;
+  static sint  NumSeq;
+  static sint    len, len1, len2, is, minlen;
+  static sint   se1, se2, sb1, sb2;
+  static sint  maxres;
+  static sint int_scale;
+  static short  *matptr;
+  static short	*mat_xref;
+  static char   c;
+  static lint    score;
+  static float  scale;
+  static double logmin,logdiff;
+  static double pcid;
+
+
+  alignment = (char **) ckalloc( nseqs * sizeof (char *) );
+  aln_len = (sint *) ckalloc( nseqs * sizeof (sint) );
+  aln_weight = (sint *) ckalloc( nseqs * sizeof (sint) );
+
+  for (i=0;i<nseqs;i++)
+     if (aligned[i+1] == 0) group[i+1] = 0;
+
+  nseqs1 = nseqs2 = 0;
+  for (i=0;i<nseqs;i++)
+    {
+        if (group[i+1] == 1) nseqs1++;
+        else if (group[i+1] == 2) nseqs2++;
+    }
+
+  if ((nseqs1 == 0) || (nseqs2 == 0)) return(0.0);
+
+  if (nseqs2 > nseqs1)
+    {
+     switch_profiles = TRUE;
+     for (i=0;i<nseqs;i++)
+       {
+          if (group[i+1] == 1) group[i+1] = 2;
+          else if (group[i+1] == 2) group[i+1] = 1;
+       }
+    }
+  else
+  	switch_profiles = FALSE;
+
+  int_scale = 100;
+
+/*
+   calculate the mean of the sequence pc identities between the two groups
+*/
+        count = 0;
+        pcid = 0.0;
+	negative=neg_matrix;
+        for (i=0;i<nseqs;i++)
+          {
+             if (group[i+1] == 1)
+             for (j=0;j<nseqs;j++)
+               if (group[j+1] == 2)
+                    {
+                       count++;
+                       pcid += tmat[i+1][j+1];
+                    }
+          }
+
+  pcid = pcid/(float)count;
+
+if (debug > 0) fprintf(stdout,"mean tmat %3.1f\n", pcid);
+
+
+/*
+  Make the first profile.
+*/
+  prf_length1 = 0;
+  for (i=0;i<nseqs;i++)
+       if (group[i+1] == 1)
+		if(seqlen_array[i+1]>prf_length1) prf_length1=seqlen_array[i+1];
+
+  nseqs1 = 0;
+if (debug>0) fprintf(stdout,"sequences profile 1:\n");
+  for (i=0;i<nseqs;i++)
+    {
+       if (group[i+1] == 1)
+          {
+if (debug>0) {
+extern char **names;
+fprintf(stdout,"%s\n",names[i+1]);
+}
+             len = seqlen_array[i+1];
+             alignment[nseqs1] = (char *) ckalloc( (prf_length1+2) * sizeof (char) );
+             for (j=0;j<len;j++)
+               alignment[nseqs1][j] = seq_array[i+1][j+1];
+		for(j=len;j<prf_length1;j++)
+			alignment[nseqs1][j]=gap_pos1;
+             alignment[nseqs1][j] = ENDALN;
+             aln_len[nseqs1] = prf_length1;
+             aln_weight[nseqs1] = seq_weight[i];
+             nseqs1++;
+          }
+    }
+
+/*
+  Make the second profile.
+*/
+  prf_length2 = 0;
+  for (i=0;i<nseqs;i++)
+       if (group[i+1] == 2)
+		if(seqlen_array[i+1]>prf_length2) prf_length2=seqlen_array[i+1];
+
+  nseqs2 = 0;
+if (debug>0) fprintf(stdout,"sequences profile 2:\n");
+  for (i=0;i<nseqs;i++)
+    {
+       if (group[i+1] == 2)
+          {
+if (debug>0) {
+extern char **names;
+fprintf(stdout,"%s\n",names[i+1]);
+}
+             len = seqlen_array[i+1];
+             alignment[nseqs1+nseqs2] =
+                   (char *) ckalloc( (prf_length2+2) * sizeof (char) );
+             for (j=0;j<len;j++)
+               alignment[nseqs1+nseqs2][j] = seq_array[i+1][j+1];
+		for(j=len;j<prf_length2;j++)
+			alignment[nseqs1+nseqs2][j]=gap_pos1;
+             alignment[nseqs1+nseqs2][j] = ENDALN;
+             aln_len[nseqs1+nseqs2] = prf_length2;
+             aln_weight[nseqs1+nseqs2] = seq_weight[i];
+             nseqs2++;
+          }
+    }
+
+  max_aln_length = prf_length1 + prf_length2+2;
+  
+/*
+   calculate real length of profiles - removing gaps!
+*/
+  len1=0;
+  for (i=0;i<nseqs1;i++)
+    {
+       is=0;
+       for (j=0; j<MIN(aln_len[i],prf_length1); j++)
+	  {
+            c = alignment[i][j];
+      	    if ((c !=gap_pos1) && (c != gap_pos2)) is++;
+          }
+       len1+=is;
+    }
+  len1/=(float)nseqs1;
+   
+  len2=0;
+  for (i=nseqs1;i<nseqs2+nseqs1;i++)
+    {
+       is=0;
+       for (j=0; j<MIN(aln_len[i],prf_length2); j++)
+	  {
+            c = alignment[i][j];
+      	    if ((c !=gap_pos1) && (c != gap_pos2)) is++;
+          }
+       len2+=is;
+    }
+  len2/=(float)nseqs2;
+
+  if (dnaflag)
+     {
+       scale=1.0;
+       if (strcmp(dnamtrxname, "iub") == 0)
+	{
+            matptr = swgapdnamt;
+            mat_xref = def_dna_xref;
+	}
+       else if (strcmp(dnamtrxname, "clustalw") == 0)
+	{
+            matptr = clustalvdnamt;
+            mat_xref = def_dna_xref;
+            scale=0.66;
+	}
+       else 
+        {
+           matptr = userdnamat;
+           mat_xref = dna_xref;
+        }
+            maxres = get_matrix(matptr, mat_xref, matrix, neg_matrix, int_scale);
+            if (maxres == 0) return((sint)-1);
+/*
+            matrix[0][4]=transition_weight*matrix[0][0];
+            matrix[4][0]=transition_weight*matrix[0][0];
+            matrix[2][11]=transition_weight*matrix[0][0];
+            matrix[11][2]=transition_weight*matrix[0][0];
+            matrix[2][12]=transition_weight*matrix[0][0];
+            matrix[12][2]=transition_weight*matrix[0][0];
+*/
+/* fix suggested by Chanan Rubin at Compugen */
+           matrix[mat_xref[0]][mat_xref[4]]=transition_weight*matrix[0][0]; 
+           matrix[mat_xref[4]][mat_xref[0]]=transition_weight*matrix[0][0]; 
+           matrix[mat_xref[2]][mat_xref[11]]=transition_weight*matrix[0][0]; 
+           matrix[mat_xref[11]][mat_xref[2]]=transition_weight*matrix[0][0]; 
+           matrix[mat_xref[2]][mat_xref[12]]=transition_weight*matrix[0][0]; 
+           matrix[mat_xref[12]][mat_xref[2]]=transition_weight*matrix[0][0]; 
+
+          gapcoef1 = gapcoef2 = 100.0 * gap_open *scale;
+          lencoef1 = lencoef2 = 100.0 * gap_extend *scale;
+    }
+  else
+    {
+  	if(len1==0 || len2==0) {
+  		logmin=1.0;
+  		logdiff=1.0;
+  	}  
+  	else {
+  		minlen = MIN(len1,len2);
+ 		logmin = 1.0/log10((double)minlen);
+ 		if (len2<len1)
+    	 		logdiff = 1.0+0.5*log10((double)((float)len2/(float)len1));
+  		else if (len1<len2)
+  	   		logdiff = 1.0+0.5*log10((double)((float)len1/(float)len2));
+  		else logdiff=1.0;
+		if(logdiff<0.9) logdiff=0.9;
+  	}
+if(debug>0) fprintf(stdout,"%d %d logmin %f   logdiff %f\n",
+(pint)len1,(pint)len2, logmin,logdiff);
+       scale=0.75;
+       if (strcmp(mtrxname, "blosum") == 0)
+        {
+           scale=0.75;
+           if (negative || distance_tree == FALSE) matptr = blosum40mt;
+           else if (pcid > 80.0)
+             {
+                matptr = blosum80mt;
+             }
+           else if (pcid > 60.0)
+             {
+                matptr = blosum62mt2;
+             }
+           else if (pcid > 40.0)
+             {
+                matptr = blosum45mt;
+             }
+           else if (pcid > 30.0)
+             {
+                scale=0.5;
+                matptr = blosum45mt;
+             }
+           else if (pcid > 20.0)
+             {
+                scale=0.6;
+                matptr = blosum45mt;
+             }
+           else 
+             {
+                scale=0.6;
+                matptr = blosum30mt;
+             }
+           mat_xref = def_aa_xref;
+
+        }
+       else if (strcmp(mtrxname, "pam") == 0)
+        {
+           scale=0.75;
+           if (negative || distance_tree == FALSE) matptr = pam120mt;
+           else if (pcid > 80.0) matptr = pam20mt;
+           else if (pcid > 60.0) matptr = pam60mt;
+           else if (pcid > 40.0) matptr = pam120mt;
+           else matptr = pam350mt;
+           mat_xref = def_aa_xref;
+        }
+       else if (strcmp(mtrxname, "gonnet") == 0)
+        {
+	   scale/=2.0;
+           if (negative || distance_tree == FALSE) matptr = gon250mt;
+           else if (pcid > 35.0)
+             {
+                matptr = gon80mt;
+		scale/=2.0;
+             }
+           else if (pcid > 25.0)
+             {
+                if(minlen<100) matptr = gon250mt;
+                else matptr = gon120mt;
+             }
+           else
+             {
+                if(minlen<100) matptr = gon350mt;
+		else matptr = gon160mt;
+             }
+           mat_xref = def_aa_xref;
+           int_scale /= 10;
+        }
+       else if (strcmp(mtrxname, "id") == 0)
+        {
+           matptr = idmat;
+           mat_xref = def_aa_xref;
+        }
+       else if(user_series)
+        {
+           matptr=NULL;
+	   found=FALSE;
+	   for(i=0;i<matseries.nmat;i++)
+		if(pcid>=matseries.mat[i].llimit && pcid<=matseries.mat[i].ulimit)
+		{
+			j=i;
+			found=TRUE;
+			break;
+		}
+	   if(found==FALSE)
+	   {
+		if(!error_given)
+		warning(
+"\nSeries matrix not found for sequence percent identity = %d.\n"
+"(Using first matrix in series as a default.)\n"
+"This alignment may not be optimal!\n"
+"SUGGESTION: Check your matrix series input file and try again.",(int)pcid);
+		error_given=TRUE;
+		j=0;
+	   }
+if (debug>0) fprintf(stdout,"pcid %d  matrix %d\n",(pint)pcid,(pint)j+1);
+
+           matptr = matseries.mat[j].matptr;
+           mat_xref = matseries.mat[j].aa_xref;
+/* this gives a scale of 0.5 for pcid=llimit and 1.0 for pcid=ulimit */
+           scale=0.5+(pcid-matseries.mat[j].llimit)/((matseries.mat[j].ulimit-matseries.mat[j].llimit)*2.0);
+        }
+       else 
+        {
+           matptr = usermat;
+           mat_xref = aa_xref;
+        }
+if(debug>0) fprintf(stdout,"pcid %3.1f scale %3.1f\n",pcid,scale);
+      	maxres = get_matrix(matptr, mat_xref, matrix, negative, int_scale);
+      if (maxres == 0)
+        {
+           fprintf(stdout,"Error: matrix %s not found\n", mtrxname);
+           return(-1);
+        }
+
+          if (negative) {
+              gapcoef1 = gapcoef2 = 100.0 * (float)(gap_open);
+              lencoef1 = lencoef2 = 100.0 * gap_extend;
+	  }
+          else {
+          if (mat_avscore <= 0)
+              gapcoef1 = gapcoef2 = 100.0 * (float)(gap_open + logmin);
+	  else
+              gapcoef1 = gapcoef2 = scale * mat_avscore * (float)(gap_open/(logdiff*logmin));
+              lencoef1 = lencoef2 = 100.0 * gap_extend;
+	 }
+    }
+if (debug>0)
+{
+fprintf(stdout,"matavscore %d\n",mat_avscore);
+fprintf(stdout,"Gap Open1 %d  Gap Open2 %d  Gap Extend1 %d   Gap Extend2 %d\n",
+   (pint)gapcoef1,(pint)gapcoef2, (pint)lencoef1,(pint)lencoef2);
+fprintf(stdout,"Matrix  %s\n", mtrxname);
+}
+
+  profile1 = (sint **) ckalloc( (prf_length1+2) * sizeof (sint *) );
+  for(i=0; i<prf_length1+2; i++)
+       profile1[i] = (sint *) ckalloc( (LENCOL+2) * sizeof(sint) );
+
+  profile2 = (sint **) ckalloc( (prf_length2+2) * sizeof (sint *) );
+  for(i=0; i<prf_length2+2; i++)
+       profile2[i] = (sint *) ckalloc( (LENCOL+2) * sizeof(sint) );
+
+/*
+  calculate the Gap Coefficients.
+*/
+     gaps = (sint *) ckalloc( (max_aln_length+1) * sizeof (sint) );
+
+     if (switch_profiles == FALSE)
+        calc_gap_coeff(alignment, gaps, profile1, (struct_penalties1 && use_ss1), gap_penalty_mask1,
+           (sint)0, nseqs1, prf_length1, gapcoef1, lencoef1);
+     else
+        calc_gap_coeff(alignment, gaps, profile1, (struct_penalties2 && use_ss2), gap_penalty_mask2,
+           (sint)0, nseqs1, prf_length1, gapcoef1, lencoef1);
+/*
+  calculate the profile matrix.
+*/
+     calc_prf1(profile1, alignment, gaps, matrix,
+          aln_weight, prf_length1, (sint)0, nseqs1);
+
+if (debug>4)
+{
+extern char *amino_acid_codes;
+  for (j=0;j<=max_aa;j++)
+    fprintf(stdout,"%c    ", amino_acid_codes[j]);
+ fprintf(stdout,"\n");
+  for (i=0;i<prf_length1;i++)
+   {
+    for (j=0;j<=max_aa;j++)
+      fprintf(stdout,"%d ", (pint)profile1[i+1][j]);
+    fprintf(stdout,"%d ", (pint)profile1[i+1][gap_pos1]);
+    fprintf(stdout,"%d ", (pint)profile1[i+1][gap_pos2]);
+    fprintf(stdout,"%d %d\n",(pint)profile1[i+1][GAPCOL],(pint)profile1[i+1][LENCOL]);
+   }
+}
+
+/*
+  calculate the Gap Coefficients.
+*/
+
+     if (switch_profiles == FALSE)
+        calc_gap_coeff(alignment, gaps, profile2, (struct_penalties2 && use_ss2), gap_penalty_mask2,
+           nseqs1, nseqs1+nseqs2, prf_length2, gapcoef2, lencoef2);
+     else
+        calc_gap_coeff(alignment, gaps, profile2, (struct_penalties1 && use_ss1), gap_penalty_mask1,
+           nseqs1, nseqs1+nseqs2, prf_length2, gapcoef2, lencoef2);
+/*
+  calculate the profile matrix.
+*/
+     calc_prf2(profile2, alignment, aln_weight,
+           prf_length2, nseqs1, nseqs1+nseqs2);
+
+     aln_weight=ckfree((void *)aln_weight);
+
+if (debug>4)
+{
+extern char *amino_acid_codes;
+  for (j=0;j<=max_aa;j++)
+    fprintf(stdout,"%c    ", amino_acid_codes[j]);
+ fprintf(stdout,"\n");
+  for (i=0;i<prf_length2;i++)
+   {
+    for (j=0;j<=max_aa;j++)
+      fprintf(stdout,"%d ", (pint)profile2[i+1][j]);
+    fprintf(stdout,"%d ", (pint)profile2[i+1][gap_pos1]);
+    fprintf(stdout,"%d ", (pint)profile2[i+1][gap_pos2]);
+    fprintf(stdout,"%d %d\n",(pint)profile2[i+1][GAPCOL],(pint)profile2[i+1][LENCOL]);
+   }
+}
+
+  aln_path1 = (char *) ckalloc( (max_aln_length+1) * sizeof(char) );
+  aln_path2 = (char *) ckalloc( (max_aln_length+1) * sizeof(char) );
+
+
+/*
+   align the profiles
+*/
+/* use Myers and Miller to align two sequences */
+
+  last_print = 0;
+  print_ptr = 1;
+
+  sb1 = sb2 = 0;
+  se1 = prf_length1;
+  se2 = prf_length2;
+
+  HH = (lint *) ckalloc( (max_aln_length+1) * sizeof (lint) );
+  DD = (lint *) ckalloc( (max_aln_length+1) * sizeof (lint) );
+  RR = (lint *) ckalloc( (max_aln_length+1) * sizeof (lint) );
+  SS = (lint *) ckalloc( (max_aln_length+1) * sizeof (lint) );
+  gS = (lint *) ckalloc( (max_aln_length+1) * sizeof (lint) );
+  displ = (sint *) ckalloc( (max_aln_length+1) * sizeof (sint) );
+
+  score = pdiff(sb1, sb2, se1-sb1, se2-sb2, profile1[0][GAPCOL], profile1[prf_length1][GAPCOL]);
+
+  HH=ckfree((void *)HH);
+  DD=ckfree((void *)DD);
+  RR=ckfree((void *)RR);
+  SS=ckfree((void *)SS);
+  gS=ckfree((void *)gS);
+
+  ptracepath( &alignment_len);
+  
+  displ=ckfree((void *)displ);
+
+  add_ggaps();
+
+  for (i=0;i<prf_length1+2;i++)
+     profile1[i]=ckfree((void *)profile1[i]);
+  profile1=ckfree((void *)profile1);
+
+  for (i=0;i<prf_length2+2;i++)
+     profile2[i]=ckfree((void *)profile2[i]);
+  profile2=ckfree((void *)profile2);
+
+  prf_length1 = alignment_len;
+
+  aln_path1=ckfree((void *)aln_path1);
+  aln_path2=ckfree((void *)aln_path2);
+
+  NumSeq = 0;
+  for (j=0;j<nseqs;j++)
+    {
+       if (group[j+1]  == 1)
+         {
+            seqlen_array[j+1] = prf_length1;
+	    realloc_seq(j+1,prf_length1);
+            for (i=0;i<prf_length1;i++)
+              seq_array[j+1][i+1] = alignment[NumSeq][i];
+            NumSeq++;
+         }
+    }
+  for (j=0;j<nseqs;j++)
+    {
+       if (group[j+1]  == 2)
+         {
+            seqlen_array[j+1] = prf_length1;
+            seq_array[j+1] = (char *)realloc(seq_array[j+1], (prf_length1+2) * sizeof (char));
+	    realloc_seq(j+1,prf_length1);
+            for (i=0;i<prf_length1;i++)
+              seq_array[j+1][i+1] = alignment[NumSeq][i];
+            NumSeq++;
+         }
+    }
+
+  for (i=0;i<nseqs1+nseqs2;i++)
+     alignment[i]=ckfree((void *)alignment[i]);
+  alignment=ckfree((void *)alignment);
+
+  aln_len=ckfree((void *)aln_len);
+  gaps=ckfree((void *)gaps);
+
+  return(score/100);
+}
+
+static void add_ggaps(void)
+{
+   sint j;
+   sint i,ix;
+   sint len;
+   char *ta;
+
+   ta = (char *) ckalloc( (alignment_len+1) * sizeof (char) );
+
+   for (j=0;j<nseqs1;j++)
+     {
+      ix = 0;
+      for (i=0;i<alignment_len;i++)
+        {
+           if (aln_path1[i] == 2)
+              {
+                 if (ix < aln_len[j])
+                    ta[i] = alignment[j][ix];
+                 else 
+                    ta[i] = ENDALN;
+                 ix++;
+              }
+           else if (aln_path1[i] == 1)
+              {
+/*
+   insertion in first alignment...
+*/
+                 ta[i] = gap_pos1;
+              }
+           else
+              {
+                 fprintf(stdout,"Error in aln_path\n");
+              }
+         }
+       ta[i] = ENDALN;
+       
+       len = alignment_len;
+       alignment[j] = (char *)realloc(alignment[j], (len+2) * sizeof (char));
+       for (i=0;i<len;i++)
+         alignment[j][i] = ta[i];
+       alignment[j][len] = ENDALN;
+       aln_len[j] = len;
+      }
+
+   for (j=nseqs1;j<nseqs1+nseqs2;j++)
+     {
+      ix = 0;
+      for (i=0;i<alignment_len;i++)
+        {
+           if (aln_path2[i] == 2)
+              {
+                 if (ix < aln_len[j])
+                    ta[i] = alignment[j][ix];
+                 else 
+                    ta[i] = ENDALN;
+                 ix++;
+              }
+           else if (aln_path2[i] == 1)
+              {
+/*
+   insertion in second alignment...
+*/
+                 ta[i] = gap_pos1;
+              }
+           else
+              {
+                 fprintf(stdout,"Error in aln_path\n");
+              }
+         }
+       ta[i] = ENDALN;
+       
+       len = alignment_len;
+       alignment[j] = (char *) realloc(alignment[j], (len+2) * sizeof (char) );
+       for (i=0;i<len;i++)
+         alignment[j][i] = ta[i];
+       alignment[j][len] = ENDALN;
+       aln_len[j] = len;
+      }
+      
+   ta=ckfree((void *)ta);
+
+   if (struct_penalties1 != NONE)
+       gap_penalty_mask1 = add_ggaps_mask(gap_penalty_mask1,alignment_len,aln_path1,aln_path2);
+   if (struct_penalties1 == SECST)
+       sec_struct_mask1 = add_ggaps_mask(sec_struct_mask1,alignment_len,aln_path1,aln_path2);
+     
+   if (struct_penalties2 != NONE)
+       gap_penalty_mask2 = add_ggaps_mask(gap_penalty_mask2,alignment_len,aln_path2,aln_path1);
+   if (struct_penalties2 == SECST)
+       sec_struct_mask2 = add_ggaps_mask(sec_struct_mask2,alignment_len,aln_path2,aln_path1);
+
+if (debug>0)
+{
+  char c;
+  extern char *amino_acid_codes;
+
+   for (i=0;i<nseqs1+nseqs2;i++)
+     {
+      for (j=0;j<alignment_len;j++)
+       {
+        if (alignment[i][j] == ENDALN) break;
+        else if ((alignment[i][j] == gap_pos1) || (alignment[i][j] == gap_pos2))  c = '-';
+        else c = amino_acid_codes[alignment[i][j]];
+        fprintf(stdout,"%c", c);
+       }
+      fprintf(stdout,"\n\n");
+     }
+}
+
+}                  
+
+static char * add_ggaps_mask(char *mask, int len, char *path1, char *path2)
+{
+   int i,ix;
+   char *ta;
+
+   ta = (char *) ckalloc( (len+1) * sizeof (char) );
+
+       ix = 0;
+       if (switch_profiles == FALSE)
+        {     
+         for (i=0;i<len;i++)
+           {
+             if (path1[i] == 2)
+              {
+                ta[i] = mask[ix];
+                ix++;
+              }
+             else if (path1[i] == 1)
+                ta[i] = gap_pos1;
+           }
+        }
+       else
+        {
+         for (i=0;i<len;i++)
+          {
+            if (path2[i] == 2)
+             {
+               ta[i] = mask[ix];
+               ix++;
+             }
+            else if (path2[i] == 1)
+             ta[i] = gap_pos1;
+           }
+         }
+       mask = (char *)realloc(mask,(len+2) * sizeof (char));
+       for (i=0;i<len;i++)
+         mask[i] = ta[i];
+       mask[i] ='\0';
+       
+   ta=ckfree((void *)ta);
+
+   return(mask);
+}
+
+static lint prfscore(sint n, sint m)
+{
+   sint    ix;
+   lint  score;
+
+   score = 0.0;
+   for (ix=0; ix<=max_aa; ix++)
+     {
+         score += (profile1[n][ix] * profile2[m][ix]);
+     }
+   score += (profile1[n][gap_pos1] * profile2[m][gap_pos1]);
+   score += (profile1[n][gap_pos2] * profile2[m][gap_pos2]);
+   return(score/10);
+   
+}
+
+static void ptracepath(sint *alen)
+{
+    sint i,j,k,pos,to_do;
+
+    pos = 0;
+
+    to_do=print_ptr-1;
+
+    for(i=1;i<=to_do;++i) {
+if (debug>1) fprintf(stdout,"%d ",(pint)displ[i]);
+            if(displ[i]==0) {
+                    aln_path1[pos]=2;
+                    aln_path2[pos]=2;
+                    ++pos;
+            }
+            else {
+                    if((k=displ[i])>0) {
+                            for(j=0;j<=k-1;++j) {
+                                    aln_path2[pos+j]=2;
+                                    aln_path1[pos+j]=1;
+                            }
+                            pos += k;
+                    }
+                    else {
+                            k = (displ[i]<0) ? displ[i] * -1 : displ[i];
+                            for(j=0;j<=k-1;++j) {
+                                    aln_path1[pos+j]=2;
+                                    aln_path2[pos+j]=1;
+                            }
+                            pos += k;
+                    }
+            }
+    }
+if (debug>1) fprintf(stdout,"\n");
+
+   (*alen) = pos;
+
+}
+
+static void pdel(sint k)
+{
+        if(last_print<0)
+                last_print = displ[print_ptr-1] -= k;
+        else
+                last_print = displ[print_ptr++] = -(k);
+}
+
+static void padd(sint k)
+{
+
+        if(last_print<0) {
+                displ[print_ptr-1] = k;
+                displ[print_ptr++] = last_print;
+        }
+        else
+                last_print = displ[print_ptr++] = k;
+}
+
+static void palign(void)
+{
+        displ[print_ptr++] = last_print = 0;
+}
+
+
+static lint pdiff(sint A,sint B,sint M,sint N,sint go1, sint go2)
+{
+        sint midi,midj,type;
+        lint midh;
+
+        static lint t, tl, g, h;
+
+{		static sint i,j;
+        static lint hh, f, e, s;
+
+/* Boundary cases: M <= 1 or N == 0 */
+if (debug>2) fprintf(stdout,"A %d B %d M %d N %d midi %d go1 %d go2 %d\n", 
+(pint)A,(pint)B,(pint)M,(pint)N,(pint)M/2,(pint)go1,(pint)go2);
+
+/* if sequence B is empty....                                            */
+
+        if(N<=0)  {
+
+/* if sequence A is not empty....                                        */
+
+                if(M>0) {
+
+/* delete residues A[1] to A[M]                                          */
+
+                        pdel(M);
+                }
+                return(-gap_penalty1(A,B,M));
+        }
+
+/* if sequence A is empty....                                            */
+
+        if(M<=1) {
+                if(M<=0) {
+
+/* insert residues B[1] to B[N]                                          */
+
+                        padd(N);
+                        return(-gap_penalty2(A,B,N));
+                }
+
+/* if sequence A has just one residue....                                */
+
+                if (go1 == 0)
+                	midh =  -gap_penalty1(A+1,B+1,N);
+                else
+                	midh =  -gap_penalty2(A+1,B,1)-gap_penalty1(A+1,B+1,N);
+                midj = 0;
+                for(j=1;j<=N;j++) {
+                        hh = -gap_penalty1(A,B+1,j-1) + prfscore(A+1,B+j)
+                            -gap_penalty1(A+1,B+j+1,N-j);
+                        if(hh>midh) {
+                                midh = hh;
+                                midj = j;
+                        }
+                }
+
+                if(midj==0) {
+                        padd(N);
+                        pdel(1);
+                }
+                else {
+                        if(midj>1) padd(midj-1);
+                        palign();
+                        if(midj<N) padd(N-midj);
+                }
+                return midh;
+        }
+
+
+/* Divide sequence A in half: midi */
+
+        midi = M / 2;
+
+/* In a forward phase, calculate all HH[j] and HH[j] */
+
+        HH[0] = 0.0;
+        t = -open_penalty1(A,B+1);
+        tl = -ext_penalty1(A,B+1);
+        for(j=1;j<=N;j++) {
+                HH[j] = t = t+tl;
+                DD[j] = t-open_penalty2(A+1,B+j);
+        }
+
+		if (go1 == 0) t = 0;
+		else t = -open_penalty2(A+1,B);
+        tl = -ext_penalty2(A+1,B);
+        for(i=1;i<=midi;i++) {
+                s = HH[0];
+                HH[0] = hh = t = t+tl;
+                f = t-open_penalty1(A+i,B+1);
+
+                for(j=1;j<=N;j++) {
+                	g = open_penalty1(A+i,B+j);
+                	h = ext_penalty1(A+i,B+j);
+                        if ((hh=hh-g-h) > (f=f-h)) f=hh;
+                	g = open_penalty2(A+i,B+j);
+                	h = ext_penalty2(A+i,B+j);
+                        if ((hh=HH[j]-g-h) > (e=DD[j]-h)) e=hh;
+                        hh = s + prfscore(A+i, B+j);
+                        if (f>hh) hh = f;
+                        if (e>hh) hh = e;
+
+                        s = HH[j];
+                        HH[j] = hh;
+                        DD[j] = e;
+
+                }
+        }
+
+        DD[0]=HH[0];
+
+/* In a reverse phase, calculate all RR[j] and SS[j] */
+
+        RR[N]=0.0;
+        tl = 0.0;
+        for(j=N-1;j>=0;j--) {
+                g = -open_penalty1(A+M,B+j+1);
+                tl -= ext_penalty1(A+M,B+j+1);
+                RR[j] = g+tl;
+                SS[j] = RR[j]-open_penalty2(A+M,B+j);
+                gS[j] = open_penalty2(A+M,B+j);
+        }
+
+        tl = 0.0;
+        for(i=M-1;i>=midi;i--) {
+                s = RR[N];
+                if (go2 == 0) g = 0;
+                else g = -open_penalty2(A+i+1,B+N);
+                tl -= ext_penalty2(A+i+1,B+N);
+                RR[N] = hh = g+tl;
+                t = open_penalty1(A+i,B+N);
+                f = RR[N]-t;
+
+                for(j=N-1;j>=0;j--) {
+                	g = open_penalty1(A+i,B+j+1);
+                	h = ext_penalty1(A+i,B+j+1);
+                        if ((hh=hh-g-h) > (f=f-h-g+t)) f=hh;
+                        t = g;
+                	g = open_penalty2(A+i+1,B+j);
+                	h = ext_penalty2(A+i+1,B+j);
+                        hh=RR[j]-g-h;
+                        if (i==(M-1)) {
+				 e=SS[j]-h;
+			}
+                        else {
+				e=SS[j]-h-g+open_penalty2(A+i+2,B+j);
+				gS[j] = g;
+			}
+                        if (hh > e) e=hh;
+                        hh = s + prfscore(A+i+1, B+j+1);
+                        if (f>hh) hh = f;
+                        if (e>hh) hh = e;
+
+                        s = RR[j];
+                        RR[j] = hh;
+                        SS[j] = e;
+
+                }
+        }
+        SS[N]=RR[N];
+        gS[N] = open_penalty2(A+midi+1,B+N);
+
+/* find midj, such that HH[j]+RR[j] or DD[j]+SS[j]+gap is the maximum */
+
+        midh=HH[0]+RR[0];
+        midj=0;
+        type=1;
+        for(j=0;j<=N;j++) {
+                hh = HH[j] + RR[j];
+                if(hh>=midh)
+                        if(hh>midh || (HH[j]!=DD[j] && RR[j]==SS[j])) {
+                                midh=hh;
+                                midj=j;
+                        }
+        }
+
+        for(j=N;j>=0;j--) {
+                hh = DD[j] + SS[j] + gS[j];
+                if(hh>midh) {
+                        midh=hh;
+                        midj=j;
+                        type=2;
+                }
+        }
+}
+
+/* Conquer recursively around midpoint                                   */
+
+
+        if(type==1) {             /* Type 1 gaps  */
+if (debug>2) fprintf(stdout,"Type 1,1: midj %d\n",(pint)midj);
+                pdiff(A,B,midi,midj,go1,1);
+if (debug>2) fprintf(stdout,"Type 1,2: midj %d\n",(pint)midj);
+                pdiff(A+midi,B+midj,M-midi,N-midj,1,go2);
+        }
+        else {
+if (debug>2) fprintf(stdout,"Type 2,1: midj %d\n",(pint)midj);
+                pdiff(A,B,midi-1,midj,go1, 0);
+                pdel(2);
+if (debug>2) fprintf(stdout,"Type 2,2: midj %d\n",(pint)midj);
+                pdiff(A+midi+1,B+midj,M-midi-1,N-midj,0,go2);
+        }
+
+        return midh;       /* Return the score of the best alignment */
+}
+
+/* calculate the score for opening a gap at residues A[i] and B[j]       */
+
+static sint open_penalty1(sint i, sint j)
+{
+   sint g;
+
+   if (!endgappenalties &&(i==0 || i==prf_length1)) return(0);
+
+   g = profile2[j][GAPCOL] + profile1[i][GAPCOL];
+   return(g);
+}
+
+/* calculate the score for extending an existing gap at A[i] and B[j]    */
+
+static sint ext_penalty1(sint i, sint j)
+{
+   sint h;
+
+   if (!endgappenalties &&(i==0 || i==prf_length1)) return(0);
+
+   h = profile2[j][LENCOL];
+   return(h);
+}
+
+/* calculate the score for a gap of length k, at residues A[i] and B[j]  */
+
+static sint gap_penalty1(sint i, sint j, sint k)
+{
+   sint ix;
+   sint gp;
+   sint g, h = 0;
+
+   if (k <= 0) return(0);
+   if (!endgappenalties &&(i==0 || i==prf_length1)) return(0);
+
+   g = profile2[j][GAPCOL] + profile1[i][GAPCOL];
+   for (ix=0;ix<k && ix+j<prf_length2;ix++)
+      h += profile2[ix+j][LENCOL];
+
+   gp = g + h;
+   return(gp);
+}
+/* calculate the score for opening a gap at residues A[i] and B[j]       */
+
+static sint open_penalty2(sint i, sint j)
+{
+   sint g;
+
+   if (!endgappenalties &&(j==0 || j==prf_length2)) return(0);
+
+   g = profile1[i][GAPCOL] + profile2[j][GAPCOL];
+   return(g);
+}
+
+/* calculate the score for extending an existing gap at A[i] and B[j]    */
+
+static sint ext_penalty2(sint i, sint j)
+{
+   sint h;
+
+   if (!endgappenalties &&(j==0 || j==prf_length2)) return(0);
+
+   h = profile1[i][LENCOL];
+   return(h);
+}
+
+/* calculate the score for a gap of length k, at residues A[i] and B[j]  */
+
+static sint gap_penalty2(sint i, sint j, sint k)
+{
+   sint ix;
+   sint gp;
+   sint g, h = 0;
+
+   if (k <= 0) return(0);
+   if (!endgappenalties &&(j==0 || j==prf_length2)) return(0);
+
+   g = profile1[i][GAPCOL] + profile2[j][GAPCOL];
+   for (ix=0;ix<k && ix+i<prf_length1;ix++)
+      h += profile1[ix+i][LENCOL];
+
+   gp = g + h;
+   return(gp);
+}

Added: trunk/packages/clustalw/branches/upstream/current/random.c
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/random.c	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/random.c	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,81 @@
+/*
+*	
+*	Rand.c
+*	
+*	-	linear and additive congruential random number generators
+*		(see R. Sedgewick, Algorithms, Chapter 35)
+*
+*	Implementation: R. Fuchs, EMBL Data Library, 1991
+*	
+*/
+#include <stdio.h>
+
+unsigned long linrand(unsigned long r);
+unsigned long addrand(unsigned long r);
+void addrandinit(unsigned long s);
+
+static unsigned long mult(unsigned long p,unsigned long q);
+
+
+#define m1	10000
+#define m	100000000
+
+static unsigned long mult(unsigned long p, unsigned long q);
+
+/* linear congruential method
+*	
+*	linrand() returns an unsigned long random number in the range 0 to r-1
+*/
+
+
+unsigned long linrand(unsigned long r)
+{
+	static unsigned long a=1234567;
+	
+	a = (mult(a,31415821)+1) % m;
+	return( ( (a / m1) * r) / m1 );
+}
+
+static unsigned long mult(unsigned long p, unsigned long q)
+{
+	unsigned long p1,p0,q1,q0;
+	
+	p1 = p/m1; p0 = p % m1;
+	q1 = q/m1; q0 = q % m1;
+	return((((p0*q1 + p1*q0) % m1) * m1 + p0*q0) % m);
+}
+
+
+/* additive congruential method
+*	
+*	addrand() returns an unsigned long random number in the range 0 to r-1
+*	The random number generator is initialized by addrandinit()
+*/
+
+static unsigned long j;
+static unsigned long a[55];
+
+unsigned long addrand(unsigned long r)
+{
+int x,y;
+/*        fprintf(stdout,"\n j = %d",j);  */
+	j = (j + 1) % 55;
+/*        fprintf(stdout,"\n j = %d",j);  */
+	x = (j+23)%55;
+	y = (j+54)%55;
+	a[j] = (a[x] + a[y]) % m;
+/*	a[j] = (a[(j+23)%55] + a[(j+54)%55]) % m;  */
+/*        fprintf(stdout,"\n a[j] = %d",a[j]);     */
+	return( ((a[j] / m1) * r) / m1 );
+}
+
+void addrandinit(unsigned long s)
+{
+	a[0] = s;
+	j = 0;
+	do {
+		++j;
+		a[j] = (mult(31,a[j-1]) + 1) % m;
+	} while (j<54);
+}
+

Added: trunk/packages/clustalw/branches/upstream/current/readmat.c
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/readmat.c	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/readmat.c	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,476 @@
+#include <stdio.h>
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include "clustalw.h"
+#include "matrices.h"
+
+
+/*
+ *   Prototypes
+ */
+static Boolean commentline(char *line);
+
+
+/*
+ *   Global variables
+ */
+
+extern char 	*amino_acid_codes;
+extern sint 	gap_pos1, gap_pos2;
+extern sint 	max_aa;
+extern short 	def_dna_xref[],def_aa_xref[];
+extern sint 	mat_avscore;
+extern sint 	debug;
+extern Boolean  dnaflag;
+
+extern Boolean user_series;
+extern UserMatSeries matseries;
+extern short usermatseries[MAXMAT][NUMRES][NUMRES];
+extern short aa_xrefseries[MAXMAT][NUMRES+1];
+
+
+void init_matrix(void)
+{
+
+   char c1,c2;
+   short i, j, maxres;
+
+   max_aa = strlen(amino_acid_codes)-2;
+   gap_pos1 = NUMRES-2;          /* code for gaps inserted by clustalw */
+   gap_pos2 = NUMRES-1;           /* code for gaps already in alignment */
+
+/*
+   set up cross-reference for default matrices hard-coded in matrices.h
+*/
+   for (i=0;i<NUMRES;i++) def_aa_xref[i] = -1;
+   for (i=0;i<NUMRES;i++) def_dna_xref[i] = -1;
+
+   maxres = 0;
+   for (i=0;(c1=amino_acid_order[i]);i++)
+     {
+         for (j=0;(c2=amino_acid_codes[j]);j++)
+          {
+           if (c1 == c2)
+               {
+                  def_aa_xref[i] = j;
+                  maxres++;
+                  break;
+               }
+          }
+         if ((def_aa_xref[i] == -1) && (amino_acid_order[i] != '*'))
+            {
+                error("residue %c in matrices.h is not recognised",
+                                       amino_acid_order[i]);
+            }
+     }
+
+   maxres = 0;
+   for (i=0;(c1=nucleic_acid_order[i]);i++)
+     {
+         for (j=0;(c2=amino_acid_codes[j]);j++)
+          {
+           if (c1 == c2)
+               {
+                  def_dna_xref[i] = j;
+                  maxres++;
+                  break;
+               }
+          }
+         if ((def_dna_xref[i] == -1) && (nucleic_acid_order[i] != '*'))
+            {
+                error("nucleic acid %c in matrices.h is not recognised",
+                                       nucleic_acid_order[i]);
+            }
+     }
+}
+
+sint get_matrix(short *matptr, short *xref, sint matrix[NUMRES][NUMRES], Boolean neg_flag, sint scale)
+{
+   sint gg_score = 0;
+   sint gr_score = 0;
+   sint i, j, k, ix = 0;
+   sint ti, tj;
+   sint  maxres;
+   sint av1,av2,av3,min, max;
+/*
+   default - set all scores to 0
+*/
+   for (i=0;i<=max_aa;i++)
+      for (j=0;j<=max_aa;j++)
+          matrix[i][j] = 0;
+
+   ix = 0;
+   maxres = 0;
+   for (i=0;i<=max_aa;i++)
+    {
+      ti = xref[i];
+      for (j=0;j<=i;j++)
+       {
+          tj = xref[j]; 
+          if ((ti != -1) && (tj != -1))
+            {
+               k = matptr[ix];
+               if (ti==tj)
+                  {
+                     matrix[ti][ti] = k * scale;
+                     maxres++;
+                  }
+               else
+                  {
+                     matrix[ti][tj] = k * scale;
+                     matrix[tj][ti] = k * scale;
+                  }
+               ix++;
+            }
+       }
+    }
+
+   --maxres;
+
+   av1 = av2 = av3 = 0;
+   for (i=0;i<=max_aa;i++)
+    {
+      for (j=0;j<=i;j++)
+       {
+           av1 += matrix[i][j];
+           if (i==j)
+              {
+                 av2 += matrix[i][j];
+              }
+           else
+              {
+                 av3 += matrix[i][j];
+              }
+       }
+    }
+
+   av1 /= (maxres*maxres)/2;
+   av2 /= maxres;
+   av3 /= ((float)(maxres*maxres-maxres))/2;
+  mat_avscore = -av3;
+
+  min = max = matrix[0][0];
+  for (i=0;i<=max_aa;i++)
+    for (j=1;j<=i;j++)
+      {
+        if (matrix[i][j] < min) min = matrix[i][j];
+        if (matrix[i][j] > max) max = matrix[i][j];
+      }
+if (debug>1) fprintf(stdout,"maxres %d\n",(pint)max_aa);
+if (debug>1) fprintf(stdout,"average mismatch score %d\n",(pint)av3);
+if (debug>1) fprintf(stdout,"average match score %d\n",(pint)av2);
+if (debug>1) fprintf(stdout,"average score %d\n",(pint)av1);
+
+/*
+   if requested, make a positive matrix - add -(lowest score) to every entry
+*/
+  if (neg_flag == FALSE)
+   {
+
+if (debug>1) fprintf(stdout,"min %d max %d\n",(pint)min,(pint)max);
+      if (min < 0)
+        {
+           for (i=0;i<=max_aa;i++)
+            {
+              ti = xref[i];
+              if (ti != -1)
+                {
+                 for (j=0;j<=max_aa;j++)
+                   {
+                    tj = xref[j];
+/*
+                    if (tj != -1) matrix[ti][tj] -= (2*av3);
+*/
+                    if (tj != -1) matrix[ti][tj] -= min;
+                   }
+                }
+            }
+        }
+/*
+       gr_score = av3;
+       gg_score = -av3;
+*/
+
+   }
+
+
+
+  for (i=0;i<gap_pos1;i++)
+   {
+      matrix[i][gap_pos1] = gr_score;
+      matrix[gap_pos1][i] = gr_score;
+      matrix[i][gap_pos2] = gr_score;
+      matrix[gap_pos2][i] = gr_score;
+   }
+  matrix[gap_pos1][gap_pos1] = gg_score;
+  matrix[gap_pos2][gap_pos2] = gg_score;
+  matrix[gap_pos2][gap_pos1] = gg_score;
+  matrix[gap_pos1][gap_pos2] = gg_score;
+
+  maxres += 2;
+
+  return(maxres);
+}
+
+
+sint read_matrix_series(char *filename, short *usermat, short *xref)
+{
+   FILE *fd = NULL, *matfd = NULL;
+   char mat_filename[FILENAMELEN];
+   char inline1[1024];
+   sint  maxres = 0;
+   sint nmat;
+   sint n,llimit,ulimit;
+
+   if (filename[0] == '\0')
+     {
+        error("comparison matrix not specified");
+        return((sint)0);
+     }
+   if ((fd=fopen(filename,"r"))==NULL) 
+     {
+        error("cannot open %s", filename);
+        return((sint)0);
+     }
+
+/* check the first line to see if it's a series or a single matrix */
+   while (fgets(inline1,1024,fd) != NULL)
+     {
+        if (commentline(inline1)) continue;
+	if(linetype(inline1,"CLUSTAL_SERIES"))
+		user_series=TRUE;
+	else
+		user_series=FALSE;
+        break;
+     }
+
+/* it's a single matrix */
+  if(user_series == FALSE)
+    {
+	fclose(fd);
+   	maxres=read_user_matrix(filename,usermat,xref);
+   	return(maxres);
+    }
+
+/* it's a series of matrices, find the next MATRIX line */
+   nmat=0;
+   matseries.nmat=0;
+   while (fgets(inline1,1024,fd) != NULL)
+     {
+        if (commentline(inline1)) continue;
+	if(linetype(inline1,"MATRIX"))
+	{
+		if(sscanf(inline1+6,"%d %d %s",&llimit,&ulimit,mat_filename)!=3)
+		{
+			error("Bad format in file %s\n",filename);
+   			fclose(fd);
+			return((sint)0);
+		}
+		if(llimit<0 || llimit > 100 || ulimit <0 || ulimit>100)
+		{
+			error("Bad format in file %s\n",filename);
+   			fclose(fd);
+			return((sint)0);
+		}
+		if(ulimit<=llimit)
+		{
+			error("in file %s: lower limit is greater than upper (%d-%d)\n",filename,llimit,ulimit);
+   			fclose(fd);
+			return((sint)0);
+		}
+   		n=read_user_matrix(mat_filename,&usermatseries[nmat][0][0],&aa_xrefseries[nmat][0]);
+		if(n<=0)
+		{
+			error("Bad format in matrix file %s\n",mat_filename);
+   			fclose(fd);
+			return((sint)0);
+		}
+		matseries.mat[nmat].llimit=llimit;
+		matseries.mat[nmat].ulimit=ulimit;
+		matseries.mat[nmat].matptr=&usermatseries[nmat][0][0];
+		matseries.mat[nmat].aa_xref=&aa_xrefseries[nmat][0];
+		nmat++;
+	}
+    }
+   fclose(fd);
+   matseries.nmat=nmat;
+
+   maxres=n;
+   return(maxres);
+
+}
+
+sint read_user_matrix(char *filename, short *usermat, short *xref)
+{
+   double f;
+   FILE *fd;
+   sint  numargs,farg;
+   sint i, j, k = 0;
+   char codes[NUMRES];
+   char inline1[1024];
+   char *args[NUMRES+4];
+   char c1,c2;
+   sint ix1, ix = 0;
+   sint  maxres = 0;
+   float scale;
+
+   if (filename[0] == '\0')
+     {
+        error("comparison matrix not specified");
+       	return((sint)0);
+     }
+
+   if ((fd=fopen(filename,"r"))==NULL) 
+   {
+       	error("cannot open %s", filename);
+       	return((sint)0);
+   }
+   maxres = 0;
+   while (fgets(inline1,1024,fd) != NULL)
+     {
+        if (commentline(inline1)) continue;
+	if(linetype(inline1,"CLUSTAL_SERIES"))
+   	{
+       		error("in %s - single matrix expected.", filename);
+		fclose(fd);
+       		return((sint)0);
+   	}
+/*
+   read residue characters.
+*/
+        k = 0;
+        for (j=0;j<strlen(inline1);j++)
+          {
+             if (isalpha((int)inline1[j])) codes[k++] = inline1[j];
+             if (k>NUMRES)
+                {
+                   error("too many entries in matrix %s",filename);
+		   fclose(fd);
+                   return((sint)0);
+                }
+          }
+        codes[k] = '\0';
+        break;
+    }
+
+   if (k == 0) 
+     {
+        error("wrong format in matrix %s",filename);
+  	fclose(fd);
+        return((sint)0);
+     }
+
+/*
+   cross-reference the residues
+*/
+   for (i=0;i<NUMRES;i++) xref[i] = -1;
+
+   maxres = 0;
+   for (i=0;(c1=codes[i]);i++)
+     {
+         for (j=0;(c2=amino_acid_codes[j]);j++)
+           if (c1 == c2)
+               {
+                  xref[i] = j;
+                  maxres++;
+                  break;
+               }
+         if ((xref[i] == -1) && (codes[i] != '*'))
+            {
+                warning("residue %c in matrix %s not recognised",
+                                       codes[i],filename);
+            }
+     }
+
+
+/*
+   get the weights
+*/
+
+   ix = ix1 = 0;
+   while (fgets(inline1,1024,fd) != NULL)
+     {
+        if (inline1[0] == '\n') continue;
+        if (inline1[0] == '#' ||
+            inline1[0] == '!') break;
+        numargs = getargs(inline1, args, (int)(k+1));
+        if (numargs < maxres)
+          {
+             error("wrong format in matrix %s",filename);
+  	     fclose(fd);
+             return((sint)0);
+          }
+        if (isalpha(args[0][0])) farg=1;
+        else farg=0;
+
+/* decide whether the matrix values are float or decimal */
+	scale=1.0;
+	for(i=0;i<strlen(args[farg]);i++)
+		if(args[farg][i]=='.')
+		{
+/* we've found a float value */
+			scale=10.0;
+			break;
+		}
+
+        for (i=0;i<=ix;i++)
+          {
+             if (xref[i] != -1)
+               {
+                  f = atof(args[i+farg]);
+                  usermat[ix1++] = (short)(f*scale);
+               }
+          }
+        ix++;
+     }
+   if (ix != k+1)
+     {
+        error("wrong format in matrix %s",filename);
+  	fclose(fd);
+        return((sint)0);
+     }
+
+
+  maxres += 2;
+  fclose(fd);
+
+  return(maxres);
+}
+
+int getargs(char *inline1,char *args[],int max)
+{
+
+	char	*inptr;
+/*
+#ifndef MAC
+	char	*strtok(char *s1, const char *s2);
+#endif
+*/
+	int	i;
+
+	inptr=inline1;
+	for (i=0;i<=max;i++)
+	{
+		if ((args[i]=strtok(inptr," \t\n"))==NULL)
+			break;
+		inptr=NULL;
+	}
+
+	return(i);
+}
+
+
+static Boolean commentline(char *line)
+{
+        int i;
+ 
+        if(line[0] == '#') return TRUE;
+        for(i=0;line[i]!='\n' && line[i]!=EOS;i++) {
+                if(!isspace(line[i]))
+			return FALSE;
+        }
+        return TRUE;
+}
+

Added: trunk/packages/clustalw/branches/upstream/current/sequence.c
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/sequence.c	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/sequence.c	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,1379 @@
+/********* Sequence input routines for CLUSTAL W *******************/
+/* DES was here.  FEB. 1994 */
+/* Now reads PILEUP/MSF and CLUSTAL alignment files */
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include "clustalw.h"	
+
+#define MIN(a,b) ((a)<(b)?(a):(b))
+
+
+
+/*
+*	Prototypes
+*/
+
+static char * get_seq(char *,sint *,char *);
+static char * get_clustal_seq(char *,sint *,char *,sint);
+static char * get_msf_seq(char *,sint *,char *,sint);
+static void check_infile(sint *);
+static void p_encode(char *, char *, sint);
+static void n_encode(char *, char *, sint);
+static sint res_index(char *,char);
+static Boolean check_dnaflag(char *, sint);
+static sint count_clustal_seqs(void);
+static sint count_pir_seqs(void);
+static sint count_msf_seqs(void);
+static sint count_rsf_seqs(void);
+static void get_swiss_feature(char *line,sint len);
+static void get_rsf_feature(char *line,sint len);
+static void get_swiss_mask(char *line,sint len);
+static void get_clustal_ss(sint length);
+static void get_embl_ss(sint length);
+static void get_rsf_ss(sint length);
+static void get_gde_ss(sint length);
+static Boolean cl_blankline(char *line);
+
+/*
+ *	Global variables
+ */
+extern sint max_names;
+FILE *fin;
+extern Boolean usemenu, dnaflag, explicit_dnaflag;
+extern Boolean interactive;
+extern char seqname[];
+extern sint nseqs;
+extern sint *seqlen_array;
+extern sint *output_index;
+extern char **names,**titles;
+extern char **seq_array;
+extern Boolean profile1_empty, profile2_empty;
+extern sint gap_pos2;
+extern sint max_aln_length;
+extern char *gap_penalty_mask, *sec_struct_mask;
+extern sint struct_penalties;
+extern char *ss_name;
+extern sint profile_no;
+extern sint debug;
+
+char *amino_acid_codes   =    "ABCDEFGHIKLMNPQRSTUVWXYZ-";  /* DES */
+static sint seqFormat;
+static char chartab[128];
+static char *formatNames[] = {"unknown","EMBL/Swiss-Prot","PIR",
+			      "Pearson","GDE","Clustal","Pileup/MSF","RSF","USER","PHYLIP","NEXUS"};
+
+void fill_chartab(void)	/* Create translation and check table */
+{
+	register sint i;
+	register char c;
+	
+	for(i=0;i<128;chartab[i++]=0);
+	for(i=0;(c=amino_acid_codes[i]);i++)
+		chartab[(int)c]=chartab[tolower(c)]=c;
+}
+
+static char * get_msf_seq(char *sname,sint *len,char *tit,sint seqno)
+/* read the seqno_th. sequence from a PILEUP multiple alignment file */
+{
+	static char line[MAXLINE+1];
+	char *seq = NULL;
+	sint i,j,k;
+	unsigned char c;
+
+	fseek(fin,0,0); 		/* start at the beginning */
+
+	*len=0;				/* initialise length to zero */
+        for(i=0;;i++) {
+		if(fgets(line,MAXLINE+1,fin)==NULL) return NULL; /* read the title*/
+		if(linetype(line,"//") ) break;		    /* lines...ignore*/
+	}
+
+	while (fgets(line,MAXLINE+1,fin) != NULL) {
+		if(!blankline(line)) {
+
+			for(i=1;i<seqno;i++) fgets(line,MAXLINE+1,fin);
+                        for(j=0;j<=strlen(line);j++) if(line[j] != ' ') break;
+			for(k=j;k<=strlen(line);k++) if(line[k] == ' ') break;
+			strncpy(sname,line+j,MIN(MAXNAMES,k-j)); 
+			sname[MIN(MAXNAMES,k-j)]=EOS;
+			rtrim(sname);
+                       	blank_to_(sname);
+
+			if(seq==NULL)
+				seq=(char *)ckalloc((MAXLINE+2)*sizeof(char));
+			else
+				seq=(char *)ckrealloc(seq,((*len)+MAXLINE+2)*sizeof(char));
+			for(i=k;i<=MAXLINE;i++) {
+				c=line[i];
+				if(c == '.' || c == '~' ) c = '-';
+				if(c == '*') c = 'X';
+				if(c == '\n' || c == EOS) break; /* EOL */
+				c=chartab[c];
+				if(c) seq[++(*len)]=c;
+			}
+
+			for(i=0;;i++) {
+				if(fgets(line,MAXLINE+1,fin)==NULL) return seq;
+				if(blankline(line)) break;
+			}
+		}
+	}
+	return seq;
+}
+
+static Boolean cl_blankline(char *line)
+{
+	int i;
+
+	if (line[0] == '!') return TRUE;
+	
+	for(i=0;line[i]!='\n' && line[i]!=EOS;i++) {
+		if( isdigit(line[i]) ||
+		    isspace(line[i]) ||
+		    (line[i] == '*') ||
+		    (line[i] == ':') ||
+                    (line[i] == '.')) 
+			;
+		else
+			return FALSE;
+	}
+	return TRUE;
+}
+
+static char * get_clustal_seq(char *sname,sint *len,char *tit,sint seqno)
+/* read the seqno_th. sequence from a clustal multiple alignment file */
+{
+	static char line[MAXLINE+1];
+	static char tseq[MAXLINE+1];
+	char *seq = NULL;
+	sint i,j;
+	unsigned char c;
+
+	fseek(fin,0,0); 		/* start at the beginning */
+
+	*len=0;				/* initialise length to zero */
+	fgets(line,MAXLINE+1,fin);	/* read the title line...ignore it */
+
+	while (fgets(line,MAXLINE+1,fin) != NULL) {
+		if(!cl_blankline(line)) {
+
+			for(i=1;i<seqno;i++) fgets(line,MAXLINE+1,fin);
+			for(j=0;j<=strlen(line);j++) if(line[j] != ' ') break;
+
+			sscanf(line,"%s%s",sname,tseq);
+			for(j=0;j<MAXNAMES;j++) if(sname[j] == ' ') break;
+			sname[j]=EOS;
+			rtrim(sname);
+                       	blank_to_(sname);
+
+			if(seq==NULL)
+				seq=(char *)ckalloc((MAXLINE+2)*sizeof(char));
+			else
+				seq=(char *)ckrealloc(seq,((*len)+MAXLINE+2)*sizeof(char));
+			for(i=0;i<=MAXLINE;i++) {
+				c=tseq[i];
+				/*if(c == '\n' || c == EOS) break;*/ /* EOL */
+				if(isspace(c) || c == EOS) break; /* EOL */
+				c=chartab[c];
+				if(c) seq[++(*len)]=c;
+			}
+
+			for(i=0;;i++) {
+				if(fgets(line,MAXLINE+1,fin)==NULL) return seq;
+				if(cl_blankline(line)) break;
+			}
+		}
+	}
+
+	return seq;
+}
+
+static void get_clustal_ss(sint length)
+/* read the structure data from a clustal multiple alignment file */
+{
+	static char title[MAXLINE+1];
+	static char line[MAXLINE+1];
+	static char lin2[MAXLINE+1];
+	static char tseq[MAXLINE+1];
+	static char sname[MAXNAMES+1];
+	sint i,j,len,ix,struct_index=0;
+	char c;
+
+	
+	fseek(fin,0,0); 		/* start at the beginning */
+
+	len=0;				/* initialise length to zero */
+	if (fgets(line,MAXLINE+1,fin) == NULL) return;	/* read the title line...ignore it */
+
+	if (fgets(line,MAXLINE+1,fin) == NULL) return;  /* read the next line... */
+/* skip any blank lines */
+	for (;;) {
+		if(fgets(line,MAXLINE+1,fin)==NULL) return;
+		if(!blankline(line)) break;
+	}
+
+/* look for structure table lines */
+	ix = -1;
+	for(;;) {
+		if(line[0] != '!') break;
+		if(strncmp(line,"!SS",3) == 0) {
+			ix++;
+			sscanf(line+4,"%s%s",sname,tseq);
+			for(j=0;j<MAXNAMES;j++) if(sname[j] == ' ') break;
+			sname[j]=EOS;
+			rtrim(sname);
+    		blank_to_(sname);
+    		if (interactive) {
+				strcpy(title,"Found secondary structure in alignment file: ");
+				strcat(title,sname);
+				(*lin2)=prompt_for_yes_no(title,"Use it to set local gap penalties ");
+			}
+			else (*lin2) = 'y';
+			if ((*lin2 != 'n') && (*lin2 != 'N'))  {               	
+				struct_penalties = SECST;
+				struct_index = ix;
+				for (i=0;i<length;i++)
+				{
+					sec_struct_mask[i] = '.';
+					gap_penalty_mask[i] = '.';
+				}
+				strcpy(ss_name,sname);
+				for(i=0;len < length;i++) {
+					c = tseq[i];
+					if(c == '\n' || c == EOS) break; /* EOL */
+					if (!isspace(c)) sec_struct_mask[len++] = c;
+				}
+			}
+		}
+		else if(strncmp(line,"!GM",3) == 0) {
+			ix++;
+			sscanf(line+4,"%s%s",sname,tseq);
+			for(j=0;j<MAXNAMES;j++) if(sname[j] == ' ') break;
+			sname[j]=EOS;
+			rtrim(sname);
+    		blank_to_(sname);
+    		if (interactive) {
+				strcpy(title,"Found gap penalty mask in alignment file: ");
+				strcat(title,sname);
+				(*lin2)=prompt_for_yes_no(title,"Use it to set local gap penalties ");
+			}
+			else (*lin2) = 'y';
+			if ((*lin2 != 'n') && (*lin2 != 'N'))  {               	
+				struct_penalties = GMASK;
+				struct_index = ix;
+				for (i=0;i<length;i++)
+					gap_penalty_mask[i] = '1';
+					strcpy(ss_name,sname);
+				for(i=0;len < length;i++) {
+					c = tseq[i];
+					if(c == '\n' || c == EOS) break; /* EOL */
+					if (!isspace(c)) gap_penalty_mask[len++] = c;
+				}
+			}
+		}
+		if (struct_penalties != NONE) break;
+		if(fgets(line,MAXLINE+1,fin)==NULL) return;
+	}
+			
+	if (struct_penalties == NONE) return;
+	
+/* skip any more comment lines */
+	while (line[0] == '!') {
+		if(fgets(line,MAXLINE+1,fin)==NULL) return;
+	}
+
+/* skip the sequence lines and any comments after the alignment */
+	for (;;) {
+		if(isspace(line[0])) break;
+		if(fgets(line,MAXLINE+1,fin)==NULL) return;
+	}
+			
+
+/* read the rest of the alignment */
+	
+	for (;;) {
+/* skip any blank lines */
+			for (;;) {
+				if(!blankline(line)) break;
+				if(fgets(line,MAXLINE+1,fin)==NULL) return;
+			}
+/* get structure table line */
+			for(ix=0;ix<struct_index;ix++) {
+				if (line[0] != '!') {
+					if(struct_penalties == SECST)
+						error("bad secondary structure format");
+					else
+						error("bad gap penalty mask format");
+				   	struct_penalties = NONE;
+					return;
+				}
+				if(fgets(line,MAXLINE+1,fin)==NULL) return;
+			}
+			if(struct_penalties == SECST) {
+				if (strncmp(line,"!SS",3) != 0) {
+					error("bad secondary structure format");
+					struct_penalties = NONE;
+					return;
+				}
+				sscanf(line+4,"%s%s",sname,tseq);
+				for(i=0;len < length;i++) {
+					c = tseq[i];
+					if(c == '\n' || c == EOS) break; /* EOL */
+					if (!isspace(c)) sec_struct_mask[len++] = c;
+				}			
+			}
+			else if (struct_penalties == GMASK) {
+				if (strncmp(line,"!GM",3) != 0) {
+					error("bad gap penalty mask format");
+					struct_penalties = NONE;
+					return;
+				}
+				sscanf(line+4,"%s%s",sname,tseq);
+				for(i=0;len < length;i++) {
+					c = tseq[i];
+					if(c == '\n' || c == EOS) break; /* EOL */
+					if (!isspace(c)) gap_penalty_mask[len++] = c;
+				}			
+			}
+
+/* skip any more comment lines */
+		while (line[0] == '!') {
+			if(fgets(line,MAXLINE+1,fin)==NULL) return;
+		}
+
+/* skip the sequence lines */
+		for (;;) {
+			if(isspace(line[0])) break;
+			if(fgets(line,MAXLINE+1,fin)==NULL) return;
+		}
+	}
+}
+
+static void get_embl_ss(sint length)
+{
+	static char title[MAXLINE+1];
+	static char line[MAXLINE+1];
+	static char lin2[MAXLINE+1];
+	static char sname[MAXNAMES+1];
+	char feature[MAXLINE+1];
+	sint i;
+
+/* find the start of the sequence entry */
+	for (;;) {
+		while( !linetype(line,"ID") )
+			if (fgets(line,MAXLINE+1,fin) == NULL) return;
+			
+    	for(i=5;i<=strlen(line);i++)  /* DES */
+			if(line[i] != ' ') break;
+		strncpy(sname,line+i,MAXNAMES); /* remember entryname */
+    		for(i=0;i<=strlen(sname);i++)
+			if(sname[i] == ' ') {
+				sname[i]=EOS;
+				break;
+			}
+		sname[MAXNAMES]=EOS;
+		rtrim(sname);
+    	blank_to_(sname);
+		
+/* look for secondary structure feature table / gap penalty mask */
+		while(fgets(line,MAXLINE+1,fin) != NULL) {
+			if (linetype(line,"FT")) {
+				sscanf(line+2,"%s",feature);
+				if (strcmp(feature,"HELIX") == 0 ||
+				    strcmp(feature,"STRAND") == 0)
+				{
+
+				if (interactive) {
+					strcpy(title,"Found secondary structure in alignment file: ");
+					strcat(title,sname);
+					(*lin2)=prompt_for_yes_no(title,"Use it to set local gap penalties ");
+				}
+				else (*lin2) = 'y';
+				if ((*lin2 != 'n') && (*lin2 != 'N'))  {               	
+					struct_penalties = SECST;
+					for (i=0;i<length;i++)
+						sec_struct_mask[i] = '.';
+					do {
+						get_swiss_feature(&line[2],length);
+						fgets(line,MAXLINE+1,fin);
+					} while( linetype(line,"FT") );
+				}
+				else {
+					do {
+						fgets(line,MAXLINE+1,fin);
+					} while( linetype(line,"FT") );
+				}
+				strcpy(ss_name,sname);
+				}
+			}
+			else if (linetype(line,"GM")) {
+				if (interactive) {
+					strcpy(title,"Found gap penalty mask in alignment file: ");
+					strcat(title,sname);
+					(*lin2)=prompt_for_yes_no(title,"Use it to set local gap penalties ");
+				}
+				else (*lin2) = 'y';
+				if ((*lin2 != 'n') && (*lin2 != 'N'))  {               	
+					struct_penalties = GMASK;
+					for (i=0;i<length;i++)
+						gap_penalty_mask[i] = '1';
+					do {
+						get_swiss_mask(&line[2],length);
+						fgets(line,MAXLINE+1,fin);
+					} while( linetype(line,"GM") );
+				}
+				else {
+					do {
+						fgets(line,MAXLINE+1,fin);
+					} while( linetype(line,"GM") );
+				}
+				strcpy(ss_name,sname);
+			}
+			if (linetype(line,"SQ"))
+				break;	
+
+			if (struct_penalties != NONE) break;			
+		}
+						
+	}
+						
+}
+
+static void get_rsf_ss(sint length)
+{
+	static char title[MAXLINE+1];
+	static char line[MAXLINE+1];
+	static char lin2[MAXLINE+1];
+	static char sname[MAXNAMES+1];
+	sint i;
+
+/* skip the comments */
+	while (fgets(line,MAXLINE+1,fin) != NULL) {
+ 		if(line[strlen(line)-2]=='.' &&
+                                 line[strlen(line)-3]=='.')
+			break;
+	}
+
+/* find the start of the sequence entry */
+	for (;;) {
+		while (fgets(line,MAXLINE+1,fin) != NULL)
+                	if( *line == '{' ) break;
+
+		while( !keyword(line,"name") )
+			if (fgets(line,MAXLINE+1,fin) == NULL) return;
+			
+    	for(i=5;i<=strlen(line);i++)  /* DES */
+			if(line[i] != ' ') break;
+		strncpy(sname,line+i,MAXNAMES); /* remember entryname */
+    		for(i=0;i<=strlen(sname);i++)
+			if(sname[i] == ' ') {
+				sname[i]=EOS;
+				break;
+			}
+		sname[MAXNAMES]=EOS;
+		rtrim(sname);
+    	blank_to_(sname);
+		
+/* look for secondary structure feature table / gap penalty mask */
+		while(fgets(line,MAXLINE+1,fin) != NULL) {
+			if (keyword(line,"feature")) {
+				if (interactive) {
+					strcpy(title,"Found secondary structure in alignment file: ");
+					strcat(title,sname);
+					(*lin2)=prompt_for_yes_no(title,"Use it to set local gap penalties ");
+				}
+				else (*lin2) = 'y';
+				if ((*lin2 != 'n') && (*lin2 != 'N'))  {               	
+					struct_penalties = SECST;
+					for (i=0;i<length;i++)
+						sec_struct_mask[i] = '.';
+					do {
+						if(keyword(line,"feature"))
+							get_rsf_feature(&line[7],length);
+						fgets(line,MAXLINE+1,fin);
+					} while( !keyword(line,"sequence") );
+				}
+				else {
+					do {
+						fgets(line,MAXLINE+1,fin);
+					} while( !keyword(line,"sequence") );
+				}
+				strcpy(ss_name,sname);
+			}
+			else if (keyword(line,"sequence"))
+				break;	
+
+			if (struct_penalties != NONE) break;			
+		}
+						
+	}
+						
+}
+
+static void get_gde_ss(sint length)
+{
+	static char title[MAXLINE+1];
+	static char line[MAXLINE+1];
+	static char lin2[MAXLINE+1];
+	static char sname[MAXNAMES+1];
+	sint i, len, offset = 0;
+        unsigned char c;
+
+	for (;;) {
+		line[0] = '\0';
+/* search for the next comment line */
+		while(*line != '"')
+			if (fgets(line,MAXLINE+1,fin) == NULL) return;
+
+/* is it a secondary structure entry? */
+		if (strncmp(&line[1],"SS_",3) == 0) {
+			for (i=1;i<=MAXNAMES-3;i++) {
+				if (line[i+3] == '(' || line[i+3] == '\n')
+						break;
+				sname[i-1] = line[i+3];
+			}
+			i--;
+			sname[i]=EOS;
+			if (sname[i-1] == '(') sscanf(&line[i+3],"%d",&offset);
+			else offset = 0;
+			for(i--;i > 0;i--) 
+				if(isspace(sname[i])) {
+					sname[i]=EOS;	
+				}
+				else break;		
+			blank_to_(sname);
+
+			if (interactive) {
+				strcpy(title,"Found secondary structure in alignment file: ");
+				strcat(title,sname);
+				(*lin2)=prompt_for_yes_no(title,"Use it to set local gap penalties ");
+			}
+			else (*lin2) = 'y';
+			if ((*lin2 != 'n') && (*lin2 != 'N'))  {               	
+				struct_penalties = SECST;
+				for (i=0;i<length;i++)
+					sec_struct_mask[i] = '.';
+				len = 0;
+				while(fgets(line,MAXLINE+1,fin)) {
+					if(*line == '%' || *line == '#' || *line == '"') break;
+					for(i=offset;i < length;i++) {
+						c=line[i];
+						if(c == '\n' || c == EOS) 
+							break;			/* EOL */
+						sec_struct_mask[len++]=c;
+					}
+					if (len > length) break;
+				}
+				strcpy(ss_name,sname);
+			}
+		}
+/* or is it a gap penalty mask entry? */
+		else if (strncmp(&line[1],"GM_",3) == 0) {
+			for (i=1;i<=MAXNAMES-3;i++) {
+				if (line[i+3] == '(' || line[i+3] == '\n')
+						break;
+				sname[i-1] = line[i+3];
+			}
+			i--;
+			sname[i]=EOS;
+			if (sname[i-1] == '(') sscanf(&line[i+3],"%d",&offset);
+			else offset = 0;
+			for(i--;i > 0;i--) 
+				if(isspace(sname[i])) {
+					sname[i]=EOS;	
+				}
+				else break;		
+			blank_to_(sname);
+
+			if (interactive) {
+				strcpy(title,"Found gap penalty mask in alignment file: ");
+				strcat(title,sname);
+				(*lin2)=prompt_for_yes_no(title,"Use it to set local gap penalties ");
+			}
+			else (*lin2) = 'y';
+			if ((*lin2 != 'n') && (*lin2 != 'N'))  {               	
+				struct_penalties = GMASK;
+				for (i=0;i<length;i++)
+					gap_penalty_mask[i] = '1';
+				len = 0;
+				while(fgets(line,MAXLINE+1,fin)) {
+					if(*line == '%' || *line == '#' || *line == '"') break;
+					for(i=offset;i < length;i++) {
+						c=line[i];
+						if(c == '\n' || c == EOS) 
+							break;			/* EOL */
+						gap_penalty_mask[len++]=c;
+					}
+					if (len > length) break;
+				}
+				strcpy(ss_name,sname);
+			}
+		}
+		if (struct_penalties != NONE) break;			
+	}			
+			
+}
+
+static void get_swiss_feature(char *line, sint len)
+{
+	char c, s, feature[MAXLINE+1];
+	int  i, start_pos, end_pos;
+	
+	if (sscanf(line,"%s%d%d",feature,&start_pos,&end_pos) != 3) {
+		return;
+	}
+
+	if (strcmp(feature,"HELIX") == 0) {
+		c = 'A';
+		s = '$';
+	}
+	else if (strcmp(feature,"STRAND") == 0) {
+		c = 'B';
+		s = '%';
+	}
+	else
+		return;
+			
+	if(start_pos >=len || end_pos>=len) return;
+
+	sec_struct_mask[start_pos-1] = s;
+	for (i=start_pos;i<end_pos-1;i++)
+		sec_struct_mask[i] = c;
+	sec_struct_mask[end_pos-1] = s;
+		
+}
+
+static void get_rsf_feature(char *line, sint len)
+{
+	char c, s;
+	char str1[MAXLINE+1],str2[MAXLINE+1],feature[MAXLINE+1];
+	int  i, tmp,start_pos, end_pos;
+	
+	if (sscanf(line,"%d%d%d%s%s%s",&start_pos,&end_pos,&tmp,str1,str2,feature) != 6) {
+		return;
+	}
+
+	if (strcmp(feature,"HELIX") == 0) {
+		c = 'A';
+		s = '$';
+	}
+	else if (strcmp(feature,"STRAND") == 0) {
+		c = 'B';
+		s = '%';
+	}
+	else
+		return;
+			
+	if(start_pos>=len || end_pos >= len) return;
+	sec_struct_mask[start_pos-1] = s;
+	for (i=start_pos;i<end_pos-1;i++)
+		sec_struct_mask[i] = c;
+	sec_struct_mask[end_pos-1] = s;
+		
+}
+
+static void get_swiss_mask(char *line, sint len)
+{
+	int  i, value, start_pos, end_pos;
+	
+	if (sscanf(line,"%d%d%d",&value,&start_pos,&end_pos) != 3) {
+		return;
+	}
+
+	if (value < 1 || value > 9) return;
+	
+	if(start_pos>=len || end_pos >= len) return;
+	for (i=start_pos-1;i<end_pos;i++)
+		gap_penalty_mask[i] = value+'0';
+		
+}
+
+static char * get_seq(char *sname,sint *len,char *tit)
+{
+	static char line[MAXLINE+1];
+	char *seq = NULL;
+	sint i, offset = 0;
+        unsigned char c=EOS;
+	Boolean got_seq=FALSE;
+
+	switch(seqFormat) {
+
+/************************************/
+		case EMBLSWISS:
+			while( !linetype(line,"ID") )
+				if (fgets(line,MAXLINE+1,fin) == NULL) return NULL;
+			
+                        for(i=5;i<=strlen(line);i++)  /* DES */
+				if(line[i] != ' ') break;
+			strncpy(sname,line+i,MAXNAMES); /* remember entryname */
+                	for(i=0;i<=strlen(sname);i++)
+                        	if(sname[i] == ' ') {
+                                	sname[i]=EOS;
+                                	break;
+                        	}
+
+			sname[MAXNAMES]=EOS;
+			rtrim(sname);
+                        blank_to_(sname);
+
+						
+			while( !linetype(line,"SQ") )
+				fgets(line,MAXLINE+1,fin);
+				
+			*len=0;
+			while(fgets(line,MAXLINE+1,fin)) {
+				if(got_seq && blankline(line)) break;
+ 				if( strlen(line) > 2 && line[strlen(line)-2]=='.' && line[strlen(line)-3]=='.' ) 
+					continue;
+				if(seq==NULL)
+					seq=(char *)ckalloc((MAXLINE+2)*sizeof(char));
+				else
+					seq=(char *)ckrealloc(seq,((*len)+MAXLINE+2)*sizeof(char));
+				for(i=0;i<=MAXLINE;i++) {
+					c=line[i];
+				if(c == '\n' || c == EOS || c == '/')
+					break;			/* EOL */
+				c=chartab[c];
+				if(c) {
+					got_seq=TRUE;
+					seq[++(*len)]=c;
+				}
+				}
+			if(c == '/') break;
+			}
+		break;
+		
+/************************************/
+		case PIR:
+			while(*line != '>')
+				fgets(line,MAXLINE+1,fin);			
+                        for(i=4;i<=strlen(line);i++)  /* DES */
+				if(line[i] != ' ') break;
+			strncpy(sname,line+i,MAXNAMES); /* remember entryname */
+			sname[MAXNAMES]=EOS;
+			rtrim(sname);
+                        blank_to_(sname);
+
+			fgets(line,MAXLINE+1,fin);
+			strncpy(tit,line,MAXTITLES);
+			tit[MAXTITLES]=EOS;
+			i=strlen(tit);
+			if(tit[i-1]=='\n') tit[i-1]=EOS;
+			
+			*len=0;
+			while(fgets(line,MAXLINE+1,fin)) {
+				if(seq==NULL)
+					seq=(char *)ckalloc((MAXLINE+2)*sizeof(char));
+				else
+					seq=(char *)ckrealloc(seq,((*len)+MAXLINE+2)*sizeof(char));
+				for(i=0;i<=MAXLINE;i++) {
+					c=line[i];
+				if(c == '\n' || c == EOS || c == '*')
+					break;			/* EOL */
+			
+				c=chartab[c];
+				if(c) seq[++(*len)]=c;
+				}
+			if(c == '*') break;
+			}
+		break;
+/***********************************************/
+		case PEARSON:
+			while(*line != '>')
+				fgets(line,MAXLINE+1,fin);
+			
+                        for(i=1;i<=strlen(line);i++)  /* DES */
+				if(line[i] != ' ') break;
+			strncpy(sname,line+i,MAXNAMES); /* remember entryname */
+                        for(i=1;i<=strlen(sname);i++)  /* DES */
+				if(sname[i] == ' ') break;
+			sname[i]=EOS;
+			rtrim(sname);
+                        blank_to_(sname);
+
+			*tit=EOS;
+			
+			*len=0;
+			while(fgets(line,MAXLINE+1,fin)) {
+				if(seq==NULL)
+					seq=(char *)ckalloc((MAXLINE+2)*sizeof(char));
+				else
+					seq=(char *)ckrealloc(seq,((*len)+MAXLINE+2)*sizeof(char));
+				for(i=0;i<=MAXLINE;i++) {
+					c=line[i];
+				if(c == '\n' || c == EOS || c == '>')
+					break;			/* EOL */
+			
+				c=chartab[c];
+				if(c) seq[++(*len)]=c;
+			}
+			if(c == '>') break;
+			}
+		break;
+/**********************************************/
+		case GDE:
+			if (dnaflag) {
+				while(*line != '#')
+					fgets(line,MAXLINE+1,fin);
+			}
+			else {
+				while(*line != '%')
+					fgets(line,MAXLINE+1,fin);
+			}
+			
+			for (i=1;i<=MAXNAMES;i++) {
+				if (line[i] == '(' || line[i] == '\n')
+                                    break;
+				sname[i-1] = line[i];
+			}
+			i--;
+			sname[i]=EOS;
+			if (sname[i-1] == '(') sscanf(&line[i],"%d",&offset);
+			else offset = 0;
+			for(i--;i > 0;i--) 
+				if(isspace(sname[i])) {
+					sname[i]=EOS;	
+				}
+				else break;		
+                        blank_to_(sname);
+
+			*tit=EOS;
+			
+			*len=0;
+			for (i=0;i<offset;i++) seq[++(*len)] = '-';
+			while(fgets(line,MAXLINE+1,fin)) {
+			if(*line == '%' || *line == '#' || *line == '"') break;
+				if(seq==NULL)
+					seq=(char *)ckalloc((MAXLINE+2)*sizeof(char));
+				else
+					seq=(char *)ckrealloc(seq,((*len)+MAXLINE+2)*sizeof(char));
+				for(i=0;i<=MAXLINE;i++) {
+					c=line[i];
+				if(c == '\n' || c == EOS) 
+					break;			/* EOL */
+			
+				c=chartab[c];
+				if(c) seq[++(*len)]=c;
+				}
+			}
+		break;
+/***********************************************/
+		case RSF:
+			while(*line != '{')
+				if (fgets(line,MAXLINE+1,fin) == NULL) return NULL;
+			
+			while( !keyword(line,"name") )
+				if (fgets(line,MAXLINE+1,fin) == NULL) return NULL;
+			
+                        for(i=5;i<=strlen(line);i++)  /* DES */
+				if(line[i] != ' ') break;
+			strncpy(sname,line+i,MAXNAMES); /* remember entryname */
+                	for(i=0;i<=strlen(sname);i++)
+                        	if(sname[i] == ' ') {
+                                	sname[i]=EOS;
+                                	break;
+                        	}
+
+			sname[MAXNAMES]=EOS;
+			rtrim(sname);
+                        blank_to_(sname);
+
+						
+			while( !keyword(line,"sequence") )
+				if (fgets(line,MAXLINE+1,fin) == NULL) return NULL;
+				
+			*len=0;
+			while(fgets(line,MAXLINE+1,fin)) {
+				if(seq==NULL)
+					seq=(char *)ckalloc((MAXLINE+2)*sizeof(char));
+				else
+					seq=(char *)ckrealloc(seq,((*len)+MAXLINE+2)*sizeof(char));
+				for(i=0;i<=MAXLINE;i++) {
+					c=line[i];
+					if(c == EOS || c == '}')
+						break;			/* EOL */
+					if( c=='.')
+						seq[++(*len)]='-';
+					c=chartab[c];
+					if(c) seq[++(*len)]=c;
+				}
+				if(c == '}') break;
+			}
+		break;
+/***********************************************/
+	}
+	
+	seq[*len+1]=EOS;
+
+	return seq;
+}
+
+
+sint readseqs(sint first_seq) /*first_seq is the #no. of the first seq. to read */
+{
+	char line[FILENAMELEN+1];
+	char fileName[FILENAMELEN+1];
+
+	static char *seq1,sname1[MAXNAMES+1],title[MAXTITLES+1];
+	sint i,j;
+	sint no_seqs;
+	static sint l1;
+	static Boolean dnaflag1;
+	
+	if(usemenu)
+		getstr("Enter the name of the sequence file",line);
+	else
+		strcpy(line,seqname);
+	if(*line == EOS) return -1;
+
+	if ((sscanf(line,"file://%s",fileName) == 1 )) {
+	  strcpy(line,fileName);
+	}
+
+	if((fin=fopen(line,"r"))==NULL) {
+		error("Could not open sequence file (%s) ",line);
+		return -1;      /* DES -1 => file not found */
+	}
+	strcpy(seqname,line);
+	no_seqs=0;
+	check_infile(&no_seqs);
+	info("Sequence format is %s",formatNames[seqFormat]);
+	if(seqFormat==NEXUS)
+		error("Cannot read nexus format");
+
+/* DES DEBUG 
+	fprintf(stdout,"\n\n File name = %s\n\n",seqname);
+*/
+	if(no_seqs == 0)
+		return 0;       /* return the number of seqs. (zero here)*/
+
+/*
+	if((no_seqs + first_seq -1) > MAXN) {
+		error("Too many sequences. Maximum is %d",(pint)MAXN);
+		return 0;
+	}
+*/
+
+/* DES */
+/*	if(seqFormat == CLUSTAL) {
+		info("no of sequences = %d",(pint)no_seqs);
+		return no_seqs;
+	}
+*/
+	max_aln_length = 0;
+
+/* if this is a multiple alignment, or profile 1 - free any memory used
+by previous alignments, then allocate memory for the new alignment */
+	if(first_seq == 1) {
+		max_names = 0;
+		free_aln(nseqs);
+		alloc_aln(no_seqs);
+	}
+/* otherwise, this is a profile 2, and we need to reallocate the arrays,
+leaving the data for profile 1 intact */
+	else realloc_aln(first_seq,no_seqs);
+
+        for(i=1;i<first_seq;i++)
+	{
+                if(seqlen_array[i]>max_aln_length)
+                        max_aln_length=seqlen_array[i];
+		if(strlen(names[i])>max_names)
+			max_names=strlen(names[i]);
+	}
+
+	for(i=first_seq;i<=first_seq+no_seqs-1;i++) {    /* get the seqs now*/
+		output_index[i] = i;	/* default output order */
+		if(seqFormat == CLUSTAL) 
+			seq1=get_clustal_seq(sname1,&l1,title,i-first_seq+1);
+		else if(seqFormat == MSF)
+			    seq1=get_msf_seq(sname1,&l1,title,i-first_seq+1);
+		else
+			seq1=get_seq(sname1,&l1,title);
+
+		if(seq1==NULL) break;
+/* JULIE */
+/*  Set max length of dynamically allocated arrays in prfalign.c */
+		if (l1 > max_aln_length) max_aln_length = l1;
+		seqlen_array[i]=l1;                   /* store the length */
+		strcpy(names[i],sname1);              /*    "   "  name   */
+		strcpy(titles[i],title);              /*    "   "  title  */
+
+		if(!explicit_dnaflag) {
+			dnaflag1 = check_dnaflag(seq1,l1); /* check DNA/Prot */
+		        if(i == 1) dnaflag = dnaflag1;
+		}			/* type decided by first seq*/
+		else
+			dnaflag1 = dnaflag;
+
+		alloc_seq(i,l1);
+
+		if(dnaflag)
+			n_encode(seq1,seq_array[i],l1); /* encode the sequence*/
+		else					/* as ints  */
+			p_encode(seq1,seq_array[i],l1);
+		if(seq1!=NULL) seq1=ckfree(seq1);
+	}
+
+
+	max_aln_length *= 2;
+/*
+   JULIE
+   check sequence names are all different - otherwise phylip tree is 
+   confused.
+*/
+	for(i=1;i<=first_seq+no_seqs-1;i++) {
+		for(j=i+1;j<=first_seq+no_seqs-1;j++) {
+			if (strncmp(names[i],names[j],MAXNAMES) == 0) {
+				error("Multiple sequences found with same name, %s (first %d chars are significant)", names[i],MAXNAMES);
+				return 0;
+			}
+		}
+	}
+	for(i=first_seq;i<=first_seq+no_seqs-1;i++)
+	{
+		if(seqlen_array[i]>max_aln_length)
+			max_aln_length=seqlen_array[i];
+	}
+	
+/* look for a feature table / gap penalty mask (only if this is a profile) */
+	if (profile_no > 0) {
+		rewind(fin);
+		struct_penalties = NONE;
+    		gap_penalty_mask = (char *)ckalloc((max_aln_length+1) * sizeof (char));
+    		sec_struct_mask = (char *)ckalloc((max_aln_length+1) * sizeof (char));
+    		ss_name = (char *)ckalloc((MAXNAMES+1) * sizeof (char));
+
+		if (seqFormat == CLUSTAL) {
+			get_clustal_ss(max_aln_length);
+		}
+		else if (seqFormat == GDE) {
+			get_gde_ss(max_aln_length);
+		}
+		else if (seqFormat == EMBLSWISS) {
+			get_embl_ss(max_aln_length);
+		}
+		else if (seqFormat == RSF) {
+			get_rsf_ss(max_aln_length);
+		}
+	}
+
+	for(i=first_seq;i<=first_seq+no_seqs-1;i++)
+	{
+		if(strlen(names[i])>max_names)
+			max_names=strlen(names[i]);
+	}
+
+	if(max_names<10) max_names=10;
+
+	fclose(fin);
+			
+	return no_seqs;    /* return the number of seqs. read in this call */
+}
+
+
+static Boolean check_dnaflag(char *seq, sint slen)
+/* check if DNA or Protein
+   The decision is based on counting all A,C,G,T,U or N. 
+   If >= 85% of all characters (except -) are as above => DNA  */
+{
+	sint i, c, nresidues, nbases;
+	float ratio;
+	char *dna_codes="ACGTUN";
+	
+	nresidues = nbases = 0;	
+	for(i=1; i <= slen; i++) {
+		if(seq[i] != '-') {
+			nresidues++;
+			if(seq[i] == 'N')
+				nbases++;
+			else {
+				c = res_index(dna_codes, seq[i]);
+				if(c >= 0)
+					nbases++;
+			}
+		}
+	}
+	if( (nbases == 0) || (nresidues == 0) ) return FALSE;
+	ratio = (float)nbases/(float)nresidues;
+/* DES 	fprintf(stdout,"\n nbases = %d, nresidues = %d, ratio = %f\n",
+		(pint)nbases,(pint)nresidues,(pint)ratio); */
+	if(ratio >= 0.85) 
+		return TRUE;
+	else
+		return FALSE;
+}
+
+
+
+static void check_infile(sint *nseqs)
+{
+	char line[MAXLINE+1];
+	sint i;	
+
+	*nseqs=0;
+	while (fgets(line,MAXLINE+1,fin) != NULL) {
+		if(!blankline(line)) 
+			break;
+	}
+
+	for(i=strlen(line)-1;i>=0;i--)
+		if(isgraph(line[i])) break;
+	line[i+1]=EOS;
+        
+	for(i=0;i<=6;i++) line[i] = toupper(line[i]);
+
+	if( linetype(line,"ID") ) {					/* EMBL/Swiss-Prot format ? */
+		seqFormat=EMBLSWISS;
+		(*nseqs)++;
+	}
+        else if( linetype(line,"CLUSTAL") ) {
+		seqFormat=CLUSTAL;
+	}
+ 	else if( linetype(line,"PILEUP") ) {
+		seqFormat = MSF;
+	}
+ 	else if( linetype(line,"!!AA_MULTIPLE_ALIGNMENT") ) {
+		seqFormat = MSF;
+		dnaflag = FALSE;
+	}
+ 	else if( linetype(line,"!!NA_MULTIPLE_ALIGNMENT") ) {
+		seqFormat = MSF;
+		dnaflag = TRUE;
+	}
+ 	else if( strstr(line,"MSF") && line[strlen(line)-1]=='.' &&
+                                 line[strlen(line)-2]=='.' ) {
+		seqFormat = MSF;
+	}
+ 	else if( linetype(line,"!!RICH_SEQUENCE") ) {
+		seqFormat = RSF;
+	}
+ 	else if( linetype(line,"#NEXUS") ) {
+		seqFormat=NEXUS;
+		return;
+	}
+	else if(*line == '>') {						/* no */
+		seqFormat=(line[3] == ';')?PIR:PEARSON; /* distinguish PIR and Pearson */
+		(*nseqs)++;
+	}
+	else if((*line == '"') || (*line == '%') || (*line == '#')) {
+		seqFormat=GDE; /* GDE format */
+		if (*line == '%') {
+                        (*nseqs)++;
+			dnaflag = FALSE;
+		}
+		else if (*line == '#') {
+			(*nseqs)++;
+			dnaflag = TRUE;
+		}
+	}
+	else {
+		seqFormat=UNKNOWN;
+		return;
+	}
+
+	while(fgets(line,MAXLINE+1,fin) != NULL) {
+		switch(seqFormat) {
+			case EMBLSWISS:
+				if( linetype(line,"ID") )
+					(*nseqs)++;
+				break;
+			case PIR:
+				*nseqs = count_pir_seqs();
+				fseek(fin,0,0);
+				return;
+			case PEARSON:
+                                if( *line == '>' )
+                                        (*nseqs)++;
+                                break;
+			case GDE:
+				if(( *line == '%' ) && ( dnaflag == FALSE))
+					(*nseqs)++;
+				else if (( *line == '#') && ( dnaflag == TRUE))
+					(*nseqs)++;
+				break;
+			case CLUSTAL:
+				*nseqs = count_clustal_seqs();
+/* DES */ 			/* fprintf(stdout,"\nnseqs = %d\n",(pint)*nseqs); */
+				fseek(fin,0,0);
+				return;
+			case MSF:
+				*nseqs = count_msf_seqs();
+				fseek(fin,0,0);
+				return;
+			case RSF:
+				fseek(fin,0,0);
+				*nseqs = count_rsf_seqs();
+				fseek(fin,0,0);
+				return;
+			case USER:
+			default:
+				break;
+		}
+	}
+	fseek(fin,0,0);
+}
+
+
+static sint count_pir_seqs(void)
+/* count the number of sequences in a pir alignment file */
+{
+	char line[MAXLINE+1],c;
+	sint  nseqs, i;
+	Boolean seq_ok;
+
+	seq_ok = FALSE;
+	while (fgets(line,MAXLINE+1,fin) != NULL) { /* Look for end of first seq */
+		if(*line == '>') break;
+		for(i=0;seq_ok == FALSE;i++) {
+			c=line[i];
+			if(c == '*') {
+				seq_ok = TRUE;	/* ok - end of sequence found */
+				break;
+			}			/* EOL */
+			if(c == '\n' || c == EOS)
+				break;			/* EOL */
+		}
+		if (seq_ok == TRUE)
+			break;
+	}
+	if (seq_ok == FALSE) {
+		error("PIR format sequence end marker '*'\nmissing for one or more sequences.");
+		return (sint)0;	/* funny format*/
+	}
+	
+	
+	nseqs = 1;
+	
+	while (fgets(line,MAXLINE+1,fin) != NULL) {
+		if(*line == '>') {		/* Look for start of next seq */
+			seq_ok = FALSE;
+			while (fgets(line,MAXLINE+1,fin) != NULL) { /* Look for end of seq */
+				if(*line == '>') {
+					error("PIR format sequence end marker '*' missing for one or more sequences.");
+					return (sint)0;	/* funny format*/
+				}
+				for(i=0;seq_ok == FALSE;i++) {
+					c=line[i];
+					if(c == '*') {
+						seq_ok = TRUE;	/* ok - sequence found */
+						break;
+					}			/* EOL */
+					if(c == '\n' || c == EOS)
+						break;			/* EOL */
+				}
+				if (seq_ok == TRUE) {
+					nseqs++;
+					break;
+				}
+			}
+		}
+	}
+	return (sint)nseqs;
+}
+
+
+static sint count_clustal_seqs(void)
+/* count the number of sequences in a clustal alignment file */
+{
+	char line[MAXLINE+1];
+	sint  nseqs;
+
+	while (fgets(line,MAXLINE+1,fin) != NULL) {
+		if(!cl_blankline(line)) break;		/* Look for next non- */
+	}						/* blank line */
+	nseqs = 1;
+
+	while (fgets(line,MAXLINE+1,fin) != NULL) {
+		if(cl_blankline(line)) return nseqs;
+		nseqs++;
+	}
+
+	return (sint)0;	/* if you got to here-funny format/no seqs.*/
+}
+
+static sint count_msf_seqs(void)
+{
+/* count the number of sequences in a PILEUP alignment file */
+
+	char line[MAXLINE+1];
+	sint  nseqs;
+
+	while (fgets(line,MAXLINE+1,fin) != NULL) {
+		if(linetype(line,"//")) break;
+	}
+
+	while (fgets(line,MAXLINE+1,fin) != NULL) {
+		if(!blankline(line)) break;		/* Look for next non- */
+	}						/* blank line */
+	nseqs = 1;
+
+	while (fgets(line,MAXLINE+1,fin) != NULL) {
+		if(blankline(line)) return nseqs;
+		nseqs++;
+	}
+
+	return (sint)0;	/* if you got to here-funny format/no seqs.*/
+}
+
+static sint count_rsf_seqs(void)
+{
+/* count the number of sequences in a GCG RSF alignment file */
+
+	char line[MAXLINE+1];
+	sint  nseqs;
+
+	nseqs = 0;
+/* skip the comments */
+	while (fgets(line,MAXLINE+1,fin) != NULL) {
+ 		if(line[strlen(line)-2]=='.' &&
+                                 line[strlen(line)-3]=='.')
+			break;
+	}
+
+	while (fgets(line,MAXLINE+1,fin) != NULL) {
+                if( *line == '{' )
+                      nseqs++;
+	}
+	return (sint)nseqs;
+}
+
+static void p_encode(char *seq, char *naseq, sint l)
+{				/* code seq as ints .. use gap_pos2 for gap */
+	register sint i;
+/*	static char *aacids="CSTPAGNDEQHRKMILVFYW";*/
+	
+	for(i=1;i<=l;i++)
+		if(seq[i] == '-')
+			naseq[i] = gap_pos2;
+		else
+			naseq[i] = res_index(amino_acid_codes,seq[i]);
+	naseq[i] = -3;
+}
+
+static void n_encode(char *seq,char *naseq,sint l)
+{				/* code seq as ints .. use gap_pos2 for gap */
+	register sint i;
+/*	static char *nucs="ACGTU";	*/
+	
+	for(i=1;i<=l;i++) {
+    	if(seq[i] == '-')          	   /* if a gap character -> code = gap_pos2 */
+			naseq[i] = gap_pos2;   /* this is the code for a gap in */
+		else {                     /* the input files */
+			naseq[i]=res_index(amino_acid_codes,seq[i]);
+		}
+	}
+	naseq[i] = -3;
+}
+
+static sint res_index(char *t,char c)
+{
+	register sint i;
+	
+	for(i=0;t[i] && t[i] != c;i++)
+		;
+	if(t[i]) return(i);
+	else return -1;
+}

Added: trunk/packages/clustalw/branches/upstream/current/showpair.c
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/showpair.c	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/showpair.c	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,486 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <math.h>
+#include "clustalw.h"	
+
+static void make_p_ptrs(sint *tptr, sint *pl, sint naseq, sint l);
+static void make_n_ptrs(sint *tptr, sint *pl, sint naseq, sint len);
+static void put_frag(sint fs, sint v1, sint v2, sint flen);
+static sint frag_rel_pos(sint a1, sint b1, sint a2, sint b2);
+static void des_quick_sort(sint *array1, sint *array2, sint array_size);
+static void pair_align(sint seq_no, sint l1, sint l2);
+
+
+/*
+*	Prototypes
+*/
+
+/*
+*	 Global variables
+*/
+extern sint *seqlen_array;
+extern char **seq_array;
+extern sint  dna_ktup, dna_window, dna_wind_gap, dna_signif; /* params for DNA */
+extern sint prot_ktup,prot_window,prot_wind_gap,prot_signif; /* params for prots */
+extern sint 	nseqs;
+extern Boolean 	dnaflag;
+extern double 	**tmat;
+extern sint 	max_aa;
+extern sint  max_aln_length;
+
+static sint 	next;
+static sint 	curr_frag,maxsf,vatend;
+static sint 	**accum;
+static sint 	*diag_index;
+static char 	*slopes;
+
+sint ktup,window,wind_gap,signif;    		      /* Pairwise aln. params */
+sint *displ;
+sint *zza, *zzb, *zzc, *zzd;
+
+extern Boolean percent;
+
+
+static void make_p_ptrs(sint *tptr,sint *pl,sint naseq,sint l)
+{
+	static sint a[10];
+	sint i,j,limit,code,flag;
+	char residue;
+	
+	for (i=1;i<=ktup;i++)
+           a[i] = (sint) pow((double)(max_aa+1),(double)(i-1));
+
+	limit = (sint) pow((double)(max_aa+1),(double)ktup);
+	for(i=1;i<=limit;++i)
+		pl[i]=0;
+	for(i=1;i<=l;++i)
+		tptr[i]=0;
+	
+	for(i=1;i<=(l-ktup+1);++i) {
+		code=0;
+		flag=FALSE;
+		for(j=1;j<=ktup;++j) {
+			residue = seq_array[naseq][i+j-1];
+			if((residue<0) || (residue > max_aa)){
+				flag=TRUE;
+				break;
+			}
+			code += ((residue) * a[j]);
+		}
+		if(flag)
+			continue;
+		++code;
+		if(pl[code]!=0)
+			tptr[i]=pl[code];
+		pl[code]=i;
+	}
+}
+
+
+static void make_n_ptrs(sint *tptr,sint *pl,sint naseq,sint len)
+{
+	static sint pot[]={ 0, 1, 4, 16, 64, 256, 1024, 4096 };
+	sint i,j,limit,code,flag;
+	char residue;
+	
+	limit = (sint) pow((double)4,(double)ktup);
+	
+	for(i=1;i<=limit;++i)
+		pl[i]=0;
+	for(i=1;i<=len;++i)
+		tptr[i]=0;
+	
+	for(i=1;i<=len-ktup+1;++i) {
+		code=0;
+		flag=FALSE;
+		for(j=1;j<=ktup;++j) {
+			residue = seq_array[naseq][i+j-1];
+			if((residue<0) || (residue>4)){
+				flag=TRUE;
+				break;
+			}
+			code += ((residue) * pot[j]);  /* DES */
+		}
+		if(flag)
+			continue;
+		++code;
+		if(pl[code]!=0)
+			tptr[i]=pl[code];
+		pl[code]=i;
+	}
+}
+
+
+static void put_frag(sint fs,sint v1,sint v2,sint flen)
+{
+	sint end;
+	accum[0][curr_frag]=fs;
+	accum[1][curr_frag]=v1;
+	accum[2][curr_frag]=v2;
+	accum[3][curr_frag]=flen;
+	
+	if(!maxsf) {
+		maxsf=1;
+		accum[4][curr_frag]=0;
+		return;
+	}
+	
+        if(fs >= accum[0][maxsf]) {
+		accum[4][curr_frag]=maxsf;
+		maxsf=curr_frag;
+		return;
+	}
+	else {
+		next=maxsf;
+		while(TRUE) {
+			end=next;
+			next=accum[4][next];
+			if(fs>=accum[0][next])
+				break;
+		}
+		accum[4][curr_frag]=next;
+		accum[4][end]=curr_frag;
+	}
+}
+
+
+static sint frag_rel_pos(sint a1,sint b1,sint a2,sint b2)
+{
+	sint ret;
+	
+	ret=FALSE;
+	if(a1-b1==a2-b2) {
+		if(a2<a1)
+			ret=TRUE;
+	}
+	else {
+		if(a2+ktup-1<a1 && b2+ktup-1<b1)
+			ret=TRUE;
+	}
+	return ret;
+}
+
+
+static void des_quick_sort(sint *array1, sint *array2, sint array_size)
+/*  */
+/* Quicksort routine, adapted from chapter 4, page 115 of software tools */
+/* by Kernighan and Plauger, (1986) */
+/* Sort the elements of array1 and sort the */
+/* elements of array2 accordingly */
+/*  */
+{
+	sint temp1, temp2;
+	sint p, pivlin;
+	sint i, j;
+	sint lst[50], ust[50];       /* the maximum no. of elements must be*/
+								/* < log(base2) of 50 */
+
+	lst[1] = 1;
+	ust[1] = array_size-1;
+	p = 1;
+
+	while(p > 0) {
+		if(lst[p] >= ust[p])
+			p--;
+		else {
+			i = lst[p] - 1;
+			j = ust[p];
+			pivlin = array1[j];
+			while(i < j) {
+				for(i=i+1; array1[i] < pivlin; i++)
+					;
+				for(j=j-1; j > i; j--)
+					if(array1[j] <= pivlin) break;
+				if(i < j) {
+					temp1     = array1[i];
+					array1[i] = array1[j];
+					array1[j] = temp1;
+					
+					temp2     = array2[i];
+					array2[i] = array2[j];
+					array2[j] = temp2;
+				}
+			}
+			
+			j = ust[p];
+
+			temp1     = array1[i];
+			array1[i] = array1[j];
+			array1[j] = temp1;
+
+			temp2     = array2[i];
+			array2[i] = array2[j];
+			array2[j] = temp2;
+
+			if(i-lst[p] < ust[p] - i) {
+				lst[p+1] = lst[p];
+				ust[p+1] = i - 1;
+				lst[p]   = i + 1;
+			}
+			else {
+				lst[p+1] = i + 1;
+				ust[p+1] = ust[p];
+				ust[p]   = i - 1;
+			}
+			p = p + 1;
+		}
+	}
+	return;
+
+}
+
+
+
+
+
+static void pair_align(sint seq_no,sint l1,sint l2)
+{
+	sint pot[8],i,j,l,m,flag,limit,pos,tl1,vn1,vn2,flen,osptr,fs;
+	sint tv1,tv2,encrypt,subt1,subt2,rmndr;
+	char residue;
+	
+	if(dnaflag) {
+		for(i=1;i<=ktup;++i)
+			pot[i] = (sint) pow((double)4,(double)(i-1));
+		limit = (sint) pow((double)4,(double)ktup);
+	}
+	else {
+		for (i=1;i<=ktup;i++)
+           		pot[i] = (sint) pow((double)(max_aa+1),(double)(i-1));
+		limit = (sint) pow((double)(max_aa+1),(double)ktup);
+	}
+	
+	tl1 = (l1+l2)-1;
+	
+	for(i=1;i<=tl1;++i) {
+		slopes[i]=displ[i]=0;
+		diag_index[i] = i;
+	}
+	
+
+/* increment diagonal score for each k_tuple match */
+
+	for(i=1;i<=limit;++i) {
+		vn1=zzc[i];
+		while(TRUE) {
+			if(!vn1) break;
+			vn2=zzd[i];
+			while(vn2 != 0) {
+				osptr=vn1-vn2+l2;
+				++displ[osptr];
+				vn2=zzb[vn2];
+			}
+			vn1=zza[vn1];
+		}
+	}
+
+/* choose the top SIGNIF diagonals */
+
+	des_quick_sort(displ, diag_index, tl1);
+
+	j = tl1 - signif + 1;
+	if(j < 1) j = 1;
+ 
+/* flag all diagonals within WINDOW of a top diagonal */
+
+	for(i=tl1; i>=j; i--) 
+		if(displ[i] > 0) {
+			pos = diag_index[i];
+			l = (1  >pos-window) ? 1   : pos-window;
+			m = (tl1<pos+window) ? tl1 : pos+window;
+			for(; l <= m; l++) 
+				slopes[l] = 1;
+		}
+
+	for(i=1; i<=tl1; i++)  displ[i] = 0;
+
+	
+	curr_frag=maxsf=0;
+	
+	for(i=1;i<=(l1-ktup+1);++i) {
+		encrypt=flag=0;
+		for(j=1;j<=ktup;++j) {
+			residue = seq_array[seq_no][i+j-1];
+			if((residue<0) || (residue>max_aa)) {
+				flag=TRUE;
+				break;
+			}
+			encrypt += ((residue)*pot[j]);
+		}
+		if(flag) continue;
+		++encrypt;
+	
+		vn2=zzd[encrypt];
+	
+		flag=FALSE;
+		while(TRUE) {
+			if(!vn2) {
+				flag=TRUE;
+				break;
+			}
+			osptr=i-vn2+l2;
+			if(slopes[osptr]!=1) {
+				vn2=zzb[vn2];
+				continue;
+			}
+			flen=0;
+			fs=ktup;
+			next=maxsf;		
+		
+		/*
+		* A-loop
+		*/
+		
+			while(TRUE) {
+				if(!next) {
+					++curr_frag;
+					if(curr_frag>=2*max_aln_length) {
+						info("(Partial alignment)");
+						vatend=1;
+						return;
+					}
+					displ[osptr]=curr_frag;
+					put_frag(fs,i,vn2,flen);
+				}
+				else {
+					tv1=accum[1][next];
+					tv2=accum[2][next];
+					if(frag_rel_pos(i,vn2,tv1,tv2)) {
+						if(i-vn2==accum[1][next]-accum[2][next]) {
+							if(i>accum[1][next]+(ktup-1))
+								fs=accum[0][next]+ktup;
+							else {
+								rmndr=i-accum[1][next];
+								fs=accum[0][next]+rmndr;
+							}
+							flen=next;
+							next=0;
+							continue;
+						}
+						else {
+							if(displ[osptr]==0)
+								subt1=ktup;
+							else {
+								if(i>accum[1][displ[osptr]]+(ktup-1))
+									subt1=accum[0][displ[osptr]]+ktup;
+								else {
+									rmndr=i-accum[1][displ[osptr]];
+									subt1=accum[0][displ[osptr]]+rmndr;
+								}
+							}
+							subt2=accum[0][next]-wind_gap+ktup;
+							if(subt2>subt1) {
+								flen=next;
+								fs=subt2;
+							}
+							else {
+								flen=displ[osptr];
+								fs=subt1;
+							}
+							next=0;
+							continue;
+						}
+					}
+					else {
+						next=accum[4][next];
+						continue;
+					}
+				}
+				break;
+			}
+		/*
+		* End of Aloop
+		*/
+		
+			vn2=zzb[vn2];
+		}
+	}
+	vatend=0;
+}		 
+
+void show_pair(sint istart, sint iend, sint jstart, sint jend)
+{
+	sint i,j,dsr;
+	double calc_score;
+	
+	accum = (sint **)ckalloc( 5*sizeof (sint *) );
+	for (i=0;i<5;i++)
+		accum[i] = (sint *) ckalloc((2*max_aln_length+1) * sizeof (sint) );
+
+	displ      = (sint *) ckalloc( (2*max_aln_length +1) * sizeof (sint) );
+	slopes     = (char *)ckalloc( (2*max_aln_length +1) * sizeof (char));
+	diag_index = (sint *) ckalloc( (2*max_aln_length +1) * sizeof (sint) );
+
+	zza = (sint *)ckalloc( (max_aln_length+1) * sizeof (sint) );
+	zzb = (sint *)ckalloc( (max_aln_length+1) * sizeof (sint) );
+
+	zzc = (sint *)ckalloc( (max_aln_length+1) * sizeof (sint) );
+	zzd = (sint *)ckalloc( (max_aln_length+1) * sizeof (sint) );
+
+        if(dnaflag) {
+                ktup     = dna_ktup;
+                window   = dna_window;
+                signif   = dna_signif;
+                wind_gap = dna_wind_gap;
+        }
+        else {
+                ktup     = prot_ktup;
+                window   = prot_window;
+                signif   = prot_signif;
+                wind_gap = prot_wind_gap;
+        }
+
+	fprintf(stdout,"\n\n");
+	
+	for(i=istart+1;i<=iend;++i) {
+		if(dnaflag)
+			make_n_ptrs(zza,zzc,i,seqlen_array[i]);
+		else
+			make_p_ptrs(zza,zzc,i,seqlen_array[i]);
+		for(j=jstart+2;j<=jend;++j) {
+			if(dnaflag)
+				make_n_ptrs(zzb,zzd,j,seqlen_array[j]);
+			else
+				make_p_ptrs(zzb,zzd,j,seqlen_array[j]);
+			pair_align(i,seqlen_array[i],seqlen_array[j]);
+			if(!maxsf)
+				calc_score=0.0;
+			else {
+				calc_score=(double)accum[0][maxsf];
+				if(percent) {
+					dsr=(seqlen_array[i]<seqlen_array[j]) ?
+							seqlen_array[i] : seqlen_array[j];
+				calc_score = (calc_score/(double)dsr) * 100.0;
+				}
+			}
+/*
+			tmat[i][j]=calc_score;
+			tmat[j][i]=calc_score;
+*/
+
+                        tmat[i][j] = (100.0 - calc_score)/100.0;
+                        tmat[j][i] = (100.0 - calc_score)/100.0;
+			if(calc_score>0.1) 
+				info("Sequences (%d:%d) Aligned. Score: %lg",
+               			(pint)i,(pint)j,calc_score);
+			else
+				info("Sequences (%d:%d) Not Aligned",
+						(pint)i,(pint)j);
+		}
+	}
+
+	for (i=0;i<5;i++)
+	   accum[i]=ckfree((void *)accum[i]);
+	accum=ckfree((void *)accum);
+
+	displ=ckfree((void *)displ);
+	slopes=ckfree((void *)slopes);
+	diag_index=ckfree((void *)diag_index);
+
+	zza=ckfree((void *)zza);
+	zzb=ckfree((void *)zzb);
+	zzc=ckfree((void *)zzc);
+	zzd=ckfree((void *)zzd);
+}
+

Added: trunk/packages/clustalw/branches/upstream/current/trees.c
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/trees.c	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/trees.c	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,2166 @@
+/* Phyle of filogenetic tree calculating functions for CLUSTAL W */
+/* DES was here  FEB. 1994 */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <math.h>
+#include "clustalw.h"
+#include "dayhoff.h"    /* set correction for amino acid distances >= 75% */
+
+
+/*
+ *   Prototypes
+ */
+Boolean transition(sint base1, sint base2);
+void tree_gap_delete(void);
+void distance_matrix_output(FILE *ofile);
+void nj_tree(char **tree_description, FILE *tree);
+void compare_tree(char **tree1, char **tree2, sint *hits, sint n);
+void print_phylip_tree(char **tree_description, FILE *tree, sint bootstrap);
+void print_nexus_tree(char **tree_description, FILE *tree, sint bootstrap);
+sint two_way_split(char **tree_description, FILE *tree, sint start_row, sint flag, sint bootstrap);
+sint two_way_split_nexus(char **tree_description, FILE *tree, sint start_row, sint flag, sint bootstrap);
+void print_tree(char **tree_description, FILE *tree, sint *totals);
+static Boolean is_ambiguity(char c);
+static void overspill_message(sint overspill,sint total_dists);
+
+
+/*
+ *   Global variables
+ */
+
+extern sint max_names;
+
+extern double **tmat;     /* general nxn array of reals; allocated from main */
+                          /* this is used as a distance matrix */
+extern Boolean dnaflag;   /* TRUE for DNA seqs; FALSE for proteins */
+extern Boolean tossgaps;  /* Ignore places in align. where ANY seq. has a gap*/
+extern Boolean kimura;    /* Use correction for multiple substitutions */
+extern Boolean output_tree_clustal;   /* clustal text output for trees */
+extern Boolean output_tree_phylip;    /* phylip nested parentheses format */
+extern Boolean output_tree_distances; /* phylip distance matrix */
+extern Boolean output_tree_nexus;     /* nexus format tree */
+extern Boolean output_pim;     /* perc identity matrix output Ramu */
+
+extern sint    bootstrap_format;      /* bootstrap file format */
+extern Boolean empty;                 /* any sequences in memory? */
+extern Boolean usemenu;   /* interactive (TRUE) or command line (FALSE) */
+extern sint nseqs;
+extern sint max_aln_length;
+extern sint *seqlen_array; /* the lengths of the sequences */
+extern char **seq_array;   /* the sequences */
+extern char **names;       /* the seq. names */
+extern char seqname[];		/* name of input file */
+extern sint gap_pos1,gap_pos2;
+extern Boolean use_ambiguities;
+extern char *amino_acid_codes;
+
+static double 	*av;
+static double 	*left_branch, *right_branch;
+static double 	*save_left_branch, *save_right_branch;
+static sint	*boot_totals;
+static sint 	*tkill;
+/*  
+  The next line is a fossil from the days of using the cc ran()
+static int 	ran_factor;
+*/
+static sint 	*boot_positions;
+static FILE 	*phylip_phy_tree_file;
+static FILE 	*clustal_phy_tree_file;
+static FILE 	*distances_phy_tree_file;
+static FILE 	*nexus_phy_tree_file;
+static FILE     *pim_file; /* Ramu */
+static Boolean 	verbose;
+static char 	*tree_gaps;
+static sint first_seq, last_seq;
+                     /* array of weights; 1 for use this posn.; 0 don't */
+
+extern sint boot_ntrials;		/* number of bootstrap trials */
+extern unsigned sint boot_ran_seed;	/* random number generator seed */
+
+void phylogenetic_tree(char *phylip_name,char *clustal_name,char *dist_name, char *nexus_name, char *pim_name)
+/* 
+   Calculate a tree using the distances in the nseqs*nseqs array tmat.
+   This is the routine for getting the REAL trees after alignment.
+*/
+{	char path[FILENAMELEN+1];
+	sint i, j;
+	sint overspill = 0;
+	sint total_dists;
+	static char **standard_tree;
+	static char **save_tree;
+	char lin2[10];
+
+	if(empty) {
+		error("You must load an alignment first");
+		return;
+	}
+
+	if(nseqs<2) {
+		error("Alignment has only %d sequences",nseqs);
+		return;
+	}
+	first_seq=1;
+	last_seq=nseqs;
+
+	get_path(seqname,path);
+	
+if(output_tree_clustal) {
+        if (clustal_name[0]!=EOS) {
+                if((clustal_phy_tree_file = open_explicit_file(
+                clustal_name))==NULL) return;
+        }
+        else {
+		if((clustal_phy_tree_file = open_output_file(
+		"\nEnter name for CLUSTAL    tree output file  ",path,
+		clustal_name,"nj")) == NULL) return;
+        }
+}
+
+if(output_tree_phylip) {
+        if (phylip_name[0]!=EOS) {
+                if((phylip_phy_tree_file = open_explicit_file(
+                phylip_name))==NULL) return;
+        }
+        else {
+                 if((phylip_phy_tree_file = open_output_file(
+		"\nEnter name for PHYLIP     tree output file  ",path,
+                phylip_name,"ph")) == NULL) return;
+        }
+}
+
+if(output_tree_distances)
+{
+        if (dist_name[0]!=EOS) {
+                if((distances_phy_tree_file = open_explicit_file(
+                dist_name))==NULL) return;
+        }
+        else {
+		if((distances_phy_tree_file = open_output_file(
+		"\nEnter name for distance matrix output file  ",path,
+		dist_name,"dst")) == NULL) return;
+        }
+}
+
+if(output_tree_nexus)
+{
+        if (nexus_name[0]!=EOS) {
+                if((nexus_phy_tree_file = open_explicit_file(
+                nexus_name))==NULL) return;
+        }
+        else {
+		if((nexus_phy_tree_file = open_output_file(
+		"\nEnter name for NEXUS tree output file  ",path,
+		nexus_name,"tre")) == NULL) return;
+        }
+}
+
+if(output_pim)
+{
+        if (pim_name[0]!=EOS) {
+        	if((pim_file = open_explicit_file(
+		pim_name))==NULL) return;
+      }
+      else {
+        	if((pim_file = open_output_file(
+		"\nEnter name for % Identity matrix output file  ",path,
+                pim_name,"pim")) == NULL) return;
+      }
+}
+
+	boot_positions = (sint *)ckalloc( (seqlen_array[first_seq]+2) * sizeof (sint) );
+
+	for(j=1; j<=seqlen_array[first_seq]; ++j) 
+		boot_positions[j] = j;		
+
+	if(output_tree_clustal) {
+		verbose = TRUE;     /* Turn on file output */
+		if(dnaflag)
+			overspill = dna_distance_matrix(clustal_phy_tree_file);
+		else 
+			overspill = prot_distance_matrix(clustal_phy_tree_file);
+	}
+
+	if(output_tree_phylip) {
+		verbose = FALSE;     /* Turn off file output */
+		if(dnaflag)
+			overspill = dna_distance_matrix(phylip_phy_tree_file);
+		else 
+			overspill = prot_distance_matrix(phylip_phy_tree_file);
+	}
+
+	if(output_tree_nexus) {
+		verbose = FALSE;     /* Turn off file output */
+		if(dnaflag)
+			overspill = dna_distance_matrix(nexus_phy_tree_file);
+		else 
+			overspill = prot_distance_matrix(nexus_phy_tree_file);
+	}
+
+        if(output_pim) { /* Ramu  */
+          	verbose = FALSE;     /* Turn off file output */
+          	if(dnaflag)
+           		calc_percidentity(pim_file);
+          	else
+            		calc_percidentity(pim_file);
+        }
+
+
+	if(output_tree_distances) {
+		verbose = FALSE;     /* Turn off file output */
+		if(dnaflag)
+			overspill = dna_distance_matrix(distances_phy_tree_file);
+		else 
+			overspill = prot_distance_matrix(distances_phy_tree_file);
+      		distance_matrix_output(distances_phy_tree_file);
+	}
+
+/* check if any distances overflowed the distance corrections */
+	if ( overspill > 0 ) {
+		total_dists = (nseqs*(nseqs-1))/2;
+		overspill_message(overspill,total_dists);
+	}
+
+	if(output_tree_clustal) verbose = TRUE;     /* Turn on file output */
+
+	standard_tree   = (char **) ckalloc( (nseqs+1) * sizeof (char *) );
+	for(i=0; i<nseqs+1; i++) 
+		standard_tree[i]  = (char *) ckalloc( (nseqs+1) * sizeof(char) );
+	save_tree   = (char **) ckalloc( (nseqs+1) * sizeof (char *) );
+	for(i=0; i<nseqs+1; i++) 
+		save_tree[i]  = (char *) ckalloc( (nseqs+1) * sizeof(char) );
+
+	if(output_tree_clustal || output_tree_phylip || output_tree_nexus) 
+		nj_tree(standard_tree,clustal_phy_tree_file);
+
+	for(i=1; i<nseqs+1; i++) 
+		for(j=1; j<nseqs+1; j++) 
+			save_tree[i][j]  = standard_tree[i][j];
+
+	if(output_tree_phylip) 
+		print_phylip_tree(standard_tree,phylip_phy_tree_file,0);
+
+	for(i=1; i<nseqs+1; i++) 
+		for(j=1; j<nseqs+1; j++) 
+			standard_tree[i][j]  = save_tree[i][j];
+
+	if(output_tree_nexus) 
+		print_nexus_tree(standard_tree,nexus_phy_tree_file,0);
+
+/*
+	print_tree(standard_tree,phy_tree_file);
+*/
+	tree_gaps=ckfree((void *)tree_gaps);
+	boot_positions=ckfree((void *)boot_positions);
+	if (left_branch != NULL) left_branch=ckfree((void *)left_branch);
+	if (right_branch != NULL) right_branch=ckfree((void *)right_branch);
+	if (tkill != NULL) tkill=ckfree((void *)tkill);
+	if (av != NULL) av=ckfree((void *)av);
+	for (i=0;i<nseqs+1;i++)
+		standard_tree[i]=ckfree((void *)standard_tree[i]);
+	standard_tree=ckfree((void *)standard_tree);
+
+	for (i=0;i<nseqs+1;i++)
+		save_tree[i]=ckfree((void *)save_tree[i]);
+	save_tree=ckfree((void *)save_tree);
+
+if(output_tree_clustal) {
+	fclose(clustal_phy_tree_file);	
+	info("Phylogenetic tree file created:   [%s]",clustal_name);
+}
+
+if(output_tree_phylip) {
+	fclose(phylip_phy_tree_file);	
+	info("Phylogenetic tree file created:   [%s]",phylip_name);
+}
+
+if(output_tree_distances) {
+	fclose(distances_phy_tree_file);	
+	info("Distance matrix  file  created:   [%s]",dist_name);
+}
+
+if(output_tree_nexus) {
+	fclose(nexus_phy_tree_file);	
+	info("Nexus tree file  created:   [%s]",nexus_name);
+}
+
+if(output_pim) {
+	fclose(pim_file);
+	info(" perc identity matrix file  created:   [%s]",pim_name);
+}
+
+}
+
+static void overspill_message(sint overspill,sint total_dists)
+{
+	char err_mess[1024]="";
+
+	sprintf(err_mess,"%d of the distances out of a total of %d",
+	(pint)overspill,(pint)total_dists);
+	strcat(err_mess,"\n were out of range for the distance correction.");
+	strcat(err_mess,"\n");
+	strcat(err_mess,"\n SUGGESTIONS: 1) remove the most distant sequences");
+	strcat(err_mess,"\n           or 2) use the PHYLIP package");
+	strcat(err_mess,"\n           or 3) turn off the correction.");
+	strcat(err_mess,"\n Note: Use option 3 with caution! With this degree");
+	strcat(err_mess,"\n of divergence you will have great difficulty");
+	strcat(err_mess,"\n getting robust and reliable trees.");
+	strcat(err_mess,"\n\n");
+	warning(err_mess);
+}
+
+
+
+Boolean transition(sint base1, sint base2) /* TRUE if transition; else FALSE */
+/* 
+
+   assumes that the bases of DNA sequences have been translated as
+   a,A = 0;   c,C = 1;   g,G = 2;   t,T,u,U = 3;  N = 4;  
+   a,A = 0;   c,C = 2;   g,G = 6;   t,T,u,U =17;  
+
+   A <--> G  and  T <--> C  are transitions;  all others are transversions.
+
+*/
+{
+	if( ((base1 == 0) && (base2 == 6)) || ((base1 == 6) && (base2 == 0)) )
+		return TRUE;                                     /* A <--> G */
+	if( ((base1 ==17) && (base2 == 2)) || ((base1 == 2) && (base2 ==17)) )
+		return TRUE;                                     /* T <--> C */
+    return FALSE;
+}
+
+
+void tree_gap_delete(void)   /* flag all positions in alignment that have a gap */
+{			  /* in ANY sequence */
+	sint seqn;
+	sint posn;
+
+	tree_gaps = (char *)ckalloc( (max_aln_length+1) * sizeof (char) );
+        
+	for(posn=1; posn<=seqlen_array[first_seq]; ++posn) {
+		tree_gaps[posn] = 0;
+     	for(seqn=1; seqn<=last_seq-first_seq+1; ++seqn)  {
+			if((seq_array[seqn+first_seq-1][posn] == gap_pos1) ||
+			   (seq_array[seqn+first_seq-1][posn] == gap_pos2)) {
+			   tree_gaps[posn] = 1;
+				break;
+			}
+		}
+	}
+
+}
+
+void distance_matrix_output(FILE *ofile)
+{
+	sint i,j;
+	
+	fprintf(ofile,"%6d",(pint)last_seq-first_seq+1);
+	for(i=1;i<=last_seq-first_seq+1;i++) {
+		fprintf(ofile,"\n%-*s ",max_names,names[i]);
+		for(j=1;j<=last_seq-first_seq+1;j++) {
+			fprintf(ofile,"%6.3f ",tmat[i][j]);
+			if(j % 8 == 0) {
+				if(j!=last_seq-first_seq+1) fprintf(ofile,"\n"); 
+				if(j != last_seq-first_seq+1 ) fprintf(ofile,"          ");
+			}
+		}
+	}
+}
+
+
+
+#ifdef ORIGINAL_NJ_TREE
+void nj_tree(char **tree_description, FILE *tree)
+{
+	register int i;
+	sint l[4],nude,k;
+	sint nc,mini,minj,j,ii,jj;
+	double fnseqs,fnseqs2=0,sumd;
+	double diq,djq,dij,d2r,dr,dio,djo,da;
+	double tmin,total,dmin;
+	double bi,bj,b1,b2,b3,branch[4];
+	sint typei,typej;             /* 0 = node; 1 = OTU */
+	
+	fnseqs = (double)last_seq-first_seq+1;
+
+/*********************** First initialisation ***************************/
+	
+	if(verbose)  {
+		fprintf(tree,"\n\n\t\t\tNeighbor-joining Method\n");
+		fprintf(tree,"\n Saitou, N. and Nei, M. (1987)");
+		fprintf(tree," The Neighbor-joining Method:");
+		fprintf(tree,"\n A New Method for Reconstructing Phylogenetic Trees.");
+		fprintf(tree,"\n Mol. Biol. Evol., 4(4), 406-425\n");
+		fprintf(tree,"\n\n This is an UNROOTED tree\n");
+		fprintf(tree,"\n Numbers in parentheses are branch lengths\n\n");
+	}	
+
+	if (fnseqs == 2) {
+		if (verbose) fprintf(tree,"Cycle   1     =  SEQ:   1 (%9.5f) joins  SEQ:   2 (%9.5f)",tmat[first_seq][first_seq+1],tmat[first_seq][first_seq+1]);
+		return;
+	}
+
+	mini = minj = 0;
+
+	left_branch 	= (double *) ckalloc( (nseqs+2) * sizeof (double)   );
+	right_branch    = (double *) ckalloc( (nseqs+2) * sizeof (double)   );
+	tkill 		= (sint *) ckalloc( (nseqs+1) * sizeof (sint) );
+	av   		= (double *) ckalloc( (nseqs+1) * sizeof (double)   );
+
+	for(i=1;i<=last_seq-first_seq+1;++i) 
+		{
+		tmat[i][i] = av[i] = 0.0;
+		tkill[i] = 0;
+		}
+
+/*********************** Enter The Main Cycle ***************************/
+
+ /*	for(nc=1; nc<=(last_seq-first_seq+1-3); ++nc) {  */            	/**start main cycle**/
+	for(nc=1; nc<=(last_seq-first_seq+1-3); ++nc) {
+		sumd = 0.0;
+		for(j=2; j<=last_seq-first_seq+1; ++j)
+			for(i=1; i<j; ++i) {
+				tmat[j][i] = tmat[i][j];
+				sumd = sumd + tmat[i][j];
+			}
+
+		tmin = 99999.0;
+
+/*.................compute SMATij values and find the smallest one ........*/
+
+		for(jj=2; jj<=last_seq-first_seq+1; ++jj) 
+			if(tkill[jj] != 1) 
+				for(ii=1; ii<jj; ++ii)
+					if(tkill[ii] != 1) {
+						diq = djq = 0.0;
+
+						for(i=1; i<=last_seq-first_seq+1; ++i) {
+							diq = diq + tmat[i][ii];
+							djq = djq + tmat[i][jj];
+						}
+
+						dij = tmat[ii][jj];
+						d2r = diq + djq - (2.0*dij);
+						dr  = sumd - dij -d2r;
+						fnseqs2 = fnseqs - 2.0;
+					        total= d2r+ fnseqs2*dij +dr*2.0;
+						total= total / (2.0*fnseqs2);
+
+						if(total < tmin) {
+							tmin = total;
+							mini = ii;
+							minj = jj;
+						}
+					}
+		
+
+/*.................compute branch lengths and print the results ........*/
+
+
+		dio = djo = 0.0;
+		for(i=1; i<=last_seq-first_seq+1; ++i) {
+			dio = dio + tmat[i][mini];
+			djo = djo + tmat[i][minj];
+		}
+
+		dmin = tmat[mini][minj];
+		dio = (dio - dmin) / fnseqs2;
+		djo = (djo - dmin) / fnseqs2;
+		bi = (dmin + dio - djo) * 0.5;
+		bj = dmin - bi;
+		bi = bi - av[mini];
+		bj = bj - av[minj];
+
+		if( av[mini] > 0.0 )
+			typei = 0;
+		else
+			typei = 1;
+		if( av[minj] > 0.0 )
+			typej = 0;
+		else
+			typej = 1;
+
+		if(verbose) 
+	 	    fprintf(tree,"\n Cycle%4d     = ",(pint)nc);
+
+/* 
+   set negative branch lengths to zero.  Also set any tiny positive
+   branch lengths to zero.
+*/		if( fabs(bi) < 0.0001) bi = 0.0;
+		if( fabs(bj) < 0.0001) bj = 0.0;
+
+	    	if(verbose) {
+		    if(typei == 0) 
+			fprintf(tree,"Node:%4d (%9.5f) joins ",(pint)mini,bi);
+		    else 
+			fprintf(tree," SEQ:%4d (%9.5f) joins ",(pint)mini,bi);
+
+		    if(typej == 0) 
+			fprintf(tree,"Node:%4d (%9.5f)",(pint)minj,bj);
+		    else 
+			fprintf(tree," SEQ:%4d (%9.5f)",(pint)minj,bj);
+
+		    fprintf(tree,"\n");
+	    	}	
+
+
+	    	left_branch[nc] = bi;
+	    	right_branch[nc] = bj;
+
+		for(i=1; i<=last_seq-first_seq+1; i++)
+			tree_description[nc][i] = 0;
+
+	     	if(typei == 0) { 
+			for(i=nc-1; i>=1; i--)
+				if(tree_description[i][mini] == 1) {
+					for(j=1; j<=last_seq-first_seq+1; j++)  
+					     if(tree_description[i][j] == 1)
+						    tree_description[nc][j] = 1;
+					break;
+				}
+		}
+		else
+			tree_description[nc][mini] = 1;
+
+		if(typej == 0) {
+			for(i=nc-1; i>=1; i--) 
+				if(tree_description[i][minj] == 1) {
+					for(j=1; j<=last_seq-first_seq+1; j++)  
+					     if(tree_description[i][j] == 1)
+						    tree_description[nc][j] = 1;
+					break;
+				}
+		}
+		else
+			tree_description[nc][minj] = 1;
+			
+
+/* 
+   Here is where the -0.00005 branch lengths come from for 3 or more
+   identical seqs.
+*/
+/*		if(dmin <= 0.0) dmin = 0.0001; */
+                if(dmin <= 0.0) dmin = 0.000001;
+		av[mini] = dmin * 0.5;
+
+/*........................Re-initialisation................................*/
+
+		fnseqs = fnseqs - 1.0;
+		tkill[minj] = 1;
+
+		for(j=1; j<=last_seq-first_seq+1; ++j) 
+			if( tkill[j] != 1 ) {
+				da = ( tmat[mini][j] + tmat[minj][j] ) * 0.5;
+				if( (mini - j) < 0 ) 
+					tmat[mini][j] = da;
+				if( (mini - j) > 0)
+					tmat[j][mini] = da;
+			}
+
+		for(j=1; j<=last_seq-first_seq+1; ++j)
+			tmat[minj][j] = tmat[j][minj] = 0.0;
+
+
+/****/	}						/**end main cycle**/
+
+/******************************Last Cycle (3 Seqs. left)********************/
+
+	nude = 1;
+
+	for(i=1; i<=last_seq-first_seq+1; ++i)
+		if( tkill[i] != 1 ) {
+			l[nude] = i;
+			nude = nude + 1;
+		}
+
+	b1 = (tmat[l[1]][l[2]] + tmat[l[1]][l[3]] - tmat[l[2]][l[3]]) * 0.5;
+	b2 =  tmat[l[1]][l[2]] - b1;
+	b3 =  tmat[l[1]][l[3]] - b1;
+ 
+	branch[1] = b1 - av[l[1]];
+	branch[2] = b2 - av[l[2]];
+	branch[3] = b3 - av[l[3]];
+
+/* Reset tiny negative and positive branch lengths to zero */
+	if( fabs(branch[1]) < 0.0001) branch[1] = 0.0;
+	if( fabs(branch[2]) < 0.0001) branch[2] = 0.0;
+	if( fabs(branch[3]) < 0.0001) branch[3] = 0.0;
+
+	left_branch[last_seq-first_seq+1-2] = branch[1];
+	left_branch[last_seq-first_seq+1-1] = branch[2];
+	left_branch[last_seq-first_seq+1]   = branch[3];
+
+	for(i=1; i<=last_seq-first_seq+1; i++)
+		tree_description[last_seq-first_seq+1-2][i] = 0;
+
+	if(verbose)
+		fprintf(tree,"\n Cycle%4d (Last cycle, trichotomy):\n",(pint)nc);
+
+	for(i=1; i<=3; ++i) {
+	   if( av[l[i]] > 0.0) {
+	      	if(verbose)
+	      	    fprintf(tree,"\n\t\t Node:%4d (%9.5f) ",(pint)l[i],branch[i]);
+		for(k=last_seq-first_seq+1-3; k>=1; k--)
+			if(tree_description[k][l[i]] == 1) {
+				for(j=1; j<=last_seq-first_seq+1; j++)
+				 	if(tree_description[k][j] == 1)
+					    tree_description[last_seq-first_seq+1-2][j] = i;
+				break;
+			}
+	   }
+	   else  {
+	      	if(verbose)
+	   	    fprintf(tree,"\n\t\t  SEQ:%4d (%9.5f) ",(pint)l[i],branch[i]);
+		tree_description[last_seq-first_seq+1-2][l[i]] = i;
+	   }
+	   if(i < 3) {
+	      	if(verbose)
+	            fprintf(tree,"joins");
+	   }
+	}
+
+	if(verbose)
+		fprintf(tree,"\n");
+
+}
+
+#else /* ORIGINAL_NJ_TREE */
+
+void nj_tree(char **tree_description, FILE *tree) {
+	void fast_nj_tree();
+ 
+	/*fprintf(stderr, "****** call fast_nj_tree() !!!! ******\n");*/
+	fast_nj_tree(tree_description, tree);
+}
+
+
+/****************************************************************************
+ * [ Improvement ideas in fast_nj_tree() ] by DDBJ & FUJITSU Limited.
+ *						written by Tadashi Koike
+ *						(takoike at genes.nig.ac.jp)
+ *******************
+ * <IMPROVEMENT 1> : Store the value of sum of the score to temporary array,
+ *                   and use again and again.
+ *
+ *	In the main cycle, these are calculated again and again :
+ *	    diq = sum of tmat[n][ii]   (n:1 to last_seq-first_seq+1),
+ *	    djq = sum of tmat[n][jj]   (n:1 to last_seq-first_seq+1),
+ *	    dio = sum of tmat[n][mini] (n:1 to last_seq-first_seq+1),
+ *	    djq = sum of tmat[n][minj] (n:1 to last_seq-first_seq+1)
+ *		// 'last_seq' and 'first_seq' are both constant values //
+ *	and the result of above calculations is always same until 
+ *	a best pair of neighbour nodes is joined.
+ *
+ *	So, we change the logic to calculate the sum[i] (=sum of tmat[n][i]
+ *	(n:1 to last_seq-first_seq+1)) and store it to array, before
+ *	beginning to find a best pair of neighbour nodes, and after that 
+ *	we use them again and again.
+ *
+ *	    tmat[i][j]
+ *	              1   2   3   4   5
+ *	            +---+---+---+---+---+
+ *	          1 |   |   |   |   |   |
+ *	            +---+---+---+---+---+
+ *	          2 |   |   |   |   |   |  1) calculate sum of tmat[n][i]
+ *	            +---+---+---+---+---+        (n: 1 to last_seq-first_seq+1)
+ *	          3 |   |   |   |   |   |  2) store that sum value to sum[i]
+ *	            +---+---+---+---+---+
+ *	          4 |   |   |   |   |   |  3) use sum[i] during finding a best
+ *	            +---+---+---+---+---+     pair of neibour nodes.
+ *	          5 |   |   |   |   |   |
+ *	            +---+---+---+---+---+
+ *	              |   |   |   |   |
+ *	              V   V   V   V   V  Calculate sum , and store it to sum[i]
+ *	            +---+---+---+---+---+
+ *	     sum[i] |   |   |   |   |   |
+ *	            +---+---+---+---+---+
+ *
+ *	At this time, we thought that we use upper triangle of the matrix
+ *	because tmat[i][j] is equal to tmat[j][i] and tmat[i][i] is equal 
+ *	to zero. Therefore, we prepared sum_rows[i] and sum_cols[i] instead 
+ *	of sum[i] for storing the sum value.
+ *
+ *	    tmat[i][j]
+ *	              1   2   3   4   5     sum_cols[i]
+ *	            +---+---+---+---+---+     +---+
+ *	          1     | # | # | # | # | --> |   | ... sum of tmat[1][2..5]
+ *	            + - +---+---+---+---+     +---+
+ *	          2         | # | # | # | --> |   | ... sum of tmat[2][3..5]
+ *	            + - + - +---+---+---+     +---+
+ *	          3             | # | # | --> |   | ... sum of tmat[3][4..5]
+ *	            + - + - + - +---+---+     +---+
+ *	          4                 | # | --> |   | ... sum of tmat[4][5]
+ *	            + - + - + - + - +---+     +---+
+ *	          5                     | --> |   | ... zero
+ *	            + - + - + - + - + - +     +---+
+ *	              |   |   |   |   |
+ *	              V   V   V   V   V  Calculate sum , sotre to sum[i]
+ *	            +---+---+---+---+---+
+ *	sum_rows[i] |   |   |   |   |   |
+ *	            +---+---+---+---+---+
+ *	              |   |   |   |   |
+ *	              |   |   |   |   +----- sum of tmat[1..4][5]
+ *	              |   |   |   +--------- sum of tmat[1..3][4]
+ *	              |   |   +------------- sum of tmat[1..2][3]
+ *	              |   +----------------- sum of tmat[1][2]
+ *	              +--------------------- zero
+ *
+ *	And we use (sum_rows[i] + sum_cols[i]) instead of sum[i].
+ *
+ *******************
+ * <IMPROVEMENT 2> : We manage valid nodes with chain list, instead of
+ *                   tkill[i] flag array.
+ *
+ *	In original logic, invalid(killed?) nodes after nodes-joining
+ *	are managed with tkill[i] flag array (set to 1 when killed).
+ *	By this method, it is conspicuous to try next node but skip it
+ *	at the latter of finding a best pair of neighbor nodes.
+ *
+ *	So, we thought that we managed valid nodes by using a chain list 
+ *	as below:
+ *
+ *	1) declare the list structure.
+ *		struct {
+ *		    sint n;		// entry number of node.
+ *		    void *prev;		// pointer to previous entry.
+ *		    void *next;		// pointer to next entry.
+ *		}
+ *	2) construct a valid node list.
+ *
+ *       +-----+    +-----+    +-----+    +-----+        +-----+
+ * NULL<-|prev |<---|prev |<---|prev |<---|prev |<- - - -|prev |
+ *       |  0  |    |  1  |    |  2  |    |  3  |        |  n  |
+ *       | next|--->| next|--->| next|--->| next|- - - ->| next|->NULL
+ *       +-----+    +-----+    +-----+    +-----+        +-----+
+ *
+ *	3) when finding a best pair of neighbor nodes, we use
+ *	   this chain list as loop counter.
+ *
+ *	4) If an entry was killed by node-joining, this chain list is
+ *	   modified to remove that entry.
+ *
+ *	   EX) remove the entry No 2.
+ *       +-----+    +-----+               +-----+        +-----+
+ * NULL<-|prev |<---|prev |<--------------|prev |<- - - -|prev |
+ *       |  0  |    |  1  |               |  3  |        |  n  |
+ *       | next|--->| next|-------------->| next|- - - ->| next|->NULL
+ *       +-----+    +-----+               +-----+        +-----+
+ *                             +-----+
+ *                       NULL<-|prev |
+ *                             |  2  |
+ *                             | next|->NULL
+ *                             +-----+
+ *
+ *	By this method, speed is up at the latter of finding a best pair of
+ *	neighbor nodes.
+ *
+ *******************
+ * <IMPROVEMENT 3> : Cut the frequency of division.
+ *
+ * At comparison between 'total' and 'tmin' in the main cycle, total is
+ * divided by (2.0*fnseqs2) before comparison.  If N nodes are available, 
+ * that division happen (N*(N-1))/2 order.
+ *
+ * We thought that the comparison relation between tmin and total/(2.0*fnseqs2)
+ * is equal to the comparison relation between (tmin*2.0*fnseqs2) and total.
+ * Calculation of (tmin*2.0*fnseqs2) is only one time. so we stop dividing
+ * a total value and multiply tmin and (tmin*2.0*fnseqs2) instead.
+ *
+ *******************
+ * <IMPROVEMENT 4> : some transformation of the equation (to cut operations).
+ *
+ * We transform an equation of calculating 'total' in the main cycle.
+ *
+ */
+
+
+void fast_nj_tree(char **tree_description, FILE *tree)
+{
+	register int i;
+	sint l[4],nude,k;
+	sint nc,mini,minj,j,ii,jj;
+	double fnseqs,fnseqs2=0,sumd;
+	double diq,djq,dij,d2r,dr,dio,djo,da;
+	double tmin,total,dmin;
+	double bi,bj,b1,b2,b3,branch[4];
+	sint typei,typej;             /* 0 = node; 1 = OTU */
+
+	/* IMPROVEMENT 1, STEP 0 : declare  variables */
+	double *sum_cols, *sum_rows, *join;
+
+	/* IMPROVEMENT 2, STEP 0 : declare  variables */
+	sint loop_limit;
+	typedef struct _ValidNodeID {
+	    sint n;
+	    struct _ValidNodeID *prev;
+	    struct _ValidNodeID *next;
+	} ValidNodeID;
+	ValidNodeID *tvalid, *lpi, *lpj, *lpii, *lpjj, *lp_prev, *lp_next;
+
+	/*
+	 * correspondence of the loop counter variables.
+	 *   i .. lpi->n,	ii .. lpii->n
+	 *   j .. lpj->n,	jj .. lpjj->n
+	 */
+
+	fnseqs = (double)last_seq-first_seq+1;
+
+/*********************** First initialisation ***************************/
+	
+	if(verbose)  {
+		fprintf(tree,"\n\n\t\t\tNeighbor-joining Method\n");
+		fprintf(tree,"\n Saitou, N. and Nei, M. (1987)");
+		fprintf(tree," The Neighbor-joining Method:");
+		fprintf(tree,"\n A New Method for Reconstructing Phylogenetic Trees.");
+		fprintf(tree,"\n Mol. Biol. Evol., 4(4), 406-425\n");
+		fprintf(tree,"\n\n This is an UNROOTED tree\n");
+		fprintf(tree,"\n Numbers in parentheses are branch lengths\n\n");
+	}	
+
+	if (fnseqs == 2) {
+		if (verbose) fprintf(tree,"Cycle   1     =  SEQ:   1 (%9.5f) joins  SEQ:   2 (%9.5f)",tmat[first_seq][first_seq+1],tmat[first_seq][first_seq+1]);
+		return;
+	}
+
+	mini = minj = 0;
+
+	left_branch 	= (double *) ckalloc( (nseqs+2) * sizeof (double)   );
+	right_branch    = (double *) ckalloc( (nseqs+2) * sizeof (double)   );
+	tkill 		= (sint *) ckalloc( (nseqs+1) * sizeof (sint) );
+	av   		= (double *) ckalloc( (nseqs+1) * sizeof (double)   );
+
+	/* IMPROVEMENT 1, STEP 1 : Allocate memory */
+	sum_cols	= (double *) ckalloc( (nseqs+1) * sizeof (double)   );
+	sum_rows	= (double *) ckalloc( (nseqs+1) * sizeof (double)   );
+	join		= (double *) ckalloc( (nseqs+1) * sizeof (double)   );
+
+	/* IMPROVEMENT 2, STEP 1 : Allocate memory */
+	tvalid	= (ValidNodeID *) ckalloc( (nseqs+1) * sizeof (ValidNodeID) );
+	/* tvalid[0] is special entry in array. it points a header of valid entry list */
+	tvalid[0].n = 0;
+	tvalid[0].prev = NULL;
+	tvalid[0].next = &tvalid[1];
+
+	/* IMPROVEMENT 2, STEP 2 : Construct and initialize the entry chain list */
+	for(i=1, loop_limit = last_seq-first_seq+1,
+		lpi=&tvalid[1], lp_prev=&tvalid[0], lp_next=&tvalid[2] ;
+		i<=loop_limit ;
+		++i, ++lpi, ++lp_prev, ++lp_next)
+		{
+		tmat[i][i] = av[i] = 0.0;
+		tkill[i] = 0;
+		lpi->n = i;
+		lpi->prev = lp_prev;
+		lpi->next = lp_next;
+
+		/* IMPROVEMENT 1, STEP 2 : Initialize arrays */
+		sum_cols[i] = sum_rows[i] = join[i] = 0.0;
+		}
+	tvalid[loop_limit].next = NULL;
+
+	/*
+	 * IMPROVEMENT 1, STEP 3 : Calculate the sum of score value that 
+	 * is sequence[i] to others.
+	 */
+	sumd = 0.0;
+	for (lpj=tvalid[0].next ; lpj!=NULL ; lpj = lpj->next) {
+		double tmp_sum = 0.0;
+		j = lpj->n;
+		/* calculate sum_rows[j] */
+		for (lpi=tvalid[0].next ; lpi->n < j ; lpi = lpi->next) {
+			i = lpi->n;
+			tmp_sum += tmat[i][j];
+			/* tmat[j][i] = tmat[i][j]; */
+		}
+		sum_rows[j] = tmp_sum;
+
+		tmp_sum = 0.0;
+		/* Set lpi to that lpi->n is greater than j */
+		if ((lpi != NULL) && (lpi->n == j)) {
+			lpi = lpi->next;
+		}
+		/* calculate sum_cols[j] */
+		for( ; lpi!=NULL ; lpi = lpi->next) {
+			i = lpi->n;
+			tmp_sum += tmat[j][i];
+			/* tmat[i][j] = tmat[j][i]; */
+		}
+		sum_cols[j] = tmp_sum;
+	}
+
+/*********************** Enter The Main Cycle ***************************/
+
+	for(nc=1, loop_limit = (last_seq-first_seq+1-3); nc<=loop_limit; ++nc) {
+
+		sumd = 0.0;
+		/* IMPROVEMENT 1, STEP 4 : use sum value */
+		for(lpj=tvalid[0].next ; lpj!=NULL ; lpj = lpj->next) {
+			sumd += sum_cols[lpj->n];
+		}
+
+		/* IMPROVEMENT 3, STEP 0 : multiply tmin and 2*fnseqs2 */
+		fnseqs2 = fnseqs - 2.0;		/* Set fnseqs2 at this point. */
+		tmin = 99999.0 * 2.0 * fnseqs2;
+
+
+/*.................compute SMATij values and find the smallest one ........*/
+
+		mini = minj = 0;
+
+		/* jj must starts at least 2 */
+		if ((tvalid[0].next != NULL) && (tvalid[0].next->n == 1)) {
+			lpjj = tvalid[0].next->next;
+		} else {
+			lpjj = tvalid[0].next;
+		}
+
+		for( ; lpjj != NULL; lpjj = lpjj->next) {
+			jj = lpjj->n;
+			for(lpii=tvalid[0].next ; lpii->n < jj ; lpii = lpii->next) {
+				ii = lpii->n;
+				diq = djq = 0.0;
+
+				/* IMPROVEMENT 1, STEP 4 : use sum value */
+				diq = sum_cols[ii] + sum_rows[ii];
+				djq = sum_cols[jj] + sum_rows[jj];
+				/*
+				 * always ii < jj in this point. Use upper
+				 * triangle of score matrix.
+				 */
+				dij = tmat[ii][jj];
+
+				/*
+				 * IMPROVEMENT 3, STEP 1 : fnseqs2 is
+				 * already calculated.
+				 */
+				/* fnseqs2 = fnseqs - 2.0 */
+
+				/* IMPROVEMENT 4 : transform the equation */
+  /*-------------------------------------------------------------------*
+   * OPTIMIZE of expression 'total = d2r + fnseqs2*dij + dr*2.0'       *
+   * total = d2r + fnseq2*dij + 2.0*dr                                 *
+   *       = d2r + fnseq2*dij + 2(sumd - dij - d2r)                    *
+   *       = d2r + fnseq2*dij + 2*sumd - 2*dij - 2*d2r                 *
+   *       =       fnseq2*dij + 2*sumd - 2*dij - 2*d2r + d2r           *
+   *       = fnseq2*dij + 2*sumd - 2*dij - d2r                         *
+   *       = fnseq2*dij + 2*sumd - 2*dij - (diq + djq - 2*dij)         *
+   *       = fnseq2*dij + 2*sumd - 2*dij - diq - djq + 2*dij           *
+   *       = fnseq2*dij + 2*sumd - 2*dij + 2*dij - diq - djq           *
+   *       = fnseq2*dij + 2*sumd  - diq - djq                          *
+   *-------------------------------------------------------------------*/
+				total = fnseqs2*dij + 2.0*sumd  - diq - djq;
+
+				/* 
+				 * IMPROVEMENT 3, STEP 2 : abbrevlate
+				 * the division on comparison between 
+				 * total and tmin.
+				 */
+				/* total = total / (2.0*fnseqs2); */
+
+				if(total < tmin) {
+					tmin = total;
+					mini = ii;
+					minj = jj;
+				}
+			}
+		}
+
+		/* MEMO: always ii < jj in avobe loop, so mini < minj */
+
+/*.................compute branch lengths and print the results ........*/
+
+
+		dio = djo = 0.0;
+
+		/* IMPROVEMENT 1, STEP 4 : use sum value */
+		dio = sum_cols[mini] + sum_rows[mini];
+		djo = sum_cols[minj] + sum_rows[minj];
+
+		dmin = tmat[mini][minj];
+		dio = (dio - dmin) / fnseqs2;
+		djo = (djo - dmin) / fnseqs2;
+		bi = (dmin + dio - djo) * 0.5;
+		bj = dmin - bi;
+		bi = bi - av[mini];
+		bj = bj - av[minj];
+
+		if( av[mini] > 0.0 )
+			typei = 0;
+		else
+			typei = 1;
+		if( av[minj] > 0.0 )
+			typej = 0;
+		else
+			typej = 1;
+
+		if(verbose) 
+	 	    fprintf(tree,"\n Cycle%4d     = ",(pint)nc);
+
+/* 
+   set negative branch lengths to zero.  Also set any tiny positive
+   branch lengths to zero.
+*/		if( fabs(bi) < 0.0001) bi = 0.0;
+		if( fabs(bj) < 0.0001) bj = 0.0;
+
+	    	if(verbose) {
+		    if(typei == 0) 
+			fprintf(tree,"Node:%4d (%9.5f) joins ",(pint)mini,bi);
+		    else 
+			fprintf(tree," SEQ:%4d (%9.5f) joins ",(pint)mini,bi);
+
+		    if(typej == 0) 
+			fprintf(tree,"Node:%4d (%9.5f)",(pint)minj,bj);
+		    else 
+			fprintf(tree," SEQ:%4d (%9.5f)",(pint)minj,bj);
+
+		    fprintf(tree,"\n");
+	    	}	
+
+
+	    	left_branch[nc] = bi;
+	    	right_branch[nc] = bj;
+
+		for(i=1; i<=last_seq-first_seq+1; i++)
+			tree_description[nc][i] = 0;
+
+	     	if(typei == 0) { 
+			for(i=nc-1; i>=1; i--)
+				if(tree_description[i][mini] == 1) {
+					for(j=1; j<=last_seq-first_seq+1; j++)  
+					     if(tree_description[i][j] == 1)
+						    tree_description[nc][j] = 1;
+					break;
+				}
+		}
+		else
+			tree_description[nc][mini] = 1;
+
+		if(typej == 0) {
+			for(i=nc-1; i>=1; i--) 
+				if(tree_description[i][minj] == 1) {
+					for(j=1; j<=last_seq-first_seq+1; j++)  
+					     if(tree_description[i][j] == 1)
+						    tree_description[nc][j] = 1;
+					break;
+				}
+		}
+		else
+			tree_description[nc][minj] = 1;
+			
+
+/* 
+   Here is where the -0.00005 branch lengths come from for 3 or more
+   identical seqs.
+*/
+/*		if(dmin <= 0.0) dmin = 0.0001; */
+                if(dmin <= 0.0) dmin = 0.000001;
+		av[mini] = dmin * 0.5;
+
+/*........................Re-initialisation................................*/
+
+		fnseqs = fnseqs - 1.0;
+		tkill[minj] = 1;
+
+		/* IMPROVEMENT 2, STEP 3 : Remove tvalid[minj] from chain list. */
+		/* [ Before ]
+		 *  +---------+        +---------+        +---------+       
+		 *  |prev     |<-------|prev     |<-------|prev     |<---
+		 *  |    n    |        | n(=minj)|        |    n    |
+		 *  |     next|------->|     next|------->|     next|----
+		 *  +---------+        +---------+        +---------+ 
+		 *
+		 * [ After ]
+		 *  +---------+                           +---------+       
+		 *  |prev     |<--------------------------|prev     |<---
+		 *  |    n    |                           |    n    |
+		 *  |     next|-------------------------->|     next|----
+		 *  +---------+                           +---------+ 
+		 *                     +---------+
+		 *              NULL---|prev     |
+		 *                     | n(=minj)|
+		 *                     |     next|---NULL
+		 *                     +---------+ 
+		 */
+		(tvalid[minj].prev)->next = tvalid[minj].next;
+		if (tvalid[minj].next != NULL) {
+			(tvalid[minj].next)->prev = tvalid[minj].prev;
+		}
+		tvalid[minj].prev = tvalid[minj].next = NULL;
+
+		/* IMPROVEMENT 1, STEP 5 : re-calculate sum values. */
+		for(lpj=tvalid[0].next ; lpj != NULL ; lpj = lpj->next) {
+			double tmp_di = 0.0;
+			double tmp_dj = 0.0;
+			j = lpj->n;
+
+			/* 
+			 * subtrace a score value related with 'minj' from
+			 * sum arrays .
+			 */
+			if (j < minj) {
+				tmp_dj = tmat[j][minj];
+				sum_cols[j] -= tmp_dj;
+			} else if (j > minj) {
+				tmp_dj = tmat[minj][j];
+				sum_rows[j] -= tmp_dj;
+			} /* nothing to do when j is equal to minj. */
+			
+
+			/* 
+			 * subtrace a score value related with 'mini' from
+			 * sum arrays .
+			 */
+			if (j < mini) {
+				tmp_di = tmat[j][mini];
+				sum_cols[j] -= tmp_di;
+			} else if (j > mini) {
+				tmp_di = tmat[mini][j];
+				sum_rows[j] -= tmp_di;
+			} /* nothing to do when j is equal to mini. */
+
+			/* 
+			 * calculate a score value of the new inner node.
+			 * then, store it temporary to join[] array.
+			 */
+			join[j] = (tmp_dj + tmp_di) * 0.5;
+		}
+
+		/* 
+		 * 1)
+		 * Set the score values (stored in join[]) into the matrix,
+		 * row/column position is 'mini'.
+		 * 2)
+		 * Add a score value of the new inner node to sum arrays.
+		 */
+		for(lpj=tvalid[0].next ; lpj != NULL; lpj = lpj->next) {
+			j = lpj->n;
+			if (j < mini) {
+				tmat[j][mini] = join[j];
+				sum_cols[j] += join[j];
+			} else if (j > mini) {
+				tmat[mini][j] = join[j];
+				sum_rows[j] += join[j];
+			} /* nothing to do when j is equal to mini. */
+		}
+
+		/* Re-calculate sum_rows[mini],sum_cols[mini]. */
+		sum_cols[mini] = sum_rows[mini] = 0.0;
+
+		/* calculate sum_rows[mini] */
+		da = 0.0;
+		for(lpj=tvalid[0].next ; lpj->n < mini ; lpj = lpj->next) {
+                      da += join[lpj->n];
+		}
+		sum_rows[mini] = da;
+
+		/* skip if 'lpj->n' is equal to 'mini' */
+		if ((lpj != NULL) && (lpj->n == mini)) {
+			lpj = lpj->next;
+		}
+
+		/* calculate sum_cols[mini] */
+		da = 0.0;
+		for( ; lpj != NULL; lpj = lpj->next) {
+                      da += join[lpj->n];
+		}
+		sum_cols[mini] = da;
+
+		/*
+		 * Clean up sum_rows[minj], sum_cols[minj] and score matrix
+		 * related with 'minj'.
+		 */
+		sum_cols[minj] = sum_rows[minj] = 0.0;
+		for(j=1; j<=last_seq-first_seq+1; ++j)
+			tmat[minj][j] = tmat[j][minj] = join[j] = 0.0;
+
+
+/****/	}						/**end main cycle**/
+
+/******************************Last Cycle (3 Seqs. left)********************/
+
+	nude = 1;
+
+	for(lpi=tvalid[0].next; lpi != NULL; lpi = lpi->next) {
+		l[nude] = lpi->n;
+		++nude;
+	}
+
+	b1 = (tmat[l[1]][l[2]] + tmat[l[1]][l[3]] - tmat[l[2]][l[3]]) * 0.5;
+	b2 =  tmat[l[1]][l[2]] - b1;
+	b3 =  tmat[l[1]][l[3]] - b1;
+ 
+	branch[1] = b1 - av[l[1]];
+	branch[2] = b2 - av[l[2]];
+	branch[3] = b3 - av[l[3]];
+
+/* Reset tiny negative and positive branch lengths to zero */
+	if( fabs(branch[1]) < 0.0001) branch[1] = 0.0;
+	if( fabs(branch[2]) < 0.0001) branch[2] = 0.0;
+	if( fabs(branch[3]) < 0.0001) branch[3] = 0.0;
+
+	left_branch[last_seq-first_seq+1-2] = branch[1];
+	left_branch[last_seq-first_seq+1-1] = branch[2];
+	left_branch[last_seq-first_seq+1]   = branch[3];
+
+	for(i=1; i<=last_seq-first_seq+1; i++)
+		tree_description[last_seq-first_seq+1-2][i] = 0;
+
+	if(verbose)
+		fprintf(tree,"\n Cycle%4d (Last cycle, trichotomy):\n",(pint)nc);
+
+	for(i=1; i<=3; ++i) {
+	   if( av[l[i]] > 0.0) {
+	      	if(verbose)
+	      	    fprintf(tree,"\n\t\t Node:%4d (%9.5f) ",(pint)l[i],branch[i]);
+		for(k=last_seq-first_seq+1-3; k>=1; k--)
+			if(tree_description[k][l[i]] == 1) {
+				for(j=1; j<=last_seq-first_seq+1; j++)
+				 	if(tree_description[k][j] == 1)
+					    tree_description[last_seq-first_seq+1-2][j] = i;
+				break;
+			}
+	   }
+	   else  {
+	      	if(verbose)
+	   	    fprintf(tree,"\n\t\t  SEQ:%4d (%9.5f) ",(pint)l[i],branch[i]);
+		tree_description[last_seq-first_seq+1-2][l[i]] = i;
+	   }
+	   if(i < 3) {
+	      	if(verbose)
+	            fprintf(tree,"joins");
+	   }
+	}
+
+	if(verbose)
+		fprintf(tree,"\n");
+
+	
+	/* IMPROVEMENT 1, STEP 6 : release memory area */
+	ckfree(sum_cols);
+	ckfree(sum_rows);
+	ckfree(join);
+
+	/* IMPROVEMENT 2, STEP 4 : release memory area */
+	ckfree(tvalid);
+
+}
+#endif /* ORIGINAL_NJ_TREE */
+
+
+
+void bootstrap_tree(char *phylip_name,char *clustal_name, char *nexus_name)
+{
+	sint i,j;
+	int ranno;
+	char path[MAXLINE+1];
+    char dummy[10];
+	char err_mess[1024];
+	static char **sample_tree;
+	static char **standard_tree;
+	static char **save_tree;
+	sint total_dists, overspill = 0, total_overspill = 0;
+	sint nfails = 0;
+
+	if(empty) {
+		error("You must load an alignment first");
+		return;
+	}
+
+        if(nseqs<4) {
+                error("Alignment has only %d sequences",nseqs);
+                return;
+        }
+
+	if(!output_tree_clustal && !output_tree_phylip && !output_tree_nexus) {
+		error("You must select either clustal or phylip or nexus tree output format");
+		return;
+	}
+	get_path(seqname, path);
+	
+	if (output_tree_clustal) {
+        if (clustal_name[0]!=EOS) {
+                if((clustal_phy_tree_file = open_explicit_file(
+                clustal_name))==NULL) return;
+        }
+        else {
+		if((clustal_phy_tree_file = open_output_file(
+		"\nEnter name for bootstrap output file  ",path,
+		clustal_name,"njb")) == NULL) return;
+        }
+	}
+
+	first_seq=1;
+	last_seq=nseqs;
+
+	if (output_tree_phylip) {
+        if (phylip_name[0]!=EOS) {
+                if((phylip_phy_tree_file = open_explicit_file(
+                phylip_name))==NULL) return;
+        }
+	else {
+		if((phylip_phy_tree_file = open_output_file(
+		"\nEnter name for bootstrap output file  ",path,
+		phylip_name,"phb")) == NULL) return;
+	}
+	}
+
+	if (output_tree_nexus) {
+        if (nexus_name[0]!=EOS) {
+                if((nexus_phy_tree_file = open_explicit_file(
+                nexus_name))==NULL) return;
+        }
+	else {
+		if((nexus_phy_tree_file = open_output_file(
+		"\nEnter name for bootstrap output file  ",path,
+		nexus_name,"treb")) == NULL) return;
+	}
+	}
+
+	boot_totals    = (sint *)ckalloc( (nseqs+1) * sizeof (sint) );
+	for(i=0;i<nseqs+1;i++)
+		boot_totals[i]=0;
+		
+	boot_positions = (sint *)ckalloc( (seqlen_array[first_seq]+2) * sizeof (sint) );
+
+	for(j=1; j<=seqlen_array[first_seq]; ++j)  /* First select all positions for */
+		boot_positions[j] = j;	   /* the "standard" tree */
+
+	if(output_tree_clustal) {
+		verbose = TRUE;     /* Turn on file output */
+		if(dnaflag)
+			overspill = dna_distance_matrix(clustal_phy_tree_file);
+		else 
+			overspill = prot_distance_matrix(clustal_phy_tree_file);
+	}
+
+	if(output_tree_phylip) {
+		verbose = FALSE;     /* Turn off file output */
+		if(dnaflag)
+			overspill = dna_distance_matrix(phylip_phy_tree_file);
+		else 
+			overspill = prot_distance_matrix(phylip_phy_tree_file);
+	}
+
+	if(output_tree_nexus) {
+		verbose = FALSE;     /* Turn off file output */
+		if(dnaflag)
+			overspill = dna_distance_matrix(nexus_phy_tree_file);
+		else 
+			overspill = prot_distance_matrix(nexus_phy_tree_file);
+	}
+
+/* check if any distances overflowed the distance corrections */
+	if ( overspill > 0 ) {
+		total_dists = (nseqs*(nseqs-1))/2;
+		overspill_message(overspill,total_dists);
+	}
+
+	tree_gaps=ckfree((void *)tree_gaps);
+
+	if (output_tree_clustal) verbose = TRUE;   /* Turn on screen output */
+
+	standard_tree   = (char **) ckalloc( (nseqs+1) * sizeof (char *) );
+	for(i=0; i<nseqs+1; i++) 
+		standard_tree[i]   = (char *) ckalloc( (nseqs+1) * sizeof(char) );
+
+/* compute the standard tree */
+
+	if(output_tree_clustal || output_tree_phylip || output_tree_nexus)
+		nj_tree(standard_tree,clustal_phy_tree_file);
+
+	if (output_tree_clustal)
+		fprintf(clustal_phy_tree_file,"\n\n\t\t\tBootstrap Confidence Limits\n\n");
+
+/* save the left_branch and right_branch for phylip output */
+	save_left_branch = (double *) ckalloc( (nseqs+2) * sizeof (double)   );
+	save_right_branch = (double *) ckalloc( (nseqs+2) * sizeof (double)   );
+	for (i=1;i<=nseqs;i++) {
+		save_left_branch[i] = left_branch[i];
+		save_right_branch[i] = right_branch[i];
+	}
+/*  
+  The next line is a fossil from the days of using the cc ran()
+	ran_factor = RAND_MAX / seqlen_array[first_seq]; 
+*/
+
+	if(usemenu) 
+   		boot_ran_seed = 
+getint("\n\nEnter seed no. for random number generator ",1,1000,boot_ran_seed);
+
+/* do not use the native cc ran()
+	srand(boot_ran_seed);
+*/
+       	addrandinit((unsigned long) boot_ran_seed);
+
+	if (output_tree_clustal)
+		fprintf(clustal_phy_tree_file,"\n Random number generator seed = %7u\n",
+		boot_ran_seed);
+
+	if(usemenu) 
+  		boot_ntrials = 
+getint("\n\nEnter number of bootstrap trials ",1,10000,boot_ntrials);
+
+	if (output_tree_clustal) {
+  		fprintf(clustal_phy_tree_file,"\n Number of bootstrap trials   = %7d\n",
+	(pint)boot_ntrials);
+
+		fprintf(clustal_phy_tree_file,
+		"\n\n Diagrammatic representation of the above tree: \n");
+		fprintf(clustal_phy_tree_file,"\n Each row represents 1 tree cycle;");
+		fprintf(clustal_phy_tree_file," defining 2 groups.\n");
+		fprintf(clustal_phy_tree_file,"\n Each column is 1 sequence; ");
+		fprintf(clustal_phy_tree_file,"the stars in each line show 1 group; ");
+		fprintf(clustal_phy_tree_file,"\n the dots show the other\n");
+		fprintf(clustal_phy_tree_file,"\n Numbers show occurences in bootstrap samples.");
+	}
+/*
+	print_tree(standard_tree, clustal_phy_tree_file, boot_totals);
+*/
+	verbose = FALSE;                   /* Turn OFF screen output */
+
+	left_branch=ckfree((void *)left_branch);
+	right_branch=ckfree((void *)right_branch);
+	tkill=ckfree((void *)tkill);
+	av=ckfree((void *)av);
+
+	sample_tree   = (char **) ckalloc( (nseqs+1) * sizeof (char *) );
+	for(i=0; i<nseqs+1; i++) 
+		sample_tree[i]   = (char *) ckalloc( (nseqs+1) * sizeof(char) );
+
+	if (usemenu)
+	fprintf(stdout,"\n\nEach dot represents 10 trials\n\n");
+        total_overspill = 0;
+	nfails = 0;
+	for(i=1; i<=boot_ntrials; ++i) {
+		for(j=1; j<=seqlen_array[first_seq]; ++j) { /* select alignment */
+							    /* positions for */
+			ranno = addrand( (unsigned long) seqlen_array[1]) + 1;
+			boot_positions[j] = ranno; 	    /* bootstrap sample */
+		}
+		if(output_tree_clustal) {
+			if(dnaflag)
+				overspill = dna_distance_matrix(clustal_phy_tree_file);
+			else 
+				overspill = prot_distance_matrix(clustal_phy_tree_file);
+		}
+	
+		if(output_tree_phylip) {
+			if(dnaflag)
+				overspill = dna_distance_matrix(phylip_phy_tree_file);
+			else 
+				overspill = prot_distance_matrix(phylip_phy_tree_file);
+		}
+
+		if(output_tree_nexus) {
+			if(dnaflag)
+				overspill = dna_distance_matrix(nexus_phy_tree_file);
+			else 
+				overspill = prot_distance_matrix(nexus_phy_tree_file);
+		}
+
+		if( overspill > 0) {
+			total_overspill = total_overspill + overspill;
+			nfails++;
+		}			
+
+		tree_gaps=ckfree((void *)tree_gaps);
+
+		if(output_tree_clustal || output_tree_phylip || output_tree_nexus) 
+			nj_tree(sample_tree,clustal_phy_tree_file);
+
+	 	left_branch=ckfree((void *)left_branch);
+		right_branch=ckfree((void *)right_branch);
+		tkill=ckfree((void *)tkill);
+		av=ckfree((void *)av);
+
+		compare_tree(standard_tree, sample_tree, boot_totals, last_seq-first_seq+1);
+		if (usemenu) {
+			if(i % 10  == 0) fprintf(stdout,".");
+			if(i % 100 == 0) fprintf(stdout,"\n");
+		}
+	}
+
+/* check if any distances overflowed the distance corrections */
+	if ( nfails > 0 ) {
+		total_dists = (nseqs*(nseqs-1))/2;
+		fprintf(stdout,"\n");
+		fprintf(stdout,"\n WARNING: %ld of the distances out of a total of %ld times %ld",
+		(long)total_overspill,(long)total_dists,(long)boot_ntrials);
+		fprintf(stdout,"\n were out of range for the distance correction.");
+		fprintf(stdout,"\n This affected %d out of %d bootstrap trials.",
+		(pint)nfails,(pint)boot_ntrials);
+		fprintf(stdout,"\n This may not be fatal but you have been warned!");
+		fprintf(stdout,"\n");
+		fprintf(stdout,"\n SUGGESTIONS: 1) turn off the correction");
+		fprintf(stdout,"\n           or 2) remove the most distant sequences");
+		fprintf(stdout,"\n           or 3) use the PHYLIP package.");
+		fprintf(stdout,"\n\n");
+		if (usemenu) 
+			getstr("Press [RETURN] to continue",dummy);
+	}
+
+
+	boot_positions=ckfree((void *)boot_positions);
+
+	for (i=1;i<nseqs+1;i++)
+		sample_tree[i]=ckfree((void *)sample_tree[i]);
+	sample_tree=ckfree((void *)sample_tree);
+/*
+	fprintf(clustal_phy_tree_file,"\n\n Bootstrap totals for each group\n");
+*/
+	if (output_tree_clustal)
+		print_tree(standard_tree, clustal_phy_tree_file, boot_totals);
+
+	save_tree   = (char **) ckalloc( (nseqs+1) * sizeof (char *) );
+	for(i=0; i<nseqs+1; i++) 
+		save_tree[i]   = (char *) ckalloc( (nseqs+1) * sizeof(char) );
+
+	for(i=1; i<nseqs+1; i++) 
+		for(j=1; j<nseqs+1; j++) 
+			save_tree[i][j]  = standard_tree[i][j];
+
+	if(output_tree_phylip) {
+		left_branch 	= (double *) ckalloc( (nseqs+2) * sizeof (double)   );
+		right_branch    = (double *) ckalloc( (nseqs+2) * sizeof (double)   );
+		for (i=1;i<=nseqs;i++) {
+			left_branch[i] = save_left_branch[i];
+			right_branch[i] = save_right_branch[i];
+		}
+		print_phylip_tree(standard_tree,phylip_phy_tree_file,
+						 bootstrap_format);
+		left_branch=ckfree((void *)left_branch);
+		right_branch=ckfree((void *)right_branch);
+	}
+
+	for(i=1; i<nseqs+1; i++) 
+		for(j=1; j<nseqs+1; j++) 
+			standard_tree[i][j]  = save_tree[i][j];
+
+	if(output_tree_nexus) {
+		left_branch 	= (double *) ckalloc( (nseqs+2) * sizeof (double)   );
+		right_branch    = (double *) ckalloc( (nseqs+2) * sizeof (double)   );
+		for (i=1;i<=nseqs;i++) {
+			left_branch[i] = save_left_branch[i];
+			right_branch[i] = save_right_branch[i];
+		}
+		print_nexus_tree(standard_tree,nexus_phy_tree_file,
+						 bootstrap_format);
+		left_branch=ckfree((void *)left_branch);
+		right_branch=ckfree((void *)right_branch);
+	}
+
+	boot_totals=ckfree((void *)boot_totals);
+	save_left_branch=ckfree((void *)save_left_branch);
+	save_right_branch=ckfree((void *)save_right_branch);
+
+	for (i=1;i<nseqs+1;i++)
+		standard_tree[i]=ckfree((void *)standard_tree[i]);
+	standard_tree=ckfree((void *)standard_tree);
+
+	for (i=0;i<nseqs+1;i++)
+		save_tree[i]=ckfree((void *)save_tree[i]);
+	save_tree=ckfree((void *)save_tree);
+
+	if (output_tree_clustal)
+		fclose(clustal_phy_tree_file);
+
+	if (output_tree_phylip)
+		fclose(phylip_phy_tree_file);
+
+	if (output_tree_nexus)
+		fclose(nexus_phy_tree_file);
+
+	if (output_tree_clustal)
+		info("Bootstrap output file completed       [%s]"
+		,clustal_name);
+	if (output_tree_phylip)
+		info("Bootstrap output file completed       [%s]"
+		,phylip_name);
+	if (output_tree_nexus)
+		info("Bootstrap output file completed       [%s]"
+		,nexus_name);
+}
+
+
+void compare_tree(char **tree1, char **tree2, sint *hits, sint n)
+{	
+	sint i,j,k;
+	sint nhits1, nhits2;
+
+	for(i=1; i<=n-3; i++)  {
+		for(j=1; j<=n-3; j++)  {
+			nhits1 = 0;
+			nhits2 = 0;
+			for(k=1; k<=n; k++) {
+				if(tree1[i][k] == tree2[j][k]) nhits1++;
+				if(tree1[i][k] != tree2[j][k]) nhits2++;
+			}
+			if((nhits1 == last_seq-first_seq+1) || (nhits2 == last_seq-first_seq+1)) hits[i]++;
+		}
+	}
+}
+
+
+void print_nexus_tree(char **tree_description, FILE *tree, sint bootstrap)
+{
+	sint i;
+	sint old_row;
+	
+	fprintf(tree,"#NEXUS\n\n");
+
+	fprintf(tree,"BEGIN TREES;\n\n");
+	fprintf(tree,"\tTRANSLATE\n");
+	for(i=1;i<nseqs;i++) {
+		fprintf(tree,"\t\t%d	%s,\n",(pint)i,names[i]);
+	}
+	fprintf(tree,"\t\t%d	%s\n",(pint)nseqs,names[nseqs]);
+	fprintf(tree,"\t\t;\n");
+
+	fprintf(tree,"\tUTREE PAUP_1= ");
+
+	if(last_seq-first_seq+1==2) {
+		fprintf(tree,"(%d:%7.5f,%d:%7.5f);",first_seq,tmat[first_seq][first_seq+1],first_seq+1,tmat[first_seq][first_seq+1]);
+	}
+	else {
+
+	fprintf(tree,"(");
+ 
+	old_row=two_way_split_nexus(tree_description, tree, last_seq-first_seq+1-2,1,bootstrap);
+	fprintf(tree,":%7.5f",left_branch[last_seq-first_seq+1-2]);
+	if ((bootstrap==BS_BRANCH_LABELS) && (old_row>0) && (boot_totals[old_row]>0))
+		fprintf(tree,"[%d]",(pint)boot_totals[old_row]);
+	fprintf(tree,",");
+
+	old_row=two_way_split_nexus(tree_description, tree, last_seq-first_seq+1-2,2,bootstrap);
+	fprintf(tree,":%7.5f",left_branch[last_seq-first_seq+1-1]);
+	if ((bootstrap==BS_BRANCH_LABELS) && (old_row>0) && (boot_totals[old_row]>0))
+		fprintf(tree,"[%d]",(pint)boot_totals[old_row]);
+	fprintf(tree,",");
+
+	old_row=two_way_split_nexus(tree_description, tree, last_seq-first_seq+1-2,3,bootstrap);
+	fprintf(tree,":%7.5f",left_branch[last_seq-first_seq+1]);
+	if ((bootstrap==BS_BRANCH_LABELS) && (old_row>0) && (boot_totals[old_row]>0))
+		fprintf(tree,"[%d]",(pint)boot_totals[old_row]);
+	fprintf(tree,")");
+        if (bootstrap==BS_NODE_LABELS) fprintf(tree,"TRICHOTOMY");
+	fprintf(tree,";");
+	}
+	fprintf(tree,"\nENDBLOCK;\n");
+}
+
+
+sint two_way_split_nexus
+(char **tree_description, FILE *tree, sint start_row, sint flag, sint bootstrap)
+{
+	sint row, new_row = 0, old_row, col, test_col = 0;
+	Boolean single_seq;
+
+	if(start_row != last_seq-first_seq+1-2) fprintf(tree,"("); 
+
+	for(col=1; col<=last_seq-first_seq+1; col++) {
+		if(tree_description[start_row][col] == flag) {
+			test_col = col;
+			break;
+		}
+	}
+
+	single_seq = TRUE;
+	for(row=start_row-1; row>=1; row--) 
+		if(tree_description[row][test_col] == 1) {
+			single_seq = FALSE;
+			new_row = row;
+			break;
+		}
+
+	if(single_seq) {
+		tree_description[start_row][test_col] = 0;
+		fprintf(tree,"%d",test_col+first_seq-1);
+		if(start_row == last_seq-first_seq+1-2) {
+			return(0);
+		}
+
+		fprintf(tree,":%7.5f,",left_branch[start_row]);
+	}
+	else {
+		for(col=1; col<=last_seq-first_seq+1; col++) {
+		    if((tree_description[start_row][col]==1)&&
+		       (tree_description[new_row][col]==1))
+				tree_description[start_row][col] = 0;
+		}
+		old_row=two_way_split_nexus(tree_description, tree, new_row, (sint)1, bootstrap);
+		if(start_row == last_seq-first_seq+1-2) {
+			return(new_row);
+		}
+
+		fprintf(tree,":%7.5f",left_branch[start_row]);
+		if ((bootstrap==BS_BRANCH_LABELS) && (boot_totals[old_row]>0))
+			fprintf(tree,"[%d]",(pint)boot_totals[old_row]);
+
+		fprintf(tree,",");
+	}
+
+
+	for(col=1; col<=last_seq-first_seq+1; col++) 
+		if(tree_description[start_row][col] == flag) {
+			test_col = col;
+			break;
+		}
+	
+	single_seq = TRUE;
+	new_row = 0;
+	for(row=start_row-1; row>=1; row--) 
+		if(tree_description[row][test_col] == 1) {
+			single_seq = FALSE;
+			new_row = row;
+			break;
+		}
+
+	if(single_seq) {
+		tree_description[start_row][test_col] = 0;
+		fprintf(tree,"%d",test_col+first_seq-1);
+		fprintf(tree,":%7.5f)",right_branch[start_row]);
+	}
+	else {
+		for(col=1; col<=last_seq-first_seq+1; col++) {
+		    if((tree_description[start_row][col]==1)&&
+		       (tree_description[new_row][col]==1))
+				tree_description[start_row][col] = 0;
+		}
+		old_row=two_way_split_nexus(tree_description, tree, new_row, (sint)1, bootstrap);
+		fprintf(tree,":%7.5f",right_branch[start_row]);
+		if ((bootstrap==BS_BRANCH_LABELS) && (boot_totals[old_row]>0))
+			fprintf(tree,"[%d]",(pint)boot_totals[old_row]);
+
+		fprintf(tree,")");
+	}
+	if ((bootstrap==BS_NODE_LABELS) && (boot_totals[start_row]>0))
+			fprintf(tree,"%d",(pint)boot_totals[start_row]);
+	
+	return(start_row);
+}
+
+
+void print_phylip_tree(char **tree_description, FILE *tree, sint bootstrap)
+{
+	sint old_row;
+	
+	if(last_seq-first_seq+1==2) {
+		fprintf(tree,"(%s:%7.5f,%s:%7.5f);",names[first_seq],tmat[first_seq][first_seq+1],names[first_seq+1],tmat[first_seq][first_seq+1]);
+		return;
+	}
+
+	fprintf(tree,"(\n");
+ 
+	old_row=two_way_split(tree_description, tree, last_seq-first_seq+1-2,1,bootstrap);
+	fprintf(tree,":%7.5f",left_branch[last_seq-first_seq+1-2]);
+	if ((bootstrap==BS_BRANCH_LABELS) && (old_row>0) && (boot_totals[old_row]>0))
+		fprintf(tree,"[%d]",(pint)boot_totals[old_row]);
+	fprintf(tree,",\n");
+
+	old_row=two_way_split(tree_description, tree, last_seq-first_seq+1-2,2,bootstrap);
+	fprintf(tree,":%7.5f",left_branch[last_seq-first_seq+1-1]);
+	if ((bootstrap==BS_BRANCH_LABELS) && (old_row>0) && (boot_totals[old_row]>0))
+		fprintf(tree,"[%d]",(pint)boot_totals[old_row]);
+	fprintf(tree,",\n");
+
+	old_row=two_way_split(tree_description, tree, last_seq-first_seq+1-2,3,bootstrap);
+	fprintf(tree,":%7.5f",left_branch[last_seq-first_seq+1]);
+	if ((bootstrap==BS_BRANCH_LABELS) && (old_row>0) && (boot_totals[old_row]>0))
+		fprintf(tree,"[%d]",(pint)boot_totals[old_row]);
+	fprintf(tree,")");
+        if (bootstrap==BS_NODE_LABELS) fprintf(tree,"TRICHOTOMY");
+	fprintf(tree,";\n");
+}
+
+
+sint two_way_split
+(char **tree_description, FILE *tree, sint start_row, sint flag, sint bootstrap)
+{
+	sint row, new_row = 0, old_row, col, test_col = 0;
+	Boolean single_seq;
+
+	if(start_row != last_seq-first_seq+1-2) fprintf(tree,"(\n"); 
+
+	for(col=1; col<=last_seq-first_seq+1; col++) {
+		if(tree_description[start_row][col] == flag) {
+			test_col = col;
+			break;
+		}
+	}
+
+	single_seq = TRUE;
+	for(row=start_row-1; row>=1; row--) 
+		if(tree_description[row][test_col] == 1) {
+			single_seq = FALSE;
+			new_row = row;
+			break;
+		}
+
+	if(single_seq) {
+		tree_description[start_row][test_col] = 0;
+		fprintf(tree,"%.*s",max_names,names[test_col+first_seq-1]);
+		if(start_row == last_seq-first_seq+1-2) {
+			return(0);
+		}
+
+		fprintf(tree,":%7.5f,\n",left_branch[start_row]);
+	}
+	else {
+		for(col=1; col<=last_seq-first_seq+1; col++) {
+		    if((tree_description[start_row][col]==1)&&
+		       (tree_description[new_row][col]==1))
+				tree_description[start_row][col] = 0;
+		}
+		old_row=two_way_split(tree_description, tree, new_row, (sint)1, bootstrap);
+		if(start_row == last_seq-first_seq+1-2) {
+			return(new_row);
+		}
+
+		fprintf(tree,":%7.5f",left_branch[start_row]);
+		if ((bootstrap==BS_BRANCH_LABELS) && (boot_totals[old_row]>0))
+			fprintf(tree,"[%d]",(pint)boot_totals[old_row]);
+
+		fprintf(tree,",\n");
+	}
+
+
+	for(col=1; col<=last_seq-first_seq+1; col++) 
+		if(tree_description[start_row][col] == flag) {
+			test_col = col;
+			break;
+		}
+	
+	single_seq = TRUE;
+	new_row = 0;
+	for(row=start_row-1; row>=1; row--) 
+		if(tree_description[row][test_col] == 1) {
+			single_seq = FALSE;
+			new_row = row;
+			break;
+		}
+
+	if(single_seq) {
+		tree_description[start_row][test_col] = 0;
+		fprintf(tree,"%.*s",max_names,names[test_col+first_seq-1]);
+		fprintf(tree,":%7.5f)\n",right_branch[start_row]);
+	}
+	else {
+		for(col=1; col<=last_seq-first_seq+1; col++) {
+		    if((tree_description[start_row][col]==1)&&
+		       (tree_description[new_row][col]==1))
+				tree_description[start_row][col] = 0;
+		}
+		old_row=two_way_split(tree_description, tree, new_row, (sint)1, bootstrap);
+		fprintf(tree,":%7.5f",right_branch[start_row]);
+		if ((bootstrap==BS_BRANCH_LABELS) && (boot_totals[old_row]>0))
+			fprintf(tree,"[%d]",(pint)boot_totals[old_row]);
+
+		fprintf(tree,")\n");
+	}
+	if ((bootstrap==BS_NODE_LABELS) && (boot_totals[start_row]>0))
+			fprintf(tree,"%d",(pint)boot_totals[start_row]);
+	
+	return(start_row);
+}
+
+
+
+void print_tree(char **tree_description, FILE *tree, sint *totals)
+{
+	sint row,col;
+
+	fprintf(tree,"\n");
+
+	for(row=1; row<=last_seq-first_seq+1-3; row++)  {
+		fprintf(tree," \n");
+		for(col=1; col<=last_seq-first_seq+1; col++) { 
+			if(tree_description[row][col] == 0)
+				fprintf(tree,"*");
+			else
+				fprintf(tree,".");
+		}
+		if(totals[row] > 0)
+			fprintf(tree,"%7d",(pint)totals[row]);
+	}
+	fprintf(tree," \n");
+	for(col=1; col<=last_seq-first_seq+1; col++) 
+		fprintf(tree,"%1d",(pint)tree_description[last_seq-first_seq+1-2][col]);
+	fprintf(tree,"\n");
+}
+
+
+
+sint dna_distance_matrix(FILE *tree)
+{   
+	sint m,n;
+	sint j,i;
+	sint res1, res2;
+    sint overspill = 0;
+	double p,q,e,a,b,k;	
+
+	tree_gap_delete();  /* flag positions with gaps (tree_gaps[i] = 1 ) */
+	
+	if(verbose) {
+		fprintf(tree,"\n");
+		fprintf(tree,"\n DIST   = percentage divergence (/100)");
+		fprintf(tree,"\n p      = rate of transition (A <-> G; C <-> T)");
+		fprintf(tree,"\n q      = rate of transversion");
+		fprintf(tree,"\n Length = number of sites used in comparison");
+		fprintf(tree,"\n");
+	    if(tossgaps) {
+		fprintf(tree,"\n All sites with gaps (in any sequence) deleted!");
+		fprintf(tree,"\n");
+	    }
+	    if(kimura) {
+		fprintf(tree,"\n Distances corrected by Kimura's 2 parameter model:");
+		fprintf(tree,"\n\n Kimura, M. (1980)");
+		fprintf(tree," A simple method for estimating evolutionary ");
+		fprintf(tree,"rates of base");
+		fprintf(tree,"\n substitutions through comparative studies of ");
+		fprintf(tree,"nucleotide sequences.");
+		fprintf(tree,"\n J. Mol. Evol., 16, 111-120.");
+		fprintf(tree,"\n\n");
+	    }
+	}
+
+	for(m=1;   m<last_seq-first_seq+1;  ++m)     /* for every pair of sequence */
+	for(n=m+1; n<=last_seq-first_seq+1; ++n) {
+		p = q = e = 0.0;
+		tmat[m][n] = tmat[n][m] = 0.0;
+		for(i=1; i<=seqlen_array[first_seq]; ++i) {
+			j = boot_positions[i];
+                    	if(tossgaps && (tree_gaps[j] > 0) ) 
+				goto skip;          /* gap position */
+			res1 = seq_array[m+first_seq-1][j];
+			res2 = seq_array[n+first_seq-1][j];
+			if( (res1 == gap_pos1)     || (res1 == gap_pos2) ||
+                            (res2 == gap_pos1) || (res2 == gap_pos2)) 
+				goto skip;          /* gap in a seq*/
+			if(!use_ambiguities)
+			if( is_ambiguity(res1) || is_ambiguity(res2))
+				goto skip;          /* ambiguity code in a seq*/
+			e = e + 1.0;
+                        if(res1 != res2) {
+				if(transition(res1,res2))
+					p = p + 1.0;
+				else
+					q = q + 1.0;
+			}
+		        skip:;
+		}
+
+
+	/* Kimura's 2 parameter correction for multiple substitutions */
+
+		if(!kimura) {
+			if (e == 0) {
+				fprintf(stdout,"\n WARNING: sequences %d and %d are non-overlapping\n",m,n);
+				k = 0.0;
+				p = 0.0;
+				q = 0.0;
+			}
+			else {
+				k = (p+q)/e;
+				if(p > 0.0)
+					p = p/e;
+				else
+					p = 0.0;
+				if(q > 0.0)
+					q = q/e;
+				else
+					q = 0.0;
+			}
+			tmat[m][n] = tmat[n][m] = k;
+			if(verbose)                    /* if screen output */
+				fprintf(tree,        
+ 	     "%4d vs.%4d:  DIST = %7.4f; p = %6.4f; q = %6.4f; length = %6.0f\n"
+        	                 ,(pint)m,(pint)n,k,p,q,e);
+		}
+		else {
+			if (e == 0) {
+				fprintf(stdout,"\n WARNING: sequences %d and %d are non-overlapping\n",m,n);
+				p = 0.0;
+				q = 0.0;
+			}
+			else {
+				if(p > 0.0)
+					p = p/e;
+				else
+					p = 0.0;
+				if(q > 0.0)
+					q = q/e;
+				else
+					q = 0.0;
+			}
+
+			if( ((2.0*p)+q) == 1.0 )
+				a = 0.0;
+			else
+				a = 1.0/(1.0-(2.0*p)-q);
+
+			if( q == 0.5 )
+				b = 0.0;
+			else
+				b = 1.0/(1.0-(2.0*q));
+
+/* watch for values going off the scale for the correction. */
+			if( (a<=0.0) || (b<=0.0) ) {
+				overspill++;
+				k = 3.5;  /* arbitrary high score */ 
+			}
+			else 
+				k = 0.5*log(a) + 0.25*log(b);
+			tmat[m][n] = tmat[n][m] = k;
+			if(verbose)                      /* if screen output */
+	   			fprintf(tree,
+             "%4d vs.%4d:  DIST = %7.4f; p = %6.4f; q = %6.4f; length = %6.0f\n"
+        	                ,(pint)m,(pint)n,k,p,q,e);
+
+		}
+	}
+	return overspill;	/* return the number of off-scale values */
+}
+
+
+sint prot_distance_matrix(FILE *tree)
+{
+	sint m,n;
+	sint j,i;
+	sint res1, res2;
+    sint overspill = 0;
+	double p,e,k, table_entry;	
+
+
+	tree_gap_delete();  /* flag positions with gaps (tree_gaps[i] = 1 ) */
+	
+	if(verbose) {
+		fprintf(tree,"\n");
+		fprintf(tree,"\n DIST   = percentage divergence (/100)");
+		fprintf(tree,"\n Length = number of sites used in comparison");
+		fprintf(tree,"\n\n");
+	        if(tossgaps) {
+			fprintf(tree,"\n All sites with gaps (in any sequence) deleted");
+			fprintf(tree,"\n");
+		}
+	    	if(kimura) {
+			fprintf(tree,"\n Distances up tp 0.75 corrected by Kimura's empirical method:");
+			fprintf(tree,"\n\n Kimura, M. (1983)");
+			fprintf(tree," The Neutral Theory of Molecular Evolution.");
+			fprintf(tree,"\n Page 75. Cambridge University Press, Cambridge, England.");
+			fprintf(tree,"\n\n");
+	    	}
+	}
+
+	for(m=1;   m<nseqs;  ++m)     /* for every pair of sequence */
+	for(n=m+1; n<=nseqs; ++n) {
+		p = e = 0.0;
+		tmat[m][n] = tmat[n][m] = 0.0;
+		for(i=1; i<=seqlen_array[1]; ++i) {
+			j = boot_positions[i];
+	            	if(tossgaps && (tree_gaps[j] > 0) ) goto skip; /* gap position */
+			res1 = seq_array[m][j];
+			res2 = seq_array[n][j];
+			if( (res1 == gap_pos1)     || (res1 == gap_pos2) ||
+                            (res2 == gap_pos1) || (res2 == gap_pos2)) 
+                                    goto skip;   /* gap in a seq*/
+			e = e + 1.0;
+                        if(res1 != res2) p = p + 1.0;
+		        skip:;
+		}
+
+		if(p <= 0.0) 
+			k = 0.0;
+		else
+			k = p/e;
+
+/* DES debug */
+/* fprintf(stdout,"Seq1=%4d Seq2=%4d  k =%7.4f \n",(pint)m,(pint)n,k); */
+/* DES debug */
+
+		if(kimura) {
+			if(k < 0.75) { /* use Kimura's formula */
+				if(k > 0.0) k = - log(1.0 - k - (k * k/5.0) );
+			}
+			else {
+				if(k > 0.930) {
+				   overspill++;
+				   k = 10.0; /* arbitrarily set to 1000% */
+				}
+				else {
+				   table_entry = (k*1000.0) - 750.0;
+                                   k = (double)dayhoff_pams[(int)table_entry];
+                                   k = k/100.0;
+				}
+			}
+		}
+
+		tmat[m][n] = tmat[n][m] = k;
+		    if(verbose)                    /* if screen output */
+			fprintf(tree,        
+ 	                 "%4d vs.%4d  DIST = %6.4f;  length = %6.0f\n",
+ 	                 (pint)m,(pint)n,k,e);
+	}
+	return overspill;
+}
+
+
+void guide_tree(FILE *tree,sint firstseq,sint numseqs)
+/* 
+   Routine for producing unrooted NJ trees from seperately aligned
+   pairwise distances.  This produces the GUIDE DENDROGRAMS in
+   PHYLIP format.
+*/
+{
+        static char **standard_tree;
+        sint i;
+	float dist;
+
+	phylip_phy_tree_file=tree;
+        verbose = FALSE;
+	first_seq=firstseq;
+	last_seq=first_seq+numseqs-1;
+  
+	if(numseqs==2) {
+		dist=tmat[firstseq][firstseq+1]/2.0;
+		fprintf(tree,"(%s:%0.5f,%s:%0.5f);\n",
+			names[firstseq],dist,names[firstseq+1],dist);
+	}
+	else {
+        standard_tree   = (char **) ckalloc( (last_seq-first_seq+2) * sizeof (char *) );
+        for(i=0; i<last_seq-first_seq+2; i++)
+                standard_tree[i]  = (char *) ckalloc( (last_seq-first_seq+2) * sizeof(char));
+
+        nj_tree(standard_tree,clustal_phy_tree_file);
+
+        print_phylip_tree(standard_tree,phylip_phy_tree_file,0);
+
+        if(left_branch != NULL) left_branch=ckfree((void *)left_branch);
+        if(right_branch != NULL) right_branch=ckfree((void *)right_branch);
+        if(tkill != NULL) tkill=ckfree((void *)tkill);
+        if(av != NULL) av=ckfree((void *)av);
+        for (i=1;i<last_seq-first_seq+2;i++)
+                standard_tree[i]=ckfree((void *)standard_tree[i]);
+        standard_tree=ckfree((void *)standard_tree);
+	}
+        fclose(phylip_phy_tree_file);
+
+}
+
+static Boolean is_ambiguity(char c)
+{
+        int i;
+	char codes[]="ACGTU";
+
+        if(use_ambiguities==TRUE)
+        {
+         return FALSE;
+        }
+
+	for(i=0;i<5;i++)
+        	if(amino_acid_codes[c]==codes[i])
+             	   return FALSE;
+
+        return TRUE;
+}
+

Added: trunk/packages/clustalw/branches/upstream/current/util.c
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/util.c	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/util.c	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,405 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdarg.h>
+#include <ctype.h>
+#include "clustalw.h"
+
+extern char **seq_array;
+extern sint  *seqlen_array;
+extern char **names,**titles;
+extern sint *output_index;
+extern sint *seq_weight;
+extern double **tmat;
+
+
+/*
+*	ckalloc()
+*
+*	Tries to allocate "bytes" bytes of memory. Exits program if failed.
+*	Return value:
+*		Generic pointer to the newly allocated memory.
+*/
+
+void *ckalloc(size_t bytes)
+{
+	register void *ret;
+	
+	if( (ret = calloc(bytes, sizeof(char))) == NULL)
+/*
+	if( (ret = malloc(bytes)) == NULL)
+*/
+		fatal("Out of memory\n");
+	else
+		return ret;	
+
+	return ret;	
+}
+
+/*
+*	ckrealloc()
+*
+*	Tries to reallocate "bytes" bytes of memory. Exits program if failed.
+*	Return value:
+*		Generic pointer to the re-allocated memory.
+*/
+
+void *ckrealloc(void *ptr, size_t bytes)
+{
+	register void *ret=NULL;
+
+	if (ptr == NULL)	
+		fatal("Bad call to ckrealloc\n");
+	else if( (ret = realloc(ptr, bytes)) == NULL)
+		fatal("Out of memory\n");
+	else
+		return ret;	
+
+	return ret;	
+}
+
+/*
+*	ckfree()
+*
+*	Tries to free memory allocated by ckalloc.
+*	Return value:
+*		None.
+*/
+
+void *ckfree(void *ptr)
+{
+	if (ptr == NULL)
+		warning("Bad call to ckfree\n");
+	else {
+	 	free(ptr);
+		ptr = NULL;
+	}
+	return ptr;
+}
+
+
+/*
+*	rtrim()
+*
+*	Removes trailing blanks from a string
+*
+*	Return values:
+*		Pointer to the processed string
+*/
+
+char * rtrim(char *str)
+{
+	register int p;
+
+	p = strlen(str) - 1;
+	
+	while ( isspace(str[p]) )
+		p--;
+		
+	str[p + 1] = EOS;
+	
+	return str;
+}
+
+
+/*
+*	blank_to_()
+*
+*	Replace blanks in a string with underscores
+*
+*       Also replaces , ; : ( or ) with _
+*
+*	Return value:
+*		Pointer to the processed string
+*/
+
+char * blank_to_(char *str)
+{
+	int i,p;
+
+	p = strlen(str) - 1;
+	
+	for(i=0;i<=p;i++) 
+		if(
+                     (str[i]==' ') ||
+                     (str[i]==';') ||
+                     (str[i]==',') ||
+                     (str[i]=='(') ||
+                     (str[i]==')') ||
+                     (str[i]==':') 
+                  )
+                      str[i] = '_';
+	
+	return str;
+}
+
+
+/*
+*	upstr()
+*
+*	Converts string str to uppercase.
+*	Return values:
+*		Pointer to the converted string.
+*/
+
+char * upstr(char *str)
+{
+	register char *s = str;
+	
+	while( (*s = toupper(*s)) )
+		s++;
+		
+	return str;
+}
+
+/*
+*	lowstr()
+*
+*	Converts string str to lower case.
+*	Return values:
+*		Pointer to the converted string.
+*/
+
+char * lowstr(char *str)
+{
+	register char *s = str;
+	
+	while( (*s = tolower(*s)) )
+		s++;
+		
+	return str;
+}
+
+void getstr(char *instr,char *outstr)
+{	
+	fprintf(stdout,"%s: ",instr);
+	gets(outstr);
+}
+
+double getreal(char *instr,double minx,double maxx,double def)
+{
+	int status;
+	float ret;
+	char line[MAXLINE];	
+	
+	while(TRUE) {
+		fprintf(stdout,"%s (%.1f-%.1f)   [%.1f]: ",instr,minx,maxx,def);
+		gets(line);
+		status=sscanf(line,"%f",&ret);
+		if(status == EOF) return def;
+		if(ret>maxx) {
+			fprintf(stdout,"ERROR: Max. value=%.1f\n\n",maxx);
+			continue;
+		}
+		if(ret<minx) {
+			fprintf(stdout,"ERROR: Min. value=%.1f\n\n",minx);
+			continue;
+		}
+		break;
+	}
+	return (double)ret;
+}
+
+
+int getint(char *instr,int minx,int maxx, int def)
+{
+	int ret,status;
+	char line[MAXLINE];	
+
+	while(TRUE) {
+		fprintf(stdout,"%s (%d..%d)    [%d]: ",
+		instr,(pint)minx,(pint)maxx,(pint)def);
+		gets(line);
+		status=sscanf(line,"%d",&ret);
+		if(status == EOF) return def;
+		if(ret>maxx) {
+			fprintf(stdout,"ERROR: Max. value=%d\n\n",(pint)maxx);
+			continue;
+		}
+		if(ret<minx) {
+			fprintf(stdout,"ERROR: Min. value=%d\n\n",(pint)minx);
+			continue;
+		}
+		break;
+	}
+	return ret;
+}
+
+void do_system(void)
+{
+	char line[MAXLINE];
+	
+	getstr("\n\nEnter system command",line);
+	if(*line != EOS)
+		system(line);
+	fprintf(stdout,"\n\n");
+}
+
+
+Boolean linetype(char *line,char *code)
+{
+	return( strncmp(line,code,strlen(code)) == 0 );
+}
+
+Boolean keyword(char *line,char *code)
+{
+	int i;
+	char key[MAXLINE];
+
+	for(i=0;!isspace(line[i]) && line[i]!=EOS;i++)
+		key[i]=line[i];
+	key[i]=EOS;
+	return( strcmp(key,code) == 0 );
+}
+
+Boolean blankline(char *line)
+{
+	int i;
+
+	for(i=0;line[i]!='\n' && line[i]!=EOS;i++) {
+		if( isdigit(line[i]) ||
+		    isspace(line[i]) ||
+		    (line[i] == '*') ||
+		    (line[i] == ':') ||
+                    (line[i] == '.')) 
+			;
+		else
+			return FALSE;
+	}
+	return TRUE;
+}
+
+
+void get_path(char *str,char *path)
+{
+	register int i;
+	
+	strcpy(path,str);
+	for(i=strlen(path)-1;i>-1;--i) {
+		if(str[i]==DIRDELIM) {
+			i = -1;
+			break;
+		}
+		if(str[i]=='.') break;
+	}
+	if(i<0)
+		strcat(path,".");
+	else
+		path[i+1]=EOS;
+}
+
+void alloc_aln(sint nseqs)
+{
+	sint i,j;
+
+        seqlen_array = (sint *)ckalloc( (nseqs+1) * sizeof (sint));
+
+        seq_array = (char **)ckalloc( (nseqs + 1) * sizeof (char *) );
+	for(i=0;i<nseqs+1;i++)
+		seq_array[i]=NULL;
+
+        names = (char **)ckalloc( (nseqs+1) * sizeof (char *) );
+        for(i=1;i<=nseqs;i++)
+                names[i] = (char *)ckalloc((MAXNAMES+1) * sizeof (char));
+
+        titles = (char **)ckalloc( (nseqs+1) * sizeof (char *) );
+        for(i=1;i<=nseqs;i++)
+                titles[i] = (char *)ckalloc((MAXTITLES+1) * sizeof (char));
+
+        output_index = (sint *)ckalloc( (nseqs+1) * sizeof (sint));
+
+        tmat = (double **) ckalloc( (nseqs+1) * sizeof (double *) );
+        for(i=1;i<=nseqs;i++)
+                tmat[i] = (double *)ckalloc( (nseqs+1) * sizeof (double) );
+        for(i=1;i<=nseqs;i++)
+        	for(j=1;j<=nseqs;j++)
+			tmat[i][j]=0.0;
+
+        seq_weight = (sint *)ckalloc( (nseqs+1) * sizeof (sint));
+        for(i=1;i<=nseqs;i++)
+		seq_weight[i]=100;
+}
+
+void realloc_aln(sint first_seq,sint nseqs)
+{
+	sint i,j;
+
+        seqlen_array = (sint *)ckrealloc(seqlen_array, (first_seq+nseqs+1) * sizeof (sint));
+
+        seq_array = (char **)ckrealloc(seq_array, (first_seq+nseqs+1) * sizeof (char *) );
+	for(i=first_seq;i<first_seq+nseqs+1;i++)
+		seq_array[i]=NULL;
+
+        names = (char **)ckrealloc(names, (first_seq+nseqs+1) * sizeof (char *) );
+        for(i=first_seq;i<first_seq+nseqs;i++)
+                names[i] = (char *)ckalloc((MAXNAMES+1) * sizeof (char));
+
+        titles = (char **)ckrealloc(titles, (first_seq+nseqs+1) * sizeof (char *) );
+        for(i=first_seq;i<first_seq+nseqs;i++)
+                titles[i] = (char *)ckalloc((MAXTITLES+1) * sizeof (char));
+
+        output_index = (sint *)ckrealloc(output_index, (first_seq+nseqs+1) * sizeof (sint));
+
+        seq_weight = (sint *)ckrealloc(seq_weight, (first_seq+nseqs+1) * sizeof (sint));
+        for(i=first_seq;i<first_seq+nseqs;i++)
+		seq_weight[i]=100;
+
+        tmat = (double **) ckrealloc(tmat, (first_seq+nseqs+1) * sizeof (double *) );
+        for(i=1;i<first_seq;i++)
+                tmat[i] = (double *)ckrealloc(tmat[i], (first_seq+nseqs+1) * sizeof (double) );
+        for(i=first_seq;i<first_seq+nseqs;i++)
+                tmat[i] = (double *)ckalloc( (first_seq+nseqs+1) * sizeof (double) );
+        for(i=1;i<first_seq;i++)
+        	for(j=first_seq;j<first_seq+nseqs;j++)
+		{
+			tmat[i][j]=0.0;
+			tmat[j][i]=0.0;
+		}
+}
+
+void free_aln(sint nseqs)
+{
+	sint i;
+
+	if(nseqs<=0) return;
+
+	seqlen_array = ckfree(seqlen_array);
+
+        for(i=1;i<=nseqs;i++)
+		seq_array[i] = ckfree(seq_array[i]);
+	seq_array = ckfree(seq_array);
+
+	for(i=1;i<=nseqs;i++)
+		names[i] = ckfree(names[i]);
+	names = ckfree(names);
+
+	for(i=1;i<=nseqs;i++)
+		titles[i] = ckfree(titles[i]);
+	titles = ckfree(titles);
+
+	output_index = ckfree(output_index);
+
+	seq_weight = ckfree(seq_weight);
+
+        for(i=1;i<=nseqs;i++)
+		tmat[i] = ckfree(tmat[i]);
+	tmat = ckfree(tmat);
+}
+
+void alloc_seq(sint seq_no,sint length)
+{
+	seq_array[seq_no] = (char *)ckalloc((length+2) * sizeof (char));
+}
+
+void realloc_seq(sint seq_no,sint length)
+{
+	seq_array[seq_no] = (char *)realloc(seq_array[seq_no], (length+2) * sizeof (char));
+
+}
+
+void free_seq(sint seq_no)
+{
+	seq_array[seq_no]=ckfree(seq_array[seq_no]);
+}
+

Added: trunk/packages/clustalw/branches/upstream/current/xcolor.c
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/xcolor.c	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/xcolor.c	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,1191 @@
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <time.h>
+#include <ctype.h>
+
+#include <vibrant.h>
+
+#include "clustalw.h"
+#include "xmenu.h"
+
+#define SIMPLE 1
+#define COMPOUND 2
+
+#define LEFTMARGIN 20
+#define SEPARATION 2
+#define CHARHEIGHT 10
+#define CHARWIDTH 6
+#define A4X 564
+#define A4Y 800
+#define A3X 832
+#define A3Y 1159
+#define USLETTERX 564
+#define USLETTERY 750
+#define SCOREY 3
+#define HEADER 7
+#define NOHEADER 0
+#define MAXRESNO 6
+
+#define MAXPARLEN 10
+#define MAXPAR 100
+ 
+static void print_ps_info(FILE *fd,int pagesize);
+static void print_page_header(FILE *fd,int ps_rotation,int maxx,int maxy,
+int page,int numpages,Boolean header,char *str_time,
+char *ps_file,int ps_xtrans,int ps_ytrans,float ps_scale);
+static void print_header_line(FILE *fd,panel_data name_data, panel_data seq_data,
+int ix,int fr,int lr);
+static void print_footer_line(FILE *fd,panel_data name_data, panel_data seq_data,
+int ix,int fr,int lr);
+static void print_quality_curve(FILE *fd,panel_data seq_data
+,int fr,int lr,int score_height);
+static void print_seq_line(FILE *fd,panel_data name_data, panel_data seq_data,
+int row,int seq,int fr,int lr,int res_number);
+
+
+typedef struct consensus_parameters
+{
+char consensus;
+int cutoff;
+int length;
+char cutoff_list[20];
+} consensus_para;
+ 
+typedef struct color_parameters
+{
+int type;
+char residue;
+int color;
+int length;
+char cons_list[20];
+} color_para;
+
+static void init_color_lut(FILE *fd);
+static int init_printer_lut(char *filename);
+static char *init_consensus(panel_data data);
+static int SaveColPara(char word[MAXPAR][MAXPARLEN],int num_words,int count);
+static int SaveConPara(char word[MAXPAR][MAXPARLEN],int num_words,int count);
+static int get_line(char *sinline,char word[MAXPAR][MAXPARLEN]);
+static int residue_color(char res,char consensus);
+static Boolean commentline(char *line);
+
+#define DEF_NCOLORS 4
+#define MAX_NCOLORS 8
+#define DEFAULT_COLOR 0
+
+typedef struct rgb_color {
+	char name[20];
+	float r,g,b;
+} rgb_color;
+
+rgb_color def_color_lut[MAX_NCOLORS]={
+	"RED"          ,0.9, 0.1, 0.1,
+	"BLUE"         ,0.1, 0.1, 0.7,
+	"GREEN"        ,0.1, 0.9, 0.1,
+	"ORANGE"       ,0.9, 0.6, 0.3,
+	"CYAN"         ,0.1, 0.9, 0.9,
+	"PINK"         ,0.9, 0.5, 0.5,
+	"MAGENTA"      ,0.9, 0.1, 0.9,
+	"YELLOW"       ,0.9, 0.9, 0.0,
+};
+
+char def_aacolor[MAX_NCOLORS][26]={"krh",
+				"fwy",
+				"ilmv",
+				"gpst"};
+
+char def_dnacolor[MAX_NCOLORS][26]={"a",
+				"c",
+				"tu",
+				"g"};
+
+extern char revision_level[];
+
+extern int max_names;
+ 
+extern int ncolors;
+extern int ncolor_pars;
+extern color color_lut[];
+extern int inverted;
+extern Boolean residue_exceptions;
+extern Boolean segment_exceptions;
+extern Boolean dnaflag;
+
+int NumColParas;
+int NumConParas;
+
+color_para Col_Par[100];
+consensus_para Con_Par[100];
+
+
+void make_colormask(panel_data data)
+{
+	int i,j;
+
+	for(i=0;i<data.nseqs;i++)
+		for(j=0;j<data.ncols;j++)
+			data.colormask[i][j] = DEFAULT_COLOR;
+
+	if (ncolors > 1)
+	{
+        	data.consensus=init_consensus(data);
+
+		for(i=0;i<data.nseqs;i++)
+			for(j=0;j<data.ncols;j++)
+				data.colormask[i][j] = residue_color(data.lines[i][j],data.consensus[j]);
+
+	}
+}
+
+static void init_color_lut(FILE *fd)
+{ 
+	char sinline[1025];
+	char *args[10];
+	int i,numargs;
+	Boolean found=FALSE;
+
+	if (inverted==FALSE)
+	{
+        	strcpy(color_lut[0].name,"BLACK");
+		color_lut[0].r=0.4;
+		color_lut[0].g=0.4;
+		color_lut[0].b=0.4;
+		SelectColor(color_lut[0].r*255, color_lut[0].g*255, color_lut[0].b*255);
+		color_lut[0].val=GetColor();
+	}
+	else
+	{
+        	strcpy(color_lut[0].name,"WHITE");
+		color_lut[0].r=1.0;
+		color_lut[0].g=1.0;
+		color_lut[0].b=1.0;
+		SelectColor(color_lut[0].r*255, color_lut[0].g*255, color_lut[0].b*255);
+		color_lut[0].val=GetColor();
+	}
+
+	ncolors=1;
+	if (fd != NULL)
+	{
+		for (;fgets(sinline,1024,fd)!=NULL;)
+		{
+			sinline[strlen(sinline)-1] = '\0';
+			if (strcmp(sinline,"@rgbindex")==0) 
+			{
+				found = TRUE;
+				break;
+			}
+		}
+	}
+	if (found == TRUE)
+	{
+		for (;fgets(sinline,1024,fd)!=NULL;)
+		{
+			if (commentline(sinline)) continue;
+			if (sinline[0]=='@') break;
+			numargs = getargs(sinline, args, 4);
+			if (numargs != 4)
+			{
+				error("Problem in color rgb index - line %d\n",ncolors+1);
+				break;
+			}
+			else
+			{
+				strcpy(color_lut[ncolors].name, args[0]);
+				color_lut[ncolors].r=atof(args[1]);
+				color_lut[ncolors].g=atof(args[2]);
+				color_lut[ncolors].b=atof(args[3]);
+				SelectColor(color_lut[ncolors].r*255, color_lut[ncolors].g*255, color_lut[ncolors].b*255);
+				color_lut[ncolors].val=GetColor();
+				ncolors++;
+				if (ncolors>=MAXCOLORS) 
+				{
+					warning("Only using first %d colors in rgb index.",MAXCOLORS);
+					break;
+				}
+			}
+		}
+
+	}
+
+/* if we can't find a table, use the hard-coded colors */
+        if (ncolors==1)
+        {
+		ncolors=MAX_NCOLORS+1;
+		for(i=1;i<ncolors;i++)
+		{
+			strcpy(color_lut[i].name,def_color_lut[i-1].name);
+			color_lut[i].r=def_color_lut[i-1].r;
+			color_lut[i].g=def_color_lut[i-1].g;
+			color_lut[i].b=def_color_lut[i-1].b;
+			SelectColor(color_lut[i].r*255, color_lut[i].g*255, color_lut[i].b*255);
+			color_lut[i].val=GetColor();
+		}
+	}
+
+}
+
+void init_color_parameters(char *par_file)
+{
+
+	int i,j,err;
+	char sinline[1025];
+	int maxparas = 50;
+	char inword[MAXPAR][MAXPARLEN];
+	int num_words;
+	int in_consensus=FALSE,in_color=FALSE;
+	int consensus_found=FALSE,color_found=FALSE;
+	FILE *par_fd=NULL;
+
+	if(par_file!=NULL)
+		par_fd=fopen(par_file,"r");
+	if(par_fd==NULL)
+	{
+                info("No color file found - using defaults");
+		ncolor_pars=0;
+	}
+
+	init_color_lut(par_fd);
+	if (par_fd != NULL) rewind(par_fd);
+	if (ncolors==0) return;
+
+	NumColParas=0;
+	NumConParas=0;
+	if (par_fd != NULL)
+	{
+		for(;fgets(sinline,1024,par_fd) != NULL;)
+		{
+			sinline[strlen(sinline)-1] = '\0';
+			if (commentline(sinline)) continue;
+			switch(sinline[0])
+			{	
+				case '\0':
+					break;
+				case '@':
+					if (strcmp((char*)(sinline+1),"consensus")==0) 
+					{
+						in_consensus = TRUE;
+						in_color = FALSE;
+						consensus_found = TRUE;
+					}
+					else if (strcmp((char*)(sinline+1),"color")==0)
+					{
+						in_consensus = FALSE;
+						in_color = TRUE;
+						color_found = TRUE;
+					}
+					break;
+				default:
+					num_words = get_line(sinline,inword);
+					if (in_consensus == TRUE) 
+					{
+						err = SaveConPara(inword,num_words,NumConParas);
+						if (err == 0) NumConParas++;
+					}
+					else if (in_color == TRUE)
+					{
+						err = SaveColPara(inword,num_words,NumColParas);
+						if (err == 0) NumColParas++;
+					}
+	
+					if((NumColParas>maxparas) || (NumConParas>maxparas))
+				     	error("Too many parameters in color file");
+	
+			}
+		}
+		if (color_found == FALSE)
+		{
+			error("@color not found in parameter file - using defaults\n");
+			ncolor_pars=0;
+		}
+		fclose(par_fd);
+	}
+	ncolor_pars=NumColParas;
+
+/* if no color parameters found, use the default aa groupings */
+	if(ncolor_pars==0)
+	{
+		if (dnaflag)
+		{
+			for(i=0;i<DEF_NCOLORS;i++)
+			{
+				for(j=0;j<strlen(def_dnacolor[i]);j++)
+				{
+					Col_Par[ncolor_pars].type=SIMPLE;
+					Col_Par[ncolor_pars].residue=def_dnacolor[i][j];
+					Col_Par[ncolor_pars].color=i+1;
+					ncolor_pars++;
+				}
+			}
+		}
+		else
+		{
+			for(i=0;i<DEF_NCOLORS;i++)
+			{
+				for(j=0;j<strlen(def_aacolor[i]);j++)
+				{
+					Col_Par[ncolor_pars].type=SIMPLE;
+					Col_Par[ncolor_pars].residue=def_aacolor[i][j];
+					Col_Par[ncolor_pars].color=i+1;
+					ncolor_pars++;
+				}
+			}
+		}
+	}
+	NumColParas=ncolor_pars;
+}
+
+char *find_file(char *def_file)
+{
+	char filename[FILENAMELEN];
+	char *retname;
+	FILE *fd;
+	Boolean found=FALSE;
+#ifdef UNIX
+        char *path, *path1, *deb, *fin;
+        sint lf, ltot;
+        char *home;
+#endif
+
+
+        strcpy(filename,def_file);
+        fd = fopen(filename,"r");
+	if (fd != NULL)
+		found=TRUE;
+#ifdef UNIX
+        if (found == FALSE)
+        {
+                home = getenv("HOME");
+		if (home != NULL)
+		{
+                	sprintf(filename,"%s/%s",home,def_file);
+                	fd = fopen(filename,"r");
+			if (fd != NULL)
+				found=TRUE;
+		}
+                if (found == FALSE)
+                {
+			path=getenv("PATH");/* get the list of path directories,
+                        			separated by : */
+			/* added for File System Standards  - Francois */
+			path1=(char *)ckalloc((strlen(path)+64)*sizeof(char));
+			strcpy(path1,path);
+			strcat(path1,"/usr/share/clustalx:/usr/local/share/clustalx"); 
+
+        		lf=(sint)strlen(def_file);
+        		deb=path1;
+        		do
+                	{
+                		fin=strchr(deb,':');
+                		if(fin!=NULL)
+                        	{
+					strncpy(filename,deb,fin-deb);
+					ltot=fin-deb;
+				}
+                		else
+                        	{
+					strcpy(filename,deb);
+					ltot=(sint)strlen(filename);
+				}
+                		/* now one directory is in filename */
+                		if( ltot + lf + 1 <= FILENAMELEN)
+                        	{
+                        		filename[ltot]='/';
+                        		strcpy(filename+ltot+1,def_file); /* now dir is appended with filename */
+                        		if( (fd = fopen(filename,"r") ) != NULL)
+					{
+						found=TRUE;
+						break;
+                        		}
+                        	}
+                		else found = FALSE;
+                		deb=fin+1;
+                	}
+        		while (fin != NULL);
+                }
+        }
+#endif
+	if (found == TRUE)
+	{
+		fclose(fd);
+		retname=(char *)ckalloc((strlen(filename)+1)*sizeof(char));
+		strcpy(retname,filename);
+	}
+	else
+		retname=NULL;
+	return(retname);
+}
+ 
+static char *init_consensus(panel_data data)
+{
+	char *cons_data;
+        int num_res,seq,res,par,cons_total,i;
+        char residue;
+ 
+	cons_data=(char *)ckalloc((data.ncols+1)*sizeof(char));
+
+        for (res=0;res<data.ncols;res++)
+        {
+                cons_data[res] = '.';
+                for (par=0;par<NumConParas;par++)
+                {
+                        cons_total = num_res = 0;
+                        for (seq=0;seq<data.nseqs;seq++)
+                        {
+				residue=tolower(data.lines[seq][res]);
+				if (isalpha(residue))
+					num_res++;
+                                for (i=0;i<Con_Par[par].length;i++)
+                                        if (residue==tolower(Con_Par[par].cutoff_list[i]))
+                                                cons_total++;
+                        }
+                        if (num_res != 0)
+                                if (((cons_total*100)/num_res) >= Con_Par[par].cutoff)
+                                        cons_data[res] = Con_Par[par].consensus;
+                }
+        }
+
+	return(cons_data);
+}
+
+static int SaveColPara(char word[MAXPAR][MAXPARLEN],int num_words,int count)
+{
+
+	int i;
+
+	if (num_words < 3)
+	{
+		error("Wrong format in color list");
+		return(1);
+	}
+
+	if (word[1][0] != '=')
+	{
+		error("Wrong format in color list");
+		return(2);
+	}
+
+	if (num_words == 3)
+	{
+		Col_Par[count].type = SIMPLE;
+		Col_Par[count].residue = word[0][0];
+		Col_Par[count].color = -1;
+		for (i=0;i<ncolors;i++)
+			if (strcmp(word[2],color_lut[i].name)==0) Col_Par[count].color = i;
+		if (Col_Par[count].color == -1)
+		{
+			error("%s not found in rgb index - using %s",word[2],color_lut[0].name);
+			Col_Par[count].color = 0;
+		}
+	}
+	else
+	{
+		if (strcmp(word[3],"if")==0)
+		{
+			Col_Par[count].type = COMPOUND;
+			Col_Par[count].residue = word[0][0];
+			Col_Par[count].color = -1;
+			for (i=0;i<ncolors;i++)
+				if (strcmp(word[2],color_lut[i].name)==0) Col_Par[count].color = i;
+			if (Col_Par[count].color == -1)
+			{
+				error("%s not found in rgb index - using %s",word[2],color_lut[0].name);
+				Col_Par[count].color = 0;
+			}
+			Col_Par[count].length = num_words - 4;
+			for (i=4;i<num_words;i++)
+				Col_Par[count].cons_list[i-4] = word[i][0];
+		}
+		else
+		{
+			error("Wrong format in color list");
+			return(3);
+		}
+	}
+
+	return(0);
+		
+}
+
+
+static int SaveConPara(char word[MAXPAR][MAXPARLEN],int num_words,int count)
+{
+
+	int i;
+
+	if (num_words < 3)
+	{
+		error("Wrong format in consensus list");
+		return(1);
+	}
+
+	if (word[1][0] != '=')
+	{
+		error("Wrong format in consensus list");
+		return(2);
+	}
+
+	Con_Par[count].consensus = word[0][0];
+	for (i=0;i<MAXPARLEN-1;i++)
+	{
+		if(word[2][i]=='%') word[2][i] = '\0';
+	}
+	Con_Par[count].cutoff = atoi(word[2]);
+	Con_Par[count].length = num_words - 3;
+	for (i=3;i<num_words;i++)
+	{
+		Con_Par[count].cutoff_list[i-3] = word[i][0];
+	}
+
+	return(0);
+		
+}
+
+static int get_line(char *sinline,char word[MAXPAR][MAXPARLEN])
+{
+	int i=0, j, word_count=0, char_count=0;
+	int in_word=FALSE;
+
+	for(i=0;i<MAXPAR-1;i++)
+		for(j=0;j<MAXPARLEN-1;j++)
+			word[i][j]='\0';
+
+	for (i=0;i<=strlen(sinline);i++)
+	{
+		switch (sinline[i])
+		{
+			case ' ':
+			case '\t':
+			case '\0':
+			case ':':
+				if (in_word)
+				{
+					word[word_count][char_count] = '\0';
+					word_count++;
+					char_count = 0;
+					in_word = FALSE;
+				}
+				break;
+			default:
+				in_word = TRUE;
+				word[word_count][char_count] = sinline[i];
+				char_count++;
+				break;
+		}		
+
+	}
+	return(word_count);
+}
+
+static int residue_color(char res,char consensus)
+{
+	int i,j;
+
+        for (i=0;i<NumColParas;i++)
+        {
+                if (tolower(res) == tolower(Col_Par[i].residue))
+                {
+                        switch (Col_Par[i].type)
+                        {
+                        case SIMPLE:
+                                return(Col_Par[i].color);
+                        case COMPOUND:
+                                for (j=0;j<Col_Par[i].length;j++)
+                                {
+                                        if (consensus == Col_Par[i].cons_list[j]
+)
+                                                return(Col_Par[i].color);
+                                }
+                                break;
+                        default:
+                                return(DEFAULT_COLOR);
+                        }
+                }
+        }
+        return(DEFAULT_COLOR);
+}
+
+static Boolean commentline(char *line)
+{
+        int i;
+ 
+	if (line[0] == '#') return TRUE;
+        for(i=0;line[i]!='\n' && line[i]!=EOS;i++) {
+                if( !isspace(line[i]) )
+                        return FALSE;
+        }
+        return TRUE;
+}
+
+int block_height,block_left,block_top;
+int header_top,seq_top,footer_top,curve_top;
+
+void write_ps_file(spanel p,char *ps_file,char *par_file,int pagesize,
+int orientation,Boolean header, Boolean ruler, Boolean resno, Boolean resize,
+int first_printres,int last_printres,
+int blength,Boolean show_curve)
+{
+	int i,j,bn,seq,numseqs;
+	int err;
+	int blocklen,numpages;
+	int fr,lr;
+	int page,row;
+	int ps_rotation=0,ps_xtrans=0,ps_ytrans=0;
+	float ps_scale,hscale,wscale;
+	int maxseq;
+	int maxx=0,maxy=0;
+	int score_height=0;
+	int main_header=0;
+	int numelines,numecols;
+	int nhead,nfoot;
+	int ppix_width;    /* width of the page in pixels */
+	int pchar_height;    /* height of the page in chars for sequences */
+	int ppix_height;    /* height of the page in pixels for sequences */
+        int blocksperpage,numblocks;
+	int *res_number;
+	panel_data name_data,seq_data;
+	FILE *fd;
+
+	time_t *tptr=NULL,ttime;
+	char *str_time;
+
+/* open the output file */
+	if ((fd=fopen(ps_file,"w"))==NULL)
+	{
+		error("Cannot open file %s",ps_file);
+		return;
+	}
+
+/* check for printer-specific rgb values */
+	err=init_printer_lut(par_file);
+	if(err>0) warning("No PS Colors file: using default colors\n");
+
+/* get the page size parameters */
+
+	if (pagesize==A4)
+	{
+		if (orientation==PORTRAIT)
+		{
+			maxx=A4X;
+			maxy=A4Y;
+			ps_rotation=0;
+		}
+		else
+		{
+			maxx=A4Y;
+			maxy=A4X;
+			ps_rotation=-90;
+		}
+	}
+	else if (pagesize==A3)
+	{
+		if (orientation==PORTRAIT)
+		{
+			maxx=A3X;
+			maxy=A3Y;
+			ps_rotation=0;
+		}
+		else
+		{
+			maxx=A3Y;
+			maxy=A3X;
+			ps_rotation=-90;
+		}
+	}
+	else if (pagesize==USLETTER)
+	{
+		if (orientation==PORTRAIT)
+		{
+			maxx=USLETTERX;
+			maxy=USLETTERY;
+			ps_rotation=0;
+		}
+		else
+		{
+			maxx=USLETTERY;
+			maxy=USLETTERX;
+			ps_rotation=-90;
+		}
+	}
+	if(show_curve) score_height=SCOREY;
+	if(header) main_header=HEADER;
+	else main_header=NOHEADER;
+        ppix_width=maxx-LEFTMARGIN*2;
+        ppix_height=maxy-main_header*CHARHEIGHT;
+
+/* get the name data */
+	GetPanelExtra(p.names,&name_data);
+
+/* get the sequence data */
+	GetPanelExtra(p.seqs,&seq_data);
+	numseqs=seq_data.nseqs;
+	nhead=seq_data.nhead;
+	if(ruler)
+		nfoot=seq_data.nfoot;
+	else
+		nfoot=seq_data.nfoot-1;
+	numelines=nhead+nfoot+score_height+SEPARATION;
+
+/* check the block length, residue range parameters */
+	if(first_printres<=0)
+		first_printres=1;
+	if((last_printres<=0) || (last_printres>seq_data.ncols))
+		last_printres=seq_data.ncols;
+	if(first_printres>last_printres)
+	{
+		error("Bad residue range - cannot write postscript");
+		return;
+	}
+	if (blength==0 || last_printres-first_printres+1<blength) 
+		blocklen=last_printres-first_printres+1;
+	else
+		blocklen=blength;
+
+	res_number=(int *)ckalloc((name_data.nseqs+1)*sizeof(int));
+	for(i=0;i<name_data.nseqs;i++)
+	{
+		res_number[i]=0;
+		for(j=0;j<first_printres-1;j++)
+			if(isalpha(seq_data.lines[i][j])) res_number[i]++;
+	}
+	if(resno)
+		numecols=MAXRESNO+1+max_names;
+	else
+		numecols=1+max_names;
+
+/* print out the PS revision level etc. */
+	ttime = time(tptr);
+	str_time = ctime(&ttime);
+	print_ps_info(fd,pagesize);
+
+/* calculate scaling factors, block sizes to fit the page etc. */
+
+        if (resize==FALSE || blocklen==last_printres-first_printres+1)
+        {
+/* split the alignment into blocks of sequences. If the blocks are too long
+for the page - tough! */
+		if(resize==FALSE)
+                	ps_scale=1.0;
+		else
+			ps_scale=(float)ppix_width/(float)((blocklen+numecols)*CHARWIDTH);
+		ps_xtrans= LEFTMARGIN * (1-ps_scale);
+		ps_ytrans= ppix_height * (1-ps_scale);
+		if (pagesize!=A3 && orientation==LANDSCAPE)
+			ps_xtrans-=LEFTMARGIN;
+
+        	pchar_height=((maxy/CHARHEIGHT)-main_header)/ps_scale;
+        	maxseq=pchar_height-numelines;
+		block_height = (maxseq+numelines) * CHARHEIGHT;
+		numpages = (numseqs/maxseq) + 1;
+		seq=0;
+		for (page=0;page<numpages;page++)
+		{
+/* print the top of page header */
+			print_page_header(fd,ps_rotation,maxx,maxy,
+			   page,numpages,header,str_time,
+			   ps_file,ps_xtrans,ps_ytrans,ps_scale);
+
+			block_top = maxy - main_header*CHARHEIGHT;
+			block_left = LEFTMARGIN + (1+max_names)*CHARWIDTH; 
+			header_top = block_top;
+
+			fr=first_printres-1;
+			lr=last_printres-1;
+/*  show the header lines */
+			for (i=0;i<nhead;i++)
+				print_header_line(fd,name_data,seq_data,i,fr,lr);
+
+			seq_top = block_top-nhead*CHARHEIGHT;
+/*  show the sequence lines */
+			for (row=0;row<maxseq ;row++)
+			{
+				if(resno)
+				{
+					for(i=fr;i<=lr;i++)
+						if(isalpha(seq_data.lines[seq][i]))
+							res_number[seq]++;
+				}
+				print_seq_line(fd,name_data,seq_data,row,seq,fr,lr,res_number[seq]);
+				seq++;
+				if(seq>=numseqs)
+				{
+					row++;
+					break;
+				}
+			}
+
+			footer_top = seq_top-row*CHARHEIGHT;
+/*  show the footer lines */
+			for (i=0;i<nfoot;i++)
+				print_footer_line(fd,name_data,seq_data,i,fr,lr);
+
+			curve_top = footer_top-nfoot*CHARHEIGHT;
+/* show the quality curve */
+			if(show_curve)
+				print_quality_curve(fd,seq_data,fr,lr,score_height);
+
+			fprintf(fd,"\nshowpage\n");
+			fprintf(fd,"restore\n");
+		}
+        }
+        else
+        {
+/* split the alignment into blocks of residues, and scale the blocks to fit the page */
+        	maxseq=ppix_height/CHARHEIGHT-numelines-main_header;
+		hscale=(float)maxseq/(float)numseqs;
+		wscale=(float)ppix_width/(float)((blocklen+numecols)*CHARWIDTH);
+                ps_scale=MIN(hscale,wscale);
+		ps_xtrans= LEFTMARGIN * (1-ps_scale);
+		ps_ytrans= ppix_height * (1-ps_scale);
+		if (pagesize!=A3 && orientation==LANDSCAPE)
+			ps_xtrans-=LEFTMARGIN;
+
+        	pchar_height=((maxy/CHARHEIGHT)-main_header)/ps_scale;
+        	maxseq=pchar_height-numelines;
+		block_height = (numseqs+numelines) * CHARHEIGHT;
+		blocksperpage = pchar_height/(numseqs+numelines);
+		if (blocksperpage==0)
+		{
+			error("illegal combination of print parameters");
+			return;
+		}
+		numblocks = (last_printres-first_printres) / blocklen + 1;
+        	if (numblocks % blocksperpage == 0)
+			numpages = numblocks / blocksperpage;
+        	else
+			numpages = numblocks / blocksperpage + 1;
+
+		for (bn=0;bn<numblocks;bn++)
+		{
+			page = bn / blocksperpage;
+/* print the top of page header */
+			if (bn % blocksperpage == 0)
+				print_page_header(fd,ps_rotation,maxx,maxy,
+			   	page,numpages,header,str_time,
+			   	ps_file,ps_xtrans,ps_ytrans,ps_scale);
+
+			block_top = maxy - main_header*CHARHEIGHT-block_height*(bn%blocksperpage);
+			block_left = LEFTMARGIN + (1+max_names)*CHARWIDTH; 
+			header_top = block_top;
+			seq_top = block_top-nhead*CHARHEIGHT;
+			footer_top = block_top-(nhead+numseqs)*CHARHEIGHT;
+			curve_top = block_top-(nhead+numseqs+nfoot)*CHARHEIGHT;
+
+			fr=first_printres-1 + blocklen*bn;
+			lr=fr+blocklen-1;
+			if(lr>=last_printres) lr=last_printres-1;
+/*  show the header lines */
+			for (i=0;i<nhead;i++)
+				print_header_line(fd,name_data,seq_data,i,fr,lr);
+
+/*  show the sequence lines */
+			for (i=0;i<numseqs;i++)
+			{
+				row = i % maxseq;
+				if(resno)
+				{
+					for(j=fr;j<=lr;j++)
+						if(isalpha(seq_data.lines[i][j]))
+							res_number[i]++;
+				}
+				print_seq_line(fd,name_data,seq_data,row,i,fr,lr,res_number[i]);
+			}
+/*  show the footer lines */
+			for (i=0;i<nfoot;i++)
+				print_footer_line(fd,name_data,seq_data,i,fr,lr);
+
+/* show the quality curve */
+			if(show_curve)
+				print_quality_curve(fd,seq_data,fr,lr,score_height);
+
+			if ((bn == (numblocks-1)) || ((bn % blocksperpage == blocksperpage-1)))
+			{
+				fprintf(fd,"\nshowpage\n");
+				fprintf(fd,"restore\n");
+			}
+		}
+	}
+	fclose(fd);
+	return;
+}
+
+static int init_printer_lut(char *filename)
+{ 
+	FILE *fd;
+	char sinline[1025];
+	char *args[10];
+	char name[20];
+	int i,numargs;
+	Boolean found=FALSE;
+	char *par_file=NULL;
+
+/* reset the printer rgb colors to the color file rgb values */
+	for(i=0;i<ncolors;i++)
+	{
+		color_lut[i].pr=color_lut[i].r;
+		color_lut[i].pg=color_lut[i].g;
+		color_lut[i].pb=color_lut[i].b;
+	}
+
+/* search for the printer color file */
+	if(filename[0]==EOS) return 1;
+	par_file=find_file(filename);
+	if(par_file==NULL)
+	{
+		error("Cannot find printer file %s",filename);
+		return 1;
+	}
+	if ((fd=fopen(par_file,"r"))==NULL)
+	{
+		error("Cannot open printer file %s",par_file);
+		return 1;
+	}
+
+	for (;fgets(sinline,1024,fd)!=NULL;)
+	{
+		if (commentline(sinline)) continue;
+		numargs = getargs(sinline, args, 4);
+		if (numargs != 4)
+		{
+			error("Problem in parameter file - line %d\n",ncolors+1);
+			break;
+		}
+		else
+		{
+/* we've found a color - find the index the color lut */
+			strcpy(name, args[0]);
+			for(i=0;i<ncolors;i++)
+			{
+				if(strcmp(name,color_lut[i].name)==0)
+				{
+					color_lut[i].pr=atof(args[1]);
+					color_lut[i].pg=atof(args[2]);
+					color_lut[i].pb=atof(args[3]);
+				}
+			}
+		}
+	}
+	ckfree(par_file);
+	return 0;
+}
+
+static void print_ps_info(FILE *fd,int pagesize)
+{
+	fprintf(fd,"%%!PS-Adobe-1.0\n");
+	fprintf(fd,"%%%%Creator: Julie Thompson\n");
+	fprintf(fd,"%%%%Title:ClustalX Alignment\n");
+	fprintf(fd,"%%%%EndComments\n");
+	fprintf(fd,"/box { newpath\n");
+	fprintf(fd,"\t-0 -3 moveto\n");
+	fprintf(fd,"\t-0 %d lineto\n",CHARHEIGHT-3);
+	fprintf(fd,"\t%d %d lineto\n",CHARWIDTH,CHARHEIGHT-3);
+	fprintf(fd,"\t%d -3 lineto\n",CHARWIDTH);
+	fprintf(fd,"\tclosepath\n");
+	fprintf(fd,"      } def\n\n");
+	
+	fprintf(fd,"/color_char { gsave\n");
+	fprintf(fd,"\tsetrgbcolor\n");
+	fprintf(fd,"\tmoveto\n");
+	fprintf(fd,"\tshow\n");
+	fprintf(fd,"\tgrestore\n");
+	fprintf(fd,"      } def\n\n");
+	
+	fprintf(fd,"/cbox { gsave\n");
+	fprintf(fd,"\ttranslate\n");
+	fprintf(fd,"\tnewpath\n");
+	fprintf(fd,"\t0 0 moveto\n");
+	fprintf(fd,"\tlineto\n");
+	fprintf(fd,"\tlineto\n");
+	fprintf(fd,"\tlineto\n");
+	fprintf(fd,"\tclosepath\n");
+	fprintf(fd,"\tfill\n");
+	fprintf(fd,"\tgrestore\n");
+	fprintf(fd,"      } def\n\n");
+
+	fprintf(fd,"/color_inv { gsave\n");
+	fprintf(fd,"\tsetrgbcolor\n");
+	fprintf(fd,"\ttranslate\n");
+	fprintf(fd,"\tbox fill\n");
+	fprintf(fd,"\tgrestore\n");
+	fprintf(fd,"\tmoveto\n");
+	fprintf(fd,"\tshow\n");
+	fprintf(fd,"      } def\n\n");
+
+        fprintf(fd,"/white_inv { gsave\n");
+        fprintf(fd,"\tsetrgbcolor\n");
+        fprintf(fd,"\ttranslate\n");
+        fprintf(fd,"\tbox fill\n");
+        fprintf(fd,"\tgrestore\n");
+        fprintf(fd,"\tgsave\n");
+        fprintf(fd,"\tsetrgbcolor\n");
+        fprintf(fd,"\tmoveto\n");
+        fprintf(fd,"\tshow\n");
+        fprintf(fd,"\tgrestore\n");
+        fprintf(fd,"      } def\n\n");
+
+	if (pagesize==A3)
+		fprintf(fd,"statusdict begin a3 end\n\n");
+/* For canon color printer, use a3tray instead of a3!! */
+}
+
+static void print_page_header(FILE *fd,int ps_rotation,int maxx,int maxy,
+int page,int numpages,Boolean header,char *str_time,
+char *ps_file,int ps_xtrans,int ps_ytrans,float ps_scale)
+{
+	int ps_x,ps_y;
+	char tstr[50];
+
+	fprintf(fd,"%%%%Page: P%d\n",page);
+	fprintf(fd,"save\n\n");
+
+	if (ps_rotation==-90)
+	{
+		fprintf(fd,"0 %d translate\n",maxx);
+		fprintf(fd,"%d rotate\n",ps_rotation);
+	}
+
+	if (header)
+	{
+		sprintf(tstr,"CLUSTAL %s MULTIPLE SEQUENCE ALIGNMENT",revision_level);
+		ps_x = (maxx-strlen(tstr)*10)/2;
+		ps_y = maxy - 2*CHARHEIGHT;
+		fprintf(fd,"%d %d moveto\n",ps_x,ps_y);
+		fprintf(fd,"/Times-Bold findfont 14 scalefont setfont\n");
+		fprintf(fd,"(%s) show\n\n",tstr);
+
+		ps_x = 20;
+		ps_y = maxy - 4*CHARHEIGHT;
+		fprintf(fd,"%d %d moveto\n",ps_x,ps_y);
+		fprintf(fd,"(File: %s) show\n\n",ps_file);
+
+		sprintf(tstr,"Date: %s",str_time);
+		ps_x = maxx-strlen(tstr)*8-20;
+		ps_y = maxy - 4*CHARHEIGHT;
+		fprintf(fd,"%d %d moveto\n",ps_x,ps_y);
+		fprintf(fd,"(%s) show\n\n",tstr);
+
+		sprintf(tstr,"Page %d of %d",page+1,numpages);
+		ps_x = 20;
+		ps_y = maxy - 5*CHARHEIGHT-4;
+		fprintf(fd,"%d %d moveto\n",ps_x,ps_y);
+		fprintf(fd,"(%s) show\n\n",tstr);
+	}	
+	fprintf(fd,"%d %d translate\n",ps_xtrans,ps_ytrans);
+	fprintf(fd,"%#3.2f %#3.2f scale\n",ps_scale,ps_scale);
+	fprintf(fd,"/Courier-Bold findfont 10 scalefont setfont\n");
+}
+
+static void print_header_line(FILE *fd,panel_data name_data, panel_data seq_data,
+int ix,int fr,int lr)
+{
+	int i;
+	int ps_x,ps_y;
+
+	ps_x = LEFTMARGIN;
+	ps_y = header_top - (ix * CHARHEIGHT);
+	fprintf(fd,"%d %d moveto\n",ps_x,ps_y);
+	fprintf(fd,"(%*s ) show\n",max_names,name_data.header[ix]);
+	for(i=fr;i<=lr;i++)
+	{
+		ps_x = block_left + (i-fr) * CHARWIDTH; 
+		fprintf(fd,"(");
+		fprintf(fd,"%c",seq_data.header[ix][i]);
+		fprintf(fd,") ");
+		fprintf(fd,"%d %d %d %d 1.0 1.0 1.0 color_inv\n",ps_x,ps_y,ps_x,ps_y);
+	}
+	fprintf(fd,"\n");
+}
+
+static void print_footer_line(FILE *fd,panel_data name_data, panel_data seq_data,
+int ix,int fr,int lr)
+{
+	int i;
+	int ps_x,ps_y;
+
+	ps_x = LEFTMARGIN;
+	ps_y = footer_top - (ix * CHARHEIGHT);
+	fprintf(fd,"%d %d moveto\n",ps_x,ps_y);
+	fprintf(fd,"(%*s ) show\n",max_names,name_data.footer[ix]);
+	for(i=fr;i<=lr;i++)
+	{
+		ps_x = block_left + (i-fr) * CHARWIDTH; 
+		fprintf(fd,"(");
+		fprintf(fd,"%c",seq_data.footer[ix][i]);
+		fprintf(fd,") ");
+		fprintf(fd,"%d %d %d %d 1.0 1.0 1.0 color_inv\n",ps_x,ps_y,ps_x,ps_y);
+	}
+	fprintf(fd,"\n");
+}
+
+static void print_quality_curve(FILE *fd,panel_data seq_data,
+int fr,int lr,int score_height)
+{
+	int i,w,h;
+	int ps_x,ps_y,curve_bottom;
+
+	w=CHARWIDTH;
+	ps_x = block_left+CHARWIDTH;
+       	curve_bottom=curve_top-score_height*CHARHEIGHT;
+	fprintf(fd,"0.3 0.3 0.3 setrgbcolor\n");
+	for(i=fr+1;i<=lr;i++)
+	{
+		fprintf(fd,"%d %d moveto\n",ps_x,curve_bottom);
+       		h=score_height*CHARHEIGHT*((float)seq_data.colscore[i]/100.0);
+		if(h<1) h=1;
+		fprintf(fd,"%d 0 %d %d 0 %d %d %d cbox\n",w,w,h,h,ps_x,curve_bottom);
+		ps_x+=CHARWIDTH;
+	}
+	fprintf(fd,"0.0 0.0 0.0 setrgbcolor\n");
+}
+
+static void print_seq_line(FILE *fd,panel_data name_data, panel_data seq_data,
+int row,int seq,int fr,int lr,int res_number)
+{
+	int i,color;
+	int ps_x,ps_y;
+	float red, green, blue;
+
+	ps_x = LEFTMARGIN;
+	ps_y = seq_top - (row * CHARHEIGHT);
+	fprintf(fd,"%d %d moveto\n",ps_x,ps_y);
+	fprintf(fd,"(%*s ) show\n",max_names,name_data.lines[seq]);
+	for(i=fr;i<=lr;i++)
+	{
+		color = seq_data.colormask[seq][i];
+		red = color_lut[color].pr;
+		green = color_lut[color].pg;
+		blue = color_lut[color].pb;
+		ps_x = block_left + (i-fr) * CHARWIDTH; 
+		fprintf(fd,"(");
+		fprintf(fd,"%c",seq_data.lines[seq][i]);
+		fprintf(fd,") ");
+                if(segment_exceptions && seq_data.segment_exception[seq][i] > 0)
+                {
+                       fprintf(fd,"%d %d %1.1f %1.1f %1.1f %d %d %1.1f %1.1f %1.1f white_inv\n",
+				ps_x,ps_y,1.0,1.0,1.0,ps_x,ps_y,0.1,0.1,0.1);
+                }
+                else if(residue_exceptions && seq_data.residue_exception[seq][i] == TRUE)
+                {
+                       fprintf(fd,"%d %d %1.1f %1.1f %1.1f %d %d %1.1f %1.1f %1.1f white_inv\n",
+				ps_x,ps_y,1.0,1.0,1.0,ps_x,ps_y,0.4,0.4,0.4);
+                }
+                else
+                {
+                       if(inverted)
+                              fprintf(fd,"%d %d %d %d %1.1f %1.1f %1.1f color_inv\n",
+				ps_x,ps_y,ps_x,ps_y,red,green,blue);
+                       else
+                              fprintf(fd,"%d %d %1.1f %1.1f %1.1f color_char\n",
+				ps_x,ps_y,red,green,blue);
+                }
+	}
+
+	if(res_number>0)
+	{
+		ps_x = block_left + (lr-fr+1) * CHARWIDTH; 
+		ps_y = seq_top - (row * CHARHEIGHT);
+		fprintf(fd,"%d %d moveto\n",ps_x,ps_y);
+		fprintf(fd,"(%*d) show\n",MAXRESNO,res_number);
+	}
+	fprintf(fd,"\n");
+}

Added: trunk/packages/clustalw/branches/upstream/current/xdisplay.c
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/xdisplay.c	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/xdisplay.c	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,2191 @@
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+
+#include <vibrant.h>
+#include <document.h>
+
+#include "clustalw.h"
+#include "xmenu.h"
+
+static void VscrollMulti(BaR bar, GraphiC p, Nlm_Int2 newval, Nlm_Int2 oldval);
+static void HscrollMultiN(BaR bar, GraphiC p, Nlm_Int2 newval, Nlm_Int2 oldval);
+static void HscrollMultiS(BaR bar, GraphiC p, Nlm_Int2 newval, Nlm_Int2 oldval);
+static void VscrollPrf1(BaR bar, GraphiC p, Nlm_Int2 newval, Nlm_Int2 oldval);
+static void HscrollPrf1N(BaR bar, GraphiC p, Nlm_Int2 newval, Nlm_Int2 oldval);
+static void HscrollPrf1S(BaR bar, GraphiC p, Nlm_Int2 newval, Nlm_Int2 oldval);
+static void VscrollPrf2(BaR bar, GraphiC p, Nlm_Int2 newval, Nlm_Int2 oldval);
+static void HscrollPrf2N(BaR bar, GraphiC p, Nlm_Int2 newval, Nlm_Int2 oldval);
+static void HscrollPrf2S(BaR bar, GraphiC p, Nlm_Int2 newval, Nlm_Int2 oldval);
+
+static void NameClick(PaneL panel, PoinT pt);
+static void NameDrag(PaneL panel, PoinT pt);
+static void NameRelease(PaneL panel, PoinT pt);
+static void SeqClick(PaneL panel, PoinT pt);
+static void SeqDrag(PaneL panel, PoinT pt);
+static void SeqRelease(PaneL panel, PoinT pt);
+
+static void fit_seq_display(RecT wr,Boolean mv_message);
+static void fit_prf_displays(RecT wr,int numseqs1,int numseqs2,Boolean mv_message);
+
+static void vscrollnames(BaR bar, int newval, int oldval);
+static void hscrollnames(BaR bar, int newval, int oldval);
+static void vscrollseqs(BaR bar, int newval, int oldval);
+static void hscrollseqs(BaR bar, int newval, int oldval);
+
+static void correct_scrollbar(BaR b,int visible,int total,int value,Boolean reset);
+
+static PaneL make_panel(int type,GrouP g,int width,int height,int firstline,int tlines);
+static panel_data free_panel_data(panel_data data);
+static panel_data alloc_name_data(panel_data data);
+static panel_data alloc_seq_data(panel_data data);
+
+extern sint max_names;
+
+extern int    mheader; /* maximum header lines */
+extern int    mfooter; /* maximum footer lines */
+extern int max_plines;     /*   profile align display length */
+extern int min_plines1;     /*   profile align display length */
+extern int min_plines2;     /*   profile align display length */
+extern int loffset,boffset,toffset;
+extern int roffset;
+extern int poffset;
+
+extern Boolean aln_mode;
+extern Boolean fixed_prf_scroll;
+extern Boolean window_displayed;
+
+extern PrompT   message;           /* used in temporary message window */
+
+extern spanel  seq_panel;        /* data for multiple alignment area */
+extern spanel  prf_panel[];       /* data for profile alignment areas */
+extern spanel  active_panel;       /* 'in-use' panel -scrolling,clicking etc. */
+extern FonT datafont;
+extern WindoW mainw;
+extern GrouP  seq_display,prf1_display,prf2_display;
+
+extern int ncolors;
+extern int    inverted;
+
+extern Boolean  dnaflag;
+extern sint     nseqs;
+extern sint    profile1_nseqs;
+extern sint     output_order;
+extern sint     *output_index;
+extern sint     *seqlen_array;
+extern char     **seq_array;
+extern char     **names, **titles;
+extern char     *amino_acid_codes;
+extern sint     gap_pos1, gap_pos2;
+extern char *gap_penalty_mask1,*gap_penalty_mask2;
+extern char *sec_struct_mask1,*sec_struct_mask2;
+extern sint struct_penalties1,struct_penalties2;
+extern sint output_struct_penalties;
+extern Boolean use_ss1, use_ss2;
+
+extern char *explicit_par_file;
+extern char *par_file;
+extern char def_protpar_file[];
+extern char def_dnapar_file[];
+extern sint     ncutseqs;
+extern Boolean residue_exceptions;
+extern Boolean segment_exceptions;
+extern color color_lut[];
+extern char *res_cat1[];
+extern char *res_cat2[];
+
+static range selected_seqs;           /* sequences selected by clicking on names */
+static range selected_res;           /* residues selected by clicking on seqs */
+
+ 
+static int fromvscroll,fromhscroll; /* set by scrolling functions,
+                            used by DrawPanel, draw_names, draw_seqs */
+
+
+void resize_multi_window(void)
+{
+	RecT r;
+
+	ObjectRect(mainw,&r);
+	fit_seq_display(r,FALSE);
+}
+
+void resize_prf_window(int numseqs1,int numseqs2)
+{
+	RecT r;
+
+	SelectFont(datafont);
+	stdCharWidth=CharWidth('A');
+        stdLineHeight=LineHeight();
+
+	if(numseqs1>max_plines)
+		numseqs1=max_plines;
+	else if(numseqs1<min_plines1)
+		numseqs1=min_plines1;
+	if(numseqs2>max_plines)
+		numseqs2=max_plines;
+	else if(numseqs2<min_plines2)
+		numseqs2=min_plines2;
+	ObjectRect(mainw,&r);
+
+	fit_prf_displays(r,numseqs1,numseqs2,FALSE);
+}
+
+static void fit_seq_display(RecT wr,Boolean mv_message)
+{
+	int width,height,moffset;
+	RecT nr,sr,mr;
+	panel_data data;
+
+	ObjectRect(seq_panel.names,&nr);
+	ObjectRect(message,&mr);
+	moffset=mr.top-nr.bottom;
+	width=nr.right-nr.left;
+	height=wr.bottom-wr.top-boffset-toffset;
+	nr.top=toffset;
+	nr.left=loffset;
+	nr.bottom=nr.top+height;
+	nr.right=nr.left+width;
+	SetPosition(seq_panel.names,&nr);
+
+	GetPanelExtra(seq_panel.names,&data);
+	data.vlines=(height-SCOREHEIGHT)/data.lineheight - MARGIN;
+	data.vseqs=data.vlines-data.nhead-data.nfoot;
+	SetPanelExtra(seq_panel.names,&data);
+
+
+	sr.top=nr.top;
+	sr.left=nr.right;
+	sr.bottom=sr.top+height;
+	sr.right=wr.right-wr.left-roffset;
+	width=sr.right-sr.left;
+	SetPosition(seq_panel.seqs,&sr);
+
+	GetPanelExtra(seq_panel.seqs,&data);
+	data.vcols=width/data.charwidth - MARGIN*2;
+	data.vlines=(height-SCOREHEIGHT)/data.lineheight - MARGIN;
+	data.vseqs=data.vlines-data.nhead-data.nfoot;
+	SetPanelExtra(seq_panel.seqs,&data);
+
+	if(mv_message) {
+		height=mr.bottom-mr.top;
+		mr.top=nr.bottom+moffset;
+		mr.bottom=mr.top+height;
+		SetPosition(message,&mr);
+	}
+
+
+	position_scrollbars(seq_panel);
+	correct_name_bars(TRUE);
+	correct_seq_bars(TRUE);
+
+}
+
+static void fit_prf_displays(RecT wr,int numseqs1,int numseqs2,Boolean mv_message)
+{
+	int width,height,moffset;
+	RecT mr,nr,sr;
+	panel_data data;
+
+	ObjectRect(prf_panel[1].names,&nr);
+	ObjectRect(message,&mr);
+	moffset=mr.top-nr.bottom;
+
+	ObjectRect(prf_panel[0].names,&nr);
+	width=nr.right-nr.left;
+
+	nr.top=toffset;
+	nr.left=loffset;
+	height=(wr.bottom-wr.top-boffset-toffset-poffset)*numseqs1/(numseqs1+numseqs2);
+	nr.bottom=nr.top+height;
+	nr.right=nr.left+width;
+	SetPosition(prf_panel[0].names,&nr);
+	GetPanelExtra(prf_panel[0].names,&data);
+	data.vlines=(height-SCOREHEIGHT)/data.lineheight - MARGIN;
+	data.vseqs=data.vlines-data.nhead-data.nfoot;
+	SetPanelExtra(prf_panel[0].names,&data);
+	sr.top=nr.top;
+	sr.left=nr.right;
+	sr.bottom=sr.top+height;
+	sr.right=wr.right-wr.left-roffset;
+	width=sr.right-sr.left;
+	SetPosition(prf_panel[0].seqs,&sr);
+	GetPanelExtra(prf_panel[0].seqs,&data);
+	data.vcols=width/data.charwidth - MARGIN*2;
+	data.vlines=(height-SCOREHEIGHT)/data.lineheight - MARGIN;
+	data.vseqs=data.vlines-data.nhead-data.nfoot;
+	SetPanelExtra(prf_panel[0].seqs,&data);
+	position_scrollbars(prf_panel[0]);
+
+
+
+	nr.top=nr.bottom+poffset;
+	height=(wr.bottom-wr.top-boffset-toffset-poffset)*numseqs2/(numseqs1+numseqs2);
+	nr.bottom=nr.top+height;
+	SetPosition(prf_panel[1].names,&nr);
+	GetPanelExtra(prf_panel[1].names,&data);
+	data.vlines=(height-SCOREHEIGHT)/data.lineheight - MARGIN;
+	data.vseqs=data.vlines-data.nhead-data.nfoot;
+	SetPanelExtra(prf_panel[1].names,&data);
+	sr.top=nr.top;
+	sr.bottom=sr.top+height;
+	SetPosition(prf_panel[1].seqs,&sr);
+	GetPanelExtra(prf_panel[1].seqs,&data);
+	data.vcols=width/data.charwidth - MARGIN*2;
+	data.vlines=(height-SCOREHEIGHT)/data.lineheight - MARGIN;
+	data.vseqs=data.vlines-data.nhead-data.nfoot;
+	SetPanelExtra(prf_panel[1].seqs,&data);
+	position_scrollbars(prf_panel[1]);
+
+	if(mv_message) {
+		height=mr.bottom-mr.top;
+		mr.top=nr.bottom+moffset;
+		mr.bottom=mr.top+height;
+		SetPosition(message,&mr);
+	}
+
+	correct_name_bars(TRUE);
+	correct_seq_bars(TRUE);
+}
+
+void ResizeWindowProc(WindoW w)
+{
+	int numseqs1,numseqs2;
+	RecT wr;
+	panel_data data;
+
+	if(window_displayed==FALSE) return;
+
+	ObjectRect(w,&wr);
+	if (aln_mode==MULTIPLEM)
+	{
+/* if the window is too small, hide everything */
+		if(wr.bottom-wr.top < toffset+boffset)
+		{
+			Hide(seq_display);
+			Hide(message);
+			return;
+		}
+		fit_seq_display(wr,TRUE);
+		Show(seq_display);
+		Show(message);
+	}
+	else
+	{
+/* if the window is too small, hide everything */
+		if(wr.bottom-wr.top < toffset+boffset+2*poffset)
+		{
+			Hide(prf1_display);
+			Hide(prf2_display);
+			Hide(message);
+			return;
+		}
+		GetPanelExtra(prf_panel[0].names,&data);
+		numseqs1=data.nseqs;
+		if(numseqs1<min_plines1)numseqs1=min_plines1;
+		else if(numseqs1>max_plines)numseqs1=max_plines;
+		GetPanelExtra(prf_panel[1].names,&data);
+		numseqs2=data.nseqs;
+		if(numseqs2<min_plines2)numseqs2=min_plines2;
+		else if(numseqs2>max_plines)numseqs2=max_plines;
+
+		fit_prf_displays(wr,numseqs1,numseqs2,TRUE);
+		Show(prf1_display);
+		Show(prf2_display);
+		Show(message);
+	}
+
+}
+
+void position_scrollbars(spanel p)
+{
+	int height;
+	RecT hr,vr,nr,sr;
+	panel_data data;
+
+	ObjectRect(p.names,&nr);
+	GetPanelExtra(p.names,&data);
+	ObjectRect(data.hscrollbar,&hr);
+	height=hr.bottom-hr.top;
+	LoadRect(&hr,nr.left,nr.bottom,nr.right,nr.bottom+height);
+	SetPosition(data.hscrollbar,&hr);
+#ifdef WIN_MAC
+	AdjustPrnt(data.hscrollbar,&hr,FALSE);
+#endif
+	ObjectRect(p.seqs,&sr);
+	GetPanelExtra(p.seqs,&data);
+	ObjectRect(data.hscrollbar,&hr);
+	height=hr.bottom-hr.top;
+	LoadRect(&hr,sr.left,sr.bottom,sr.right,sr.bottom+height);
+	SetPosition(data.hscrollbar,&hr);
+#ifdef WIN_MAC
+	AdjustPrnt(data.hscrollbar,&hr,FALSE);
+#endif
+	ObjectRect(data.vscrollbar,&vr);
+	LoadRect(&vr,vr.left,sr.top,vr.right,sr.bottom);
+	SetPosition(data.vscrollbar,&vr);
+#ifdef WIN_MAC
+	AdjustPrnt(data.vscrollbar,&vr,FALSE);
+#endif
+}
+
+
+
+
+
+void load_aln_data(spanel p,int fs,int ls,Boolean reset)
+{
+	int i,j,slength=0;
+	int nhead;
+	sint val;
+	panel_data name_data,seq_data;
+
+	WatchCursor();
+
+	GetPanelExtra(p.names,&name_data);
+	GetPanelExtra(p.seqs,&seq_data);
+	name_data=free_panel_data(name_data);
+	seq_data=free_panel_data(seq_data);
+	SetPanelExtra(p.names,&name_data);
+	SetPanelExtra(p.seqs,&seq_data);
+
+	name_data.nseqs=ls-fs+1;
+	seq_data.nseqs=name_data.nseqs;
+	name_data.firstseq=fs;
+	seq_data.firstseq=fs;
+
+/* find the maximum length of sequence */
+	for(i=fs;i<=ls;i++)
+           	if (slength < seqlen_array[i+1]) slength = seqlen_array[i+1];
+	name_data.ncols=max_names;
+	seq_data.ncols=slength;
+
+	if (name_data.nseqs>0)
+	{
+		name_data=alloc_name_data(name_data);
+		seq_data=alloc_seq_data(seq_data);
+
+	
+		for(i=fs;i<=ls;i++)
+        	{
+                	strncpy(name_data.lines[i-fs],names[i+1],MAXNAMES);
+                	name_data.lines[i-fs][MAXNAMES]='\0';
+                	for(j=0;j<seqlen_array[i+1];j++)
+                	{
+                        	val = seq_array[i+1][j+1];
+                        	if((val == -3) || (val == 253))
+                                	break;
+                        	else if((val == gap_pos1) || (val == gap_pos2))
+                                	seq_data.lines[i-fs][j] = '-';
+                        	else {
+                                	seq_data.lines[i-fs][j] = amino_acid_codes[val];
+                        	}
+                	}
+                	for(j=seqlen_array[i+1];j<slength;j++)
+                        	seq_data.lines[i-fs][j] = ' ';
+			seq_data.lines[i-fs][j]='\0';
+
+			name_data.selected[i-fs]=FALSE;
+
+        	}
+
+
+		make_consensus(seq_data,name_data.header[0],seq_data.header[0]);
+		nhead=make_struct_data(seq_data.prf_no,slength,name_data.header[1],seq_data.header[1]);
+		if (nhead==0)
+			nhead=make_gp_data(seq_data.prf_no,slength,name_data.header[1],seq_data.header[1]);
+		seq_data.nhead=name_data.nhead=nhead+1;
+
+		seq_data.nfoot=name_data.nfoot=1;
+		seq_data.consensus=NULL;
+		make_ruler(slength,name_data.footer[0],seq_data.footer[0]);
+		make_colscores(seq_data);
+	}
+	else
+	{
+		seq_data.ncols=name_data.ncols=0;
+	}
+
+	if(reset==TRUE)
+	{
+		name_data.firstvline=0;
+		name_data.firstvcol=0;
+		seq_data.firstvline=0;
+		seq_data.firstvcol=0;
+	}
+        name_data.vseqs=name_data.vlines-name_data.nhead-name_data.nfoot;
+        seq_data.vseqs=seq_data.vlines-seq_data.nhead-seq_data.nfoot;
+
+	if(seq_data.nseqs>0)
+	{
+/* try to find the user's color parameter file */
+		if (explicit_par_file == NULL)
+		{
+			if (par_file != NULL)
+				ckfree(par_file);
+			if(dnaflag)
+				par_file=find_file(def_dnapar_file);
+			else
+				par_file=find_file(def_protpar_file);
+		}
+        	init_color_parameters(par_file);
+        	make_colormask(seq_data);
+	}
+
+	SetPanelExtra(p.names,&name_data);
+	SetPanelExtra(p.seqs,&seq_data);
+
+	ArrowCursor();
+}
+
+void load_aln(spanel p,int fs,int ls,Boolean reset)
+{
+
+	load_aln_data(p,fs,ls,reset);
+
+	DrawPanel(p.names);
+	DrawPanel(p.seqs);
+	correct_name_bars(reset);
+	correct_seq_bars(reset);
+
+}
+
+static panel_data alloc_name_data(panel_data data)
+{
+	int i;
+
+	data.lines=(char **)ckalloc((data.nseqs+1)*sizeof(char *));
+	data.colormask=NULL;
+	data.selected=(int *)ckalloc((data.nseqs+1)*sizeof(int));
+
+	for(i=0;i<data.nseqs;i++)
+        {
+		data.lines[i]=(char *)ckalloc((MAXNAMES+1)*sizeof(char));
+		strncpy(data.lines[i],names[i+1],MAXNAMES);
+		data.lines[i][MAXNAMES]='\0';
+	}
+
+	data.header=(char **)ckalloc((mheader+1)*sizeof(char *));
+	for(i=0;i<mheader;i++)
+		data.header[i]=(char *)ckalloc((MAXNAMES+1)*sizeof(char));
+	data.footer=(char **)ckalloc((mfooter+1)*sizeof(char *));
+	for(i=0;i<mfooter;i++)
+		data.footer[i]=(char *)ckalloc((MAXNAMES+1)*sizeof(char));
+	return(data);
+}
+
+static panel_data alloc_seq_data(panel_data data)
+{
+	int i;
+
+	data.lines=(char **)ckalloc((data.nseqs+1)*sizeof(char *));
+	data.colormask=(char **)ckalloc((data.nseqs+1)*sizeof(char *));
+	data.firstsel=data.lastsel=-1;
+
+	for(i=0;i<data.nseqs;i++)
+        {
+		data.lines[i]=(char *)ckalloc((data.ncols+1)*sizeof(char));
+		data.colormask[i]=(char *)ckalloc((data.ncols+1)*sizeof(char));
+	}
+
+	data.selected=(int *)ckalloc((data.ncols+1)*sizeof(int));
+	for(i=0;i<data.ncols;i++)
+		data.selected[i]=FALSE;
+
+	data.header=(char **)ckalloc((mheader+1)*sizeof(char *));
+	for(i=0;i<mheader;i++)
+		data.header[i]=(char *)ckalloc((data.ncols+1)*sizeof(char));
+
+	data.colscore=(sint *)ckalloc((data.ncols+1)*sizeof(sint));
+	data.residue_exception=(Boolean **)ckalloc((data.nseqs+1)*sizeof(Boolean *));
+	for(i=0;i<data.nseqs;i++)
+		data.residue_exception[i]=(Boolean *)ckalloc((data.ncols+1)*sizeof(Boolean));
+	data.segment_exception=(short **)ckalloc((data.nseqs+1)*sizeof(short *));
+	for(i=0;i<data.nseqs;i++)
+		data.segment_exception[i]=(short *)ckalloc((data.ncols+1)*sizeof(short));
+
+	data.footer=(char **)ckalloc((mfooter+1)*sizeof(char *));
+	for(i=0;i<mfooter;i++)
+		data.footer[i]=(char *)ckalloc((data.ncols+1)*sizeof(char));
+	return(data);
+}
+
+void correct_name_bars(Boolean reset)
+{
+	panel_data data,data1;
+
+	if(aln_mode==PROFILEM)
+	{
+		GetPanelExtra(prf_panel[0].names,&data);
+		GetPanelExtra(prf_panel[1].names,&data1);
+		if(reset==TRUE)
+		{
+			data.firstvcol=0;
+			data1.firstvcol=0;
+		}
+		correct_scrollbar(data.hscrollbar,data.vcols,data.ncols,data.firstvcol,reset);
+		correct_scrollbar(data1.hscrollbar,data1.vcols,data1.ncols,data.firstvcol,reset);
+		if(reset==TRUE)
+		{
+			data.firstvline=0;
+			data1.firstvline=0;
+		}
+		correct_scrollbar(data.vscrollbar,data.vseqs,data.nseqs,data.firstvline,reset);
+		correct_scrollbar(data1.vscrollbar,data1.vseqs,data1.nseqs,data1.firstvline,reset);
+		SetPanelExtra(prf_panel[0].names,&data);
+		SetPanelExtra(prf_panel[1].names,&data1);
+	}
+	else
+	{
+		GetPanelExtra(seq_panel.names,&data);
+		if(reset==TRUE)
+		{
+			data.firstvcol=0;
+			data.firstvline=0;
+		}
+		correct_scrollbar(data.vscrollbar,data.vseqs,data.nseqs,data.firstvline,reset);
+		correct_scrollbar(data.hscrollbar,data.vcols,data.ncols,data.firstvcol,reset);
+
+		SetPanelExtra(seq_panel.names,&data);
+	}
+	
+}
+
+void correct_seq_bars(Boolean reset)
+{
+	int maxcols,m1,m2;
+	panel_data data,data1;
+
+	if(aln_mode==PROFILEM)
+	{
+		GetPanelExtra(prf_panel[0].seqs,&data);
+		GetPanelExtra(prf_panel[1].seqs,&data1);
+		if(fixed_prf_scroll==TRUE)
+		{
+			Hide(data.hscrollbar);
+			m1=MAX(data.firstvcol,data1.firstvcol);
+			m2=MAX(data.ncols-data.firstvcol,data1.ncols-data1.firstvcol);
+			maxcols=m1+m2;
+			if(reset==TRUE)
+			{
+				data.firstvcol=0;
+				data1.firstvcol=0;
+			}
+			data.lockoffset= -MAX(data1.firstvcol-data.firstvcol,0);
+			data1.lockoffset= -MAX(data.firstvcol-data1.firstvcol,0);
+			correct_scrollbar(data1.hscrollbar,data1.vcols,maxcols,m1,TRUE);
+		}
+		else
+		{
+			Show(data.hscrollbar);
+			if(reset==TRUE)
+			{
+				data.firstvcol=0;
+				data1.firstvcol=0;
+			}
+			data.lockoffset=0;
+			data1.lockoffset=0;
+			correct_scrollbar(data.hscrollbar,data.vcols,data.ncols,data.firstvcol,reset);
+			correct_scrollbar(data1.hscrollbar,data1.vcols,data1.ncols,data.firstvcol,reset);
+		}
+		if(reset==TRUE)
+		{
+			data.firstvline=0;
+			data1.firstvline=0;
+		}
+		correct_scrollbar(data.vscrollbar,data.vseqs,data.nseqs,data.firstvline,reset);
+		correct_scrollbar(data1.vscrollbar,data1.vseqs,data1.nseqs,data.firstvline,reset);
+		SetPanelExtra(prf_panel[0].seqs,&data);
+		SetPanelExtra(prf_panel[1].seqs,&data1);
+	}
+	else
+	{
+		GetPanelExtra(seq_panel.seqs,&data);
+		if(reset==TRUE)
+		{
+			data.firstvcol=0;
+			data.firstvline=0;
+		}
+		correct_scrollbar(data.vscrollbar,data.vseqs,data.nseqs,data.firstvline,reset);
+		correct_scrollbar(data.hscrollbar,data.vcols,data.ncols,data.firstvcol,reset);
+
+		SetPanelExtra(seq_panel.seqs,&data);
+	}
+	
+}
+
+static void correct_scrollbar(BaR b,int visible,int total,int value,Boolean reset)
+{
+	int max;
+
+	if (b!=NULL)
+	{
+		if (visible > 0 && total > visible)
+			max=total-visible;
+		else
+			max=0;
+       		if(reset==TRUE) CorrectBarValue(b,0);
+       		CorrectBarPage(b,visible,visible);
+       		CorrectBarValue(b,value);
+       		CorrectBarMax(b,max);
+	}
+}
+
+
+void color_seqs(void)
+{
+	panel_data data;
+
+	GetPanelExtra(seq_panel.seqs,&data);
+	if (data.nseqs == 0) return;
+
+	info("Coloring sequences...");
+	make_colormask(data);
+	DrawPanel(seq_panel.seqs);
+	info("Done.");
+}
+
+void color_prf1(void)
+{
+	panel_data data;
+
+	GetPanelExtra(prf_panel[0].seqs,&data);
+	if (data.nseqs == 0) return;
+
+	make_colormask(data);
+	info("Coloring profile 1...");
+	DrawPanel(prf_panel[0].seqs);
+	info("Done.");
+}
+
+void color_prf2(void)
+{
+	panel_data data;
+
+	GetPanelExtra(prf_panel[1].seqs,&data);
+	if (data.nseqs == 0) return;
+
+	make_colormask(data);
+	info("Coloring profile 2...");
+	DrawPanel(prf_panel[1].seqs);
+	info("Done.");
+}
+
+void remove_gap_pos(int fseq, int lseq,int prf_no)
+{
+	int i,j,k,ngaps;
+
+
+	if (fseq>=lseq) return;
+
+	for (i=1;i<=seqlen_array[fseq];)
+	{
+		ngaps=0;
+		for (j=fseq;j<=lseq;j++)
+			if(seq_array[j][i]==gap_pos1 || seq_array[j][i]==gap_pos2) ngaps++;
+		if (ngaps==lseq-fseq+1)
+		{
+			for (j=fseq;j<=lseq;j++)
+			{
+				for(k=i+1;k<=seqlen_array[j]+1;k++)
+					seq_array[j][k-1]=seq_array[j][k];
+				seqlen_array[j]--;
+			}
+			if(prf_no==1 && sec_struct_mask1 != NULL)
+				for(k=i;k<=seqlen_array[fseq];k++)
+					sec_struct_mask1[k-1]=sec_struct_mask1[k];
+			if(prf_no==1 && gap_penalty_mask1 != NULL)
+				for(k=i;k<=seqlen_array[fseq];k++)
+					gap_penalty_mask1[k-1]=gap_penalty_mask1[k];
+			if(prf_no==2 && sec_struct_mask2 != NULL)
+				for(k=i;k<=seqlen_array[fseq];k++)
+					sec_struct_mask2[k-1]=sec_struct_mask2[k];
+			if(prf_no==2 && gap_penalty_mask2 != NULL)
+				for(k=i;k<=seqlen_array[fseq];k++)
+					gap_penalty_mask2[k-1]=gap_penalty_mask2[k];
+			if(seqlen_array[fseq]<=0) break;
+		}
+		else i++;
+	}
+}
+
+/* width and height passed here are in pixels */
+
+static PaneL make_panel(int type,GrouP g,int width,int height,int firstseq,int nseqs)
+{
+	int i,l,length=0;
+	PaneL p;
+	panel_data data;
+
+	data.type=type;
+	SelectFont(datafont);
+	data.lineheight=LineHeight();
+	data.charwidth=CharWidth('A');
+	if(type==NAMES)
+	{
+/* find the maximum length of sequence name */
+        	for (i=firstseq;i<=firstseq+nseqs-1;i++)
+		{
+           		l = strlen(names[i]);
+           		if (length < l) length = l;
+		}
+		data.vcols=width/data.charwidth - MARGIN*2 - DNUMBER;
+	}
+	else
+	{
+        	for (i=firstseq;i<=firstseq+nseqs-1;i++)
+           		if (length < seqlen_array[i]) length = seqlen_array[i];
+		data.vcols=width/data.charwidth - MARGIN*2;
+	}
+ 
+	data.lines=NULL;
+	data.nhead=0;
+	data.nfoot=0;
+	data.header=NULL;
+	data.footer=NULL;
+	data.consensus=NULL;
+	data.colormask=NULL;
+	data.vlines=(height-SCOREHEIGHT)/data.lineheight - MARGIN;
+	data.vseqs=data.vlines-data.nhead-data.nfoot;
+	data.nseqs=nseqs;
+	data.ncols=length;
+	data.firstseq=firstseq-1;
+	data.firstvline=0;
+	data.firstvcol=0;
+	data.lockoffset=0;
+	data.ascent=Ascent();
+	data.descent=Descent();
+	data.selected=NULL;
+	data.firstsel=-1;
+	data.lastsel=-1;
+	data.colscore=NULL;
+	data.seqweight=NULL;
+	data.subgroup=NULL;
+	data.residue_exception=NULL;
+	data.segment_exception=NULL;
+	data.vscrollbar=NULL;
+	data.hscrollbar=NULL;
+
+	p=AutonomousPanel(g, width, height, DrawPanel, NULL,NULL,sizeof(panel_data), NULL, NULL);
+
+	SetPanelExtra(p, &data);
+	return p;
+
+}
+ 
+void DrawPanel(PaneL p)
+{
+	RecT r;
+        panel_data data;
+	int pixelwidth,pixelheight;
+
+	UseWindow(mainw);
+	Select(p);
+
+	if (fromvscroll==0 && fromhscroll==0)
+	{
+		ObjectRect(p,&r);
+        	pixelwidth=r.right-r.left;
+        	pixelheight=r.bottom-r.top;
+
+		SelectFont(datafont);
+		GetPanelExtra(p, &data);
+		data.lineheight=LineHeight();
+		data.charwidth=CharWidth('A');
+		if (data.type==NAMES)
+			data.vcols=pixelwidth/data.charwidth-MARGIN*2-DNUMBER;
+		else
+			data.vcols=pixelwidth/data.charwidth-MARGIN*2;
+		data.vlines=(pixelheight-SCOREHEIGHT)/data.lineheight - MARGIN;
+		data.vseqs=data.vlines-data.nhead-data.nfoot;
+		if(data.vseqs<0)data.vseqs=0;
+		if(data.vcols<0)data.vcols=0;
+		SetPanelExtra(p, &data);
+/* draw the outside frame */
+		ObjectRect (p, &r);
+		Black();
+		FrameRect(&r);
+		InsetRect(&r,1,1);
+		black_on_white();
+		EraseRect(&r);
+		if(data.nseqs == 0) return;
+	}
+
+/* draw the structure and gap penalty data */
+/* draw the footer */
+	if (fromvscroll==0)
+	{
+		draw_header(p);
+		draw_footer(p);
+		draw_colscores(p);
+	}
+
+/* draw the data lines */
+	if (data.type==NAMES)
+		draw_names(p);
+	else
+		draw_seqs(p);
+
+
+}
+
+void hscrollnames(BaR bar, int newval, int oldval)
+{
+	PaneL		p;
+        panel_data        data;
+ 
+	p = active_panel.names;
+        GetPanelExtra(p, &data);
+        data.firstvcol = newval;
+        SetPanelExtra(p, &data);
+        Select(p);
+ 
+	if (data.vseqs<=0) return;
+	draw_names(p);
+}
+
+void vscrollnames(BaR bar, int newval, int oldval)
+{
+	PaneL		p;
+        panel_data        data;
+ 
+	p = active_panel.names;
+        GetPanelExtra(p, &data);
+        data.firstvline = newval;
+        SetPanelExtra(p, &data);
+        Select(p);
+ 
+	if (data.vseqs<=0) return;
+	draw_names(p);
+}
+
+void vscrollseqs(BaR bar, int newval, int oldval)
+{
+	PaneL		p;
+        panel_data        data;
+        RecT            block,rect;
+	int 		l;
+ 
+	p = active_panel.seqs;
+        GetPanelExtra(p, &data);
+	l=data.firstvline;
+        data.firstvline = newval;
+        SetPanelExtra(p, &data);
+        Select(p);
+ 
+	if (data.vseqs<=0) return;
+
+	if (data.vseqs<3 || data.nseqs-l < data.vseqs)
+	{
+		fromvscroll=0;
+		draw_seqs(p);
+		return;
+	}
+
+        if (newval == oldval + 1) {
+		fromvscroll=1;
+                ObjectRect(p, &rect);
+		InsetRect(&rect,1,1);
+                block.top = rect.top+(data.nhead)*data.lineheight+data.descent+1;
+                block.bottom = block.top+(data.vseqs)*data.lineheight;
+		block.left=rect.left;
+		block.right=rect.right;
+                ScrollRect(&block, 0, -data.lineheight);
+        } else if (newval == oldval - 1) {
+		fromvscroll=-1;
+                ObjectRect(p, &rect);
+		InsetRect(&rect,1,1);
+                block.top = rect.top+(data.nhead)*data.lineheight+data.descent+1;
+                block.bottom = block.top+(data.vseqs)*data.lineheight;
+		block.left=rect.left;
+		block.right=rect.right;
+                ScrollRect(&block, 0, data.lineheight);
+        } else {
+		fromvscroll=0;
+        }
+	draw_seqs(p);
+}
+
+void hscrollseqs(BaR bar, int newval, int oldval)
+{
+	PaneL p;
+        panel_data        data;
+        RecT            rect;
+ 
+ 
+	p = active_panel.seqs;
+        GetPanelExtra(p, &data);
+        data.firstvcol = newval+data.lockoffset;
+        SetPanelExtra(p, &data);
+        Select(p);
+ 
+	if (data.vcols<=0) return;
+
+	if (data.vcols<3)
+	{
+		fromhscroll=0;
+		draw_header(p);
+		draw_seqs(p);
+		draw_footer(p);
+		draw_colscores(p);
+		return;
+	}
+        if (newval == oldval + 1) {
+		fromhscroll=1;
+                ObjectRect(p, &rect);
+                InsetRect(&rect,1,1);
+                rect.left+=data.charwidth;
+                ScrollRect(&rect, -data.charwidth, 0);
+        } else if (newval == oldval - 1) {
+		fromhscroll=-1;
+                ObjectRect(p, &rect);
+                InsetRect(&rect,1,1);
+                rect.right=rect.left+(data.vcols+1)*data.charwidth;
+                ScrollRect(&rect, data.charwidth, 0);
+        } else {
+		fromhscroll=0;
+        }
+	draw_header(p);
+	draw_seqs(p);
+	draw_footer(p);
+	draw_colscores(p);
+}
+
+void draw_names(PaneL p)
+{
+	int i,f,l;
+	panel_data data;
+
+	UseWindow(mainw);
+	Select(p);
+	GetPanelExtra(p,&data);
+	if(data.lines==NULL) return;
+	SelectFont(datafont);
+	
+	if (fromvscroll==0)
+	{
+		f=data.firstvline;
+		l=data.firstvline+data.vseqs-1;
+	}
+	else if (fromvscroll==-1)
+		f=l=data.firstvline;
+	else
+		f=l=data.firstvline+data.vseqs-1;
+	
+	if(l>=data.nseqs) l=data.nseqs-1;
+	for(i=f;i<=l;i++)
+		if (data.selected[i]==TRUE)
+			draw_nameline(p,i,i,HIGHLIGHT);
+		else
+			draw_nameline(p,i,i,NORMAL);
+}
+
+void draw_seqs(PaneL p)
+{
+	int i,f,l,s,x,y,format;
+	int fs,ls;
+	panel_data data;
+	PoinT pt;
+	RecT r,block;
+
+	UseWindow(mainw);
+	Select(p);
+	GetPanelExtra(p,&data);
+	if(data.lines==NULL) return;
+	SelectFont(datafont);
+	black_on_white();
+	if (fromhscroll==-1)
+	{
+		f=data.firstvcol;
+		if ((f>=data.firstsel) && (f<=data.lastsel))
+			format=HIGHLIGHT;
+		else format=NORMAL; 
+		draw_seqcol(p,f,format);
+	}
+	else if (fromhscroll==1)
+	{
+		f=data.firstvcol+data.vcols-1;
+		if ((f>=data.firstsel) && (f<=data.lastsel))
+			format=HIGHLIGHT;
+		else format=NORMAL; 
+		draw_seqcol(p,f,format);
+	}
+	else
+	{
+ 		if (fromvscroll==-1)
+		{
+			f=l=data.firstvline;
+		}
+		else if (fromvscroll==1)
+		{
+			f=l=data.firstvline+data.vseqs-1;
+		}
+		else
+		{
+			f=data.firstvline;
+			l=data.firstvline+data.vseqs-1;
+		}
+	
+		if(l>=data.nseqs) l=data.nseqs-1;
+        	s=f-data.firstvline;
+        	ObjectRect (p, &r);
+        	InsetRect(&r,1,1);
+		data_colors();
+        	block.top=r.top+((s+data.nhead)*data.lineheight)+data.descent+1;
+        	block.bottom=block.top+(l-f+1)*data.lineheight;
+        	block.left=r.left;
+        	block.right=r.right;
+        	EraseRect(&block);
+        	if(data.nseqs == 0) return;
+
+		if(data.firstsel != -1)
+		{
+			if ((data.firstsel>=data.firstvcol && data.firstsel<data.firstvcol+data.vcols)||
+	   		(data.lastsel>=data.firstvcol && data.lastsel<data.firstvcol+data.vcols))
+			{
+				fs=data.firstsel-data.firstvcol;
+				if (fs<0) fs=0;
+				if (fs>=data.vcols) fs=data.vcols-1;
+				ls=data.lastsel-data.firstvcol;
+				if (ls<0) ls=0;
+				if (ls>=data.vcols) ls=data.vcols-1;
+        			block.left=r.left+(fs+1)*data.charwidth;
+        			block.right=r.left+(ls+2)*data.charwidth;
+				text_colors();
+        			EraseRect(&block);
+			}
+		}
+        	x=r.left+data.charwidth;
+	 
+        	for(i=f;i<=l;i++)
+        	{
+               		y=block.top+(i-f+1)*data.lineheight-data.descent-1;
+			LoadPt(&pt,x,y);
+               		draw_seqline(data,i,pt,data.firstvcol,data.firstvcol+data.vcols-1,NORMAL);
+        	}
+	}
+
+	black_on_white();
+	fromvscroll=fromhscroll=0;
+}
+
+static void NameClick(PaneL panel, PoinT pt)
+{
+	int i;
+	panel_data data;
+	RecT r;
+
+	GetPanelExtra(panel,&data);
+	if(data.prf_no==1)
+	{
+/* revert selected area in profile 2 to normal */
+		GetPanelExtra(prf_panel[1].names,&data);
+		if(data.nseqs==0)
+			draw_seq_pointer(prf_panel[1].names,0,NORMAL);
+		for(i=0;i<data.nseqs;i++)
+			if (data.selected[i]==TRUE)
+				draw_nameline(prf_panel[1].names,i,i,NORMAL);
+		SetPanelExtra(prf_panel[1].names,&data);
+	}
+	else if(data.prf_no==2)
+	{
+/* revert selected area in profile 1 to normal */
+		GetPanelExtra(prf_panel[0].names,&data);
+		if(data.nseqs==0)
+			draw_seq_pointer(prf_panel[0].names,0,NORMAL);
+		for(i=0;i<data.nseqs;i++)
+			if (data.selected[i]==TRUE)
+				draw_nameline(prf_panel[0].names,i,i,NORMAL);
+		SetPanelExtra(prf_panel[0].names,&data);
+	}
+	GetPanelExtra(panel,&data);
+	Select(panel);
+	ObjectRect(panel,&r);
+	if (!shftKey)
+	{
+/* revert existing selected area to normal */
+		for(i=0;i<data.nseqs;i++)
+			if (data.selected[i]==TRUE)
+				draw_nameline(panel,i,i,NORMAL);
+	}
+
+	selected_seqs.first = (pt.y - r.top-data.lineheight/2)/data.lineheight + data.firstvline-data.nhead;
+	if (selected_seqs.first <0) selected_seqs.first=0;
+	if (selected_seqs.first >=data.nseqs) selected_seqs.first=data.nseqs-1;
+	if (selected_seqs.first==-1 && ncutseqs > 0)
+	{
+		selected_seqs.last=selected_seqs.first=0;
+		draw_seq_pointer(panel,0,HIGHLIGHT);
+	}
+	else
+	{
+		selected_seqs.last=selected_seqs.first;
+		draw_nameline(panel,selected_seqs.first,selected_seqs.last,HIGHLIGHT);
+	}
+	black_on_white();
+
+}
+
+static void NameDrag(PaneL panel, PoinT pt)
+{
+	panel_data data;
+	RecT r;
+	int s;
+
+	GetPanelExtra(panel,&data);
+	Select(panel);
+	ObjectRect(panel,&r);
+	s = (pt.y - r.top-data.lineheight/2)/data.lineheight + data.firstvline-data.nhead;
+	if (s<0) s=0;
+	if (s>=data.nseqs) s=data.nseqs-1;
+	if (s==selected_seqs.first)
+	{
+		if (s!=selected_seqs.last)
+		{
+			draw_nameline(panel,selected_seqs.first,selected_seqs.last,NORMAL);
+			draw_nameline(panel,selected_seqs.first,s,HIGHLIGHT);
+		}
+	}
+	else if (s>selected_seqs.first)
+	{
+		if (s>selected_seqs.last)
+			draw_nameline(panel,selected_seqs.last+1,s,HIGHLIGHT);
+		else if (s<selected_seqs.last)
+			draw_nameline(panel,s+1,selected_seqs.last,NORMAL);
+	}
+	else
+	{
+		if (s<selected_seqs.last)
+			draw_nameline(panel,s,selected_seqs.last-1,HIGHLIGHT);
+		else if (s>selected_seqs.last)
+			draw_nameline(panel,selected_seqs.last,s-1,NORMAL);
+	}
+	selected_seqs.last=s;
+
+	black_on_white();
+}
+
+static void NameRelease(PaneL panel, PoinT pt)
+{
+	int t;
+	panel_data data;
+
+	if (selected_seqs.first > selected_seqs.last)
+	{
+		t=selected_seqs.first;
+		selected_seqs.first=selected_seqs.last;
+		selected_seqs.last=t;
+	}	
+	active_panel.names = panel;
+	GetPanelExtra(panel,&data);
+	active_panel.seqs = data.index;
+
+}
+
+void draw_seq_pointer(PaneL panel,int seq,int format)
+{
+	RecT r,block;
+	panel_data data;
+
+	Select(panel);
+	GetPanelExtra(panel,&data);
+
+	ObjectRect(panel,&r);
+	InsetRect(&r,1,1);
+	block.top=r.top+((seq+data.nhead)*data.lineheight)+data.descent+1;
+	block.bottom=block.top+data.lineheight;
+	block.left=r.left;
+	block.right=r.right;
+	if (format==HIGHLIGHT)
+		Black();
+	else
+		White();
+	PaintRect(&block);
+
+}
+
+static void SeqClick(PaneL panel, PoinT pt)
+{
+	int s;
+	int f,l;
+	panel_data data;
+	RecT r;
+
+	GetPanelExtra(panel,&data);
+	if(data.prf_no==1)
+	{
+/* revert selected area in profile 2 to normal */
+		GetPanelExtra(prf_panel[1].seqs,&data);
+		f=data.firstsel;
+		l=data.lastsel;
+		data.firstsel=-1;
+		data.lastsel=-1;
+		SetPanelExtra(prf_panel[1].seqs,&data);
+		if (f != -1) highlight_seqrange(prf_panel[1].seqs,f,l,NORMAL);
+	}
+	else if(data.prf_no==2)
+	{
+/* revert selected area in profile 1 to normal */
+		GetPanelExtra(prf_panel[0].seqs,&data);
+		f=data.firstsel;
+		l=data.lastsel;
+		data.firstsel=-1;
+		data.lastsel=-1;
+		SetPanelExtra(prf_panel[0].seqs,&data);
+		if (f != -1) highlight_seqrange(prf_panel[0].seqs,f,l,NORMAL);
+	}
+	GetPanelExtra(panel,&data);
+	Select(panel);
+	ObjectRect(panel,&r);
+
+	s = (pt.x - r.left-data.charwidth)/data.charwidth + data.firstvcol;
+	if (s <0) s=0;
+	if (s<data.firstvcol) s=data.firstvcol;
+	if (s >=data.ncols) s=data.ncols-1;
+	if (s >=data.firstvcol+data.vcols) s=data.firstvcol+data.vcols-1;
+
+	if (shftKey && data.firstsel != -1)
+	{
+		if (s>data.lastsel)
+		{
+			highlight_seqrange(panel,data.firstsel,s,HIGHLIGHT);
+			data.lastsel=s;
+		}
+		else if (s<data.firstsel)
+		{
+			highlight_seqrange(panel,s,data.lastsel,HIGHLIGHT);
+			data.firstsel=s;
+		}
+		else
+		{
+			highlight_seqrange(panel,s+1,data.lastsel,NORMAL);
+			highlight_seqrange(panel,data.firstsel,s,HIGHLIGHT);
+			data.lastsel=s;
+		}
+		selected_res.first=data.firstsel;
+		selected_res.last=data.lastsel;
+	}
+	else
+	{
+/* revert existing selected area to normal */
+		f=data.firstsel;
+		l=data.lastsel;
+		data.firstsel=-1;
+		data.lastsel=-1;
+		SetPanelExtra(panel,&data);
+		if (f != -1) highlight_seqrange(panel,f,l,NORMAL);
+		selected_res.first=selected_res.last=s;
+		highlight_seqrange(panel,selected_res.first,selected_res.last,HIGHLIGHT);
+		data.firstsel=selected_res.first;
+		data.lastsel=selected_res.last;
+	}
+
+	SetPanelExtra(panel,&data);
+	black_on_white();
+
+}
+
+static void SeqDrag(PaneL panel, PoinT pt)
+{
+	panel_data data;
+	RecT r;
+	int s;
+
+	GetPanelExtra(panel,&data);
+	Select(panel);
+	ObjectRect(panel,&r);
+	s = (pt.x - r.left-data.charwidth)/data.charwidth + data.firstvcol;
+	if (s<0) s=0;
+	if (s<data.firstvcol) s=data.firstvcol;
+	if (s>=data.ncols) s=data.ncols-1;
+	if (s >=data.firstvcol+data.vcols) s=data.firstvcol+data.vcols-1;
+	if (s==selected_res.first)
+	{
+		if (s!=selected_res.last)
+		{
+			highlight_seqrange(panel,selected_res.first,selected_res.last,NORMAL);
+			highlight_seqrange(panel,selected_res.first,s,HIGHLIGHT);
+		}
+	}
+	else if (s>selected_res.first)
+	{
+		if (s>selected_res.last)
+			highlight_seqrange(panel,selected_res.last+1,s,HIGHLIGHT);
+		else if (s<selected_res.last)
+			highlight_seqrange(panel,s+1,selected_res.last,NORMAL);
+	}
+	else
+	{
+		if (s<selected_res.last)
+			highlight_seqrange(panel,s,selected_res.last-1,HIGHLIGHT);
+		else if (s>selected_res.last)
+			highlight_seqrange(panel,selected_res.last,s-1,NORMAL);
+	}
+	selected_res.last=s;
+
+	black_on_white();
+}
+
+static void SeqRelease(PaneL panel, PoinT pt)
+{
+	int t;
+	panel_data data;
+
+        if (selected_res.first > selected_res.last)
+        {
+                t=selected_res.first;
+                selected_res.first=selected_res.last;
+                selected_res.last=t;
+        }
+
+	active_panel.seqs = panel;
+	GetPanelExtra(panel,&data);
+	active_panel.names = data.index;
+	data.firstsel=selected_res.first;
+	data.lastsel=selected_res.last;
+	SetPanelExtra(panel,&data);
+
+}
+
+void draw_header(PaneL p)
+{ 
+	RecT  block,r;
+	PoinT pt;
+	int i, j, x, y;
+	panel_data data;
+	char *line;
+
+	UseWindow(mainw);
+	Select(p);
+	SelectFont(datafont);
+	GetPanelExtra(p, &data);
+	if(data.nseqs == 0) return;
+	if(data.header == NULL) return;
+	if(data.vlines<data.nhead) return;
+	if(data.vcols<=0) return;
+
+	line=(char *)ckalloc((data.vcols+1) * sizeof(char));
+	ObjectRect (p, &r);
+	InsetRect(&r,1,1);
+	block.top=r.top+data.descent/2;
+	block.bottom=block.top+(data.nhead*data.lineheight);
+	block.left=r.left;
+	block.right=r.right;
+	text_colors();
+	EraseRect(&block);
+	if (data.type==NAMES)
+        	x=r.left+DNUMBER*data.charwidth;
+	else
+        	x=r.left+data.charwidth;
+        y=r.top+data.lineheight-data.descent/2;
+	for(i=0;i<data.nhead;i++)
+	{
+		for(j=data.firstvcol;j<data.firstvcol+data.vcols && j<data.ncols;j++)
+			if(j>=0)
+				line[j-data.firstvcol]=data.header[i][j];
+			else
+				line[j-data.firstvcol]=' ';
+		line[j-data.firstvcol]='\0';
+		LoadPt(&pt, x, y);
+		SetPen(pt);
+		PaintString(line);
+		y+=data.lineheight;
+	}
+	black_on_white();
+	ckfree(line);
+} 
+
+void draw_footer(PaneL p)
+{ 
+	RecT  block,r;
+	PoinT pt;
+	int i, j,x, y;
+	panel_data data;
+	char *line;
+
+	UseWindow(mainw);
+	Select(p);
+	SelectFont(datafont);
+	GetPanelExtra(p, &data);
+	if(data.nseqs == 0) return;
+	if(data.footer == NULL) return;
+	if(data.vlines<data.nfoot) return;
+	if(data.vcols<=0) return;
+
+	line=(char *)ckalloc((data.vcols+1) * sizeof(char));
+	ObjectRect (p, &r);
+	InsetRect(&r,1,1);
+	block.top=r.top+((data.vlines-data.nfoot)*data.lineheight)+data.descent+data.ascent/2;
+	block.bottom=block.top+data.nfoot*data.lineheight;
+	block.left=r.left;
+	block.right=r.right;
+	text_colors();
+	EraseRect(&block);
+	if(data.type==NAMES)
+        	x=block.left+DNUMBER*data.charwidth;
+	else
+        	x=block.left+data.charwidth;
+        y=block.top+data.lineheight-1;
+	for(i=0;i<data.nfoot;i++)
+	{
+		for(j=data.firstvcol;j<data.firstvcol+data.vcols && j<data.ncols;j++)
+			if(j>=0)
+				line[j-data.firstvcol]=data.footer[i][j];
+			else
+				line[j-data.firstvcol]=' ';
+		line[j-data.firstvcol]='\0';
+		LoadPt(&pt, x, y);
+		SetPen(pt);
+		PaintString(line);
+		y+=data.lineheight;
+	}
+	black_on_white();
+	ckfree(line);
+} 
+
+
+void draw_nameline(PaneL p,int fseq,int lseq,int format)
+{ 
+	RecT  block,r;
+	PoinT pt;
+	int n,i, j, t, f,l,x, y,ix;
+	panel_data data;
+	char *line;
+
+	Select(p);
+	SelectFont(datafont);
+	GetPanelExtra(p, &data);
+	if(data.nseqs == 0) return;
+	
+	n=1;
+	i=data.nseqs;
+	for(;;)
+	{
+		i/=10;
+		if(i==0) break;
+		n++;
+	}
+
+	line=(char *)ckalloc((data.vcols+1) * sizeof(char));
+	if (fseq > lseq)
+	{
+		t=fseq;
+		fseq=lseq;
+		lseq=t;
+	}	
+	if (format==HIGHLIGHT)
+		for(i=fseq;i<=lseq;i++) data.selected[i]=TRUE;
+	else
+		for(i=fseq;i<=lseq;i++) data.selected[i]=FALSE;
+	SetPanelExtra(p,&data);
+	if (fseq<data.firstvline)
+		fseq=data.firstvline;
+	if (fseq>=data.firstvline+data.vseqs)
+		fseq=data.firstvline+data.vseqs;
+	if (lseq<data.firstvline)
+		lseq=data.firstvline;
+	if (lseq>=data.firstvline+data.vseqs)
+		lseq=data.firstvline+data.vseqs-1;
+	f=fseq-data.firstvline;
+	l=lseq-data.firstvline;
+	ObjectRect (p, &r);
+	InsetRect(&r,1,1);
+	block.top=r.top+((f+data.nhead)*data.lineheight)+data.descent+1;
+	block.bottom=block.top+((l-f+1)*data.lineheight);
+	block.left=r.left;
+	block.right=r.right;
+	if (format==HIGHLIGHT)
+		white_on_black();
+	else
+		data_colors();
+	EraseRect(&block);
+        y=block.top+data.lineheight-data.descent-1;
+	for(i=fseq;i<=lseq;i++)
+	{
+        	x=r.left+data.charwidth;
+		sprintf(line,"%*d",n,i+1);
+		LoadPt(&pt, x, y);
+		SetPen(pt);
+		Gray();
+		PaintString(line);
+		y+=data.lineheight;
+	}
+        y=block.top+data.lineheight-data.descent-1;
+	for(i=fseq;i<=lseq;i++)
+	{
+		ix=output_index[i+1]-1;
+        	x=r.left+DNUMBER*data.charwidth;
+		for(j=0;j<data.vcols && j<data.ncols-data.firstvcol;j++)
+			line[j]=data.lines[ix][j+data.firstvcol];
+		line[j]='\0';
+		LoadPt(&pt, x, y);
+		SetPen(pt);
+		if(format==HIGHLIGHT) White();
+		else Black();
+		PaintString(line);
+		y+=data.lineheight;
+	}
+	black_on_white();
+	ckfree(line);
+} 
+
+void draw_seqline(panel_data data,int seq,PoinT pt,int fcol,int lcol,int format)
+{ 
+	RecT r;
+	int i, j, ix;
+	char *line[MAXCOLORS+1];
+
+	if(data.nseqs == 0) return;
+
+/* draw colored character on white background */
+	for(i=0;i<ncolors;i++)
+	{
+		line[i]=(char *)ckalloc((data.vcols+1) * sizeof(char));
+		for(j=0;j<data.vcols;j++)
+		line[i][j]=' ';
+		line[i][j]='\0';
+	}
+	
+	ix=output_index[seq+1]-1;
+	
+	r.top=pt.y-data.lineheight+data.descent+1;
+	r.bottom=r.top+data.lineheight;
+	for(j=fcol;j<=lcol && j<data.ncols;j++)
+	{
+		if(j>=0)
+		{
+		if(segment_exceptions && data.segment_exception[ix][j] > 0)
+		{
+			r.left=pt.x;
+			r.right=r.left+data.charwidth;
+			DkGray();
+			PaintRect(&r);
+			White();
+		}
+		else if(residue_exceptions && data.residue_exception[ix][j] == TRUE)
+		{
+			r.left=pt.x;
+			r.right=r.left+data.charwidth;
+		/*	LtGray(); */
+			SelectColor(150,150,150);
+			PaintRect(&r);
+			White();
+		}
+		else
+		{
+                        if(inverted)
+                        {
+				if(format==HIGHLIGHT || (j>=data.firstsel && j<=data.lastsel))
+					Black();
+				else
+				{
+                                	r.left=pt.x;
+#ifdef UNIX
+                	                r.right=r.left+data.charwidth-1;
+#else
+                	                r.right=r.left+data.charwidth;
+#endif
+                                 	SetColor(color_lut[(int)data.colormask[ix][j]].val);
+                       	         	PaintRect(&r);
+                       	         	Black();
+				}
+                        }
+                        else
+                                SetColor(color_lut[(int)data.colormask[ix][j]].val);
+
+		}
+		SetPen(pt);
+		PaintChar(data.lines[ix][j]);
+		}
+		pt.x+=data.charwidth;
+	}
+	for(i=0;i<ncolors;i++)
+		ckfree(line[i]);
+	Black();
+} 
+
+void draw_seqcol(PaneL p,int col,int format)
+{ 
+	RecT  block,r, r2;
+	PoinT pt;
+	int totseqs,i, c,x,y,ix;
+	panel_data data;
+
+	Select(p);
+	SelectFont(datafont);
+	GetPanelExtra(p, &data);
+	if(data.nseqs == 0) return;
+	if(data.ncols == 0) return;
+
+	SetPanelExtra(p, &data);
+	
+	if (col<data.firstvcol)
+		col=data.firstvcol;
+	if (col>=data.firstvcol+data.vcols)
+		col=data.firstvcol+data.vcols-1;
+	c=col-data.firstvcol;
+	totseqs=data.vseqs;
+	if (totseqs>data.nseqs) totseqs=data.nseqs;
+	ObjectRect (p, &r);
+	InsetRect(&r,1,1);
+	block.top=r.top+(data.nhead*data.lineheight)+data.descent+1;
+	block.bottom=block.top+(totseqs)*data.lineheight;
+	block.left=r.left+(c+1)*data.charwidth;
+	block.right=block.left+data.charwidth;
+        if (format==HIGHLIGHT)
+                text_colors();
+        else
+                data_colors();
+	EraseRect(&block);
+
+	x=r.left+(c+1)*data.charwidth;
+       	y=block.top+data.lineheight-data.descent-1;
+	r2.left=x;
+	r2.right=r2.left+data.charwidth;
+	for(i=data.firstvline;i<data.firstvline+data.vseqs && i<data.nseqs;i++)
+	{	
+		ix=output_index[i+1]-1;
+		if(segment_exceptions && data.segment_exception[ix][col] > 0)
+		{
+			r2.top=y-data.lineheight+data.descent+1;
+			r2.bottom=r.top+data.lineheight;
+			DkGray();
+			PaintRect(&r2);
+			White();
+		}
+		else if(residue_exceptions && data.residue_exception[ix][col] == TRUE)
+		{
+			r2.top=y-data.lineheight+data.descent+1;
+			r2.bottom=r.top+data.lineheight;
+		/*	LtGray(); */
+			SelectColor(150,150,150);
+			PaintRect(&r2);
+			White();
+		}
+		else
+		{
+                        if(inverted)
+                        {
+                                r2.top=y-data.lineheight+data.descent+1;
+                                r2.bottom=r2.top+data.lineheight;
+                                if(format==HIGHLIGHT)
+				{
+                                        LtGray();
+				}
+                                else
+                                SetColor(color_lut[(int)data.colormask[ix][col]].val);
+                                PaintRect(&r2);
+                                Black();
+                        }
+                        else
+                                SetColor(color_lut[(int)data.colormask[ix][col]].val);
+
+		}
+		LoadPt(&pt,x,y);
+		SetPen(pt);
+		PaintChar(data.lines[ix][col]);
+		y+=data.lineheight;
+	}
+	Black();
+} 
+
+void highlight_seqrange(PaneL p,int fcol,int lcol, int format)
+{ 
+	RecT  block,r;
+	int i,t,x,y;
+	int fseq,lseq,s;
+	panel_data data;
+	PoinT pt;
+
+	Select(p);
+	SelectFont(datafont);
+	GetPanelExtra(p, &data);
+	if(data.nseqs == 0) return;
+	if(data.ncols == 0) return;
+
+        if (fcol > lcol)
+        {
+                t=fcol;
+                fcol=lcol;
+                lcol=t;
+        }
+
+	if ((fcol>=data.firstvcol && fcol<data.firstvcol+data.vcols)||
+	   (lcol>=data.firstvcol && lcol<data.firstvcol+data.vcols))
+	{
+		if (fcol<data.firstvcol) fcol=data.firstvcol;
+		if (fcol>=data.firstvcol+data.vcols) fcol=data.firstvcol+data.vcols-1;
+		if (lcol<data.firstvcol) lcol=data.firstvcol;
+		if (lcol>=data.firstvcol+data.vcols) lcol=data.firstvcol+data.vcols-1;
+	}
+ 
+	fseq=data.firstvline;
+	lseq=data.firstvline+data.vseqs-1;
+	if(lseq>=data.nseqs) lseq=data.nseqs-1;
+        s=fseq-data.firstvline;
+        ObjectRect (p, &r);
+        InsetRect(&r,1,1);
+	if(format==HIGHLIGHT)
+		text_colors();
+	else
+		data_colors();
+       	block.top=r.top+((s+data.nhead)*data.lineheight)+data.descent+1;
+       	block.bottom=block.top+(lseq-fseq+1)*data.lineheight;
+       	block.left=r.left+(fcol-data.firstvcol+1)*data.charwidth;
+       	block.right=r.left+(lcol-data.firstvcol+2)*data.charwidth;
+        EraseRect(&block);
+
+       	x=r.left+(fcol-data.firstvcol+1)*data.charwidth;
+	 
+       	for(i=fseq;i<=lseq;i++)
+       	{
+               	y=block.top+(i-fseq+1)*data.lineheight-data.descent-1;
+		LoadPt(&pt,x,y);
+               	draw_seqline(data,i,pt,fcol,lcol,format);
+       	}
+	black_on_white();
+} 
+
+GrouP make_scroll_area(GrouP w,int prf_no,int nwidth,int swidth,int height,int firstseq,int nseqs,spanel *p)
+{
+	panel_data ndata,sdata;
+        GrouP display;
+	RecT rect;
+        PoinT pt;
+	PaneL names,seqs;
+	BaR vscrollbar,hnscrollbar,hsscrollbar;
+	BarScrlProc hscrollnameproc, hscrollseqproc, vscrollproc;
+
+	if(prf_no==0)
+	{
+		hscrollnameproc=HscrollMultiN;
+		hscrollseqproc=HscrollMultiS;
+		vscrollproc=VscrollMulti;
+	}
+	else if (prf_no==1)
+	{
+		hscrollnameproc=HscrollPrf1N;
+		hscrollseqproc=HscrollPrf1S;
+		vscrollproc=VscrollPrf1;
+	}
+	else
+	{
+		hscrollnameproc=HscrollPrf2N;
+		hscrollseqproc=HscrollPrf2S;
+		vscrollproc=VscrollPrf2;
+	}
+
+        display=HiddenGroup(w, 0, 0, NULL);
+        SetGroupSpacing(display, 0, 0);
+	Hide(display);
+
+        vscrollbar=ScrollBar(display, -1, 1, vscrollproc);
+
+        ObjectRect(vscrollbar, &rect);/* vscrollbar for names */
+        pt.x=rect.right; /*how near they should be with name panel Ramu */
+        pt.y=rect.top;
+        SetNextPosition(display, pt); 
+        names=make_panel(NAMES,display, nwidth+(5*max_names), height, firstseq,nseqs); /* 5*max_names  Ramu */
+
+        ObjectRect(names, &rect);
+        pt.x=rect.right;
+        pt.y=rect.top;
+        SetNextPosition(display, pt);
+        seqs=make_panel(SEQS,display, swidth, height, firstseq,nseqs);
+
+/* horizontal scroll bars */
+        ObjectRect(names, &rect);
+        pt.x=rect.left;
+        pt.y=rect.bottom;
+        SetNextPosition(display, pt);
+        hnscrollbar=ScrollBar(display, 1, -1, hscrollnameproc);
+        ObjectRect(seqs, &rect);
+        pt.x=rect.left;
+        pt.y=rect.bottom;
+        SetNextPosition(display, pt);
+        hsscrollbar=ScrollBar(display, 1, -1, hscrollseqproc);
+        
+	SetRange(hsscrollbar,1,1,0);
+	SetRange(hnscrollbar,1,1,0);
+	SetRange(vscrollbar,1,1,0);
+
+	GetPanelExtra(names,&ndata);
+	ndata.hscrollbar=hnscrollbar;
+	ndata.index=seqs;
+        ndata.prf_no=prf_no;
+
+	GetPanelExtra(seqs,&sdata);
+	sdata.vscrollbar=vscrollbar;
+	sdata.hscrollbar=hsscrollbar;
+	sdata.index=names;
+        sdata.prf_no=prf_no;
+
+	SetPanelClick(names,NameClick, NameDrag, NULL, NameRelease);
+	SetPanelClick(seqs,SeqClick, SeqDrag, NULL, SeqRelease);
+
+	p->names = names;
+	p->seqs = seqs;
+
+	ndata=alloc_name_data(ndata);
+	sdata=alloc_seq_data(sdata);
+	SetPanelExtra(names,&ndata);
+	SetPanelExtra(seqs,&sdata);
+
+	Show(display);
+	return(display);
+}
+
+
+void white_on_black(void)
+{
+	Black(); InvertColors(); White();
+}
+void black_on_white(void)
+{
+	White(); InvertColors(); Black();
+}
+void text_colors(void)
+{
+	SelectColor(220,220,220);
+	InvertColors();
+	Black();
+}
+void data_colors(void)
+{
+	White();
+	InvertColors();
+	Black();
+}
+
+
+
+
+void make_ruler(int length, char *name,char *seq)
+{
+
+	int i,j;
+	char marker[5];
+	int marker_len;
+
+	strcpy(name,"ruler");
+	seq[0] = '1';
+	for (i=1;i<length;i++)
+	{
+		if ((i+1)%10 > 0)
+			seq[i] = '.';
+		else
+		{
+			sprintf(marker,"%d",((i+1)/10)*10);
+			marker_len = strlen(marker);
+			for (j=0;j<marker_len && i+1+j-marker_len < length;j++)
+				seq[i+1+j-marker_len] = marker[j];
+		}
+	}
+	seq[length]='\0';
+}
+
+panel_data free_panel_data(panel_data data)
+{
+	int i;
+
+	if (data.header!=NULL)
+	{
+		for (i=0;i<mheader;i++)
+		{
+			if(data.header[i] != NULL) ckfree(data.header[i]);
+			data.header[i]=NULL;
+		}
+		ckfree(data.header);
+		data.header=NULL;
+	}
+	if (data.footer!=NULL)
+	{
+		for (i=0;i<mfooter;i++)
+		{
+			if(data.footer[i] != NULL) ckfree(data.footer[i]);
+			data.footer[i]=NULL;
+		}
+		ckfree(data.footer);
+		data.footer=NULL;
+	}
+	if (data.consensus!=NULL)
+	{
+		ckfree(data.consensus);
+		data.consensus=NULL;
+	}
+	if (data.lines!=NULL)
+	{
+		for (i=0;i<data.nseqs;i++)
+		{
+			if(data.lines[i] != NULL) ckfree(data.lines[i]);
+			data.lines[i]=NULL;
+		}
+		ckfree(data.lines);
+		data.lines=NULL;
+	}
+	if (data.colormask!=NULL)
+	{
+		for (i=0;i<data.nseqs;i++)
+		{
+			if(data.colormask[i] != NULL) ckfree(data.colormask[i]);
+			data.colormask[i]=NULL;
+		}
+		ckfree(data.colormask);
+		data.colormask=NULL;
+	}
+	if (data.selected!=NULL) ckfree(data.selected);
+	data.selected=NULL;
+
+	if (data.seqweight!=NULL) ckfree(data.seqweight);
+	data.seqweight=NULL;
+	if (data.subgroup!=NULL) ckfree(data.subgroup);
+	data.subgroup=NULL;
+	if (data.colscore!=NULL) ckfree(data.colscore);
+	data.colscore=NULL;
+	if (data.residue_exception!=NULL)
+	{
+		for (i=0;i<data.nseqs;i++)
+		{
+			if(data.residue_exception[i] != NULL) ckfree(data.residue_exception[i]);
+			data.residue_exception[i]=NULL;
+		}
+		ckfree(data.residue_exception);
+		data.residue_exception=NULL;
+	}
+	if (data.segment_exception!=NULL)
+	{
+		for (i=0;i<data.nseqs;i++)
+		{
+			if(data.segment_exception[i] != NULL) ckfree(data.segment_exception[i]);
+			data.segment_exception[i]=NULL;
+		}
+		ckfree(data.segment_exception);
+		data.segment_exception=NULL;
+	}
+
+	return(data);
+}
+
+
+void make_consensus(panel_data data,char *name,char *seq1)
+{
+ 	char c;
+	sint catident1[NUMRES],catident2[NUMRES],ident;
+	sint i,j,k,l;
+
+
+	strcpy(name,"");    
+    	for(i=0; i<data.ncols; i++) {
+			seq1[i]=' ';
+			ident=0;
+			for(j=0;res_cat1[j]!=NULL;j++) catident1[j] = 0;
+			for(j=0;res_cat2[j]!=NULL;j++) catident2[j] = 0;
+			for(j=0;j<data.nseqs;++j) {
+				if(isalpha(data.lines[0][i])) {
+					if(data.lines[0][i] == data.lines[j][i])
+					++ident;
+					for(k=0;res_cat1[k]!=NULL;k++) {
+					        for(l=0;(c=res_cat1[k][l]);l++) {
+					        if (c=='\0') break;
+							if (data.lines[j][i]==c)
+							{
+								catident1[k]++;
+								break;
+							}
+						}
+					}
+					for(k=0;res_cat2[k]!=NULL;k++) {
+					        for(l=0;(c=res_cat2[k][l]);l++) {
+					        if (c=='\0') break;
+							if (data.lines[j][i]==c)
+							{
+								catident2[k]++;
+								break;
+							}
+						}
+					}
+				}
+			}
+			if(ident==data.nseqs)
+				seq1[i]='*';
+			else if (!dnaflag) {
+				for(k=0;res_cat1[k]!=NULL;k++) {
+					if (catident1[k]==data.nseqs) {
+						seq1[i]=':';
+						break;
+					}
+				}
+				if(seq1[i]==' ')
+				for(k=0;res_cat2[k]!=NULL;k++) {
+					if (catident2[k]==data.nseqs) {
+						seq1[i]='.';
+						break;
+					}
+				}
+			}
+		}
+}
+
+int make_struct_data(int prf_no,int len, char *name,char *seq)
+{
+	int i,n=0;
+	char val;
+        char *ss_mask;
+ 
+	seq[0]='\0';
+	name[0]='\0';
+if (prf_no == 1)
+{
+        if (struct_penalties1 == SECST && use_ss1 == TRUE) {
+		n=1;
+		strcpy(name,"Structures");
+                ss_mask = (char *)ckalloc((seqlen_array[1]+10) * sizeof(char));
+                for (i=0;i<seqlen_array[1];i++)
+                        ss_mask[i] = sec_struct_mask1[i];
+                print_sec_struct_mask(seqlen_array[1],sec_struct_mask1,ss_mask)
+;
+                for(i=0; i<len; i++) {
+                        val=ss_mask[i];
+                        if (val == gap_pos1)
+                                seq[i]='-';
+                        else
+                                seq[i]=val;
+                }
+                seq[i]=EOS;
+        	ckfree(ss_mask);
+        }
+ 
+}
+else if (prf_no == 2)
+{
+        if (struct_penalties2 == SECST && use_ss2 == TRUE) {
+		n=1;
+		strcpy(name,"Structures");
+                ss_mask = (char *)ckalloc((seqlen_array[profile1_nseqs+1]+10) *
+sizeof(char));
+                for (i=0;i<seqlen_array[profile1_nseqs+1];i++)
+                        ss_mask[i] = sec_struct_mask2[i];
+                print_sec_struct_mask(seqlen_array[profile1_nseqs+1],sec_struct_mask2,ss_mask);
+       
+                for(i=0; i<len; i++) {
+                        val=ss_mask[i];
+                        if (val == gap_pos1)
+                                seq[i]='-';
+                        else
+                                seq[i]=val;
+                }
+                seq[i]=EOS;
+       		ckfree(ss_mask);
+       }
+}
+	return(n);
+}
+
+int make_gp_data(int prf_no,int len, char *name,char *seq)
+{
+	int i,n=0;
+	char val;
+
+	seq[0]='\0';
+	name[0]='\0';
+if (prf_no == 1)
+{
+        if (struct_penalties1 == GMASK && use_ss1 == TRUE) {
+		n=1;
+		strcpy(name,"Gap Penalties");
+                for(i=0; i<len; i++) {
+                        val=gap_penalty_mask1[i];
+                        if (val == gap_pos1)
+                                seq[i]='-';
+                        else
+                                seq[i]=val;
+                }
+                seq[i]=EOS;
+        }
+}
+else if (prf_no == 2)
+{
+        if (struct_penalties2 == GMASK && use_ss2 == TRUE) {
+		n=1;
+		strcpy(name,"Gap Penalties");
+                for(i=0; i<len; i++) {
+                        val=gap_penalty_mask2[i];
+                        if (val == gap_pos1)
+                                seq[i]='-';
+                        else
+                                seq[i]=val;
+                }
+                seq[i]=EOS;
+        }
+}
+	return(n);
+}
+
+static void VscrollMulti(BaR bar, GraphiC p, Nlm_Int2 newval, Nlm_Int2 oldval)
+{
+	active_panel=seq_panel;
+	vscrollnames(bar, newval, oldval);
+	vscrollseqs(bar, newval, oldval);
+}
+
+static void HscrollMultiN(BaR bar, GraphiC p, Nlm_Int2 newval, Nlm_Int2 oldval)
+{
+	active_panel=seq_panel;
+	hscrollnames(bar, newval, oldval);
+}
+
+static void HscrollMultiS(BaR bar, GraphiC p, Nlm_Int2 newval, Nlm_Int2 oldval)
+{
+	active_panel=seq_panel;
+	hscrollseqs(bar, newval, oldval);
+}
+
+static void VscrollPrf1(BaR bar, GraphiC p, Nlm_Int2 newval, Nlm_Int2 oldval)
+{
+	active_panel=prf_panel[0];
+	vscrollnames(bar, newval, oldval);
+	vscrollseqs(bar, newval, oldval);
+}
+
+static void HscrollPrf1N(BaR bar, GraphiC p, Nlm_Int2 newval, Nlm_Int2 oldval)
+{
+	active_panel=prf_panel[0];
+	hscrollnames(bar, newval, oldval);
+}
+
+static void HscrollPrf1S(BaR bar, GraphiC p, Nlm_Int2 newval, Nlm_Int2 oldval)
+{
+	active_panel=prf_panel[0];
+	hscrollseqs(bar, newval, oldval);
+	if(fixed_prf_scroll==TRUE)
+	{
+		active_panel=prf_panel[1];
+		hscrollseqs(bar, newval, oldval);
+	}
+}
+
+static void VscrollPrf2(BaR bar, GraphiC p, Nlm_Int2 newval, Nlm_Int2 oldval)
+{
+	active_panel=prf_panel[1];
+	vscrollnames(bar, newval, oldval);
+	vscrollseqs(bar, newval, oldval);
+}
+
+static void HscrollPrf2N(BaR bar, GraphiC p, Nlm_Int2 newval, Nlm_Int2 oldval)
+{
+	active_panel=prf_panel[1];
+	hscrollnames(bar, newval, oldval);
+}
+
+static void HscrollPrf2S(BaR bar, GraphiC p, Nlm_Int2 newval, Nlm_Int2 oldval)
+{
+	if(fixed_prf_scroll==TRUE)
+	{
+		active_panel=prf_panel[0];
+		hscrollseqs(bar, newval, oldval);
+	}
+	active_panel=prf_panel[1];
+	hscrollseqs(bar, newval, oldval);
+}
+
+

Added: trunk/packages/clustalw/branches/upstream/current/xmenu.c
===================================================================
--- trunk/packages/clustalw/branches/upstream/current/xmenu.c	2006-11-29 14:30:13 UTC (rev 162)
+++ trunk/packages/clustalw/branches/upstream/current/xmenu.c	2006-12-04 00:55:49 UTC (rev 163)
@@ -0,0 +1,4636 @@
+/***********************************************************************************************
+ *
+ *
+ *
+ *   History 
+ *
+ *   27.3.2002   - color parameter chooser can browse filenames like load sequences - Jose
+ *   16.1.2002   - remove the 'cut sequences' dialog box, not needed - Jise
+ *   17.1.2002   - 'Remove positions that contain gaps in all sequences ?'  removed, no need for confirmation - Toby
+ *
+ *
+ *
+*/
+
+#include <stdarg.h>
+#include <string.h>
+
+#include <vibrant.h>
+#include <document.h>
+
+/* #include <ncbi.h>   ramu   for time funs */
+
+#include "clustalw.h"
+#include "xmenu.h"
+
+
+static void RemoveWin(WindoW w);
+static void QuitWinW(WindoW w);
+static void QuitWinI(IteM i);
+static void QuitHelpW(WindoW w);
+static void QuitHelpB(ButtoN b);
+static void SearchStrWin (IteM item);
+static void SavePSSeqWin (IteM item);
+static void SavePSPrf1Win (IteM item);
+static void SavePSPrf2Win (IteM item);
+static void SaveSeqFileWin (IteM item);
+static void SavePrf1FileWin (IteM item);
+static void SavePrf2FileWin (IteM item);
+static void OpenColorParWin (IteM item);
+static void SearchStr(ButtoN but);
+static void SavePSSeqFile(ButtoN but);
+static void SavePSPrf1File(ButtoN but);
+static void SavePSPrf2File(ButtoN but);
+static void SaveSeqFile(ButtoN but);
+static void SavePrf1File(ButtoN but);
+static void SavePrf2File(ButtoN but);
+static void SaveScoresWin (IteM item);
+static void SaveScores(ButtoN but);
+static void OpenColorPar(ButtoN but);
+static void CancelWin(ButtoN but);
+static void SaveTreeWin (IteM item);
+static void CAlignWin (IteM item);
+static void RealignSeqsWin (IteM item);
+static void RealignSeqRangeWin (IteM item);
+static void DrawTreeWin (IteM item);
+static void AlignFromTreeWin(IteM item);
+static void PrfPrfAlignWin(IteM item);
+static void PrfPrfTreeAlignWin(IteM item);
+static void SeqPrfAlignWin(IteM item);
+static void SeqPrfTreeAlignWin(IteM item);
+static void BootstrapTreeWin (IteM item);
+static void CreateAlignTree(ButtoN but);
+static void CompleteAlign(ButtoN but);
+static void RealignSeqs(ButtoN but);
+static void RealignSeqRange(ButtoN but);
+static void DrawTree(ButtoN but);
+static void AlignFromTree(ButtoN but);
+static void PrfPrfAlign(ButtoN but);
+static void PrfPrfTreeAlign(ButtoN but);
+static void SeqPrfAlign(ButtoN but);
+static void SeqPrfTreeAlign(ButtoN but);
+static void BootstrapTree(ButtoN but);
+static void OpenSeqFile (IteM item);
+static void AppendSeqFile (IteM item);
+static void OpenPrf1File (IteM item);
+static void OpenPrf2File (IteM item);
+static void ScoreWin(IteM item);
+static void SegmentWin(IteM item);
+static void ScoreSegments(ButtoN but);
+static void PWParameters(IteM item);
+static void MultiParameters(IteM item);
+static void GapParameters(IteM item);
+static void SSParameters(IteM item);
+static void OutputParameters(IteM item);
+static void OutputTreeParameters(IteM item);
+static void HelpProc(IteM item);
+static void DefColorPar(IteM item);
+static void BlackandWhite(IteM item);
+static void set_reset_new_gaps(IteM i);
+static void set_reset_all_gaps(IteM i);
+static void SearchStringAgain(ButtoN but);
+
+static PopuP make_toggle(GrouP g,CharPtr title,CharPtr true_text, CharPtr false_text,
+				 Boolean *value,PupActnProc SetProc);
+static PrompT make_scale(GrouP g,CharPtr title,int length,int value,int max,BarScrlProc SetProc);
+static PrompT make_prompt(GrouP g,CharPtr title);
+
+static void CutSequences(IteM item);
+static void PasteSequences(IteM item);
+static void RemoveGaps(IteM item);
+static void RemoveGapPos(IteM item);
+
+static void SelectSeqs(IteM item);
+static void SelectPrf1(IteM item);
+static void SelectPrf2(IteM item);
+static void MergeProfiles(IteM item);
+static void ClearSeqs(IteM item);
+ 
+static void cut_multiplem(void);
+static void cut_profile1(void);
+static void cut_profile2(void);
+static void ssave(int j);
+static void sscpy(int i,int j);
+static void sload(int i);
+static void clear_seqrange(spanel p);
+static void select_seqs(spanel p,Boolean flag);
+static void clear_seg_exceptions(spanel p);
+
+static void make_menu_headers(WindoW w);
+static void make_help_menu(void);
+static void make_score_menu(void);
+static void make_file_menu(void);
+static void make_edit_menu(void);
+static void make_align_menu(void);
+static void make_tree_menu(void);
+static void make_color_menu(void);
+
+static void save_aln_window(int prf_no,char *title,char *prompt,void save_proc(ButtoN but));
+static void save_ps_window(int prf_no,char *prompt,void save_proc(ButtoN but));
+static void read_file_window(char *title,char *prompt,char *filename,void read_proc(ButtoN but));
+static void do_align_window(WindoW *alignw,TexT *treetext,Boolean treestatus,char *title,void align_proc(ButtoN but));
+static void do_palign_window(WindoW *alignw,TexT *tree1text,TexT *tree2test,Boolean treestatus,char *title,void align_proc(ButtoN but));
+static Boolean open_aln_files(void);
+static void write_file(int fseq,int lseq,int fres,int lres);
+
+
+Boolean x_menus=FALSE;
+
+int    mheader = 2; /* maximum header lines */
+int    mfooter = 1; /* maximum footer lines */
+int max_mlines = 20;      /*   multiple align display length */
+int min_mlines = 10;      /*   multiple align display length */
+int max_plines = 8;     /*   profile align display length */
+int min_plines1 = 5;     /*   profile align display length */
+int min_plines2 = 3;     /*   profile align display length */
+
+Boolean aln_mode = MULTIPLEM;
+Boolean window_displayed = FALSE;
+
+int    save_format = CLUSTAL;
+Boolean fixed_prf_scroll = FALSE;
+int loffset,boffset,toffset;
+int roffset;
+int poffset;
+
+int score_cutoff=5;    /* cutoff for residue exceptions */
+int score_hwin=5;    /* half window for summing alignment column scores */
+int score_scale=5;
+int segment_dnascale=5;
+int length_cutoff=1;    /* length cutoff for segment exceptions */
+Boolean residue_exceptions=FALSE;
+Boolean segment_exceptions=FALSE;
+int score_matnum=4;
+char score_mtrxname[FILENAMELEN];
+int segment_matnum=3;
+char segment_mtrxname[FILENAMELEN];
+int score_dnamatnum=1;
+char score_dnamtrxname[FILENAMELEN];
+int segment_dnamatnum=1;
+char segment_dnamtrxname[FILENAMELEN];
+
+Boolean output_ss;
+Boolean output_gp;
+
+extern char     revision_level[];
+extern Boolean interactive;
+
+extern char seqname[];
+extern char     outfile_name[];
+extern char    profile1_name[];
+extern char    profile2_name[];
+extern char     usermtrxname[], pw_usermtrxname[];
+extern char     dnausermtrxname[], pw_dnausermtrxname[];
+
+extern Boolean usemenu;
+extern Boolean use_tree_file;
+extern Boolean use_tree1_file,use_tree2_file;
+extern Boolean  dnaflag;
+extern sint     nseqs;
+extern sint    profile1_nseqs;
+extern sint profile_no;
+extern sint     max_aa;
+extern sint     *seqlen_array;
+extern char     **seq_array;
+extern char     **names, **titles;
+extern Boolean empty;
+extern Boolean profile1_empty, profile2_empty;
+extern sint     gap_pos1, gap_pos2;
+extern Boolean use_ambiguities;
+
+
+extern float    gap_open,      gap_extend;
+extern float    dna_gap_open,  dna_gap_extend;
+extern float    prot_gap_open, prot_gap_extend;
+extern float    pw_go_penalty,      pw_ge_penalty;
+extern float    dna_pw_go_penalty,  dna_pw_ge_penalty;
+extern float    prot_pw_go_penalty, prot_pw_ge_penalty;
+extern sint    wind_gap,ktup,window,signif;
+extern sint    dna_wind_gap, dna_ktup, dna_window, dna_signif;
+extern sint    prot_wind_gap,prot_ktup,prot_window,prot_signif;
+extern sint        helix_penalty;
+extern sint        strand_penalty;
+extern sint        loop_penalty;
+extern sint        helix_end_minus;
+extern sint        helix_end_plus;
+extern sint        strand_end_minus;
+extern sint        strand_end_plus;
+extern sint        helix_end_penalty;
+extern sint        strand_end_penalty;
+extern sint     divergence_cutoff;
+extern sint     gap_dist;
+extern sint boot_ntrials;               /* number of bootstrap trials */
+extern unsigned sint boot_ran_seed;     /* random number generator seed */
+
+extern sint        matnum,pw_matnum;
+extern char     mtrxname[], pw_mtrxname[];
+extern sint        dnamatnum,pw_dnamatnum;
+extern char     dnamtrxname[], pw_dnamtrxname[];
+
+extern MatMenu matrix_menu;
+extern MatMenu pw_matrix_menu;
+extern MatMenu dnamatrix_menu;
+
+extern Boolean  quick_pairalign;
+extern sint        matnum,pw_matnum;
+extern Boolean  neg_matrix;
+extern float    transition_weight;
+extern char     hyd_residues[];
+extern Boolean  no_var_penalties, no_hyd_penalties, no_pref_penalties;
+extern Boolean         use_endgaps;
+extern Boolean         endgappenalties;
+extern Boolean  output_clustal, output_nbrf, output_phylip, output_gcg, output_gde, output_nexus;
+extern Boolean output_fasta; /* Ramu */
+
+extern Boolean  save_parameters;
+extern Boolean  output_tree_clustal, output_tree_phylip, output_tree_distances, output_tree_nexus, output_pim;
+extern Boolean  lowercase; /* Flag for GDE output - set on comm. line*/
+extern Boolean  cl_seq_numbers;
+
+extern Boolean  seqRange;
+
+extern sint     output_order;
+extern sint     *output_index;
+extern Boolean  reset_alignments_new;               /* DES */
+extern Boolean  reset_alignments_all;               /* DES */
+
+extern FILE     *clustal_outfile, *gcg_outfile, *nbrf_outfile, *phylip_outfile;
+extern FILE     *gde_outfile, *nexus_outfile;
+extern FILE     *fasta_outfile;
+
+extern sint     max_aln_length;
+
+extern Boolean tossgaps;  /* Ignore places in align. where ANY seq. has a gap*/
+extern Boolean kimura;    /* Use correction for multiple substitutions */
+extern sint    bootstrap_format;      /* bootstrap file format */
+
+extern sint output_struct_penalties;
+extern Boolean use_ss1, use_ss2;
+extern char *res_cat1[];
+extern char *res_cat2[];
+
+extern char     *amino_acid_codes;
+
+PrompT   message;           /* used in temporary message window */
+
+static Char filename[FILENAMELEN]; /* used in temporary file selection window */
+
+Boolean mess_output=TRUE;
+Boolean save_log=FALSE;
+FILE *save_log_fd=NULL;
+static char save_log_filename[FILENAMELEN];
+static IteM save_item1,save_item2,exc_item;
+
+spanel  seq_panel;        /* data for multiple alignment area */
+spanel  prf_panel[2];       /* data for profile alignment areas */
+spanel  active_panel;       /* 'in-use' panel -scrolling,clicking etc. */
+static range selected_seqs;           /* sequences selected by clicking on names */
+static range selected_res;           /* residues selected by clicking on seqs */
+int firstres, lastres;	/* range of alignment for saving as ... */
+ 
+/* data for Search function */
+
+char find_string[MAXFINDSTR]="";
+aln_pos find_pos;
+
+/* arrays for storing clustalw data for cut-and-paste sequences */
+static sint     *saveseqlen_array=NULL;
+static char     **saveseq_array=NULL;
+static char     **savenames=NULL, **savetitles=NULL;
+sint     ncutseqs=0;
+
+FonT datafont,helpfont;
+WindoW mainw=NULL;
+WindoW messagew=NULL;
+WindoW readfilew=NULL;
+WindoW savealnw=NULL;
+WindoW savescoresw=NULL;
+WindoW savepsw=NULL;
+WindoW findw=NULL;
+WindoW calignw=NULL;
+WindoW ralignw=NULL;
+WindoW rralignw=NULL;
+WindoW talignw=NULL;
+WindoW palignw=NULL;
+WindoW salignw=NULL;
+WindoW scorew=NULL;
+WindoW exceptionw=NULL;
+TexT savealntext;
+TexT savescorestext;
+TexT savepstext;
+TexT findtext;
+TexT pspartext;
+TexT ctreetext;
+TexT rtreetext;
+TexT rrtreetext;
+TexT ttreetext;
+TexT ptree1text,ptree2text;
+TexT streetext;
+TexT readfiletext;
+WindoW savetreew=NULL;
+TexT savetreetext;
+WindoW drawtreew=NULL;
+TexT drawnjtreetext;
+TexT drawphtreetext;
+TexT drawdsttreetext;
+TexT drawnxstreetext;
+
+TexT drawpimtext;
+
+WindoW boottreew=NULL;
+TexT bootnjtreetext;
+TexT bootphtreetext;
+TexT bootnxstreetext;
+TexT blocklentext;
+PrompT mattext,pwmattext,dnamattext,pwdnamattext,scoremattext,segmentmattext;
+PrompT scorednamattext,segmentdnamattext;
+GrouP seg_matrix_list,score_matrix_list;
+GrouP seg_dnamatrix_list,score_dnamatrix_list;
+GrouP matrix_list,pw_matrix_list,dnamatrix_list,pw_dnamatrix_list;
+
+TexT cl_outtext,pir_outtext,msf_outtext,phylip_outtext,gde_outtext,nexus_outtext;
+TexT fasta_outtext; /* Ramu */
+
+GrouP slow_para,fast_para;
+GrouP  seq_display,prf1_display,prf2_display;
+
+MenU   filem,alignm,editm,treem,colorm;
+menu_item file_item,align_item,edit_item,tree_item,color_item;
+MenU   scorem,helpmenu;
+menu_item score_item,help_item;
+IteM segment_item;
+IteM bw_item,defcol_item,usercol_item;
+IteM new_gaps_item,all_gaps_item;
+WindoW helpw[MAXHELPW];
+int numhelp=0;
+
+PopuP modetext,flisttext;
+ButtoN pscrolltext;
+
+ButtoN selFonts;
+
+PopuP show_seg_toggle;
+PrompT residue_cutofftext;
+PrompT length_cutofftext;
+PrompT scorescaletext;
+PrompT segmentdnascaletext;
+
+#define MAXFONTS 6
+int nfonts=6;   /*shoud be MAXFONTS ................ ramu */
+int av_font[MAXFONTS]={8,10,12,14,18,24};
+int font_size=1;
+
+int ncolors=0;
+int ncolor_pars=0;
+color color_lut[MAXCOLORS+1];
+char def_protpar_file[]="colprot.par";
+char def_dnapar_file[]="coldna.par";
+char *explicit_par_file = NULL;
+char *par_file = NULL;
+int    inverted = TRUE;
+int usebw=FALSE,usedefcolors=TRUE,useusercolors=FALSE;
+
+char ps_par_file[FILENAMELEN]="colprint.par";
+int pagesize=A4;
+int orientation=LANDSCAPE;
+Boolean ps_header=TRUE;
+Boolean ps_ruler=TRUE;
+Boolean resize=TRUE;
+int first_printres=0,last_printres=0,blocklen;
+Boolean ps_curve=TRUE;
+Boolean ps_resno=TRUE;
+PoinT display_pos;
+int namewidth,seqwidth; /* fixed widths of sequence display areas */
+
+Boolean         realign_endgappenalties=TRUE;
+Boolean         align_endgappenalties=FALSE;
+
+char helptext[MAXHELPLENGTH];
+
+
+
+
+/* ramu  */
+
+#include <time.h>
+#include <math.h>
+#include <unistd.h>
+#include <pwd.h>
+#include <sys/times.h>
+
+float cputime(float *seconds);  /*  Ramu ,   need's reset function */
+
+float cputime(float *seconds)
+{
+  struct tms buf;
+  static time_t last=0, first;
+  static int calls=0;
+  int hertz=sysconf(_SC_CLK_TCK);
+  time_t this;
+
+  /* get the current number of user and system cpu ticks */
+
+  times(&buf);
+  this = buf.tms_utime + buf.tms_stime;
+
+  /* if this is the first call then this is time zero */
+
+  if ( !calls ) {
+    first = this;
+    calls = -1; 
+  }
+  else
+      this = this - first;
+  if(seconds)
+      *seconds = ((float)(this - last))/(float)hertz;
+  last = this;
+  return ((float)this)/(float)hertz;
+}
+
+/*  Ramu   */
+
+
+/* main subroutine called from clustalx.c, initialises windows and enters a
+   forever loop monitoring user input */
+
+void x_menu(void)
+{
+	int i,n;
+	char font[30];
+	char tstr[30];
+	int height;
+	PrompT   fsize;
+	RecT wr,r,r1;
+ 
+
+/*  make the pulldown menu bar  */
+
+#ifdef WIN_MAC
+	MenU   m;
+
+        m=AppleMenu (NULL);
+        DeskAccGroup (m);
+	make_menu_headers(NULL);
+#endif
+#ifndef UNIX
+	ProcessUpdatesFirst(FALSE);
+#endif
+
+	sprintf(tstr,"Clustal%s",revision_level);
+/*#ifdef WIN_MSWIN
+	mainw = FixedWindow (-50,-33,-10,-10,tstr,QuitWinW);
+#else*/
+	mainw = DocumentWindow (-50,-33,-10,-10,tstr,QuitWinW,ResizeWindowProc);
+/*#endif*/	SetGroupSpacing(mainw,0,10);
+	SetGroupSpacing(mainw,0,10);
+ 
+	x_menus=TRUE;
+
+#ifndef WIN_MAC
+	make_menu_headers(mainw);
+#endif
+/* decide if we're starting in profile or sequence mode */
+	if (!profile1_empty) aln_mode=PROFILEM;
+	else aln_mode=MULTIPLEM;
+
+	make_file_menu();
+	make_edit_menu();
+	make_align_menu();
+	make_tree_menu();
+	make_color_menu();
+	make_score_menu();
+	make_help_menu();
+
+/*  add a button to switch between multiple and profile alignment modes */
+
+	modetext=PopupList(mainw,TRUE,set_aln_mode);
+	PopupItem(modetext,"Multiple Alignment Mode");
+	PopupItem(modetext,"Profile Alignment Mode");
+	if(aln_mode==MULTIPLEM)
+		SetValue(modetext,1);
+	else
+		SetValue(modetext,2);
+
+	sprintf(font, "%s,%d,%c", "courier", av_font[font_size], 'm');
+        datafont=ParseFont(font);
+
+	sprintf(font, "%s,%d,%c", "courier", 10, 'm');
+        helpfont=ParseFont(font);
+
+	Advance(mainw);
+	shift(mainw,20,0);
+
+/*  add a button to select font size */
+	fsize=StaticPrompt(mainw,"Font Size:",0,dialogTextHeight,systemFont,'r');
+	Advance(mainw);
+	flisttext=PopupList(mainw,TRUE,set_font_size);
+	for(i=0;i<nfonts;i++)
+	{
+		sprintf(tstr,"%d",av_font[i]);
+		PopupItem(flisttext,tstr);
+	}
+	SetValue(flisttext,font_size+1);
+
+	Advance(mainw);
+	shift(mainw,20,0);
+
+	/* ramu .........
+	selFonts = PushButton(mainw,"Select Fonts",VSeqMgrFontProc);
+	Advance(mainw);
+	shift(mainw,20,0);
+
+	end ramu ........... */
+
+/*  add a button to switch profile scrolling modes */
+        pscrolltext=CheckBox(mainw,"Lock Scroll",set_pscroll_mode);
+	if(fixed_prf_scroll) SetStatus(pscrolltext,TRUE);
+	Break(mainw);
+
+
+	selected_seqs.first=selected_seqs.last=-1;
+        selected_res.first=selected_res.last=-1;
+
+
+/*  initialise the multiple alignment display area */
+
+	SelectFont(datafont);
+	stdCharWidth=CharWidth('A');
+	stdLineHeight=LineHeight();
+ 
+	GetNextPosition(mainw,&display_pos);
+
+/* calculate initial pixel width and height of displays */
+	namewidth=(DNAMES+DNUMBER+1)*stdCharWidth;
+	seqwidth=(DCOLS+2*MARGIN)*stdCharWidth+2;
+	n=screenRect.right-screenRect.left;
+	if(seqwidth+namewidth>n) seqwidth=n-namewidth;
+
+	height=(max_mlines+mfooter+MARGIN)*stdLineHeight+2+SCOREHEIGHT;
+	n=screenRect.bottom-screenRect.top;
+	if(height>n) height=n;
+
+	seq_display=make_scroll_area(mainw,0,namewidth+20,seqwidth,height,1,nseqs,&seq_panel);
+	position_scrollbars(seq_panel);
+
+/*  initialise the profile alignment display area */
+ 
+	SetNextPosition(mainw,display_pos);
+	height=(max_plines+MARGIN)*stdLineHeight+2+SCOREHEIGHT;
+	if(height>n) height=n;
+	prf1_display=make_scroll_area(mainw,1,namewidth,seqwidth,height,1,profile1_nseqs,&prf_panel[0]);
+	position_scrollbars(prf_panel[0]);
+
+	prf2_display=make_scroll_area(mainw,2,namewidth,seqwidth,height,profile1_nseqs+1,nseqs-profile1_nseqs,&prf_panel[1]);
+	position_scrollbars(prf_panel[1]);
+
+/*  add the message line */
+	Break(mainw);
+	Advance(mainw);
+	SelectFont(systemFont);
+	stdCharWidth=CharWidth('A');
+	stdLineHeight=LineHeight();
+	message = StaticPrompt(mainw, "",500, 0,systemFont,'l');
+
+/* save some pixel sizes for future resizing events */
+	if(aln_mode==PROFILEM)
+	{
+		Hide(seq_display);
+		profile_no=1;
+		Show(prf1_display);
+		Show(prf2_display);
+		Show(pscrolltext);
+		active_panel=prf_panel[0];
+		Select(prf1_display);
+		load_aln(prf_panel[0],0,profile1_nseqs-1,TRUE);
+		load_aln(prf_panel[1],profile1_nseqs,nseqs-1,TRUE);
+
+		Show(mainw);
+		ObjectRect(mainw,&wr);
+		ObjectRect(prf_panel[0].names,&r);
+		ObjectRect(prf_panel[1].names,&r1);
+		boffset=wr.bottom-wr.top-r1.bottom;
+		loffset=r.left;
+		toffset=r.top;
+		ObjectRect(prf_panel[0].seqs,&r);
+		roffset=wr.right-wr.left-r.right;
+	}
+	else
+	{
+		Hide(prf1_display);
+		Hide(prf2_display);
+		Hide(pscrolltext);
+		profile_no=0;
+		Show(seq_display);
+		active_panel=seq_panel;
+
+		Select(seq_display);
+		load_aln(seq_panel,0,nseqs-1,TRUE);
+
+		Show(mainw);
+		ObjectRect(mainw,&wr);
+		ObjectRect(seq_panel.names,&r);
+		boffset=wr.bottom-wr.top-r.bottom;
+		loffset=r.left;
+		toffset=r.top;
+		ObjectRect(seq_panel.seqs,&r);
+		roffset=wr.right-wr.left-r.right;
+	}
+	ObjectRect(prf_panel[0].names,&r);
+	ObjectRect(prf_panel[1].names,&r1);
+	poffset=r1.top-r.bottom;
+
+/* initialise some variables before we display the window */
+        if(orientation==LANDSCAPE)
+        {
+                if(pagesize==A4) blocklen=150;
+                else if (pagesize==A3) blocklen=250;
+		else blocklen=150;
+        }
+        else
+        {
+                if(pagesize==A4) blocklen=80;
+                else if (pagesize==A3) blocklen=150;
+		else blocklen=150;
+        }
+
+/* ok - Go! */
+	window_displayed=TRUE;
+	ProcessEvents();
+
+}
+
+
+static void RemoveWin(WindoW w)
+{
+	Remove(w);
+}
+
+
+static void QuitWinW(WindoW w)
+{
+	if(aln_mode == MULTIPLEM)
+	{
+		if(seq_panel.modified)
+			if (Message(MSG_YN,"Alignment has not been saved.\n"
+			"Quit program anyway?")==ANS_NO) return;
+	}
+	else if(aln_mode == PROFILEM)
+	{
+		if(prf_panel[0].modified)
+			if (Message(MSG_YN,"Profile 1 has not been saved.\n"
+			"Quit program anyway?")==ANS_NO) return;
+		if(prf_panel[1].modified)
+			if (Message(MSG_YN,"Profile 2 has not been saved.\n"
+			"Quit program anyway?")==ANS_NO) return;
+	}
+	QuitProgram ();
+}
+
+static void SearchStrWin (IteM item)
+{
+	int i;
+	Boolean sel=FALSE;
+	GrouP findgr;
+	ButtoN find_can,find_ok;
+	PopuP ps,or;
+	char path[FILENAMELEN];
+	char str[FILENAMELEN];
+	panel_data data;
+
+	GetPanelExtra(active_panel.names,&data);
+	if (data.nseqs==0)
+	{
+		Message(MSG_OK,"No file loaded.");
+		return;
+	}
+	for (i=0;i<data.nseqs;i++)
+		if(data.selected[i]==TRUE)
+		{
+			sel=TRUE;
+			break;
+		}
+	if(sel==FALSE)
+	{
+		Message(MSG_OK,"Select sequences by clicking on the names.");
+		return;
+	}
+
+	SelectFont(systemFont);
+	stdCharWidth=CharWidth('A');
+	stdLineHeight=LineHeight();
+	findw=FixedWindow(-50, -33, -10, -10, "SEARCH IN SELECTED SEQUENCES",RemoveWin);
+        stdLineHeight=18;
+        SelectFont(programFont);
+	findtext=DialogText(findw, "", 35, NULL);
+	Break(findw);
+	find_ok=PushButton(findw, "SEARCH FROM START", SearchStr);
+	Break(findw);
+	find_ok=PushButton(findw, "SEARCH AGAIN", SearchStringAgain);
+	Break(findw);
+	find_can=PushButton(findw, "CLOSE", CancelWin);
+
+	Show(findw);
+}
+
+static void SavePSSeqWin (IteM item)
+{
+	if (empty)
+	{
+		error("No file loaded");
+		return;
+	}
+	save_ps_window(0,"WRITE SEQUENCES TO:",SavePSSeqFile);
+}
+
+static void SavePSPrf1Win (IteM item)
+{
+	if (profile1_empty)
+	{
+		error("No file loaded");
+		return;
+	}
+	save_ps_window(1,"WRITE PROFILE 1 TO:",SavePSPrf1File);
+}
+
+static void SavePSPrf2Win (IteM item)
+{
+	if (profile2_empty)
+	{
+		error("No file loaded");
+		return;
+	}
+	save_ps_window(2,"WRITE PROFILE 2 TO:",SavePSPrf2File);
+}
+
+static void save_ps_window(int prf_no,char *prompt,void save_proc(ButtoN but))
+{
+	GrouP savegr;
+	ButtoN save_can,save_ok;
+	PopuP ps,or;
+	char path[FILENAMELEN];
+	char str[FILENAMELEN];
+	panel_data data;
+
+	SelectFont(systemFont);
+	stdCharWidth=CharWidth('A');
+	stdLineHeight=LineHeight();
+	savepsw=FixedWindow(-50, -33, -10, -10, "WRITE POSTSCRIPT FILE",RemoveWin);
+	make_prompt(savepsw, prompt);
+        stdLineHeight=18;
+        SelectFont(programFont);
+	savepstext=DialogText(savepsw, "", 35, NULL);
+	Break(savepsw);
+	make_prompt(savepsw, "PS Colors File :");
+	pspartext=DialogText(savepsw, ps_par_file, 35, NULL);
+	Break(savepsw);
+       	make_prompt(savepsw, "Page Size");
+	Advance(savepsw);
+	ps=PopupList(savepsw,TRUE,set_pagesize);
+	PopupItem(ps,"A4");
+	PopupItem(ps,"A3");
+	PopupItem(ps,"US Letter");
+	if (pagesize == A4)
+		SetValue(ps,1);
+	else if (pagesize == A3)
+		SetValue(ps,2);
+	else if (pagesize == USLETTER)
+		SetValue(ps,3);
+	Break(savepsw);
+       	make_prompt(savepsw, "Orientation");
+	Advance(savepsw);
+	or=PopupList(savepsw,TRUE,set_orientation);
+	PopupItem(or,"LANDSCAPE");
+	PopupItem(or,"PORTRAIT");
+	if (orientation == LANDSCAPE)
+		SetValue(or,1);
+	else if (orientation == PORTRAIT)
+		SetValue(or,2);
+	Break(savepsw);
+	make_toggle(savepsw,"Print Header :","YES","NO",&ps_header,set_header);
+	Advance(savepsw);
+	make_toggle(savepsw,"Print Quality Curve :","YES","NO",&ps_curve,set_curve);
+	Break(savepsw);
+	make_toggle(savepsw,"Print Ruler :","YES","NO",&ps_ruler,set_ruler);
+	Advance(savepsw);
+	make_toggle(savepsw,"Print Residue Numbers :","YES","NO",&ps_resno,set_resno);
+	Break(savepsw);
+	make_toggle(savepsw,"Resize to fit page:","YES","NO",&resize,set_resize);
+	Break(savepsw);
+	first_printres=1;
+	if (prf_no==0)
+		GetPanelExtra(seq_panel.seqs,&data);
+	else if (prf_no==1)
+		GetPanelExtra(prf_panel[0].seqs,&data);
+	else
+		GetPanelExtra(prf_panel[1].seqs,&data);
+	last_printres=data.ncols;
+        make_prompt(savepsw, "Print from position :");
+	Advance(savepsw);
+	sprintf(str,"%5d",first_printres);
+        DialogText(savepsw, str, 5,set_fpres);
+	Advance(savepsw);
+        make_prompt(savepsw, "to :");
+	Advance(savepsw);
+	sprintf(str,"%5d",last_printres);
+        DialogText(savepsw, str, 5,set_lpres);
+	Break(savepsw);
+        make_prompt(savepsw, "Use block length :");
+	Advance(savepsw);
+	sprintf(str,"%5d",blocklen);
+        blocklentext=DialogText(savepsw, str, 5,set_blocklen);
+	Break(savepsw);
+	savegr=HiddenGroup(savepsw, 2, 0, NULL);
+	shift(savegr, 60, 20);
+	save_ok=PushButton(savegr, "  OK  ", save_proc);
+	shift(savegr, 20,0);
+	save_can=PushButton(savegr, "CLOSE", CancelWin);
+
+	if(prf_no==0)
+		get_path(seqname,path);
+	else if(prf_no==1)
+		get_path(profile1_name,path);
+	else if(prf_no==2)
+		get_path(profile2_name,path);
+	strcat(path,"ps");
+	SetTitle(savepstext, path);
+	Show(savepsw);
+}
+
+static void SaveScoresWin (IteM item)
+{
+	int i;
+	Boolean sel=FALSE;
+	GrouP scoregr;
+	ButtoN score_can,score_ok;
+	PopuP ps,or;
+	char path[FILENAMELEN];
+	char str[FILENAMELEN];
+	panel_data data;
+
+
+	if (empty)
+	{
+		error("No file loaded");
+		return;
+	}
+
+	GetPanelExtra(active_panel.names,&data);
+	for (i=0;i<data.nseqs;i++)
+		if(data.selected[i]==TRUE)
+		{
+			sel=TRUE;
+			break;
+		}
+	if(sel==FALSE)
+	{
+		Message(MSG_OK,"Select sequences to be written by clicking on the names.");
+		return;
+	}
+
+        get_path(seqname,path);
+	strcat(path,"qscores");
+
+	SelectFont(systemFont);
+	stdCharWidth=CharWidth('A');
+	stdLineHeight=LineHeight();
+	savescoresw=FixedWindow(-50, -33, -10, -10, "SAVE QUALITY SCORES",RemoveWin);
+        stdLineHeight=18;
+        SelectFont(programFont);
+	make_prompt(savescoresw, "SAVE QUALITY SCORES TO:");
+        stdLineHeight=18;
+        SelectFont(programFont);
+	Break(savescoresw);
+	savescorestext=DialogText(savescoresw, "", 35, NULL);
+	Break(savescoresw);
+	scoregr=HiddenGroup(savescoresw, 2, 0, NULL);
+	shift(scoregr, 60, 20);
+	score_ok=PushButton(scoregr, "  OK  ", SaveScores);
+	shift(scoregr, 20,0);
+	score_can=PushButton(scoregr, "CANCEL", CancelWin);
+
+	SetTitle(savescorestext, path);
+	Show(savescoresw);
+
+	Advance(savescoresw);
+	Show(savescoresw);
+}
+
+static void SaveScores(ButtoN but)
+{
+	char c;
+	int i,j,val;
+	int length=0;
+	FILE *outfile;
+	panel_data name_data,seq_data;
+	Boolean gap;
+
+	GetPanelExtra(active_panel.names,&name_data);
+	GetPanelExtra(active_panel.seqs,&seq_data);
+
+	GetTitle(savescorestext, filename, FILENAMELEN);
+	stripspace(filename);
+
+	outfile=open_explicit_file(filename); 
+
+/* get the maximum length of the selected sequences */
+        for (i=1;i<=nseqs;i++)
+           if (name_data.selected[i-1]==TRUE && length < seqlen_array[i]) length = seqlen_array[i];
+
+	for(j=1;j<=length;j++)
+	{
+/* first check for a column of gaps */
+		gap=TRUE;
+        	for (i=1;i<=nseqs;i++)
+           		if (name_data.selected[i-1]==TRUE)
+			{
+                                val = seq_array[i][j];
+                                if(j<=seqlen_array[i] && (val != gap_pos1) && (val != gap_pos2))
+				{
+					gap=FALSE;
+					break;
+				}
+			}
+		if(gap==FALSE)
+		{
+        		for (i=1;i<=nseqs;i++)
+			{
+           			if (name_data.selected[i-1]==TRUE)
+				{
+                                	val = seq_array[i][j];
+                                	if(j>seqlen_array[i] || (val == gap_pos1) || (val == gap_pos2))
+                                        	c = '-';
+                                	else {
+                                        	c = amino_acid_codes[val];
+                                	}
+	 
+					fprintf(outfile,"%c ",c);
+				}
+			}
+			fprintf(outfile,"\t%3d\n",seq_data.colscore[j-1]);
+		}
+
+	}
+	fclose(outfile);
+
+        if (Visible(savescoresw))
+        {
+                Remove(savescoresw);
+                savescoresw=NULL;
+        }
+
+
+
+	info("File %s saved",filename);
+}
+
+static void SaveSeqFileWin (IteM item)
+{
+	if (empty)
+	{
+		error("No file loaded");
+		return;
+	}
+	save_aln_window(0,"SAVE SEQUENCES","SAVE SEQUENCES AS:",SaveSeqFile);
+}
+
+static void SavePrf1FileWin (IteM item)
+{
+	if (profile1_empty)
+	{
+		error("No file loaded");
+		return;
+	}
+	save_aln_window(1,"SAVE PROFILE","SAVE PROFILE 1 AS:",SavePrf1File);
+}
+static void SavePrf2FileWin (IteM item)
+{
+	if (profile2_empty)
+	{
+		error("No file loaded");
+		return;
+	}
+	save_aln_window(2,"SAVE PROFILE","SAVE PROFILE 2 AS:",SavePrf2File);
+}
+
+static void save_aln_window(int prf_no,char *title,char *prompt,void save_proc(ButtoN but))
+{
+	GrouP savegr;
+	ButtoN save_ok, save_can;
+	GrouP maing;
+        GrouP format_list;
+	ButtoN formatb[6+1]; /* + 1 for fasta */
+	PopuP case_toggle,snos_toggle;
+	PopuP seqRange_toggle; /* Ramu */
+	char path[FILENAMELEN+1];
+	char str[FILENAMELEN+1];
+
+	SelectFont(systemFont);
+	stdCharWidth=CharWidth('A');
+	stdLineHeight=LineHeight();
+
+	savealnw=FixedWindow(-50, -33, -10, -10, title,RemoveWin);
+
+        format_list=NormalGroup(savealnw,3,0,"Format",systemFont,set_format);
+        formatb[0]=RadioButton(format_list,"CLUSTAL");
+        formatb[1]=RadioButton(format_list,"NBRF/PIR");
+        formatb[2]=RadioButton(format_list,"GCG/MSF");
+        formatb[3]=RadioButton(format_list,"PHYLIP");
+        formatb[4]=RadioButton(format_list,"GDE");
+        formatb[5]=RadioButton(format_list,"NEXUS");
+        formatb[6]=RadioButton(format_list,"FASTA");
+
+	if(prf_no==0)
+        	get_path(seqname,path);
+	else if(prf_no==1)
+        	get_path(profile1_name,path);
+	else if(prf_no==2)
+        	get_path(profile2_name,path);
+
+	if (save_format==CLUSTAL)
+	{
+        	SetValue(format_list,1);
+		strcat(path,"aln");
+	}
+	else if (save_format==PIR)
+	{
+        	SetValue(format_list,2);
+		strcat(path,"pir");
+	}
+	else if (save_format==MSF)
+	{
+        	SetValue(format_list,3);
+		strcat(path,"msf");
+	}
+	else if (save_format==PHYLIP)
+	{
+        	SetValue(format_list,4);
+		strcat(path,"phy");
+	}
+	else if (save_format==GDE)
+	{
+        	SetValue(format_list,5);
+		strcat(path,"gde");
+	}
+ 	else if (save_format==NEXUS)
+	{
+        	SetValue(format_list,6);
+		strcat(path,"nxs");
+	}
+ 	else if (save_format==FASTA)
+	{
+        	SetValue(format_list,7);
+		strcat(path,"fasta");
+	}
+
+	maing=HiddenGroup(savealnw,0,0,NULL);
+	SetGroupSpacing(maing,0,10);
+
+	case_toggle=make_toggle(maing,"GDE output case :","Lower","Upper",&lowercase,set_case);
+	Break(maing);
+	snos_toggle=make_toggle(maing,"CLUSTALW sequence numbers :","ON","OFF",&cl_seq_numbers,set_snos);
+
+	Break(maing);
+        make_prompt(maing, "Save range from  :");
+	Advance(maing);
+	firstres = 0; /* init always ramu */
+	lastres = 0; /* init always ramu */
+	sprintf(str,"%5d",firstres);
+        DialogText(maing, str, 5,set_fres);
+	Advance(maing);
+        make_prompt(maing, "to :");
+	Advance(maing);
+	sprintf(str,"%5d",lastres);
+        DialogText(maing, str, 5,set_lres);
+	/* <Ramu> */
+	Advance(maing);
+	seqRange_toggle=make_toggle(maing,"  and include range numbers :","ON","OFF",&seqRange,setRange);
+	/*</Ramu>*/
+
+	Break(maing);
+	shift(savealnw, 0, 20);
+	make_prompt(savealnw, prompt);
+        stdLineHeight=18;
+        SelectFont(programFont);
+	Break(savealnw);
+	savealntext=DialogText(savealnw, "", 35, NULL);
+	Break(savealnw);
+	savegr=HiddenGroup(savealnw, 2, 0, NULL);
+	shift(savegr, 60, 20);
+	save_ok=PushButton(savegr, "  OK  ", save_proc);
+	shift(savegr, 20,0);
+	save_can=PushButton(savegr, "CANCEL", CancelWin);
+
+	SetTitle(savealntext, path);
+	Show(savealnw);
+
+}
+
+static void read_file_window(char *title,char *prompt,char *filename,void read_proc(ButtoN but))
+{
+	GrouP readgr;
+	ButtoN read_ok, read_can;
+	GrouP maing;
+
+	SelectFont(systemFont);
+	stdCharWidth=CharWidth('A');
+	stdLineHeight=LineHeight();
+	readfilew=FixedWindow(-50, -33, -10, -10, title,RemoveWin);
+
+	maing=HiddenGroup(readfilew,2,0,NULL);
+	SetGroupSpacing(maing,0,10);
+
+	shift(readfilew, 0, 20);
+	make_prompt(readfilew, prompt);
+        stdLineHeight=18;
+        SelectFont(programFont);
+	Break(readfilew);
+	readfiletext=DialogText(readfilew, "", 35, NULL);
+	if (filename != NULL) SetTitle(readfiletext, filename);
+	Break(readfilew);
+	readgr=HiddenGroup(readfilew, 2, 0, NULL);
+	shift(readgr, 60, 20);
+	read_ok=PushButton(readgr, "  OK  ", read_proc);
+	shift(readgr, 20,0);
+	read_can=PushButton(readgr, "CANCEL", CancelWin);
+
+	Show(readfilew);
+}
+
+static void CancelWin (ButtoN but)
+{
+	Remove(ParentWindow(but));
+}
+
+static void SearchStr(ButtoN but)
+{
+
+/* reset the current position */
+
+	find_pos.seq=0;
+	find_pos.res=-1;
+
+/* find the next occurrence of the string */
+	SearchStringAgain(but);
+
+
+}
+
+static void SearchStringAgain(ButtoN but)
+{
+	int i,j,ix,length;
+	int seq,res,start_res;
+	Boolean in_string,found;
+	panel_data ndata,sdata;
+
+	GetTitle(findtext, filename, FILENAMELEN);
+	stripspace(filename);
+
+	strncpy(find_string,filename,MAXFINDSTR);
+	length=strlen(find_string);
+	if(length==0) return;
+	for(i=0;i<length;i++)
+		find_string[i]=toupper(find_string[i]);
+
+        GetPanelExtra(active_panel.names,&ndata);
+        GetPanelExtra(active_panel.seqs,&sdata);
+
+	in_string=FALSE;
+	found=FALSE;
+	start_res=0;
+	ix=0;
+	seq=find_pos.seq;
+	res=find_pos.res+1;
+        while (seq<ndata.nseqs)
+	{
+                if(ndata.selected[seq]==TRUE)
+                {
+        		while (res<sdata.ncols)
+			{
+				if(sdata.lines[seq][res]==find_string[ix])
+				{
+					if(in_string==FALSE) 
+						start_res=res;
+					ix++;
+					in_string=TRUE;
+				}
+				else if(in_string==TRUE)
+				{
+					res=start_res;
+					ix=0;
+					in_string=FALSE;
+				}
+				if(ix==length)
+				{
+					find_pos.seq=seq;
+					find_pos.res=start_res;
+					found=TRUE;
+					break;
+				}
+				res++;
+				while(res<sdata.ncols && sdata.lines[seq][res]=='-')
+					res++;
+			}
+                }
+		if(found) break;
+		seq++;
+		res=0;
+	}
+
+
+	if(found==FALSE)
+		info("String %s not found",find_string);
+	else
+	{
+		info("String %s in sequence %s, column %d",find_string,names[find_pos.seq+1],find_pos.res+1);
+	}
+}
+
+static void SavePSSeqFile(ButtoN but)
+{
+	char *ps_file;
+
+	GetTitle(savepstext, filename, FILENAMELEN);
+	stripspace(filename);
+
+        ps_file=(char *)ckalloc(FILENAMELEN*sizeof(char));
+	strcpy(ps_file,filename); 
+
+	GetTitle(pspartext, filename, FILENAMELEN);
+	stripspace(filename);
+
+	strcpy(ps_par_file,filename); 
+
+	write_ps_file(seq_panel,ps_file,ps_par_file,pagesize,orientation,
+		ps_header,ps_ruler,ps_resno,
+		resize,first_printres,last_printres,blocklen,ps_curve);
+
+	info("Postscript file %s written",ps_file);
+	ckfree(ps_file);
+
+}
+
+static void SavePSPrf1File(ButtoN but)
+{
+	char *ps_file;
+	char *ps_par_file;
+
+	GetTitle(savepstext, filename, FILENAMELEN);
+	stripspace(filename);
+
+        ps_file=(char *)ckalloc(FILENAMELEN*sizeof(char));
+	strcpy(ps_file,filename); 
+
+	GetTitle(pspartext, filename, FILENAMELEN);
+	stripspace(filename);
+
+        ps_par_file=(char *)ckalloc(FILENAMELEN*sizeof(char));
+	strcpy(ps_par_file,filename); 
+
+	write_ps_file(prf_panel[0],ps_file,ps_par_file,pagesize,orientation,
+		ps_header,ps_ruler,ps_resno,
+		resize,first_printres,last_printres,blocklen,ps_curve);
+
+	info("Postscript file %s written",ps_file);
+	ckfree(ps_file);
+
+}
+
+static void SavePSPrf2File(ButtoN but)
+{
+	char *ps_file;
+	char *ps_par_file;
+
+	GetTitle(savepstext, filename, FILENAMELEN);
+	stripspace(filename);
+
+        ps_file=(char *)ckalloc(FILENAMELEN*sizeof(char));
+	strcpy(ps_file,filename); 
+
+	GetTitle(pspartext, filename, FILENAMELEN);
+	stripspace(filename);
+
+        ps_par_file=(char *)ckalloc(FILENAMELEN*sizeof(char));
+	strcpy(ps_par_file,filename); 
+
+	write_ps_file(prf_panel[1],ps_file,ps_par_file,pagesize,orientation,
+		ps_header,ps_ruler,ps_resno,
+		resize,first_printres,last_printres,blocklen,ps_curve);
+
+	info("Postscript file %s written",ps_file);
+	ckfree(ps_file);
+
+}
+
+static void SaveSeqFile(ButtoN but)
+{
+	write_file(1,nseqs,firstres,lastres);
+	seq_panel.modified=FALSE;
+	info("File %s saved",filename);
+}
+
+static void SavePrf1File(ButtoN but)
+{
+	write_file(1,profile1_nseqs,firstres,lastres);
+	prf_panel[0].modified=FALSE;
+	info("File %s saved",filename);
+}
+
+static void SavePrf2File(ButtoN but)
+{
+	write_file(profile1_nseqs+1,nseqs,firstres,lastres);
+	prf_panel[1].modified=FALSE;
+	info("File %s saved",filename);
+}
+
+/* this is equivalent to open_alignment_output(), but uses the window
+interface to input file names */
+
+static Boolean open_aln_files(void)
+{
+	char path[FILENAMELEN];
+
+	if(!output_clustal && !output_nbrf && !output_gcg &&
+		 !output_phylip && !output_gde && !output_nexus) {
+                error("You must select an alignment output format");
+                return FALSE;
+        }
+
+	if(output_clustal) {
+		GetTitle(cl_outtext,filename,FILENAMELEN);
+		stripspace(filename);
+		if((clustal_outfile = open_explicit_file(
+			filename))==NULL) return FALSE;
+	}
+	if(output_nbrf) {
+		GetTitle(pir_outtext,filename,FILENAMELEN);
+		stripspace(filename);
+		if((nbrf_outfile = open_explicit_file(
+			filename))==NULL) return FALSE;
+	}
+	if(output_gcg) {
+		GetTitle(msf_outtext,filename,FILENAMELEN);
+		stripspace(filename);
+		if((gcg_outfile = open_explicit_file(
+			filename))==NULL) return FALSE;
+	}
+	if(output_phylip) {
+		GetTitle(phylip_outtext,filename,FILENAMELEN);
+		stripspace(filename);
+		if((phylip_outfile = open_explicit_file(
+			filename))==NULL) return FALSE;
+	}
+	if(output_gde) {
+		GetTitle(gde_outtext,filename,FILENAMELEN);
+		stripspace(filename);
+		if((gde_outfile = open_explicit_file(
+			filename))==NULL) return FALSE;
+	}
+	if(output_nexus) {
+		GetTitle(nexus_outtext,filename,FILENAMELEN);
+		stripspace(filename);
+		if((nexus_outfile = open_explicit_file(
+			filename))==NULL) return FALSE;
+	}
+
+/* <Ramu> */
+	if(output_fasta) {
+		GetTitle(fasta_outtext,filename,FILENAMELEN);
+		stripspace(filename);
+		if((fasta_outfile = open_explicit_file(
+			filename))==NULL) return FALSE;
+	}
+/* </Ramu> */
+	if(save_log)
+	{
+        	get_path(seqname,path);
+        	strcpy(save_log_filename,path);
+        	strcat(save_log_filename,"log");
+		if ((save_log_fd=fopen(save_log_filename,"a"))==NULL)
+			error("Cannot open log file %s",save_log_filename);
+	}
+
+	return TRUE;
+}
+
+static void write_file(int fseq, int lseq, int fres, int lres)
+{
+	int i,length=0;
+	FILE *outfile;
+
+	GetTitle(savealntext, filename, FILENAMELEN);
+	stripspace(filename);
+
+	outfile=open_explicit_file(filename); 
+
+        for (i=fseq;i<=lseq;i++)
+           if (length < seqlen_array[i]) length = seqlen_array[i];
+
+	if(fres<1) fres=1;
+	if(lres<1) lres=length;
+	length=lres-fres+1;
+ 
+        if(save_format==CLUSTAL) {
+                clustal_out(outfile, fres, length, fseq, lseq);
+                fclose(outfile);
+                info("CLUSTAL format file created  [%s]",filename);
+        }
+        else if(save_format==PIR)  {
+                nbrf_out(outfile, fres, length, fseq, lseq);
+                fclose(outfile);
+                info("NBRF/PIR format file created  [%s]",filename);
+        }
+        else if(save_format==MSF)  {
+                gcg_out(outfile, fres, length, fseq, lseq);
+                fclose(outfile);
+                info("GCG/MSF format file created  [%s]",filename);
+        }
+        else if(save_format==PHYLIP)  {
+                phylip_out(outfile, fres, length, fseq, lseq);
+                fclose(outfile);
+                info("PHYLIP format file created  [%s]",filename);
+        }
+        else if(save_format==GDE)  {
+                gde_out(outfile, fres, length, fseq, lseq);
+                fclose(outfile);
+                info("GDE format file created  [%s]",filename);
+        }
+        else if(save_format==NEXUS)  {
+                nexus_out(outfile, fres, length, fseq, lseq);
+                fclose(outfile);
+                info("NEXUS format file created  [%s]",filename);
+        }
+
+/* <Ramu> */
+        else if(save_format==FASTA)  {
+                fasta_out(outfile, fres, length, fseq, lseq);
+                fclose(outfile);
+                info("FASTA format file created  [%s]",filename);
+        }
+
+
+/* </Ramu> */
+	if (Visible(savealnw))
+	{
+		Remove(savealnw);
+		savealnw=NULL;
+	}
+
+
+}
+
+static void SaveTreeWin (IteM item)
+{
+	GrouP savegr;
+	ButtoN save_ok, save_can;
+	char path[FILENAMELEN];
+
+	if (empty)
+	{
+		error("No file loaded");
+		return;
+	}
+        if (nseqs < 2)
+	{
+                error("Alignment has only %d sequences",nseqs);
+                return;
+        }
+
+	SelectFont(systemFont);
+	stdCharWidth=CharWidth('A');
+	stdLineHeight=LineHeight();
+	savetreew=FixedWindow(-50, -33, -10, -10, "CREATE TREE",RemoveWin);
+	shift(savetreew, 0, 20);
+	make_prompt(savetreew, "SAVE TREE AS :");
+	Advance(savetreew);
+	shift(savetreew, 0, -10);
+	stdLineHeight=18;
+	SelectFont(programFont);
+	savetreetext=DialogText(savetreew, "", 35, NULL);
+	SelectFont(systemFont);
+	stdLineHeight=15;
+	Break(savetreew);
+	savegr=HiddenGroup(savetreew, 2, 0, NULL);
+	shift(savegr, 140, 20);
+	save_ok=PushButton(savegr, "  OK  ", CreateAlignTree);
+	shift(savegr, 20, 0);
+	save_can=PushButton(savegr, "CANCEL", CancelWin);
+
+	get_path(seqname,path);
+	strcat(path,"dnd");
+  
+	SetTitle(savetreetext, path);
+	Show(savetreew);
+}
+
+static void DrawTreeWin (IteM item)
+{
+	GrouP drawgr;
+	GrouP output_list;
+	ButtoN draw_ok, draw_can;
+	char path[FILENAMELEN];
+	char name[FILENAMELEN];
+
+	if (empty)
+	{
+		error("No file loaded");
+		return;
+	}
+        if (nseqs < 2)
+	{
+                error("Alignment has only %d sequences",nseqs);
+                return;
+        }
+
+	get_path(seqname,path);
+
+	SelectFont(systemFont);
+	stdCharWidth=CharWidth('A');
+	stdLineHeight=LineHeight();
+	drawtreew=FixedWindow(-50, -33, -10, -10, "DRAW TREE",RemoveWin);
+	output_list=HiddenGroup(drawtreew, 2, 0, NULL);
+	if (output_tree_clustal)
+	{
+		make_prompt(output_list, "SAVE CLUSTAL TREE AS :");
+		drawnjtreetext=DialogText(output_list, "", 35, NULL);
+		strcpy(name,path);
+        	strcat(name,"nj");
+		SetTitle(drawnjtreetext, name);
+		Break(output_list);
+	}
+	if (output_tree_phylip)
+	{
+		make_prompt(output_list, "SAVE PHYLIP TREE AS :");
+		drawphtreetext=DialogText(output_list, "", 35, NULL);
+		strcpy(name,path);
+        	strcat(name,"ph");
+		SetTitle(drawphtreetext, name);
+		Break(output_list);
+	}
+	if (output_tree_distances)
+	{
+		make_prompt(output_list, "SAVE DISTANCE MATRIX AS :");
+		drawdsttreetext=DialogText(output_list, "", 35, NULL);
+		strcpy(name,path);
+        	strcat(name,"dst");
+		SetTitle(drawdsttreetext, name);
+		Break(output_list);
+	}
+	if (output_tree_nexus)
+	{
+		make_prompt(output_list, "SAVE NEXUS TREE AS :");
+		drawnxstreetext=DialogText(output_list, "", 35, NULL);
+		strcpy(name,path);
+        	strcat(name,"tre");
+		SetTitle(drawnxstreetext, name);
+		Break(output_list);
+	}
+
+	if (output_pim)
+	{
+		make_prompt(output_list, "SAVE % IDENTITY MATRIX AS :");
+		drawpimtext=DialogText(output_list, "", 35, NULL);
+		strcpy(name,path);
+        	strcat(name,"pim");
+		SetTitle(drawpimtext, name);
+		Break(output_list);
+	}
+
+	SelectFont(systemFont);
+	stdLineHeight=15;
+	Break(drawtreew);
+	drawgr=HiddenGroup(drawtreew, 2, 0, NULL);
+	shift(drawgr, 140, 20);
+	draw_ok=PushButton(drawgr, "  OK  ", DrawTree);
+	shift(drawgr, 20, 0);
+	draw_can=PushButton(drawgr, "CANCEL", CancelWin);
+
+	Show(drawtreew);
+}
+
+static void BootstrapTreeWin (IteM item)
+{
+	GrouP bootgr;
+	ButtoN boot_ok, boot_can;
+	TexT seed,ntrials;
+	char name[FILENAMELEN];
+	char path[FILENAMELEN];
+	char str[FILENAMELEN];
+	GrouP output_list;
+
+	if (empty)
+	{
+		error("No file loaded");
+		return;
+	}
+        if (nseqs < 2)
+	{
+                error("Alignment has only %d sequences",nseqs);
+                return;
+        }
+
+	get_path(seqname,path);
+
+	SelectFont(systemFont);
+	stdCharWidth=CharWidth('A');
+	stdLineHeight=LineHeight();
+	boottreew=FixedWindow(-50, -33, -10, -10, "BOOTSTRAP TREE",RemoveWin);
+        make_prompt(boottreew, "Random number generator seed [1-1000] :");
+	Advance(boottreew);
+	sprintf(str,"%4d",boot_ran_seed);
+        seed=DialogText(boottreew, str, 4,set_ran_seed);
+	Break(boottreew);
+        make_prompt(boottreew, "Number of bootstrap trials [1-10000] :");
+	Advance(boottreew);
+	sprintf(str,"%5d",boot_ntrials);
+        ntrials=DialogText(boottreew, str, 5,set_ntrials);
+	Break(boottreew);
+
+	output_list=HiddenGroup(boottreew, 2, 0, NULL);
+	if (output_tree_clustal)
+	{
+		make_prompt(output_list, "SAVE CLUSTAL TREE AS :");
+		bootnjtreetext=DialogText(output_list, "", 35, NULL);
+		strcpy(name,path);
+        	strcat(name,"njb");
+		SetTitle(bootnjtreetext, name);
+		Break(output_list);
+	}
+	if (output_tree_phylip)
+	{
+		make_prompt(output_list, "SAVE PHYLIP TREE AS :");
+		bootphtreetext=DialogText(output_list, "", 35, NULL);
+		strcpy(name,path);
+        	strcat(name,"phb");
+		SetTitle(bootphtreetext, name);
+		Break(output_list);
+	}
+	if (output_tree_nexus)
+	{
+		make_prompt(output_list, "SAVE NEXUS TREE AS :");
+		bootnxstreetext=DialogText(output_list, "", 35, NULL);
+		strcpy(name,path);
+        	strcat(name,"treb");
+		SetTitle(bootnxstreetext, name);
+		Break(output_list);
+	}
+	SelectFont(systemFont);
+	stdLineHeight=15;
+	Break(boottreew);
+	bootgr=HiddenGroup(boottreew, 2, 0, NULL);
+	shift(bootgr, 140, 20);
+	boot_ok=PushButton(bootgr, "  OK  ", BootstrapTree);
+	shift(bootgr, 20, 0);
+	boot_can=PushButton(bootgr, "CANCEL", CancelWin);
+
+
+	Show(boottreew);
+}
+
+static void CreateAlignTree(ButtoN but)
+{
+	char path[FILENAMELEN];
+	char phylip_name[FILENAMELEN];
+
+	GetTitle(savetreetext, filename, FILENAMELEN);
+	strcpy(phylip_name,filename);
+	stripspace(filename);
+
+	info("Doing pairwise alignments...");
+	if(save_log)
+	{
+        	get_path(seqname,path);
+        	strcpy(save_log_filename,path);
+        	strcat(save_log_filename,"log");
+		if ((save_log_fd=fopen(save_log_filename,"a"))==NULL)
+			error("Cannot open log file %s",save_log_filename);
+	}
+
+	WatchCursor();
+	if (Visible(savetreew))
+	{
+		Remove(savetreew);
+		savetreew=NULL;
+	}
+	make_tree(phylip_name);
+	if(save_log && save_log_fd!=NULL)
+	{
+		fclose(save_log_fd);
+		save_log_fd=NULL;
+	}
+	ArrowCursor();
+	info("Tree %s created",filename);
+}
+
+static void DrawTree(ButtoN but)
+{
+	char path[FILENAMELEN];
+	char phylip_name[FILENAMELEN];
+	char clustal_name[FILENAMELEN];
+	char dist_name[FILENAMELEN];
+	char nexus_name[FILENAMELEN];
+	char pim_name[FILENAMELEN];
+
+	if(output_tree_clustal)
+	{
+		GetTitle(drawnjtreetext, filename, FILENAMELEN);
+		stripspace(filename);
+		strcpy(clustal_name,filename);
+	}
+	if(output_tree_phylip)
+	{
+		GetTitle(drawphtreetext, filename, FILENAMELEN);
+		stripspace(filename);
+		strcpy(phylip_name,filename);
+	}
+	if(output_tree_distances)
+	{
+		GetTitle(drawdsttreetext, filename, FILENAMELEN);
+		stripspace(filename);
+		strcpy(dist_name,filename)