[med-svn] [codonw] 01/03: Imported Upstream version 1.4.4

Sascha Steinbiss sascha-guest at moszumanska.debian.org
Fri Nov 20 23:17:19 UTC 2015


This is an automated email from the git hooks/post-receive script.

sascha-guest pushed a commit to branch master
in repository codonw.

commit cd6e300044cf9dea91487a89fa282ec0d23f4f1e
Author: Sascha Steinbiss <sascha at steinbiss.name>
Date:   Fri Nov 20 21:18:43 2015 +0000

    Imported Upstream version 1.4.4
---
 Makefile           |   65 ++
 Makefile.orig      |   65 ++
 README_coa.txt     |  156 +++
 README_indices.txt |  141 +++
 READ_coa.txt       |  167 ++++
 Readme.txt         |  115 +++
 Recoding.txt       |   80 ++
 Tutorial.txt       |  350 +++++++
 codonW.h           |  683 +++++++++++++
 codonW.hlp         |  502 ++++++++++
 codonWinstall      |  271 +++++
 codon_us.c         | 2159 +++++++++++++++++++++++++++++++++++++++
 codons.c           | 1149 +++++++++++++++++++++
 commline.c         |  755 ++++++++++++++
 coresp.c           | 1673 +++++++++++++++++++++++++++++++
 indices.txt        |  139 +++
 input.dat          | 2835 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 menu.c             | 1302 ++++++++++++++++++++++++
 open_fil.c         |  236 +++++
 tester.c           |  239 +++++
 20 files changed, 13082 insertions(+)

diff --git a/Makefile b/Makefile
new file mode 100755
index 0000000..936befc
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,65 @@
+override cflags  = $(CFLAGS) -g
+
+objects  = codon_us.o codons.o open_fil.o commline.o menu.o tester.o coresp.o
+linked   = rscu cu aau raau tidy reader cutab cutot transl bases base3s dinuc cai fop gc3s gc cbi enc
+
+CC=cc
+CFLAGS= -O -DBSD
+LN=ln -f
+
+
+all: codonw links   
+
+codonw: $(objects)
+	$(CC) $(CFLAGS)  $(objects) -o codonw -lm
+
+clean:
+	\rm -f $(objects)
+
+cleanall:
+	\rm -f $(objects) codonw Makefile $(linked)
+
+realclean:
+	\rm -f $(objects) codonw Makefile $(linked)
+
+codon_us.o: codon_us.c codonW.h 
+	$(CC) -c $(CFLAGS) codon_us.c  
+
+menu.o: menu.c codonW.h 
+	$(CC) -c $(CFLAGS) menu.c
+
+codons.o: codons.c codonW.h 
+	$(CC) -c $(CFLAGS) codons.c
+
+coresp.o: coresp.c codonW.h 
+	$(CC) -c $(CFLAGS) coresp.c
+
+open_fil.o:    open_fil.c codonW.h
+	$(CC) -c $(CFLAGS) open_fil.c
+
+commline.o:    commline.c codonW.h 
+	$(CC) -c $(CFLAGS) commline.c
+
+tester.o:      tester.c codonW.h
+	$(CC) -c $(CFLAGS) tester.c
+
+links: codonw
+		$(LN) codonw rscu
+		$(LN) codonw cu
+		$(LN) codonw aau
+		$(LN) codonw raau
+		$(LN) codonw tidy
+		$(LN) codonw reader
+		$(LN) codonw cutab
+		$(LN) codonw cutot
+		$(LN) codonw transl
+		$(LN) codonw bases
+		$(LN) codonw base3s
+		$(LN) codonw dinuc
+		$(LN) codonw cai
+		$(LN) codonw fop
+		$(LN) codonw gc3s
+		$(LN) codonw gc
+		$(LN) codonw cbi
+		$(LN) codonw enc
+
diff --git a/Makefile.orig b/Makefile.orig
new file mode 100755
index 0000000..936befc
--- /dev/null
+++ b/Makefile.orig
@@ -0,0 +1,65 @@
+override cflags  = $(CFLAGS) -g
+
+objects  = codon_us.o codons.o open_fil.o commline.o menu.o tester.o coresp.o
+linked   = rscu cu aau raau tidy reader cutab cutot transl bases base3s dinuc cai fop gc3s gc cbi enc
+
+CC=cc
+CFLAGS= -O -DBSD
+LN=ln -f
+
+
+all: codonw links   
+
+codonw: $(objects)
+	$(CC) $(CFLAGS)  $(objects) -o codonw -lm
+
+clean:
+	\rm -f $(objects)
+
+cleanall:
+	\rm -f $(objects) codonw Makefile $(linked)
+
+realclean:
+	\rm -f $(objects) codonw Makefile $(linked)
+
+codon_us.o: codon_us.c codonW.h 
+	$(CC) -c $(CFLAGS) codon_us.c  
+
+menu.o: menu.c codonW.h 
+	$(CC) -c $(CFLAGS) menu.c
+
+codons.o: codons.c codonW.h 
+	$(CC) -c $(CFLAGS) codons.c
+
+coresp.o: coresp.c codonW.h 
+	$(CC) -c $(CFLAGS) coresp.c
+
+open_fil.o:    open_fil.c codonW.h
+	$(CC) -c $(CFLAGS) open_fil.c
+
+commline.o:    commline.c codonW.h 
+	$(CC) -c $(CFLAGS) commline.c
+
+tester.o:      tester.c codonW.h
+	$(CC) -c $(CFLAGS) tester.c
+
+links: codonw
+		$(LN) codonw rscu
+		$(LN) codonw cu
+		$(LN) codonw aau
+		$(LN) codonw raau
+		$(LN) codonw tidy
+		$(LN) codonw reader
+		$(LN) codonw cutab
+		$(LN) codonw cutot
+		$(LN) codonw transl
+		$(LN) codonw bases
+		$(LN) codonw base3s
+		$(LN) codonw dinuc
+		$(LN) codonw cai
+		$(LN) codonw fop
+		$(LN) codonw gc3s
+		$(LN) codonw gc
+		$(LN) codonw cbi
+		$(LN) codonw enc
+
diff --git a/README_coa.txt b/README_coa.txt
new file mode 100755
index 0000000..eb6410e
--- /dev/null
+++ b/README_coa.txt
@@ -0,0 +1,156 @@
+
+
+README.coa
+
+The permanent result files from a COA created by CodonW have the extension 
+�.coa� for a description of their and contents see Table 1.
+
+Short description of output files created by correspondence analysis in
+CodonW.
+
+summary.coa
+This file contains a summary of all the information generated by 
+correspondence analysis, including all the data written to files listed 
+below, except for the output written to cusort.coa. 
+
+eigen.coa
+Each axis generated in the correspondence analysis is represented by a row 
+of information. Each row consists of four columns, (1) the number of the 
+axis, (2) the axis eigenvalue, (3) the relative inertia of the axis, (4) the 
+sum of the relative inertia. 
+
+amino.coa� or codon.coa
+Each codon or amino acid included in the correspondence analysis is 
+represented by a row. The first column is description of the variable, the 
+subsequent columns contain the coordinate of the codon or amino acid on the 
+axes, the number of axes is user definable.
+
+genes.coa
+Each row represents one gene, the first column contains a unique description 
+for each gene, and subsequent columns contain the coordinates for each of 
+the recorded axis. If additional genes are added to the correspondence 
+analysis (advanced correspondence analysis option), the coordinates of these 
+genes are appended to this file.
+
+cusort.coa�
+Contains the codon usage of each gene, sorted by the gene�s coordinate on 
+the principal axis, this information is used to generate the table in 
+
+hilo.coa
+This files records a 2 way Chi squared contingency test between two subsets 
+(as defined by the �advanced correspondence analysis options�) of genes 
+positioned at the extremes of  axis 1 (cusort.coa). 
+
+cai.coa�
+Contains the relative usage of each codon within each synonym family, the 
+most frequent codon assigned the value one and all other codons are 
+expressed relative to this. This file can be used to calculate species 
+specific CAI values. 
+
+fop.coa �and cbi.coa�
+Contains a list of the optimal codons and non-optimal codons as identified 
+in the file �hilo.coa�. The format of this file can be utilised by CodonW to 
+calculate Fop and CBI using a specific choice of optimal codons.
+
+inertia.coa
+This file is only generated if the exhaustive output option is selected 
+under the advanced correspondence analysis menu. It contains four tables of 
+information, the first two report the absolute contribution of each gene and 
+codon (or amino acid) to the inertia explained by each axis. The second two 
+tables� report the fraction of variation in each gene and codon (or amino 
+acid) explained by each axis. 
+
+codon.coa and hilo.coaare not generated during the correspondence analysis
+of amino acids
+
+
+Detailed explanation of file contents
+
+
+summary.coa
+========================================
+Correspondence analysis generate a large volume of data, CodonW writes the 
+essential data necessary to interpret the correspondence analysis to the 
+file �summary.coa�.
+
+genes.coa codons.coa amino.coa
+========================================
+The most complex analysis that CodonW performs is correspondence analysis 
+(COA). COA creates a series of orthogonal axis to identify trends that 
+explain the data variation, with each subsequent axis explaining a 
+decreasing amount of the variation. COA positions each gene and codon (or 
+amino acid) on these axes. An important property is that the ordination of 
+the rows (genes) and columns (codons or amino acids) are superimposable. 
+
+
+eigen.coa
+========================================
+The Eigen values of the principle trends, as well as the more accessible 
+fraction (with the cumulative total) of the total data inertia, that each 
+axes is explaining, is recorded to summary.coa and eigen.coa. 
+
+
+cusort.coa 
+======================================== 
+To simplify analyse of codon usage CodonW assumes that the principle trend 
+is correlated with gene expression. It uses this assumption to identify 
+putative optimal codons. Though the adage GIGO �garbage in, garbage out� 
+must be stressed, it is the researchers responsibility to establish that the 
+principle trend is correlated with gene expression (see tutorial for some 
+example of how to do this).
+
+To identify the putative optimal codons, the genes are sorted according to 
+their position on the principle, the sorted codon usage of these genes is 
+written to the file �cusort.coa�. Then a number of genes, decided by the 
+advanced correspondence analysis menu option �number of genes used to 
+identify optimal codons�, are read from the start and end of this file (i.e. 
+equivalent the extremes of the principle axis), the codon usage of each set 
+of genes is totalled. The set of genes with the lower Nc (more highly 
+biased) is putatively 
+identified as the more highly expressed.  
+
+hilo.coa
+======================================== 
+Optimal codons are defined as those codons that occur significantly more 
+often in highly expressed genes relative to their frequency in lowly 
+expressed genes. Significance is assessed by a two-way chi square 
+contingency test with the criterion of p < 0.01. The advantage of using a 
+test of significance to identify optimal codons is that variation in codon 
+usage between highly and lowly expressed genes, that is due to random noise 
+is suppressed, but a disadvantage is that the test is dependent on sample 
+size.  
+
+After CodonW does a two way chi squared test on the genes taken from the 
+extremes of axis 1,  their codon usage and RSCU is output as a table to 
+�summary.coa� and �hilo.coa�. those codons which have been putatively 
+identified as optimal p < 0.01 are indicated with an asterisk (*). Though 
+not considered optimal by CodonW, codons that occur more frequently in the 
+highly expressed dataset at 0.01 < p < 0.05 are indicated with a ampersand 
+(@). 
+
+
+fop.coa cbi.coa cai.coa
+======================================== 
+CodonW measures the degree to which the codon usage of a gene has adapted 
+towards the usage of optimal codons. It does this by calculating these 
+indices, the frequency of optimal codons (Fop), codon bias index, and codon 
+adaptation index (CAI). To calculate these indexes, information about codon 
+usage in the species being analysed is needed. The indices Fop and CBI used 
+the optimal codons for the species. The index CAI uses codon adaptation 
+values.
+For some species this information is known, and for these the optimal codons 
+and codon adaptiveness values are in-built into codonW (see the �Change 
+Defaults� menu). For other species these indexes cannot be calculated unless 
+the additional information is know. During calculation of these indices the 
+user is prompted for input files.
+During a COA CodonW generates the output files �cai.coa�, �fop.coa� and 
+�cbi.coa�. These files can be used as input files for their respective 
+indices (they are already in the correct format). 
+Again it must be stressed that CodonW must make a number of assumptions to 
+generate these files. These are: that the major trend in the codon usage is 
+correlated with expression level; that the dataset contains highly expressed 
+genes; that the genes used to identify of optimal codons where highly 
+expressed. If these assumptions are valid then the files �cbi.coa�, 
+�cai.coa� and �fop.coa� can be used to calculate the indexes CBI, CAI and 
+Fop respectively. 
+
diff --git a/README_indices.txt b/README_indices.txt
new file mode 100755
index 0000000..ab79a2a
--- /dev/null
+++ b/README_indices.txt
@@ -0,0 +1,141 @@
+Codon usage indices 
+
+This document describes the indices calculated by CodonW, by default only 
+the G+C content of the sequence is reported. The others being dependent on 
+the genetic code selected. More than one index may be calculated at the same 
+time.  
+
+Codon Adaptation Index (CAI) (Sharp and Li 1987). 
+CAI is a measurement of the relative adaptiveness of the codon usage of a 
+gene towards the codon usage of highly expressed genes. The relative 
+adaptiveness (w) of each codon is the ratio of the usage of each codon, to 
+that of the most abundant codon for the same amino acid. The relative 
+adaptiveness of codons for albeit a limited choice of species, can be 
+selected from Menu 3. The user can also input a personal choice of values. 
+The CAI index is defined as the geometric mean of these relative 
+adaptiveness values. Non-synonymous codons and termination codons (dependent 
+on genetic code) are excluded. 
+ 
+To prevent a codon absent from the reference set but present in other genes 
+from having a relative adaptiveness value of zero, which would cause CAI to 
+evaluate to zero for any genes which used that codon; it was suggested that 
+absent codons should be assigned a frequency of 0.5 when estimating ? (Sharp 
+and Li 1987). An alternative suggestion was that ? should be adjusted to 
+0.01 where otherwise it would be less than this value (Bulmer 1988). CodonW 
+does not adjust the ? value if a non-zero-input value is found; zero values 
+are assigned a value of 0.01. 
+
+Frequency of Optimal codons (Fop) (Ikemura 1981). 
+This index, is the ratio of optimal codons to synonymous codons (genetic 
+code dependent). Optimal codons for several species are in-built and can be 
+selected using Menu 3. By default, the optimal codons of E. coli are 
+assumed. The user may also enter a personal choice of optimal codons. If 
+rare synonymous codons have been identified, there is a choice of 
+calculating the original Fop index or a modified Fop index. Fop values for 
+the original index are always between 0 (where no optimal codons are used) 
+and 1 (where only optimal codons are used). When calculating the modified 
+Fop index, negative values are adjusted to zero. 
+
+Codon Bias Index (CBI) (Bennetzen and Hall 1982). 
+Codon bias index is another measure of directional codon bias, it measures 
+the extent to which a gene uses a subset of optimal codons. CBI is similar 
+to Fop as used by Ikemura, with expected usage used as a scaling factor. In a 
+gene with extreme codon bias, CBI will equal 1.0, in a gene with random 
+codon usage CBI will equal 0.0. Note that it is possible for the number of 
+optimal codons to be less than expected by random change. This results in a 
+negative value for CBI.
+
+The effective number of codons (NC) (Wright 1990).
+This index is a simple measure of overall codon bias and is analogous to the 
+effective number of alleles measure used in population genetics. Knowledge 
+of the optimal codons or a reference set of highly expressed genes is 
+unnecessary. Initially the homozygosity for each amino acid is estimated 
+from the squared codon frequencies (see Equation 5).
+
+	
+If amino acids are rare or missing, adjustments must be made. When 
+there are no amino acids in a synonymous family, Nc is not calculated 
+as the gene is either too short or has extremely skewed amino acid 
+usage (Wright 1990). An exception to this is made for genetic codes 
+where isoleucine is the only 3-fold synonymous amino acid, and is not 
+used in the protein gene. The reported value of Nc is always between 20 
+(when only one codon is effectively used for each amino acid) and 61 
+(when codons are used randomly). If the calculated Nc is greater than 
+61 (because codon usage is more evenly distributed than expected), it 
+is adjusted to 61.
+
+G+C content of the gene. 
+The frequency of nucleotides that are guanine or cytosine.
+
+G+C content 3rd position of synonymous codons (GC3s).
+This the fraction of codons, that are synonymous at the third codon 
+position, which have either a guanine of cytosine at that third codon 
+position. 
+
+Silent base compositions. 
+Selection of this option calculates four separate indices, i.e. G3s, C3s, 
+A3s & T3s. Although correlated with GC3s, this index is not directly 
+comparable. It quantifies the usage of each base at synonymous third codon 
+positions. When calculating GC3s each synonymous amino acid has at least one 
+synonym with G or C in the third position. Two or three fold synonymous 
+amino acids do not have an equal choice between bases in the synonymous 
+third position. The index A3s is the frequency that codons have an A at their 
+synonymous third position, relative to the amino acids that could have a 
+synonym with A in the synonymous third codon position. The codon usage 
+analysis of Caenorhabditis elegans identified a trend correlated with the 
+frequency of G3s. Though it was not clear whether it reflected variation in 
+base composition (or mutational biases) among regions of the C. elegans 
+genome, or another factor (Stenico et al. 1994).
+
+Length silent sites (Lsil). 
+Frequency of synonymous codons.
+
+Length  amino acids (Laa). 
+Equivalent to the number of translatable codons.
+
+Hydropathicity of protein. 
+The general average hydropathicity or (GRAVY) score, for the hypothetical 
+translated gene product. It is calculated as the arithmetic mean of the sum 
+of the hydropathic indices of each amino acid (Kyte and Doolittle 1982). 
+This index has been used to quantify the major COA trends in the amino acid 
+usage of E. coli genes (Lobry and Gautier 1994). 
+
+Aromaticity score
+The frequency of aromatic amino acids (Phe, Tyr, Trp) in the hypothetical 
+translated gene product. The hydropathicity and aromaticity protein scores 
+are indices of amino acid usage. The strongest trend in the variation in the 
+amino acid composition of E. coli genes is correlated with protein 
+hydropathicity, the second trend is correlated with gene expression, while 
+the third is correlated with aromaticity (Lobry and Gautier 1994). The 
+variation in amino acid composition can have applications for the analysis 
+of codon usage. If total codon usage is analysed, a component of the 
+variation will be due to differences in the amino acid composition of genes. 
+
+
+
+Bennetzen, J. L., and B. D. Hall, (1982). Codon selection in yeast. Journal 
+of Biological Chemistry 257: 3026-3031.
+Bulmer, M., (1988). Are codon usage patterns in unicellular organisms 
+determined by selection-mutation balance. Journal of Evolutionary 
+Biology 1: 15-26.
+Ikemura, T., (1981). Correlation between the abundance of Escherichia coli 
+transfer RNAs and the occurrence of the respective codons in its 
+protein genes: a proposal for a synonymous codon choice that is 
+optimal for the E. coli system. Journal of Molecular Biology 151: 389-
+409.
+Kyte, J., and R. Doolittle, (1982). A simple method for displaying the 
+hydropathic character of a protein. Journal of Molecular Biology 157: 
+105-132.
+Lobry, J. R., and C. Gautier, (1994). Hydrophobicity, expressivity and 
+aromaticity are the major trends of amino acid usage in 999 
+Escherichia coli chromosome encoded genes. Nucleic Acids Research 22: 
+3174-3180.
+Sharp, P. M., and W. H. Li, (1987). The codon adaptation index a measure of 
+directional synonymous codon usage bias, and its potential 
+applications. Nucleic Acids Research 15: 1281-1295.
+Stenico, M., A. T. Lloyd and P. M. Sharp, (1994). Codon usage in 
+Caenorhabditis elegans delineation of translational selection and 
+mutational biases. Nucleic Acids Research 22: 2437-2446.
+Wright, F., (1990). The effective number of codons used in a gene. Gene  87 
+: 23-29.
+
diff --git a/READ_coa.txt b/READ_coa.txt
new file mode 100755
index 0000000..8552d9d
--- /dev/null
+++ b/READ_coa.txt
@@ -0,0 +1,167 @@
+
+========================================
+
+CodonW was written by John Peden in the laboratory
+of Paul Sharp at the University of Nottingham. It is distributed under the
+terms of the GNU public license, see the file License included with the
+distribution. 
+
+========================================
+
+README.coa
+
+The permanent result files from a COA created by CodonW have the extension 
+�.coa� for a description of their and contents see Table 1.
+
+Short description of output files created by correspondence analysis in
+CodonW.
+
+summary.coa
+This file contains a summary of all the information generated by 
+correspondence analysis, including all the data written to files listed 
+below, except for the output written to cusort.coa. 
+
+eigen.coa
+Each axis generated in the correspondence analysis is represented by a row 
+of information. Each row consists of four columns, (1) the number of the 
+axis, (2) the axis eigenvalue, (3) the relative inertia of the axis, (4) the 
+sum of the relative inertia. 
+
+amino.coa� or codon.coa
+Each codon or amino acid included in the correspondence analysis is 
+represented by a row. The first column is description of the variable, the 
+subsequent columns contain the coordinate of the codon or amino acid on the 
+axes, the number of axes is user definable.
+
+genes.coa
+Each row represents one gene, the first column contains a unique description 
+for each gene, and subsequent columns contain the coordinates for each of 
+the recorded axis. If additional genes are added to the correspondence 
+analysis (advanced correspondence analysis option), the coordinates of these 
+genes are appended to this file.
+
+cusort.coa�
+Contains the codon usage of each gene, sorted by the gene�s coordinate on 
+the principal axis, this information is used to generate the table in 
+
+hilo.coa
+This files records a 2 way Chi squared contingency test between two subsets 
+(as defined by the �advanced correspondence analysis options�) of genes 
+positioned at the extremes of  axis 1 (cusort.coa). 
+
+cai.coa�
+Contains the relative usage of each codon within each synonym family, the 
+most frequent codon assigned the value one and all other codons are 
+expressed relative to this. This file can be used to calculate species 
+specific CAI values. 
+
+fop.coa �and cbi.coa�
+Contains a list of the optimal codons and non-optimal codons as identified 
+in the file �hilo.coa�. The format of this file can be utilised by CodonW to 
+calculate Fop and CBI using a specific choice of optimal codons.
+
+inertia.coa
+This file is only generated if the exhaustive output option is selected 
+under the advanced correspondence analysis menu. It contains four tables of 
+information, the first two report the absolute contribution of each gene and 
+codon (or amino acid) to the inertia explained by each axis. The second two 
+tables� report the fraction of variation in each gene and codon (or amino 
+acid) explained by each axis. 
+
+codon.coa and hilo.coaare not generated during the correspondence analysis
+of amino acids
+
+
+Detailed explanation of file contents
+
+
+summary.coa
+========================================
+Correspondence analysis generate a large volume of data, CodonW writes the 
+essential data necessary to interpret the correspondence analysis to the 
+file �summary.coa�.
+
+genes.coa codons.coa amino.coa
+========================================
+The most complex analysis that CodonW performs is correspondence analysis 
+(COA). COA creates a series of orthogonal axis to identify trends that 
+explain the data variation, with each subsequent axis explaining a 
+decreasing amount of the variation. COA positions each gene and codon (or 
+amino acid) on these axes. An important property is that the ordination of 
+the rows (genes) and columns (codons or amino acids) are superimposable. 
+
+
+eigen.coa
+========================================
+The Eigen values of the principle trends, as well as the more accessible 
+fraction (with the cumulative total) of the total data inertia, that each 
+axes is explaining, is recorded to summary.coa and eigen.coa. 
+
+
+cusort.coa 
+======================================== 
+To simplify analyse of codon usage CodonW assumes that the principle trend 
+is correlated with gene expression. It uses this assumption to identify 
+putative optimal codons. Though the adage GIGO �garbage in, garbage out� 
+must be stressed, it is the researchers responsibility to establish that the 
+principle trend is correlated with gene expression (see tutorial for some 
+example of how to do this).
+
+To identify the putative optimal codons, the genes are sorted according to 
+their position on the principle, the sorted codon usage of these genes is 
+written to the file �cusort.coa�. Then a number of genes, decided by the 
+advanced correspondence analysis menu option �number of genes used to 
+identify optimal codons�, are read from the start and end of this file (i.e. 
+equivalent the extremes of the principle axis), the codon usage of each set 
+of genes is totalled. The set of genes with the lower Nc (more highly 
+biased) is putatively 
+identified as the more highly expressed.  
+
+hilo.coa
+======================================== 
+Optimal codons are defined as those codons that occur significantly more 
+often in highly expressed genes relative to their frequency in lowly 
+expressed genes. Significance is assessed by a two-way chi square 
+contingency test with the criterion of p < 0.01. The advantage of using a 
+test of significance to identify optimal codons is that variation in codon 
+usage between highly and lowly expressed genes, that is due to random noise 
+is suppressed, but a disadvantage is that the test is dependent on sample 
+size.  
+
+After CodonW does a two way chi squared test on the genes taken from the 
+extremes of axis 1,  their codon usage and RSCU is output as a table to 
+�summary.coa� and �hilo.coa�. those codons which have been putatively 
+identified as optimal p < 0.01 are indicated with an asterisk (*). Though 
+not considered optimal by CodonW, codons that occur more frequently in the 
+highly expressed dataset at 0.01 < p < 0.05 are indicated with a ampersand 
+(@). 
+
+
+fop.coa cbi.coa cai.coa
+======================================== 
+CodonW measures the degree to which the codon usage of a gene has adapted 
+towards the usage of optimal codons. It does this by calculating these 
+indices, the frequency of optimal codons (Fop), codon bias index, and codon 
+adaptation index (CAI). To calculate these indexes, information about codon 
+usage in the species being analysed is needed. The indices Fop and CBI used 
+the optimal codons for the species. The index CAI uses codon adaptation 
+values.
+For some species this information is known, and for these the optimal codons 
+and codon adaptiveness values are in-built into codonW (see the �Change 
+Defaults� menu). For other species these indexes cannot be calculated unless 
+the additional information is know. During calculation of these indices the 
+user is prompted for input files.
+During a COA CodonW generates the output files �cai.coa�, �fop.coa� and 
+�cbi.coa�. These files can be used as input files for their respective 
+indices (they are already in the correct format). 
+Again it must be stressed that CodonW must make a number of assumptions to 
+generate these files. These are: that the major trend in the codon usage is 
+correlated with expression level; that the dataset contains highly expressed 
+genes; that the genes used to identify of optimal codons where highly 
+expressed. If these assumptions are valid then the files �cbi.coa�, 
+�cai.coa� and �fop.coa� can be used to calculate the indexes CBI, CAI and 
+Fop respectively. 
+
+
+For the most up to date version see http://codonw.sourceforge.net
+
diff --git a/Readme.txt b/Readme.txt
new file mode 100755
index 0000000..9dfba9b
--- /dev/null
+++ b/Readme.txt
@@ -0,0 +1,115 @@
+
+
+CodonW is a package for codon usage analysis. It was designed to simplify
+Multivariate Analysis (MVA) of codon usage. The MVA method employed in
+CodonW is correspondence analysis (COA) (the most popular MVA method for
+codon usage analysis). CodonW can gen erate a COA for codon usage,
+relative synonymous codon usage or amino acid usage. Additional analyses
+of codon usage include investigation of optimal codons, codon and
+dinucleotide bias, and/or base composition. 
+
+CodonW also has the capacity to analysis sequences encoded by genetic
+codes other than the universal code.
+
+Why call it codonW? 
+
+Well first you must realise that "clustal" (a very popular multiple
+alignment program by Des Higgins) was originally written in Paul's lab in
+Trinity College Dublin. Clustal has since been rewritten from FORTRAN into
+C and undergone several name changes c lustal-> clustalv-> clustalw ->
+clustalx. There was also a program called "codons" written in FORTRAN by
+Andrew Lloyd (a post-doc in Paul's lab), this was the original inspiration
+for codonW. An early version of codonW, written in C, was called codonv.
+Wh en the code was enhanced to include multivariate analysis, what better
+name than codonW. 
+
+
+CodonW version 1.3 June 1997 
+================= 
+
+The source code for CodonW can be obtained from
+ftp://molbiol.ox.ac.uk/cu/codonW.tar.Z. Binaries for a number of platforms
+are also available at this site see ftp://molbiol.ox.ac.uk/cu.
+
+
+To Install and Build on UNIX Platforms 
+================= 
+
+Get the source code from ftp://molbiol.ox.ac.uk/cu/codonW.tar.Z Change
+directory to the directory where you intend to install CodonW. 
+
+uncompress codonW.tar.Z 
+tar -xvf codonW.tar 
+cd codonw 
+./codonWinstall all     (this writes a makefiel and then builds codonw) 
+
+This will ask a few questions regarding 'make' and 'cc' and then configure
+the installation and compile the programs. If you don't understand the
+questions, just accept the default by pressing the return key and the
+installation should be OK using the defaults. The install script also
+creates a number of links to the compiled executable codonW.  These links
+allow codonW to emulate other useful codon usage analysis and sequence
+manipulation software by passing the menu interface (for more informa tion
+see README.links). Alternatively you can just elect to only build the main
+program, and not install the linked programs. 
+
+./codonWinstall codonw (compile only the executable codonw) 
+
+Once you have successfully built codonw, try these commands to get you
+started.  
+
+./codonw -help (for commandline summary)  
+./codonw        (menu interface) 
+
+There is also a short tutorial. 
+
+
+For the most recent documentation on codonW see
+http://www.molbiol.ox.ac.uk/cu/
+
+
+To Set the Codonw Help Environment:  
+================= 
+
+CodonW has an in-built help system, the help file is called codonW.hlp and
+should be located in the same directory as the executable codonw.
+Alternatively the help file can be pointed to by the environment variable
+CODONW_H, if you are using a C shell you
+ can add something similar to this to your .login script. 
+
+setenv CODONW_H file_path 
+
+Where file_path is the fully defined path name for codonW.hlp. 
+
+Additional Files:
+=================
+
+README.indices - explanation about the various codon usage indices that
+codonW calculates.  
+
+README.coa- explanation about the output files from the correspondence
+analysis. 
+
+README.links- explanation about the auxiliary programmes created during
+the making of codonw. 
+
+Tutorial- A quick tutorial on the analysis of codon usage of the open
+reading frames from Saccharomyces cerevisiae chromosome III. 
+
+input.dat- An input file containing 167 open reading frames from
+Saccharomyces cerevisiae chromosome III. (see Tutorial). 
+
+Recoding - A quick explanation about how amino acids and codons have are
+represented internally within codonW. 
+
+
+Bugs 
+
+This is a beta version of codonW, therefore there may be bugs within the
+code. If you do find or notice anything strange please e-mail bug
+reports/complaints/suggestions to johnp at molbiol.ox.ac.uk. Remember to
+include an example of the input file (and outp ut files) and the options
+selected that generated the error, don't forget to tell me the make of
+computer and operating system it was running under.
+
+
diff --git a/Recoding.txt b/Recoding.txt
new file mode 100755
index 0000000..f141bd8
--- /dev/null
+++ b/Recoding.txt
@@ -0,0 +1,80 @@
+Data Recoding
+To add computation codonW converts sequence information 
+automatically from it original text format into a numerical format. 
+This is normally transparent to the user. To add additional genetic 
+codes or a personal choice of codon values for calculating the Fop, 
+CAI or CBI indices, some understanding of the schema used to convert 
+the sequences to numerical strings is advisable. 
+
+When calculating the indices Fop, CBI, or CAI which are measure of 
+codon bias in relation to the codon usage of a set of optimal genes, 
+there is an option of using a personal choice of these values. These 
+are read from file, there must be one value for each codon (64 in 
+total) and they must be found in the file in a set sequence (i.e. 
+the numerical order of the codons, TTT, TCT ... GAG, GGG). This is 
+also the order in which codon and amino acid results are recorded to 
+file.
+
+Internally CodonW recodes all nucleotides, codons and amino acids. 
+Nucleotides are recoded as T/U=1, C=2, A=3, G=4. The 20 standard 
+amino acids and the termination codons are recoded as integer values 
+in the range 1 to 21, note that stop codons is assigned the amino 
+acid value 11 (see Table 2). The decision about whether a codon is 
+synonymous, or how many members are in a particular amino acid 
+synonymous family are taken at run time and are dependent on the 
+genetic code chosen.  
+
+Each codon is recoded into an integer value in the range 1 to 64, 
+see Table 1. The formulae used to recode the codons is:
+
+Equation 1
+        	
+code=((p1-1)*16)+P2+((p3-1)*4)    1<= code <= 64
+
+Where each of the three codon positions is represented by P1, P2 and 
+P3. Using this recoding convention, the codon ATG has the value 45. 
+ 		
+code=((3-1)*16)+1+((4-1)*4)=45
+
+Unrecognised or non-translatable bases, codons or amino acids are 
+represented all assigned the value zero.
+
+
+
+
+Table 1 Numerical values used for recoding codons 
+
+Code	Codon	AA	Code	Codon	AA	Code	Codon	AA	Code	Codon	AA
+1	UUU 	Phe	2	UCU 	Ser	3	UAU 	Tyr	4	UGU 	Cys
+5	UUC		6	UCC 		7	UAC		8	UGC 	
+9	UUA 	Leu	10	UCA		11	UAA 	STOP	12	UGA 	STOP
+13	UUG		14	UCG 		15	UAG 		16	UGG 	Trp
+17	CUU		18	CCU 	Pro	19	CAU 	His	20	CGU	Arg
+21	CUC		22	CCC 		23	CAC		24	CGC	
+25	CUA 		26	CCA		27	CAA	Gln	28	CGA 	
+29	CUG 		30	CCG 		31	CAG 		32	CGG 	
+33	AUU 	Ile	34	ACU 	Thr	35	AAU 	Asn	36	AGU 	Ser
+37	AUC		38	ACC 		39	AAC		40	AGC 	
+41	AUA 		42	ACA 		43	AAA	Lys	44	AGA 	Arg
+45	AUG 	Met	46	ACG 		47	AAG 		48	AGG 	
+49	GUU	Val	50	GCU	Ala	51	GAU 	Asp	52	GGU	Gly
+53	GUC 		54	GCC 		55	GAC		56	GGC 	
+57	GUA 		58	GCA 		59	GAA	Glu	60	GGA 	
+61	GUG 		62	GCG 		63	GAG 		64	GGG 	
+
+
+
+Table 2 Numerical values used to recode amino acids.
+Code	AA	One letter code	Code	AA	One letter code
+1	Phe	F	2	Leu	L
+3	Ile	I	4	Met	M
+5	Val	V	6	Ser	S
+7	Pro	P	8	Thr	T
+9	Ala	A	10	Tyr	Y
+11	Stop	*	12	His	H
+13	Gln	Q	14	Asn	N
+15	Lys	K	16	Asp	D
+17	Glu	E	18	Cys	C
+19	Trp	W	20	Arg	R
+21	Gly	G			
+
diff --git a/Tutorial.txt b/Tutorial.txt
new file mode 100755
index 0000000..6f199d7
--- /dev/null
+++ b/Tutorial.txt
@@ -0,0 +1,350 @@
+Tutorial 
+
+Codon usage analysis
+
+Included with this distribution of codonW should be a test dataset of 
+sequences (input.dat). We will use this set of sequence as a typical example 
+of a codon usage analysis. This test dataset is derived from the open 
+reading frames (ORFs) of Saccharomyces cerevisiae chromosome III as 
+annotated in the EMBL feature table for the sequence entry SCCHRIII 
+(accession number X59720). In the current EMBL (Release 51 June 1997) the 
+number of annotated ORFs was 172. The file input.dat contains 111 of these 
+ORFs. The rational and why some ORFs where removed is explained below.
+
+The commandline syntax of codonW will be used in this tutorial, all options 
+selected from the commandline are also selectable using the menu system. For 
+more information please read the command line help (codonw -help) or just 
+type "codonw" and use the menu specific online. 
+
+Build your dataset of genes carefully.
+Always remember that as in any analysis, but particularly with codon usage, 
+GIGO (garbage in, garbage out). Examine as many sources of information about 
+the data as possible, particularly the original publication and sequence 
+annotations. It is important that the sequences are a representative sample. 
+Five ORFs where removed from the dataset because they where annotated (and 
+had sequence identity) with genes within the previously identified 
+transposable elements Ty2 and Ty5.  These ORFs where annotated at positions 
+1537-2127, 2118-2558, 2816-3742, 84714-86030, 84714-90384. The codon usage 
+of transposable element genes differs from that of chromosomal genes. 
+
+Further checks of sequence annotation was carried out, those sequences which 
+had not been assigned gene names or SwissProt accession numbers where 
+removed. The SwissProt annotation was also checked, genes described as 
+hypothetical but which did not have any sequence identity with other 
+proteins where removed. 
+Check basic sequence integrity  
+Sequences should be checked to confirm that they match some basic gene 
+characteristics. Each sequence might reasonably be expected to have an 
+initiation codon and a translation termination codon, and no internal stop 
+codons. Those sequences that do not match these characteristics, or 
+sequences that have partial codons or untranslatable codons are flagged by 
+codonw with warning messages.
+
+To make a first pass of the input data to check for simple sequence 
+problems: 
+codonw input.dat   -nomenu 		
+
+By default codonw will report the codon usage of each gene to the file 
+input.blk. As there are no problems with this dataset there should be no 
+warning messages. However analysis of a previous version of this dataset 
+based on EMBL Release 50 where SCCHRIII had 230 annotated ORFs, generated 
+these typical warning messages.  
+
+Warning: Sequence 178 "SCCHRIII.PE178______" does not begin with a 
+recognised start codon
+Warning: Sequence 178 "SCCHRIII.PE178______" is not terminated by a stop 
+codon
+Warning: Sequence 202 "SCCHRIII.PE202______" does not begin with a 
+recognised start codon
+Warning: Sequence 202 "SCCHRIII.PE202______" has 1 internal stop codon(s)
+Warning: Sequence 202 "SCCHRIII.PE202______" is not terminated by a stop 
+codon
+
+Each sequence is labelled by its numerical occurrence in the input file 
+(i.e. these are the 178th and 202nd sequences in the input file) and its 
+sequence header line.
+
+Sequences that generate warning messages should be examined closely to 
+ascertain why. Some sequences may be annotated as partial sequences and 
+therefore the absence of a start or stop codon or the presence of a 3' 
+partial codon is to be expected. Note the presence of a 5' partial codon 
+would cause a frame shift, it is ESSENTIAL that 5' partial codons are 
+removed. Unless the frame shift that they produce, results in a (incorrect) 
+reading frame that contains internal stop codons, codonw cannot detect this 
+problem. The codon usage of a frame shifted gene sequence could adversely 
+affect the correspondence analysis  (COA) (though such genes are often 
+recognisable as being outliers on the COA plots).
+
+If a sequence warning is due to incorrect annotation this should be 
+corrected manually. Sequences that produce warnings that cannot be explained 
+or justified (e.g. a gene with internal stop codon) should be excluded. 
+These warning are informational only and do not exclude sequences from the 
+analysis.
+
+Codon usage indices 
+Once the initial quality checks have been made for the data we can then 
+proceed with the codon usage analysis (strictly speaking we can generate COA 
+and codon usage indices tasks at the same time). Some of the indices of 
+codon usage bias that CodonW calculates (i.e. Fop, CAI and CBI) use 
+information about a preferred set of codons for highly expressed genes. This 
+information is species specific and does not apply to all species (most 
+eukaryotes and many prokaryotes appear to display no codon preference in 
+highly expressed genes). Therefore care must be taken that the appropriate 
+set of optimal codons are used. For most species the optimal codons are not 
+know and therefore the indices should not be calculated at this stage. 
+However this information is known for Saccharomyces cerevisiae, so we can 
+immediately calculate these indices of codon usage. Later we will see how 
+codonW identifies optimal codons and can generate this information for your 
+species.  
+ 
+The default optimal codons and codon adaptation values are those of E. coli. 
+To select an alternative choice we use the c_type (for CAI values ) and 
+f_type (for FOP/CBI) commandline arguments. These switches requires an 
+integer values, this value is the same as the option number if we where 
+using the menu system to change the codon information. 
+
+Example       "-c_type 2" is equivalent to 
+Choose "Main Menu"
+Choose "Changes Defaults Menu"
+Choose "Change the CAI values"
+Choose "(2) Saccharomyces cerevisiae"
+
+Example       "-f_type 4" is equivalent to 
+Choose "Main Menu"
+Choose "Changes Defaults Menu"
+Choose "Change the Fop/CBI values"
+Choose "(4) Saccharomyces cerevisiae"
+
+
+Therefore to select all the codon usage indices calculated by codonw and to 
+use the optimal codons of Saccharomyces cerevisiae type:
+
+codonw input.dat  -all_indices  -c_type 2 -f_type 4 -nomenu
+
+See below for the output of this command  
+The commandline flag -nomenu by passes the menu system, the -all_indices 
+indicates to codonw that you wish to calculate all the codon and amino acid 
+usage indices. These indices areT3s, C3s, A3s, G3s, CAI, CBI, Fop, Nc, GC3s, 
+GC, L_sym, L_aa, Gravy and Aromaticity. For a fuller explanation of what 
+these indices are see Readme.indices. These indices can also be used to 
+check whether there are any identical or almost identical sequences in the 
+input file. If we sort the result file "input.out" we it is much easier to 
+identify the sequences which are similar. 
+
+sort -k 2n  input.out                 (unix for "sort using the second 
+numerical field") 
+
+The sorted output reveals the presence of two pairs of identical sequences 
+(Mating type proteins)
+ALPHA2____________63   0.3636  0.2273  0.4939  0.2177  0.109
+MATALPHA2_________63   0.3636  0.2273  0.4939  0.2177  0.109
+and 
+ALPHA1____________52   0.4361  0.2180  0.4228  0.2589  0.112   
+MATALPHA1_________52   0.4361  0.2180  0.4228  0.2589  0.112   
+
+Sequences which appear to be multiple copies of the same gene are normally 
+removed from our codon usage datasets, even if the sequences are not 
+identical but where the differences c codon usage bias as observed, lower values indicate stronger bias. A 
+useful feature of ENc is that the affect of GC biases have on the index can 
+be estimated. This allows the comparison of GC3s and ENc against the 
+theoretical values if codon bias was simply caused due to GC mutational 
+bias. A plot of ENc vs. GC3s can be seen at 
+http://www.molbiol.ox.ac.uk/cu/EncVsGC3s.gif. Although the majority of genes 
+in this plot have a degree of codon bias that can be explained in terms of 
+GC mutation, the cluster of genes (six genes with ENc <40) which have much 
+stronger codon bias than be simply explained in terms of mutational biases. 
+These genes are good candidates as genes whose codon usage has been 
+determined by natural selection, probably selection for translational 
+efficiency. 
+
+Correspondence Analysis (COA)
+We are now ready to generate a correspondence analysis of the codon usage of 
+SCCHRIII genes. We have a choice about how much information is generated. In 
+this example we will use the default values.
+
+codonw input.dat -coa_cu  -nomenu -silent		(-silent stops all 
+prompting)
+
+This generates a COA of codon usage. The summary file is "summary.coa" and 
+contains most of the data generated by the COA. One of the first sections is 
+the "Explanation of the variation by axis"  also stored in eigen.coa. 
+
+The total inertia of the data was 0.263176
+Num. Eigenval.   R.Iner.  R.Sum    |Num. Eigenval.   R.Iner.  R.Sum  |
+01   +4.5755E-02 +0.1739 +0.1739   |02   +3.2372E-02 +0.1230 +0.2969 |
+03   +1.8405E-02 +0.0699 +0.3668   |04   +1.2499E-02 +0.0475 +0.4143 |
+
+The relative inertia explained by the first axis  is 17.4%, the 2nd axis 
+explains 12.3%, the 3rd 7.0%, etc.  (17.45% is not remarkably high for 
+relative inertia explained by the first axis, but as there are ORFs included 
+which are described as hypothetical there may be random noise present in the 
+data if they are not real). 
+
+The next two sections report position of each gene and codon on the trends. 
+
+label                    Axis1      Axis2      Axis3      Axis4
+1_YCG9_Probable_____    0.00904    0.13153    0.34028   -0.05372
+2_YCG8________573_re    0.07429   -0.24652   -0.05502   -0.39837
+3_ALPHA2________633_    0.30675    0.04259   -0.22864   -0.03878
+4_ALPHA1________528_    0.16444    0.00399   -0.02000    0.00937
+5_CHA1_________1083_   -0.00322    0.10387    0.07137    0.11896
+
+this information is best viewed graphically, an example of the location of 
+the genes on the two principal axes can be seen here 
+http://www.molbiol.ox.ac.uk/cu/axes.gif.
+
+Automatic Identification of Putative Optimal Codons 
+Codonw automatically tries to identify the optimal codons in your data, or 
+more precisely identify the codons which contribute to the major trend (if 
+the main trend is selection for translational optimality these should be the 
+optimal codons). It does this by comparing the codon usage of groups of 
+genes taken from each extreme of the principle trend (axis 1). It identifies 
+the set of genes with the highest bias (using the effective number of codons 
+index) and tests for significant differences in the codon usage of between 
+the higher bias set with a two way Chi-squared contingency test. The 
+putative optimal codons are listed in summary.coa and hilo.coa.  It is the 
+responsibility of the user to confirm that the major codon usage trend is 
+selection for translational optimality, and not due to some other mutational 
+pressure (see GC variation).  The number of genes included in the two groups 
+can be selected using the command line switch ( -coa_num ) as an absolute 
+number of genes, of a percentage of the total genes in the dataset (by 
+default 5%).  
+
+The analysis of this dataset identified 19 codons that appeared to be 
+optimal. 18 of these agree with optimal codon identified previously using a 
+larger dataset set of 575 genes [Sharp, 1991 #46]. The codon identified in 
+this analysis as being optimal but not in the previous analysis, was GCC; 
+this codon has been previously suggested as being an optimal codon in S. 
+cerevisiae [Bennetzen, 1982 #92]. The U ending codons, AUU, GUU and UGU, 
+which have been previously identified as optimal [Sharp, 1991 #46], where 
+not identified here at p<0.01; although UGU was identified as potentially 
+optimal with a p<0.02. The main reason that the U ending codons where not 
+identified from this dataset was their much higher usage in the lower biased 
+dataset. 
+
+
+Caveats
+1) The codons identified by codonw, as being optimal will be dependent on 
+the strength of the trend and the size of the datasets. 
+2) The composition of the genes from chromosome III is quite different from 
+the 575-gene dataset used by Sharp and Cowe. Only one of the 30 genes they 
+considered to be highly expressed, and none of the genes they considered 
+lowly expressed are present in this dataset. The reader is reminded that 
+there are approximately 15,000 yeast genes, so just a little over 1% are 
+located on chromosome III.
+
+Codonw generated personal choice of codons 
+On the assumption that the principle trend identified by codonw is selection 
+for translational optimality, and that the genes assigned to the highly bias 
+codon usage group are highly expressed, codonw outputs files with the 
+"optimal codons" and "CAI adaptation fitness values". These files are 
+fop.coa, cbi.coa and cai.coa, their filenames are related to the index they 
+have been formatted for.  These files can be used to calculate the indices 
+in species where the preferred codon usage has not been hardwired into 
+codonW.
+
+codonw input.dat  -fop_file fop.coa 
+codonw input.day -cai_file cai.coa -cbi_file cbi.coa
+
+Caveats 
+1) The original CAI paper calculated fitness values from experimentally 
+determined highly expressed genes.  The fitness values that are internal 
+to codonW where derived from these criteria. CAI indices calculated using 
+fitness values derived from genes identified solely by COA, as being 
+highly expressed should not be regarded as true CAI values.
+2) The optimal codons stored in the files cbi.coa and fop.coa where 
+identified by codonw using a statistical test of significance, this test 
+is dependent on sample size. 
+3) The size of the sample taken from the extremes of the axis will affect 
+the identified optimal codons. 
+4) The principle trend in the variation of codon usage may not be 
+translation optimality. 
+
+When we calculate the indexes CAI, CBI and Fop using the "codonw" generated 
+optimal codons and fitness values based on this small dataset, as we would 
+expect differ from when these indices are calculated using the codonw 
+internal codon usage information for S. cerevisiae. The internal values are 
+more accurate because the datasets used to generate them where larger, and 
+contained experimentally verified gene sequences.
+
+Although the two sets of indices differ, they remain highly correlated, all 
+three indices have correlation coefficients greater than 0.96. Therefore if 
+comparisons between the index values are internally consistent (i.e. they 
+where both calculated using the same optimal codon information) relative 
+comparisons of codon usage and bias can be made. Based on a dataset of 111 
+genes we have been able to identify optimal codons, which give us some 
+insight into the codon usage of S. cerevisiae. 
+
+
+Axis2 is highly correlated with GC3s content
+Alternative datasets could have been chosen that would present a much 
+simpler analyses of codon usage  (i.e. where the optimal codons identified 
+better matched those previously published). This dataset was specifically 
+chosen as the codon usage variation for genes from this chromosome is know 
+to have a second trend,  GC3s varies with chromosomal location in a 
+systematic fashion [Sharp, 1993 #39]. When we examine correlation 
+coefficients between the first 4 axes the correlation coefficient between 
+axis2 and GC3s is highly significant (r=0.89). Interestingly the bias is most 
+strong among the U ending codons it is possible that the presence of this 
+trend contributed to why the three U ending codons where not identified here 
+as optimal codons. This trend is quite strong accounting for 12.3% of the 
+relative inertia of the data, the principle trend (apparently selection for 
+translation optimality) accounted for 17.4%. We therefore see how it is 
+possible that the strongest influence on the choice of codon usage might not 
+be translation optimality but mutation biases.
+
+
+
+
+Typical output from codonw -all_indices -nomenu 
+======================= Output ======================================
+
+Genetic code is currently set to Universal Genetic code TGA=* TAA=* TAG=*
+
+ Welcome to CodonW  1.3  for Help type h
+
+Using Saccharomyces cerevisiae (Sharp and Cowe (1991) Yeast 7:657-678) 
+w values to calculate CAI 
+Using Saccharomyces cerevisiae  (Sharp and Cowe (1991) Yeast 7:657-678) 
+optimal codons to calculate CBI
+Using Saccharomyces cerevisiae  (Sharp and Cowe (1991) Yeast 7:657-678)
+optimal codons to calculate Fop
+..................................................................
+
+                Number of sequences: 111
+
+Files used:
+ Input  file was         input.dat 
+ Output file was         input.out  (codon usage indices, e.g. gc3s)
+ Output file was         input.blk  (bulk output e.g. raw codon usage)
+
+ CodonW has finished
+         ======================================================
+Tabulation of total codon usage
+
+Phe UUU 1483 1.14 Ser UCU 1094 1.47 Tyr UAU 1000 1.12 Cys UGU  434 1.18 
+    UUC 1117 0.86     UCC  773 1.04     UAC  789 0.88     UGC  303 0.82 
+Leu UUA 1349 1.55     UCA  882 1.19 TER UAA   47 1.27 TER UGA   36 0.97 
+    UUG 1549 1.78     UCG  487 0.66     UAG   28 0.76 Trp UGG  665 1.00 
+
+    CUU  698 0.80 Pro CCU  747 1.27 His CAU  677 1.15 Arg CGU  328 0.86 
+    CUC  364 0.42     CCC  415 0.71     CAC  499 0.85     CGC  171 0.45 
+    CUA  671 0.77     CCA  911 1.55 Gln CAA 1388 1.35     CGA  151 0.39 
+    CUG  604 0.69     CCG  281 0.48     CAG  668 0.65     CGG  103 0.27 
+
+Ile AUU 1612 1.35 Thr ACU 1052 1.38 Asn AAU 1778 1.17 Ser AGU  717 0.97 
+    AUC 1018 0.85     ACC  660 0.87     AAC 1262 0.83     AGC  500 0.67 
+    AUA  943 0.79     ACA  883 1.16 Lys AAA 2118 1.13 Arg AGA 1038 2.71 
+Met AUG 1156 1.00     ACG  444 0.58     AAG 1645 0.87     AGG  504 1.32 
+
+Val GUU 1184 1.49 Ala GCU 1055 1.40 Asp GAU 1905 1.25 Gly GGU 1284 1.87 
+    GUC  674 0.85     GCC  765 1.01     GAC 1145 0.75     GGC  552 0.80 
+    GUA  622 0.78     GCA  836 1.11 Glu GAA 2371 1.41     GGA  557 0.81 
+    GUG  690 0.87     GCG  368 0.49     GAG  995 0.59     GGG  355 0.52 
+
+53400 codons (used Universal Genetic code)
+ 
+======================================================
+
+
+                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             [...]
\ No newline at end of file
diff --git a/codonW.h b/codonW.h
new file mode 100755
index 0000000..4faf563
--- /dev/null
+++ b/codonW.h
@@ -0,0 +1,683 @@
+/**************************************************************************/
+/* CodonW codon usage analysis package                                    */
+/* Copyright (C) 2005            John F. Peden                            */
+/* This program is free software; you can redistribute                    */
+/* it and/or modify it under the terms of the GNU General Public License  */
+/* as published by the Free Software Foundation; version 2 of the         */
+/* License,                                                               */
+/*                                                                        */
+/* This program is distributed in the hope that it will be useful, but    */
+/* WITHOUT ANY WARRANTY; without even the implied warranty of             */
+/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the           */
+/* GNU General Public License for more details.                           */
+/* You should have received a copy of the GNU General Public License along*/
+/* with this program; if not, write to the Free Software Foundation, Inc.,*/
+/* 675 Mass Ave, Cambridge, MA 02139, USA.                                */
+/*                                                                        */
+/*                                                                        */
+/* The author can be contacted by email (jfp#hanson-codonw at yahoo.com Anti-*/
+/* Spam please change the # in my email to an _)                          */
+/*                                                                        */
+/* For the latest version and information see                             */
+/* http://codonw.sourceforge.net 					  */
+/**************************************************************************/
+
+
+#define ARB_UNIT          100                  /* used to define the array*/
+#define MAX_GENE          (ARB_UNIT*3)         /* seq, which holds readin */
+#define LINE_LENGTH       (ARB_UNIT+100)       /* sequence data           */
+#define GARG_EXACT     0x800                   /* used in function gargs  */
+#define GARG_NEXT      0x1000                  /* used in function gargs  */
+#define GARG_THERE     0x2000                  /* used in function gargs  */
+#define GARG_SUBSQ     0x4000                  /* used in function gargs  */
+#define MAX_ARGS       100                     /* used in function gargs  */
+/*                                                debugging code          */
+#define debug_         printf("Got to %i\n",debugger++); 
+#define debug(x)       printf( #x " = %d", x);
+/*                                                defile the macro pause  */
+#define pause {fprintf(stderr,"\nPress return or enter to continue -> ");gets(pm->junk);}
+#define MAX_FILENAME_LEN 90                   /* max filename             */
+
+/* define the structures used within codonW                               */
+typedef struct {
+  char   *des;
+  char   *typ;
+  int    ca[65];
+} GENETIC_CODE_STRUCT;                        /* genetic code information */  
+
+typedef  struct { 
+  char *aa1[22];                              /* 1 letter AA code         */ 
+  char *aa3[22];                              /* 3 letter AA code         */  
+  char *cod[65];                              /* 3 letter name of codons  */
+} AMINO_STRUCT;                               
+
+typedef struct {
+  float hydro[22];                            /* hydropathicity values    */ 
+  int   aromo[22];                            /* aromaticity values       */
+} AMINO_PROP_STRUCT;
+  
+typedef struct  {
+  char   *des;                                /* store a description      */
+  char   *ref;                                /*       a reference        */
+  char   fop_cod[65];                         /* the optimal codons       */
+} FOP_STRUCT; 
+
+typedef struct {
+  char   *des;                                /* store a description      */
+  char   *ref;                                /*       a reference        */
+  float  cai_val[65];                         /* the CAI w values         */
+} CAI_STRUCT;
+
+typedef struct {  
+char   level;                                 /* either expert or standard*/
+int   axis;                                   /* how many axis to generate*/
+int   rows;                                   /* how many genes in dataset*/
+int   colm;                                   /* how many columns in data */
+int   fop_gene;                   /* No of genes to use to ident opt codon*/
+char  add_row[MAX_FILENAME_LEN];              /* file with supp sequences */
+float inertia;                                /* total data inertia       */
+char  codons[65];                             /* codon to be analysed     */
+char  amino [22];                             /* amino acids to be COA'ed */
+} COA_STRUCT;
+
+typedef struct {
+  char prog;                                  /* used to ident which prog */
+  char bulk;                                  /* used to ident blk output */  
+  char verbose;                          /* don't overwrite files    */
+  char totals;                                /* concatenate genes ?      */
+  char menu;                                  /* show a menu       ?      */
+  char warn;                                  /* show sequence warning    */
+
+  char codonW;                                /* am I codonW              */ 
+  char fop;                                   /* calc index fop           */
+  char cai;                                   /* calc index CAI           */
+  char cbi;                                   /* calc index CBI           */
+  char bases;                                 /* calc base composition    */
+  char gc3s;                                  /* calc gc at sil.3rd base  */
+  char gc;                                    /* calc gc                  */
+  char enc;                                   /* calc enc                 */
+  char sil_base;                              /* calc silent base compo   */
+  char L_sym;                                 /* No of synonymous codons  */
+  char L_aa;                                  /* No of amino acids        */
+  char hyd;                                   /* calc hydropathicity      */
+  char aro;                                   /* calc aromaticity         */
+  
+  char seperator;                             /* column separator         */
+  char coa;                                   /* calculate a COA or not ? */
+  
+  char code;                                  /* which genetic code       */
+  char f_type;                                /* which predefined fop val */
+  char c_type;                                /* which predefined CAI val */
+  
+  char seq_type;                              /* DNA or Protein or CU     */
+  char seq_format;                            /* Human or machine readable*/
+  char curr_infilename [MAX_FILENAME_LEN];    /* input filename           */  
+  char curr_outfilename[MAX_FILENAME_LEN];    /* .out filename            */   
+  char curr_tidyoutname[MAX_FILENAME_LEN];    /* .blk filename            */ 
+  char fop_filen[MAX_FILENAME_LEN];           /* user fop filename        */
+  char cai_filen[MAX_FILENAME_LEN];           /* user CAI filename        */
+  char cbi_filen[MAX_FILENAME_LEN];           /* user CBI filename        */
+  char curr_logfilename[MAX_FILENAME_LEN];    /* used for logging errors  */
+
+  char junk      [BUFSIZ+1];                  /* used to store char info  */
+  char messages  [300];                       /* used to constuct messgs  */
+  char analysis_run;                          /* has CodonW actually run  */
+
+  int  term_length;                           /* how many lines are there */
+                                              /* file pointers            */
+  FILE *inputfile;                            /* input file               */ 
+  FILE *outputfile;                           /* .out file                */
+  FILE *tidyoutfile;                          /* .blk file                */
+  FILE *cuout;                                /* codon usage output       */
+  FILE *fopfile;                              /* fop input values         */
+  FILE *caifile;                              /* cai input values         */  
+  FILE *cbifile;                              /* cbi input values         */
+  FILE *logfile;                              /* log file name            */
+  FILE *my_err;                               /* pointer for err stream   */
+  
+  FILE *fcoa_in;                                 
+  FILE *fcoa_out;
+} MENU_STRUCT ;
+
+
+#ifndef DECOSF
+#define DEBUG                                 /* include debug  code      */ 
+#endif
+
+#ifndef TRUE
+#define TRUE 1                                /* for dumb compilers       */
+#endif
+
+#ifndef FALSE
+#define FALSE 0                               /* for dumb compilers       */
+#endif
+
+
+/* these handle how to delete files, and blank the screen                 */
+#if defined _WINDOWS || defined _WIN32
+# define deletefile(x) _unlink(x)
+# define clearscr(x) {int n; for(n=0; n<x ;n++) printf("\n");}
+#elif defined  _DOS
+# define deletefile(x) _unlink(x)
+# define clearscr(x) system("cls");
+#else
+# define deletefile(x)  remove(x)
+#if defined DEBUG
+# define clearscr(x) {int n; for(n=0; n<x ;n++) printf("\n");}
+#else
+# define clearscr(x) system("clear");
+#endif
+#endif
+
+#ifdef ORIG_DEFS                                 /* declare only once     */ 
+char Revision[] = "1.4.4";                       /* version               */
+char Update[]   = "$Date: 2005/05/11 21:43:49 $";/* date                  */
+char Author[]   = "$Author: johnfpeden $";       /* author                */
+char  title[100];                                /* sequence description  */
+char  long_seq;                                  /* length of seq title   */ 
+char  last_base;
+long int ncod[65];
+long int naa[23];
+long int din[3][16];
+long int codon_tot;
+long int master_ic;
+long int fl_pos_start;        
+long int fl_pos_curr;
+long int GC_TOT;
+long int AT_TOT;
+long int AA_TOT;
+long int IUBC_TOT;
+long int GAP_TOT; 
+long int num_sequence;
+long int num_seq_int_stop;
+long int non_std_char;
+long int tot;
+int last_aa = 0;
+int reg = 1;
+int valid_stops;
+int valid_start;
+int fram;              
+int *da;
+int *ds;
+
+AMINO_STRUCT         *paa;                       /* pointer to structs   */
+GENETIC_CODE_STRUCT  *pcu;
+FOP_STRUCT           *pfop;
+FOP_STRUCT           *pcbi;
+CAI_STRUCT           *pcai;
+MENU_STRUCT          *pm;
+COA_STRUCT           *pcoa;
+AMINO_PROP_STRUCT    *pap;
+
+
+                                                /* declare default values */           
+COA_STRUCT coa={
+'n',                                            /* level                  */
+4,                                              /* axis                   */
+0,                                              /* rows  or genes         */
+64,                                             /* colms                  */
+-5,               /* fop_gene (if number is negative implies a percentage)*/ 
+"",                                             /* add_row                */
+(float) 0.00                                    /* inertia                */
+};       
+
+int NumGeneticCodes=8;                          /* used in menu.c         */
+                                                /* No. of predefined codes*/
+
+                                                /* define genetic codes   */    
+GENETIC_CODE_STRUCT  cu[] = { 
+  "Universal Genetic code",
+  "TGA=* TAA=* TAG=*",
+  0,
+  1,  6, 10, 18,  1,  6, 10, 18,  2,  6, 11, 11,  2,  6, 11, 19,
+  2,  7, 12, 20,  2,  7, 12, 20,  2,  7, 13, 20,  2,  7, 13, 20,
+  3,  8, 14,  6,  3,  8, 14,  6,  3,  8, 15, 20,  4,  8, 15, 20,
+  5,  9, 16, 21,  5,  9, 16, 21,  5,  9, 17, 21,  5,  9, 17, 21,
+  "Vertebrate Mitochondrial code",
+  "AGR=* ATA=M TGA=W",
+  0,
+  1,  6, 10, 18,  1,  6, 10, 18,  2,  6, 11, 19,  2,  6, 11, 19,
+  2,  7, 12, 20,  2,  7, 12, 20,  2,  7, 13, 20,  2,  7, 13, 20,
+  3,  8, 14,  6,  3,  8, 14,  6,  4,  8, 15, 11,  4,  8, 15, 11,
+  5,  9, 16, 21,  5,  9, 16, 21,  5,  9, 17, 21,  5,  9, 17, 21,
+  "Yeast Mitochondrial code",
+  "CTN=* ATA=M TGA=W",
+  0,
+  1,  6, 10, 18,  1,  6, 10, 18,  2,  6, 11, 19,  2,  6, 11, 19,
+  8,  7, 12, 20,  8,  7, 12, 20,  8,  7, 13, 20,  8,  7, 13, 20,
+  3,  8, 14,  6,  3,  8, 14,  6,  4,  8, 15, 20,  4,  8, 15, 20,
+  5,  9, 16, 21,  5,  9, 16, 21,  5,  9, 17, 21,  5,  9, 17, 21,
+  "Filamentous fungi Mitochondrial code",
+  "TGA=W",
+  0,
+  1,  6, 10, 18,  1,  6, 10, 18,  2,  6, 11, 19,  2,  6, 11, 19,
+  2,  7, 12, 20,  2,  7, 12, 20,  2,  7, 13, 20,  2,  7, 13, 20,
+  3,  8, 14,  6,  3,  8, 14,  6,  3,  8, 15, 20,  4,  8, 15, 20,
+  5,  9, 16, 21,  5,  9, 16, 21,  5,  9, 17, 21,  5,  9, 17, 21,
+  "Insects and Plathyhelminthes Mitochondrial code",
+  "ATA=M TGA=W AGR=S",
+  0,
+  1,  6, 10, 18,  1,  6, 10, 18,  2,  6, 11, 19,  2,  6, 11, 19,
+  2,  7, 12, 20,  2,  7, 12, 20,  2,  7, 13, 20,  2,  7, 13, 20,
+  3,  8, 14,  6,  3,  8, 14,  6,  4,  8, 15,  6,  4,  8, 15,  6,
+  5,  9, 16, 21,  5,  9, 16, 21,  5,  9, 17, 21,  5,  9, 17, 21,
+ "Nuclear code of Cilitia",
+ "UAA=Q=Gln  UAG=Q",
+  0,
+  1,  6, 10, 18,  1,  6, 10, 18,  2,  6, 13, 11,  2,  6, 13, 19,
+  2,  7, 12, 20,  2,  7, 12, 20,  2,  7, 13, 20,  2,  7, 13, 20,
+  3,  8, 14,  6,  3,  8, 14,  6,  3,  8, 15, 20,  4,  8, 15, 20,
+  5,  9, 16, 21,  5,  9, 16, 21,  5,  9, 17, 21,  5,  9, 17, 21,
+ "Nuclear code of Euplotes",
+ "UGA=C",
+  0,
+  1,  6, 10, 18,  1,  6, 10, 18,  2,  6, 11, 18,  2,  6, 11, 19,
+  2,  7, 12, 20,  2,  7, 12, 20,  2,  7, 13, 20,  2,  7, 13, 20,
+  3,  8, 14,  6,  3,  8, 14,  6,  3,  8, 15, 20,  4,  8, 15, 20,
+  5,  9, 16, 21,  5,  9, 16, 21,  5,  9, 17, 21,  5,  9, 17, 21,
+  "Mitochondrial code of Echinoderms",
+  "UGA=W AGR=S AAA=N",
+  0,
+  1,  6, 10, 18,  1,  6, 10, 18,  2,  6, 11, 19,  2,  6, 11, 19,
+  2,  7, 12, 20,  2,  7, 12, 20,  2,  7, 13, 20,  2,  7, 13, 20,
+  3,  8, 14,  6,  3,  8, 14,  6,  3,  8, 14,  6,  4,  8, 15,  6,
+  5,  9, 16, 21,  5,  9, 16, 21,  5,  9, 17, 21,  5,  9, 17, 21
+};
+                                            /* define amino acid info     */
+AMINO_STRUCT amino_acids ={
+ "X",
+ "F","L","I","M","V",
+ "S","P","T","A","Y",
+ "*","H","Q","N","K",
+ "D","E","C","W","R","G",
+ "UNK",
+ "Phe","Leu","Ile","Met","Val",
+ "Ser","Pro","Thr","Ala","Tyr",
+ "TER","His","Gln","Asn","Lys",
+ "Asp","Glu","Cys","Trp","Arg","Gly",
+ "BAD",
+ "UUU","UCU","UAU","UGU",
+ "UUC","UCC","UAC","UGC",
+ "UUA","UCA","UAA","UGA",
+ "UUG","UCG","UAG","UGG",
+ "CUU","CCU","CAU","CGU",
+ "CUC","CCC","CAC","CGC",
+ "CUA","CCA","CAA","CGA",
+ "CUG","CCG","CAG","CGG",
+ "AUU","ACU","AAU","AGU",
+ "AUC","ACC","AAC","AGC",
+ "AUA","ACA","AAA","AGA",
+ "AUG","ACG","AAG","AGG",
+ "GUU","GCU","GAU","GGU",
+ "GUC","GCC","GAC","GGC",
+ "GUA","GCA","GAA","GGA",
+ "GUG","GCG","GAG","GGG"
+};
+
+int NumFopSpecies=8;                             /* again used in menu.c  */
+                                                 /* predefined fop info   */   
+FOP_STRUCT  fop[] = { 
+  "Escherichia coli",
+  "Ikemura (1985) Mol. Biol. Evol. 2:13-34 (updated by INCBI 1991)",
+0,2,3,2,2,3,3,3,3,2,2,2,2,2,2,2,2,
+  2,2,2,3,2,2,3,3,2,2,2,2,3,3,3,2, 
+  2,3,2,2,3,3,3,3,2,2,3,2,2,2,2,2,
+  3,3,2,3,2,2,3,3,2,2,3,2,2,3,2,2,
+  "Bacillus subtilis ",
+  "Sharp et al (1990) Genetics & Biotech of Bacilli vol3 pp89-98",
+0,2,3,2,2,3,1,3,2,2,2,2,2,2,1,2,2,
+  3,3,2,3,2,1,2,3,2,3,3,1,2,2,2,1, 
+  2,3,2,2,3,1,3,2,1,2,3,2,2,2,2,1,
+  3,3,2,3,2,1,3,2,3,2,3,2,2,2,2,1,
+  "Dictyostelium discoideum ",
+  "Sharp and Devine (1989) Nucl. Acids Res 17:5029-5039)",
+0,2,2,2,2,3,2,3,2,2,2,2,2,2,2,2,2,
+  2,2,2,3,3,2,3,2,2,3,3,2,2,2,2,2, 
+  2,2,2,2,3,3,3,2,2,2,2,2,2,2,3,2,
+  2,2,2,3,3,3,2,2,2,2,3,2,2,2,2,2,
+  "Aspergillus nidulans ",
+  "Lloyd and Sharp (1991) Mol. Gen. Genet 230: 288-294",
+0,2,2,2,2,3,3,3,2,2,2,2,2,2,2,2,2,
+  2,2,2,3,3,3,3,3,2,2,2,2,2,2,3,2, 
+  2,2,2,2,3,3,3,2,2,2,2,2,2,2,3,2,
+  2,3,2,3,3,3,3,2,2,2,2,2,2,2,3,2,
+  "Saccharomyces cerevisiae ",
+  "Sharp and Cowe (1991) Yeast 7:657-678",
+0,2,3,2,3,3,3,3,2,2,2,2,2,3,2,2,2,
+  2,2,2,2,2,2,3,2,2,3,3,2,2,2,2,2, 
+  3,3,2,2,3,3,3,2,2,2,2,3,2,2,3,2,
+  3,3,2,3,3,2,3,2,2,2,3,2,2,2,2,2,
+  "Drosophila melanogaster",
+  "Shields et al. (1988) Mol Biol Evol 5: 704-716",
+0,2,2,2,2,3,3,3,3,2,2,2,2,2,2,2,2,
+  2,2,2,3,2,3,3,3,2,2,2,2,3,2,3,2, 
+  2,2,2,2,3,3,3,2,2,2,2,2,2,2,3,2,
+  2,2,2,2,3,3,3,3,2,2,2,2,3,2,3,2,
+ "Caenorhabditis elegans",
+ "Stenico, Lloyd and Sharp Nuc. Acids Res. 22: 2437-2446(1994)",
+0,2,2,2,2,3,3,3,3,2,2,2,2,2,2,2,2,
+  3,2,2,3,3,2,3,3,2,3,2,2,2,2,2,2, 
+  2,2,2,2,3,3,3,2,2,2,2,2,2,2,3,2,
+  2,3,2,2,3,3,3,2,2,2,2,3,2,2,3,2,
+ "Neurospora crassa",
+ "Lloyd and Sharp (1993)",
+0,2,3,2,2,3,3,3,3,2,2,2,2,2,2,2,2,
+  2,2,2,3,3,3,3,3,2,2,2,2,2,2,3,2,
+  2,3,2,2,3,3,3,2,2,2,2,2,2,2,3,2,
+  2,2,2,3,3,3,3,3,2,2,2,2,2,2,3,2
+};
+
+int NumCaiSpecies=3;                              /* used in menu.c       */
+CAI_STRUCT cai[]= {                               /* array of cai structs */
+  "Escherichia coli",
+  "No reference",
+  0.000F,
+    0.296F,1.000F,0.239F,0.500F,1.000F,0.744F,1.000F,1.000F,
+    0.020F,0.077F,0.000F,0.000F,0.020F,0.017F,0.000F,1.000F,
+    0.042F,0.070F,0.291F,1.000F,0.037F,0.012F,1.000F,0.356F,
+    0.007F,0.135F,0.124F,0.004F,1.000F,1.000F,1.000F,0.004F,
+    0.185F,0.965F,0.051F,0.085F,1.000F,1.000F,1.000F,0.410F,
+    0.003F,0.076F,1.000F,0.004F,1.000F,0.099F,0.253F,0.002F,
+    1.000F,1.000F,0.434F,1.000F,0.066F,0.122F,1.000F,0.724F,
+    0.495F,0.586F,1.000F,0.010F,0.221F,0.424F,0.259F,0.019F,
+  "Bacillus subtilis",
+  "No reference",
+  0.00F,
+     0.571F,1.000F,0.500F,1.000F,1.000F,0.021F,1.000F,1.000F,
+     1.000F,0.458F,0.000F,0.000F,0.036F,0.021F,0.000F,1.000F,
+     0.857F,1.000F,1.000F,1.000F,0.143F,0.071F,0.083F,0.609F,
+     0.500F,0.714F,1.000F,0.022F,0.071F,0.143F,0.214F,0.043F,
+     0.500F,1.000F,0.417F,0.125F,1.000F,0.033F,1.000F,0.208F,
+     0.071F,0.867F,1.000F,0.435F,1.000F,0.200F,0.097F,0.022F,
+     1.000F,1.000F,0.417F,0.955F,0.188F,0.025F,1.000F,0.773F,
+     0.750F,0.275F,1.000F,1.000F,0.438F,0.125F,0.412F,0.045F,
+  "Saccharomyces cerevisiae",
+  "Sharp and Cowe (1991) Yeast 7:657-678",
+  0.00F,
+    0.113F,1.000F,0.071F,1.000F,1.000F,0.693F,1.000F,0.077F,
+    0.117F,0.036F,0.000F,0.000F,1.000F,0.005F,0.000F,1.000F,
+    0.006F,0.047F,0.245F,0.137F,0.003F,0.009F,1.000F,0.002F,
+    0.039F,1.000F,1.000F,0.002F,0.003F,0.002F,0.007F,0.002F,
+    0.823F,0.921F,0.053F,0.021F,1.000F,1.000F,1.000F,0.031F,
+    0.003F,0.012F,0.135F,1.000F,1.000F,0.006F,1.000F,0.003F,
+    1.000F,1.000F,0.554F,1.000F,0.831F,0.316F,1.000F,0.020F,
+    0.002F,0.015F,1.000F,0.002F,0.018F,0.001F,0.016F,0.004F
+};
+
+
+AMINO_PROP_STRUCT amino_prop={                   /* amino acid properties */
+  0.00F,                
+  2.80F,3.80F,4.50F,1.90F,4.20F,                 /* hydropathicity values */
+  -0.8F,-1.6F,-0.7F,1.80F,-1.3F,   
+  1.00F,-3.2F,-3.5F,-3.5F,-3.9F,
+  -3.5F,-3.5F,2.50F,-0.9F,-4.5F,
+  -0.4F,
+  0,
+  1,0,0,0,0,                                     /* am i aromatic ?       */ 
+  0,0,0,0,1,
+  0,0,0,0,0,
+  0,0,0,1,0,0 
+};
+
+
+MENU_STRUCT Z_menu={  /* define all manner of default values              */
+  FALSE,              /* prog                                             */
+  'X',                /*This default is set in proc_commline to CU        */
+  TRUE ,              /*verbose                                      */
+  FALSE,              /*totals                                            */
+  TRUE,               /*menu interface                                    */
+  TRUE,               /*warnings about sequence data are to be displayed  */
+  FALSE,              /*codons                                            */  
+  FALSE,              /*fop                                               */  
+  FALSE,              /*cai                                               */ 
+  FALSE,              /*cbi                                               */ 
+  FALSE,              /*bases                                             */  
+  FALSE,              /*gc3s                                              */
+  FALSE,              /*gc                                                */  
+  FALSE,              /*enc                                               */
+  FALSE,              /* silent base                                      */ 
+  FALSE,              /* Length silent codons                             */
+  FALSE,              /* length in codons                                 */
+  FALSE,              /* hydrophobicity                                   */
+  FALSE,              /* aromaticity                                      */
+    
+  ' ',                /* default seperator                                */
+   
+  FALSE,              /* coa                                              */
+   
+  0,                  /* genetic code                                     */
+  0,                  /* type of fop_species                              */ 
+  0,                  /* type of cai_species                              */
+  
+  FALSE,              /* sequence type                                    */
+  'H',                /* Sequence format                                  */
+  "",                 /* current input file name                          */
+  "",                 /* current output file name                         */
+  "",                 /* current tidy outfile name                        */
+  "",                 /* current fop input file name                      */
+  "",                 /* current cai input file name                      */
+  "",                 /* current sbi input file name                      */
+  "",                 /* log all stderr output to a file                  */
+  "",                 /* Null the string junk                             */  
+  "",                 /* Null the string messages                         */
+   
+  FALSE,              /* was analysis run                                 */     
+  24,                 /* current number of lines (height of ) screen      */ 
+  
+  NULL,               /* Null pointer input file                          */   
+  NULL,               /* Null pointer outputfile                          */   
+  NULL,               /* Null pointer tidyout file                        */   
+  NULL,               /* Null codon usage file                            */
+  NULL,               /* Null pointer fopfile                             */
+  NULL,               /* Null pointer caifile                             */ 
+  NULL,               /* Null pointer cbifile                             */ 
+  NULL,               /* Null pointer the logfile name                    */
+  NULL,               /* assign NULL pointer to my_err                    */
+  NULL,               /* Null pointer fcoa_in                             */
+  NULL                /* Null pointer fcoa_out                            */
+};
+
+
+#else                 /* already been defined so declare as externals     */      
+
+extern AMINO_STRUCT         *paa;
+extern GENETIC_CODE_STRUCT  *pcu;
+extern FOP_STRUCT           *pfop;
+extern FOP_STRUCT           *pcbi;
+extern CAI_STRUCT           *pcai;
+extern MENU_STRUCT          *pm;
+extern COA_STRUCT           *pcoa;
+extern AMINO_PROP_STRUCT    *pap;
+
+#if defined (_WINDOWS) || defined (_DOS) 
+ extern   CAI_STRUCT          /*_near*/ cai[];       /* some MS compilers  */
+ extern   GENETIC_CODE_STRUCT /*_near*/ cu[];        /* want these to be   */ 
+ extern   FOP_STRUCT          /*_near*/ fop[];       /* declared as _near  */
+#else                
+ extern CAI_STRUCT                cai[];
+ extern GENETIC_CODE_STRUCT        cu[];
+ extern FOP_STRUCT                fop[];
+#endif
+ extern COA_STRUCT                coa;
+ extern AMINO_STRUCT              amino_acids;
+ extern AMINO_PROP_STRUCT         amino_prop;
+ extern MENU_STRUCT               Z_menu; 
+
+ extern char Revision[];                             /* version string    */
+ extern char Update[];
+ extern char Author[];
+ extern char  title[100];
+ extern char  long_seq;
+ extern char  last_base;
+
+ extern long int ncod[65];
+ extern long int naa[23];
+ extern long int din[3][16];
+ extern long int codon_tot;
+ extern long int master_ic;
+ extern long int fl_pos_start;
+ extern long int fl_pos_curr;
+ extern long int GC_TOT;
+ extern long int AT_TOT;
+ extern long int AA_TOT;
+ extern long int IUBC_TOT;
+ extern long int GAP_TOT; 
+ extern long int num_sequence;
+ extern long int num_seq_int_stop;
+ extern long int non_std_char;
+ extern long int tot;
+ extern int last_aa;
+ extern int reg;
+ extern int valid_stops;
+ extern int valid_start;
+ extern int fram;      
+ extern int *da;
+ extern int *ds;
+ extern int NumGeneticCodes;
+ extern int NumFopSpecies;
+ extern int NumCaiSpecies;
+#endif
+
+/****************** Function type declarations *****************************/
+
+FILE *open_file    ( char *info, char *default_name, char *mode, 
+                     int  verbose );
+
+int*  how_synon    ( void );
+int*  how_synon_aa ( void );
+int*  how_synon    ( void );
+int*  how_synon_aa ( void );
+
+int codon_usage_tot( char *seq, long int how_many); 
+int ident_codon    ( char *codon );
+int codon_usage_out( FILE *fblkout, long int *ncod,int last_aa,
+                     int valid_stops, char *info);
+int rscu_usage_out ( FILE *fblkout, long int *ncod,long int *naa);
+int raau_usage_out ( FILE *fblkout, long int *naa );
+int aa_usage_out   ( FILE *fblkout, long int *naa );
+int cai_out        ( FILE *foutput, long int *ncod); 
+int cbi_out        ( FILE * foutput, long int *ncod, long int *naa );
+int fop_out        ( FILE *foutput, long int *ncod);
+int hydro_out      ( FILE *foutput, long int *naa );
+int aromo_out      ( FILE *foutput, long int *naa );
+int toutput        ( FILE *fblkout, char *seq );
+int output_long    ( FILE *fblkout, char *seq );
+int cutab_out      ( FILE *fblkout, long *ncod, long *naa);
+int dinuc_out      ( FILE *fblkout, char *title  );
+int fileclose      ( FILE **file_pointer );
+int clean_up       ( long int *ncod,long int *naa );
+int initilize_point( char code , char fop_type, char cai_type );
+int initilize_coa  ( char code );
+int proc_comm_line ( int *argc, char ***arg_list);
+int my_exit        ( int exit_value, char *message );          
+int printinfo      ( void); 
+
+int dinuc_count    ( char *seq , long int tot );
+int tidy           ( FILE *finput , FILE *foutput , FILE *fblkout, 
+                     FILE *fcoaout ) ;  
+int chelp ( char *help );
+
+long int codon_error( int last_aa, int valid_stops, char *title, 
+                      char error_level);
+
+float  enc_out      ( FILE *foutput, long int *ncod, long int *naa);
+double inertot      ( void);
+
+char* get_aa        ( int one_or_3_letter , char* the_dna_word);
+char* garg          ( int argc, char *argv[], const char *targ, int mode);
+char  coa_raw_out   ( FILE *fcoaout, long *ncod, long *naa, char *title);
+char  WasHelpCalled ( char * input); 
+
+void sorted_by_axis1( double *ax1, int *sortax1, int lig);
+void highlow        ( long int *low , long int *high ,FILE *summ );
+void menu_1         ( void);
+void menu_2         ( void);
+void menu_3         ( void);
+void menu_4         ( void);
+void menu_5         ( void);
+void menu_6         ( void); 
+void menu_7         ( void);
+void menu_8         ( void);
+void menu_coa       ( void);
+void welcome        ( void);
+void menu_initial   ( void);
+
+void asummary       ( void);
+void tester         ( void);
+void vecalloc       ( double **vec, int n);
+void vecalloc       ( double **vec, int n);
+void writevec       ( double *v1, FILE *fic);
+void lecmat         ( double **tab, char *nfic);
+void freetab        ( double **tab);
+void freevec        ( double *vec);
+void taballoc       ( double ***tab, int l1, int c1);
+void lecvec         ( double *v1, char *nfic);
+void ecrmat         ( double **tab, char *nfic);
+void ecrvec         ( double *v1, char *nfic);
+void scalmat        ( double **tab, double r);
+void scalvec        ( double *v1, double r);
+void sqrvec         ( double *v1);
+void prodmatAAtB    ( double **a, double **b);
+void prodmatABC     ( double **a, double **b, double **c);
+void prodmatAtAB    ( double **a, double **b);
+void ecrmatred      ( double **tab, int c1, char *nfic);
+void readvec        ( double *v1, FILE *fic);
+void lecvalpro      ( double *v1, char *nfic);
+void writescal      ( double r,  FILE *fic);
+void editvalpro     ( FILE *ficlist, double *vp, int n, double s);
+void DiagoRC        ( FILE *summary);
+void gc_out         ( FILE *foutput, FILE *fblkout, int which);
+void base_sil_us_out( FILE *foutput, long int *ncod,long int *naa);
+void bintext        ( char *nfice , char *nfics);
+void select_coa     ( char choice); 
+void textbin        ( char *filein , char *fileout);
+void colmout        ( char *nfice, char *nfics,AMINO_STRUCT *paa,
+                                   FILE *summary);   
+void output         ( char *seq ,  FILE *foutput , FILE* fblkout ,
+                                   FILE *fcoaout);
+void rowout         ( char *nfice, char *nfics, char *ncout, FILE *summary);
+void PrepAFC        ( char *nfic);
+void inertialig     ( char *inertia_out, char *filen, FILE *summary);
+void inertiacol     ( char *inertia_out, FILE *summary);
+void selectcol      ( char *nfic , double *col, int numcol);
+void gen_cusort_fop ( int *sortax1, int lig , FILE *fnam ,FILE *summ ); 
+void dot            ( int y    ,  long int period ); 
+void DiagoComp      ( int n0, double **w, double *d, int *rang);
+void suprow         ( int num_seq,char *nficvp,char *nfictasup,
+                      char *nficlisup,char *option, FILE *summary);
+void main_menu      ( int c );
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/codonW.hlp b/codonW.hlp
new file mode 100755
index 0000000..3bb3fe2
--- /dev/null
+++ b/codonW.hlp
@@ -0,0 +1,502 @@
+#main_menu#
+
+CodonW is a package for codon usage analysis. It was designed to 
+simplify Multivariate Analysis (MVA) of codon usage. The MVA method 
+employed in CodonW is correspondence analysis (COA), the most widely 
+used codon usage MVA method. COA can be performed on codon usage, 
+relative synonymous codon usage or amino acid usage. Integrated into 
+CodonW is the ability to work with genetic codes other than the 
+universal code. Other indices of codon usage and codon bias, 
+dinucleotide bias and mutation bias are also analysed by CodonW.
+
+Modes of use:
+a) There are an extensive number of command line options available if 
+your platform supports command line parameters. For more information 
+type 
+
+codonw -help
+
+b) Maximum functionality is obtained by running CodonW using the 
+interactive menus. Each menu has its own online help.
+
+c) CodonW also emulates a large number of useful utility programs used 
+in our labs to aid the analysis of codon usage.  If the first  argument 
+to the CodonW program is one of a recognised list of programs (rscu, cu, 
+aau, raau, tidy, reader, cutab, cutot, transl, bases, base3s, dinuc, 
+cai, fop, gc3s or enc), CodonW assumes that you want to accomplish or 
+calculate one of these simpler tasks/indices and bypasses the menu 
+system. For a fuller description of what these pseudo programs 
+calculate, see the README file.
+
+To Run CodonW:
+
+a) You must load a file containing all your sequences in fasta/Pearson 
+Pearson format, either from the command line or using menu 1.
+
+b) You may change many of the default values using menu 3.
+
+c) Select which codon usage indices to measure (menu 4). Choose the type 
+of correspondence analysis, if any (menu 5). Other data analysis options 
+may also be selected using menu 8.
+
+d) Return to the first (main) menu and type R to run an analysis.
+
+Output files from the correspondence analysis have the extension  .coa. 
+See summary.coa for an overall explanation of what is being generated by 
+the analysis. 
+ 
+Other output will be stored in the files that you choose using menu 1 or 
+as specified on the command line. Depending on the options chosen there 
+will either be one or two result files; usually they will have the 
+extensions .out and .blk.
+
+//
+#open_file_query#
+
+Open file dialog. 
+
+You have been requested to choose a file for the analysis. If the 
+request is for an input filename, this file must contain all your 
+sequences that you wish to analyse in a sequential fasta formatted file. 
+That is, all sequences should be in one file and individual sequences 
+separated by a single header line that starts with an angle bracket 
+character ">".  
+
+If you use GCG, the output from the program tofasta is acceptable. 
+
+If prompted for either the "bulk" or "output" file names, these 
+filenames will be used to record the results of the analysis. These 
+files will be opened for writing which may destroy the content of the 
+files, should the files already exist. So if a file already exists with 
+the name you have chosen, you will be asked whether you wish to 
+overwrite the file, append the results to the file, or choose a new 
+filename (that is, unless you have chosen the option to overwrite files 
+silently).
+
+//
+#File_not_found#
+
+File not found
+
+The name of the input file that you have chosen does not exist in the 
+current working directory. Either choose a new filename or give the 
+fully qualified filename (e.g. e:\codon\cu\input.dat).
+
+Depending on the system that you are using, the names of all files in 
+the current working directory may or may not be displayed when a file 
+cannot be located. 
+
+//
+#file_exists#
+
+File exists
+
+If the filename that you have chosen as the output file exists, it will 
+be deleted if opened for writing. You now have the choice of whether or 
+not to overwrite this file (thus deleting the original). If you choose 
+not to overwrite you have the further choice of either appending the 
+results to the file you originally choose or selecting a new filename.
+
+(Note: If you select overwrite silently from the defaults menu you will 
+not be prompted if a file of the same name already exists; it will be 
+overwritten.)
+
+//
+#file_append#
+
+File Append 
+
+You decided not to overwrite the file. You can either append the results 
+to this file or choose a new filename. 
+
+//
+#menu_2#
+
+Menu 2 Purifying sequences menu
+
+This menu was originally used to eliminate sequences from data that had 
+high sequence identity to other sequences in the dataset and thus might 
+bias the output results. 
+
+This functionality is not currently portable and is not being made 
+available at present. Try using the NCBI program nrdb or the EGCG9 
+program clean_up to remove identical or almost identical sequences. 
+
+//
+#menu_3#
+
+Menu 3 Defaults menu 
+
+To improve flexibility, many of the default values used internally by 
+CodonW (defined in the header file codonW.h) can be altered at runtime 
+using this menu. Ten options can be customised. 
+
+Option (1) Change ASCII delimiter in output. The default ASCII delimiter 
+used to separate information in machine readable output files is a 
+comma. The delimiter can be changed via this option to either the tab or 
+space character. 
+
+Option (2) Run silently. This option can be used when running from a 
+script file or as a batch job. If TRUE, it suppresses warnings about 
+overwriting files, the prompting for a personal choice of Fop, CBI or 
+CAI values (although these can still be given via command line 
+arguments) and the pause after each page of error or warning messages 
+has been displayed. 
+
+Option (3) Log warnings/information to a file. The default value for 
+this option is set as FALSE, in which case all warning or error messages 
+generated by CodonW are written to the screen via the standard error 
+stream. When TRUE, the errors are redirected to a log file:- you will be 
+prompted for the filename for this log file. This option is useful if 
+there are a large number of sequences in the input file or there are 
+many warning messages.
+
+Option (4) Number of lines on screen. This is used to set the screen 
+length, which is used during screen refreshing and the pagination of 
+error messages. 
+
+Option (5) Change the genetic code. By default, CodonW assumes the 
+universal genetic code when translating and processing codons. This 
+option allows alternative genetic codes to be selected.
+
+Option (6) Change the Fop/CBI values. To calculate either the CBI or Fop 
+indices, a set of optimal codons is required; by default the optimal 
+codons of E. coli are assumed. This option displays a submenu which 
+lists eight species where optimal codons have been identified. When 
+calculating the Fop/CBI of genes from these species the appropriate set 
+of codons should be selected. Personal selections of optimal codons can 
+be input at runtime. 
+
+Option (7) Change the CAI values. To calculate the codon adaptation 
+index it is necessary to assign fitness values to each codon; by default 
+the fitness values of E. coli codons are assumed. However, these values 
+are very species-specific and so using E. coli fitness values to 
+calculate CAI values for other species is nonsensical. Before assigning 
+fitness values to a codon a set of genes which have been experimentally 
+verified to be highly expressed must be identified. Such sets have been 
+created for relatively few species. This menu lists the species where a 
+reference set of highly expressed genes is known, and fitness values 
+assigned. Personal selections of fitness values can be input at runtime 
+if calculating CAI.  
+
+Option (8) Toggle human or machine-readable output. The default format 
+for most CodonW output files is human readable. Machine-readable output 
+is fixed width numerical data separated by an ASCII delimiter. This 
+format is readily imported into a wide range of statistical and 
+graphical analysis programs but not easily read by eye. Human readable 
+output is more verbose but easier to read. The output formats for codon 
+usage, tabulation of codon usage, relative synonymous codon usage and 
+base compositions are the most radically affected by this option. 
+
+Option (9) Toggle output for each or all genes. By default, CodonW 
+processes each gene individually. When the option "all genes" is 
+selected, sequences are concatenated and processed as a single sequence. 
+This option can be used to calculate total codon or amino acid usage, 
+the average G+C content, Fop, etc.
+
+Option (10) Correspondence analysis defaults. This option allows access 
+to the "advanced correspondence analysis" menu. This menu is normally 
+accessed as a submenu of "Correspondence analysis" (Menu 5), but is 
+included here so that all runtime options are accessible via the "Change 
+default values" menu. 
+
+//
+#menu_4#
+
+Menu 4 Codon Usage Indices
+
+This menu is used to choose the indices calculated by CodonW; by default 
+only the G+C content of the sequence is selected. The calculation of 
+these indices (except G+C content) is dependent on the genetic code 
+selected under Menu 3. More than one index may be calculated at once.
+
+ Option (1) Codon Adaptation Index (CAI). CAI measures the relative 
+adaptation of a gene to the codon usage of highly expressed genes. The 
+relative adaptiveness (w) of a codon is the ratio of the usage of that 
+codon to that of the most abundant codon for the same amino acid. The 
+relative adaptiveness of codons (for albeit a limited choice of species) 
+can be selected from Menu 3.
+
+Option (2) Frequency of Optimal codons (Fop). This index is the ratio of 
+optimal codons to synonymous codons (genetic code dependent). Optimal 
+codons for several species are in-built and can be selected using Menu 
+3. By default, the optimal codons of E. coli are assumed. The user may 
+also enter a personal choice of optimal codons. If rare synonymous 
+codons have been identified, there is a choice of calculating the 
+original Fop index or a modified Fop index. Fop values for the original 
+index are always between 0 (where no optimal codons are used) and 1 
+(where only optimal codons are used). When calculating the modified Fop 
+index, any negative values are adjusted to zero. 
+
+Option (3) Codon Bias Index (CBI). The codon bias index is a measure of 
+directional codon bias. It measures the extent to which a gene uses a 
+subset of optimal codons. 
+
+Option (4) The effective number of codons (NC). This index is a simple 
+measure of overall codon bias and is analogous to the effective number 
+of alleles measure used in population genetics. Knowledge of the optimal 
+codons or a reference set of highly expressed genes is unnecessary when 
+calculating this index. 
+
+Option (5) G+C content of the gene. This is calculated as the frequency 
+of nucleotides that are guanine or cytosine.
+
+Option (6) G+C content 3rd position of synonymous codons (GC3s). This is 
+the fraction of codons, synonymous at the third codon position, which 
+have either a guanine of cytosine at that third codon position. 
+
+Option (7) Silent base composition. Selection of this option calculates 
+four separate indices, i.e. G3s, C3s, A3s & T3s. Although correlated 
+with GC3s, this index is not directly comparable with it. It quantifies 
+the usage of each base at synonymous third codon positions. 
+
+Option (8) Length silent sites (Lsil). This is the frequency of 
+synonymous codons within each gene.
+
+Option (9) Length amino acids (Laa). This is the number of translatable 
+codons.
+
+Option (10) Hydropathicity of protein. This is the general average 
+hydropathicity or (GRAVY) score for the hypothetical translated gene 
+product. It is the arithmetic mean of the sum of the hydropathic indices 
+of each amino acid.
+
+Option (11) Aromaticity score of protein. This is the frequency of 
+aromatic amino acids (Phe, Tyr, Trp) in the hypothetical translated gene 
+product. 
+
+The hydropathicity and aromaticity protein scores are indices of amino 
+acid usage. The strongest trend in the variation in the amino acid 
+composition of E. coli genes is correlated with protein hydropathicity, 
+the second strongest trend is correlated with gene expression, while the 
+third is correlated with aromaticity. 
+//
+#menu_5_coa#
+
+Menu 5 Correspondence analysis
+
+In many unicellular organisms, protein coding genes have non-random 
+usage of synonymous codons (see Andersson and Kurland (1990) and Sharp 
+et al. (1993) for reviews). Correspondence analysis uses contingency 
+tables (counts of the joint occurrences of rows and columns of a table). 
+Therefore, the sequence data must be transformed into a contingency 
+table. The frequency of each codon (or amino acid) is tabulated for each 
+gene. This is then converted into an Euclidean distance measurement of 
+distance between the rows or columns. CodonW calculates a scaled 
+distance measurement as recommended by Grantham and co-workers (Grantham 
+et al 1981).  
+
+Analysis of a large number of distances would ordinarily be very time 
+consuming. Correspondence analysis provides a simple visualisation of 
+these distances by projecting the points from their original 
+multidimensional space onto lower dimensions, with genes with similar 
+distances plotted as neighbours. In addition to calculating the 
+coordinates for the projection of these points, correspondence analysis 
+(as implemented in CodonW) also calculates the total inertia of the 
+data, together with the eigenvalue and relative variation explained by 
+each axis. CodonW can also quantify the absolute and relative 
+contribution of each gene, codon or amino acid on each identified trend. 
+To limit variation due to stochastic noise, it is recommended that short 
+genes (less than 50 codons) be excluded from a correspondence analysis.
+
+The correspondence analysis menu (Menu 5) has four options, the default 
+option being not to generate a correspondence analysis, i.e. Do not 
+perform a COA. 
+
+Option (1) Correspondence analysis of codon usage. This generates a 
+correspondence analysis on the total codon usage. By default, this is on 
+synonymous codons, although the advanced menu may be used to adjust 
+which codons are included/excluded. If analysing synonymous codon usage, 
+the analysis has 58 degrees of freedom. 
+
+Option (2) Correspondence analysis of RSCU. This generates a 
+correspondence analysis of relative synonymous codon usage (RSCU). RSCU 
+is calculated as the ratio of the observed frequency of a codon to the 
+frequency expected under unbiased codon usage within a synonymous codon 
+group. Correspondence analysis of RSCU is useful because variation 
+caused by unequal usage of amino acids is removed; however  the number 
+of degrees of freedom is reduced to 40. 
+
+Option (3) Correspondence analysis of Amino Acid usage. This generates a 
+correspondence analysis of amino acid composition, with 19 degrees of 
+freedom.
+
+Option (4) Do not perform a correspondence analysis. This is the default 
+option.
+//
+#menu_6#
+
+Menu 6 Basic Stats
+
+This menu was originally designed to calculate some basic statistics on 
+the output from the various codon usage indices. 
+
+This functionality is not currently portable and is not being made 
+available at present. 
+
+
+//
+#menu_7#
+
+Menu 7 Relaxation (almost) 
+
+This menu was designed to help teach the genetic code(s). It asks 
+various random questions about codon translation and codon usage. The 
+genetic code used as the basis for the correct answers can be changed 
+under the default menu (Menu 3).
+
+//
+
+#fun#
+
+Teach yourself the genetic codes and codon usage. 
+
+To exit type "quit" or "exit" (without the quotation marks). 
+
+If you don't know the answer to the question, you can type "?" (without 
+the quotation marks) . 
+You will then be prompted with the correct answer. Beware:- you will be 
+penalised for incorrect answers :).
+
+The questions are:
+What is the three-letter name?    
+(You must convert the one-letter code given to the three-letter code.) 
+
+How synonymous is Amino Acid?    
+(How many synonyms are there for this amino acid?)
+
+Name the Amino Acid?              
+(Which amino acid is coded by this codon?) 
+
+//
+
+#menu_8_blk#
+
+Menu 8 Bulk output options in CodonW 
+
+Non-correspondence analysis output from CodonW which cannot easily be 
+summarised as a single index is bulk output. Under this menu there are 
+10 options. Multiple options cannot be selected simultaneously. Each 
+time this menu is selected you will be prompted for an alternative 
+output filename.
+
+Option (1) Fasta format output of DNA sequence. The input sequences are 
+reformatted and written to a file in a Fasta /Pearson-like format.
+
+Option (2) Reader format output of DNA sequence. This format is derived 
+from the fasta format, except that the sequence is written as codons 
+with three bases separated by a space, and the size of the sequence is 
+recorded at column 70. 
+
+Option (3) Translate input file to amino acids. This translates DNA to 
+amino acids using the selected genetic code. The amino acids are written 
+in a Fasta/Pearson compatible format.
+
+Option (4) Codon Usage. This is the default option. The frequency of 
+each codon is written to a file in four rows with 16 columns per row. 
+The codons are written in sequential numerical order, left to right.
+
+Option (5) Amino acid usage. The frequency of each amino acid, 
+untranslatable codons and stop codons are recorded, one row per gene and 
+23 columns per row. The first column contains a unique gene description, 
+the second column records number of untranslatable codons, the third and 
+subsequent columns summarize the amino acid and termination codon usage.
+
+Option (6) Relative Synonymous Codon Usage (RSCU). Relative synonymous 
+codon usage is calculated as the ratio of the observed frequency of a 
+codon to the frequency expected if codon usage were random.
+
+Option (7) Relative Amino acid usage (RAAU). Relative Amino acid usage 
+is the frequency of the amino acid relative to the total amino acid 
+usage.
+
+Option (8) Dinucleotide frequencies. The frequency of the 16 
+dinucleotides is calculated in each of the three possible codon 
+positions. The data are recorded with one row per position and 16 
+columns per row. 
+
+Option (9) Base composition analysis. This option records the frequency 
+of nucleotides in each codon position. It also reports GC, GC3s and GCns 
+(GC content excluding synonymous third position codons). 
+
+Option (10) No output written to file. This option is useful when 
+working with large datasets and disk storage or disk access is a 
+limiting factor. This option suppresses all the output to the bulk 
+output file.
+//
+
+#menu_coa#
+
+Advanced Correspondence Analysis menu.
+
+This menu allows much greater control over the correspondence analysis. 
+
+ Option (1) Unselect or select. This menu changes slightly depending on 
+whether correspondence analysis is of amino acid or codon usage.It 
+simplifies the selection of the codons/amino acids that are to be 
+included in the COA. This allows the user to override the default 
+selections, which if the COA is of codon usage, is the exclusion of non-
+synonymous codons and termination codons. 
+
+Option (2) Change the number of axes. The number of axes generated by a 
+correspondence analysis is N-1, where N is either the number of genes or 
+columns (whichever is the lesser in value). However, the default is to 
+generate information about the first four axes (or trends). This option 
+allows the user to record coordinates on any number of axes, up to the 
+maximum generated by the analysis. 
+
+Each axis generated by correspondence analysis is represented by a 
+multidimensional vector. The position of a gene on any axis is the 
+product of that gene's codon usage and the axis vector. As the vector is 
+itself a product of the codon usage, the vectors can be affected by 
+unusual codon usage. An analysis of nuclear and plasmid genes would be 
+difficult, as the codon usage of each would perturb the other. Each 
+dataset could be analysed individually but as the vectors for the axes 
+would be different, it would be difficult to make direct comparisons 
+between the analyses. To overcome this problem it is necessary to 
+generate the COA vectors using one dataset and then to apply the same 
+vectors to another. Thus direct comparison between the ordination of 
+genes is possible. In CodonW, this is possible by using the following 
+option (Option 3).
+
+Option (3) Add additional genes after correspondence analysis. The user 
+is prompted for the file containing the additional sequences, to which 
+the vectors are to be applied. The vectors are calculated, as normal, 
+using the genes contained in the standard input file (Menu 1). The co-
+ordinates and any additional information about these original genes are 
+recorded as normal. Next the additional genes are read in and the 
+original vectors applied to them. The ordinations of these additional 
+genes are then appended to the COA output files (for an explanation 
+about the COA output files see below).
+ 
+Option (4) Toggle level of correspondence analysis output. By default 
+this option is set to "normal" but can be toggled to "exhaustive". If 
+the exhaustive output option is selected, then in addition to the 
+standard information about gene and codon/amino acid ordination, 
+additional information about inertia of the rows and columns is 
+generated. This additional information includes the absolute 
+contribution of the inertia of each row or column to each of the 
+recorded axes, and the fraction of the variation within each row or 
+column explained by each axis.
+
+Option (5) Change number of genes used to identify optimal codons. 
+Correspondence analysis of either RSCU or codon usage where the major 
+trend correlates with gene expression can be used to identify optimal 
+codons. This is achieved by comparing the codon usage of the genes that 
+lie at the extremes of the principal trend (axis 1). By default this is 
+the top and bottom 10% of genes (as defined by axis 1 ordination). Using 
+this option this can be set to a percentage between 1% and 50%, or to an 
+absolute number of genes.  
+//
+
+#select#
+
+Codon or Amino acid selection
+
+The codons or amino acids that will NOT be analysed in this 
+correspondence analysis are surrounded by curly brackets. The choices of 
+which codons/amino acids that are to be excluded can be changed. Simply 
+give the number associated with each codon/amino acid for which you want 
+to change the status. 
+
+//
diff --git a/codonWinstall b/codonWinstall
new file mode 100755
index 0000000..dd4bc5b
--- /dev/null
+++ b/codonWinstall
@@ -0,0 +1,271 @@
+#!/bin/sh
+# Adapted from the SRS5 srsinstall script
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# analyse command line and print usage if wrong
+#
+
+ERR='codonWinstall: Stopping due to Error'
+
+if [ "$#" = 0 ]; then
+  option="all"
+elif [ "$1" = "all" ]; then
+  option="all"
+elif [ "$1" = "clean" ]; then
+  option="clean"
+elif [ "$1" = "codonw" ]; then
+  option="codonw"
+elif [ "$1" = "links" ]; then
+  option="links"
+elif [ "$1" = "cleanall" ]; then
+  option="cleanall"
+elif [ "$1" = "realclean" ]; then
+  option="cleanall"
+else
+  option="usage"
+  echo "unknown option '$1'"
+fi
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# print "usage"
+#
+
+if [ "$option" = 'usage' ]; then
+  cat << END
+
+  Usage: ./codonWinstall option
+
+  Options:
+
+  all        does a complete installation
+  codonw     compile codonw only .. no linked programmes
+  links      generate links to pseudo programmes
+  clean      removes all object files
+  cleanall   removes all the object files, codonW, linked files and Makefile
+  realclean  removes all the object files, codonW, linked files and Makefile
+  
+END
+  exit 1
+fi
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+if [ "$option" = 'all' ]; then
+  echo "... starting installation of codonW"
+fi
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# put correct "make" and "cc" commands 
+#
+
+OS=`uname`
+
+if [ "$OS" = "SunOS" ]; then
+    case "`uname -r`" in
+	[56]*)   OS='Solaris' ;;
+    esac
+fi
+
+# some echo commands do not support -n
+# generally /usr/bin/echo doesn't but /usr/ucb/echo does, so one can hardwire
+# to be independent of users path.
+if [ -r /usr/ucb/echo ]; then
+  ECHON="/usr/ucb/echo -n"
+else
+  ECHON="echo -n"
+fi
+
+# OSF/1 v4.0 /usr/ucb is symlink to /usr/bin, but /bin/sh has builtin -n
+if [ "$OS" = "OSF1" ]; then
+  case "`uname -r`" in
+    V[4]*)  ECHON="echo -n"
+	    # or can
+	    #CMD_ENV=bsd ; export CMD_ENV
+    ;;
+  esac
+fi
+
+# we did our best, but now let's test
+ECHONT="`$ECHON | wc -c`"
+if [ $ECHONT != 0 ]; then
+  ECHONEND="\c"
+  ECHON=echo
+else
+  ECHONEND=""
+fi
+
+
+if [ ! -f "Makefile" ]; then
+
+  $ECHON "enter the make command [make]: $ECHONEND"
+  read makeCom
+  if [ "$makeCom" = "" ]; then makeCom='make' ; fi
+
+  # for OSF1 need to know if it is osf1 make or gnu make
+  if [ "$OS" = "OSF1" ]; then
+	$ECHON "is this OSF1 make [y]: $ECHONEND"
+	read OSFmake
+	if [ "$OSFmake" = "" ]; then OSFmake='y' ; fi
+  fi
+
+  if [ "$OS" = "SunOS" -o "$OS" = "Solaris" ]; then
+    ccComDef='gcc'
+  else
+    ccComDef='cc'    
+  fi
+
+  $ECHON "enter the cc command [${ccComDef}]: $ECHONEND"
+  read ccCom
+
+  if [ "$ccCom" = "" ]; then ccCom="$ccComDef" ; fi
+
+  echo 'choose between optimised code, or code for debugging'
+  $ECHON "optimised code [y]: $ECHONEND"
+  read optimCom
+  if [ "$optimCom" = "" ]; then optimCom="y" ; fi
+
+
+  $ECHON "enter the link command [ln]: $ECHONEND"
+  read ccLn
+
+  if [ "$ccLn" = "" ]; then ccLn="ln -f" ; fi
+
+
+  $ECHON "Do you want hard or soft links [hard]: $ECHONEND"
+  read ccLnflag
+
+  if [ "$ccLnflag" = "" ]; then ccLnflag="hard" ; fi
+
+  echo 'choose between optimised code, or code for debugging'
+  $ECHON "optimised code [y]: $ECHONEND"
+  read optimCom
+  if [ "$optimCom" = "" ]; then optimCom="y" ; fi
+
+
+
+  echo "...creating makefile for '$OS'"
+
+#better rename the old makefiles
+  if [ -f  "Makefile" ]; then \mv  Makefile Makefile.pre ; fi
+  if [ -f  "makefile" ]; then \mv  makefile makefile.pre ; fi
+  touch Makefile
+
+# Add the logical parts of the make file
+
+  if [ "$OS" = "OSF1" ]; then
+    echo "override cflags  = $(CFLAGS) -g" >> Makefile;
+  fi
+
+  if [ "$optimCom" = "y" ]; then
+    cflags="-O";
+  else
+    cflags="-g -DDEBUG" ;
+  fi
+
+  if [ "$ccLnflag" = "hard" ]; then
+    lncmd=$ccLn;
+  else
+    lncmd="$ccLn -s";
+  fi
+
+ link_prog="rscu cu aau raau tidy reader cutab cutot transl bases base3s dinuc cai fop gc3s gc cbi enc" 
+
+cat <<EOF >> Makefile
+
+objects  = codon_us.o codons.o open_fil.o commline.o menu.o tester.o coresp.o
+linked   = $link_prog
+
+CC=$ccComDef
+CFLAGS= $cflags -DBSD
+LN=$lncmd
+
+
+all: codonw links   
+
+codonw: $(objects)
+	$(CC) $(CFLAGS)  $(objects) -o codonw -lm
+
+clean:
+	\rm -f $(objects)
+
+cleanall:
+	\rm -f $(objects) codonw Makefile $(linked)
+
+realclean:
+	\rm -f $(objects) codonw Makefile $(linked)
+
+codon_us.o: codon_us.c codonW.h 
+	$(CC) -c $(CFLAGS) codon_us.c  
+
+menu.o: menu.c codonW.h 
+	$(CC) -c $(CFLAGS) menu.c
+
+codons.o: codons.c codonW.h 
+	$(CC) -c $(CFLAGS) codons.c
+
+coresp.o: coresp.c codonW.h 
+	$(CC) -c $(CFLAGS) coresp.c
+
+open_fil.o:    open_fil.c codonW.h
+	$(CC) -c $(CFLAGS) open_fil.c
+
+commline.o:    commline.c codonW.h 
+	$(CC) -c $(CFLAGS) commline.c
+
+tester.o:      tester.c codonW.h
+	$(CC) -c $(CFLAGS) tester.c
+
+links: codonw
+EOF
+
+
+for file in $link_prog 
+do
+ echo "\t\t$(LN) codonw $file"  >> Makefile
+done
+
+ echo >> Makefile
+
+fi
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# cleans the object
+#
+
+if [ "$option" = 'clean' ]; then
+  echo '...cleaning the old object files '
+  echo '...make clean'
+  make clean
+fi
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+if [ "$option" = 'cleanall' -o "$option" = 'realclean' ]; then
+  echo '...cleaning the old object files, linked files and executables'
+  echo '...make realclean '
+  make realclean
+fi
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+if [ "$option" = 'all' ]; then
+  echo '...Starting to make codonW, with auxillary programs '
+  echo '...make all'
+  make all
+fi
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+if [ "$option" = 'codonw' ]; then
+  echo '...checking codonW is up to date'
+  echo '...make codonw'
+  make codonw
+fi
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+if [ "$option" = 'links' ]; then
+  echo '... Linking auxilliary programs to  '
+  echo '...make links'
+  make links
+fi
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+
+
+
diff --git a/codon_us.c b/codon_us.c
new file mode 100755
index 0000000..2328031
--- /dev/null
+++ b/codon_us.c
@@ -0,0 +1,2159 @@
+/**************************************************************************/
+/* CodonW codon usage analysis package                                    */
+/* Copyright (C) 2005            John F. Peden                            */
+/* This program is free software; you can redistribute                    */
+/* it and/or modify it under the terms of the GNU General Public License  */
+/* as published by the Free Software Foundation; version 2 of the         */
+/* License,                                                               */
+/*                                                                        */
+/* This program is distributed in the hope that it will be useful, but    */
+/* WITHOUT ANY WARRANTY; without even the implied warranty of             */
+/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the           */
+/* GNU General Public License for more details.                           */
+/* You should have received a copy of the GNU General Public License along*/
+/* with this program; if not, write to the Free Software Foundation, Inc.,*/
+/* 675 Mass Ave, Cambridge, MA 02139, USA.                                */
+/*                                                                        */
+/*                                                                        */
+/* The author can be contacted by email (jfp#hanson-codonw at yahoo.com Anti-*/
+/* Spam please change the # in my email to an _)                          */
+/*                                                                        */
+/* For the latest version and information see                             */
+/* http://codonw.sourceforge.net 					  */
+/**************************************************************************/
+/*                                                                        */
+/* -----------------------        codon_us.C     ------------------------ */
+/* This file contains most of the codon usage analysis subroutines        */
+/* except for the COA analysis                                            */
+/* Internal subroutines and functions                                     */
+/* initilize_point    assigns genetic code dependent parameters to structs*/
+/* initilize_coa      decides which cod/AA to include in a COA by default */
+/* codon_usage_tot    Counts codon and amino acid usage                   */
+/* ident_codon        Converts codon into a numerical value in range 1-64 */
+/* codon_usage_out    Write out Codon Usage to file                       */
+/* codon_error        Called after all codons read, checks data was OK    */
+/* rscu_usage_out     Write out RSCU                                      */
+/* raau_usage_out     Write out normalised amino acid usage               */
+/* aa_usage_out       Write out amino acid usage                          */
+/* how_synon          Calculates how synonymous each codon is             */
+/* how_synon_aa       Calculates how synonymous each AA is                */
+/* clean_up           Re-zeros various internal counters and arrays       */
+/* base_sil_us_out    Write out base composition at silent sites          */
+/* cai_out            Write out CAI usage                                 */
+/* cbi_out            Write out codon bias index                          */
+/* fop_out            Write out Frequency of Optimal codons               */
+/* enc_out            Write out Effective Number of codons                */
+/* gc_out             Writes various analyses of base usage               */
+/* dot(,X)            prints a period every X times it is called          */
+/* get_aa             converts a three base codon into a 1 or 3 letter AA */
+/* cutab_out          Write a nice tabulation of the RSCU+CU+AA           */
+/* dinuc_count        Count the dinucleotide usage                        */
+/* dinuc_out          Write out dinucleotide usage                        */
+/* coa_raw_out        Write out raw codon usage for use by COA analysis   */
+/* sorted_by_axis1    Sorts genes according to their axis one position    */
+/* gen_cusort_fop     COA specific, write out cu of genes by axis1 posit. */
+/* highlow            Used sorted cu to calculate high_low chi sq. contin */
+/* hydro_out          Write out Protein hydropathicity                    */
+/* aromo_out          Write out Protein aromaticity                       */
+/*                                                                        */
+/*                                                                        */
+/* External subroutines to codon_us.c                                     */
+/* my_exit            Controls exit from CodonW closes any open files     */
+/* tidy               reads the input data                                */
+/* output             called from tidy to decide what to do with the data */
+/* toutput            handles the reformatting and translation of seqs    */
+/* output_long        if sequence is very long then process what we know  */
+/*                    and write sequence to disk in fragments             */
+/* open_file          Open files, checks for existing files               */
+/* fileclose          Closes files and returns a NULL pointer or exits    */
+/*                                                                        */
+/**************************************************************************/
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <math.h>
+#include <limits.h>
+#include "codonW.h"
+/********************* Initilize Pointers**********************************/
+/* Various pointers to structures are assigned here dependent on the      */
+/* genetic code chosen.                                                   */
+/* paa                points to a struct containing Amino Acid names      */
+/* pap                points to amino acid properties                     */
+/* pcai               points to Adaptation values used to calc CAI        */
+/* pfop               points to a struct describing optimal codons        */
+/* pcbi               points to the same structure as pfop                */
+/* pcu                points to data which has the translation of codons  */
+/* ds                 is a struct describing how synonymous a codon is    */
+/* da                 is a struct describing the size of each AA family   */
+/* pcoa               points to a struct that describes columns to be     */
+/*                    included/excluded from any COA analysis             */
+/**************************************************************************/
+int initilize_point(char code, char fop_species, char cai_species)
+{
+   paa = &amino_acids;
+   pap = &amino_prop;
+   pcai = &cai[cai_species];
+   pfop = &fop[fop_species];
+   pcbi = &fop[fop_species];
+   pcu = &cu[code];
+   ds = how_synon();                          
+   da = how_synon_aa();                                     
+   pcoa = &coa;
+
+   printf ("\n");   
+   if (pm->codonW)
+     printf ("Genetic code is currently set to %s %s\n\n",pcu->des,pcu->typ);
+
+   return 1;
+}
+/*******************How Synonymous is this codon  *************************/
+/* This function discovers at run time how synonymous a codon is by check-*/
+/* ing all other codons to see if they encode the same AA                 */
+/* This saves a lot of time when new genetic codes are added              */ 
+/**************************************************************************/
+int *how_synon(void)
+{
+   static int      dds[65];
+   int x,i;
+   
+   for (x = 0; x < 65; x++)
+      dds[x] = 0;
+
+   for (x = 1; x < 65; x++)
+      for (i = 1; i < 65; i++)
+     if (pcu->ca[x] == pcu->ca[i])
+        dds[x]++;
+   return dds;                             /* return a structure          */
+}
+/*******************How Synonymous is this AA     *************************/
+/* This function discovers at run time how synonymous an amino acid is by */
+/* checking all codons to see if they encode this same AA                 */
+/* This saves a lot of time when new genetic codes are added              */ 
+/**************************************************************************/
+int *how_synon_aa(void)
+{
+   static int      dda[22];
+   int x;
+   
+   for (x = 0; x < 22; x++)
+      dda[x] = 0;
+
+   for (x = 1; x < 65; x++)
+      dda[pcu->ca[x]]++;
+   return dda;                             /* return a structure          */
+}
+/********************* Initialise COA     *********************************/
+/* Decides which codons or amino acids are to be included in a COA if only*/
+/* the default choice is used. For an amino acid COA, only stops are excl */
+/* but for a codon usage COA stop codons and non-synonymous codons are    */
+/* excluded                                                               */
+/* pcoa               points to a struct that describes columns to be     */
+/*                    included/excluded from any COA analysis             */
+/*                    structure contains AA and Codon information         */
+/**************************************************************************/
+int initilize_coa(char code)
+{
+   static char     initilized;
+   static char     oldcode;
+   int i;
+    
+   /* if called a second time return unless the genetic code has changed  */
+   if (initilized && (oldcode == code)) return 1;
+
+   for (i = 0; i < 22; i++)         /* for each amino acid                */
+      if (i == 11 || i == 0)        /* stop codons have the value 11      */
+     pcoa->amino[i] = FALSE;        /* see RECODING file for more details */
+      else
+     pcoa->amino[i] = TRUE;
+
+   for (i = 0; i < 65; i++)         /* for each codon                     */
+      if (*(ds + i) == 1 || pcu->ca[i] == 11 || i == 0) 
+     pcoa->codons[i] = FALSE;
+      else
+     pcoa->codons[i] = TRUE;
+
+   initilized = TRUE;               /* we have been called  ...           */
+   return 1;
+}
+/****************** Codon Usage Counting      *****************************/
+/* Counts the frequency of usage of each codon and amino acid this data   */
+/* is used throughout CodonW                                              */
+/* pcu->ca contains codon to amino acid translations for the current code */
+/* and is assigned in initialise point                                    */
+/**************************************************************************/
+int codon_usage_tot(char *seq, long int how_many)
+{
+   char            codon[4];
+   int             icode;
+   int             i;
+   
+   for (i = 0; i < how_many - 2; i += 3) {
+      strncpy(codon, (seq + i), 3);
+      icode = ident_codon(codon);
+      ncod[icode]++;                          /*increment the codon count */  
+      naa[pcu->ca[icode]]++;                  /*increment the AA count    */ 
+      codon_tot++;                            /*increment the codon total */
+   }
+
+   if (how_many % 3) {                        /*if last codon was partial */
+      icode = 0;                              /*set icode to zero and     */
+      ncod[0]++;                              /*increment untranslated    */ 
+   }                                          /*codons                    */
+   return icode;                              /*return the last codon     */
+}
+
+/****************** Ident codon               *****************************/
+/* Converts each codon into a numerical array (codon) and converts this   */
+/* array into a numerical value in the range 0-64, zero is reserved for   */
+/* codons that contain at least one unrecognised base                     */
+/*                                                                        */
+/**************************************************************************/
+int ident_codon(char *codon)
+{
+   int             icode = 0;
+   int x;
+   
+   for (x = 0; x < 3; x++) {
+      switch (codon[x]) {
+      case 'T':
+      case 't':
+      case 'U':
+      case 'u':
+     codon[x] = (char) 1;
+     continue;
+      case 'C':
+      case 'c':
+     codon[x] = (char) 2;
+     continue;
+      case 'A':
+      case 'a':
+     codon[x] = (char) 3;
+     continue;
+      case 'G':
+      case 'g':
+     codon[x] = (char) 4;
+     continue;
+      case '\0':
+     return 0;
+      default:
+     codon[x] = (char) 0;
+     break;
+      }
+   }
+   if (codon[0] * codon[1] * codon[2] != 0)
+      icode = (codon[0] - 1) * 16 + codon[1]
+     + (codon[2] - 1) * 4;
+   else
+      icode = 0;
+
+   return icode;
+}
+
+/****************** Codon error               *****************************/
+/* Does some basic error checking for the input data, it can be called    */
+/* using different error levels, thus generating different types of       */
+/* messages. Basically checks for start, stop codons and internal stop    */
+/* codons. As well as non-translatable and partial codons                 */
+/**************************************************************************/
+long int codon_error(int x, int y, char *ttitle, char error_level)
+{
+   long int             ns = 0;                  /* number of stops       */
+   long int        loc_cod_tot = 0;
+   static int      error_lines = 0;
+   int i;
+
+   for (i = 1, ns = 0; i < 65; i++) {
+     loc_cod_tot += ncod[i];
+     if (pcu->ca[i] == 11)
+       ns +=  ncod[i];                           /*count  stop codons     */
+   }
+
+   switch (error_level) {
+     case 1:                                    /*internal stop codons    */
+       ns = ns - valid_stops;           
+       /* a stop was a valid_stop if it was the last codon of a sequence  */
+
+       if ( ! valid_start && pm->warn ) {
+           dot(0,10);   
+           fprintf(pm->my_err, "\nWarning: Sequence %3li \"%-20.20s\" does "
+               "not begin with a recognised start codon\n"
+		     ,num_sequence,ttitle);
+           error_lines++;
+       }
+
+       if (ns && pm->warn ) {
+	        dot(0,10);  
+	        if (pm->totals && pm->warn)
+	         fprintf(pm->my_err,"\nWarning: some sequences had internal stop"
+		     " codons (found %li such codons)\n", ns);
+	        else
+	         fprintf(pm->my_err, "\nWarning: Sequence %3li \"%-20.20s\" has "
+             "%li internal stop codon(s)\n", num_sequence, ttitle, ns);
+	        num_seq_int_stop++;
+	        error_lines++;
+       }
+       break;
+   case 2:                                
+     dot(0,10);  
+     if (ncod[0] == 1 && pcu->ca[x] != 11 && pm->warn){ /*  last codon was partial */
+       fprintf(pm->my_err, 
+	       "\nWarning: Sequence %3li \"%-20.20s\" last codon was partial\n"
+	       ,num_sequence, ttitle);
+       error_lines++;
+     }else {
+       if (ncod[0] && pm->warn){                        /* non translatable codons */
+	    if (pm->totals)
+	      fprintf(pm->my_err, 
+		   "\nWarning: some sequences had non translatable"
+		   " codons (found %li such codons)\n",  ncod[0]);
+	    else
+	      fprintf(pm->my_err, 
+		   "\nWarning: sequence %3li \"%-20.20s\" has %li non translatable"
+		   " codon(s)\n", num_sequence, ttitle, ncod[0]);
+	    error_lines++; 
+       }
+       if (pcu->ca[x] != 11 && pm->warn ) {
+	    if (!pm->totals){
+	       fprintf(pm->my_err, 
+		   "\nWarning: Sequence %3li \"%-20.20s\" is not terminated by"
+		   " a stop codon\n", num_sequence, ttitle);
+	       error_lines++;
+            }     
+       }
+     }  
+     break;
+   case 3: 
+                                   /* Nc error routines see codon_us      */
+     dot(0,10);                    /* dot resetting internal counter      */
+     if (x==3) x=4;                /* if x=3 there are no 3 or 4 fold AA  */ 
+     fprintf(pm->my_err, 
+	     "\nSequence %li \"%-20.20s\" contains ",num_sequence, ttitle);
+     (y) ? fprintf(pm->my_err, "only %i ", (int) y) : 
+       fprintf(pm->my_err, "no ");
+     fprintf(pm->my_err, "amino acids with %i synonymous codons\n", x);
+     fprintf(pm->my_err, "\t--Nc was not calculated \n");
+     error_lines+=2;      
+     break;
+   case 4:                         /* run silent                          */
+     break;
+   default:
+      my_exit(99,"Programme error in codon_error\n");
+   }
+   if ((((error_lines + 2) * 2) > pm->term_length) && pm->verbose 
+       && pm->my_err == stderr ) {
+     error_lines = 0;              /* count lines of errors               */
+     dot(0,10);                     
+     pause;
+   }       
+   return loc_cod_tot;             /* Number of codons counted            */
+}
+
+/****************** Codon Usage Out           *****************************/
+/* Writes codon usage output to file. Note this subroutine is only called */
+/* when machine readable output is selected, otherwise cutab_out is used  */
+/**************************************************************************/
+int codon_usage_out(FILE * fblkout, long int *nncod, int last_aa, 
+                    int vvalid_stops, char *ttitle)
+{
+  long int ccodon_tot = 0;
+  int x;
+  char sp=pm->seperator;
+  
+  ccodon_tot = codon_error(last_aa, vvalid_stops, ""   , (char) 4); /*dummy*/
+
+  /*example of output                                                     */ 
+  /*0,0,0,0,3,2,2,0,0,0,0,0,0,3,0,0,                                      */
+  /*0,0,0,4,3,4,1,7,0,0,0,0,3,1,3,1,Codons=100                              */       
+  /*0,0,0,0,10,6,3,0,0,0,0,0,1,1,12,0,Universal Genetic code              */
+  /*0,0,0,3,7,5,7,9,0,1,1,1,8,4,5,0,MLSPCOPER.PE1                         */
+
+  for (x = 1; x < 65; x++) {
+    
+    fprintf(fblkout, "%i%c",nncod[x],sp);
+    
+    switch (x) {
+    case 16:
+      fprintf(fblkout, "\n");
+      break;
+    case 32:
+	fprintf(fblkout, "Codons=%ld\n",ccodon_tot);
+      break;
+    case 48:
+	fprintf(fblkout, "%.30s\n", pcu->des);
+      break;
+    case 64:
+      fprintf(fblkout, "%.20s\n",ttitle);
+      break;
+    default:
+      break;
+    }
+  }
+  return 1;
+}
+/******************  RSCU  Usage out          *****************************/
+/* Writes Relative synonymous codon usage output to file. Note this subrou*/
+/* tine is only called if machine readable output is selected             */
+/* If human readable format was selected then what the user really wanted */
+/* was cutab so this is automatically selected in codons.c                */
+/* RSCU values are genetic codon dependent                                */
+/**************************************************************************/
+int rscu_usage_out(FILE * fblkout, long *nncod, long *nnaa)
+{  
+ int x;
+ char sp=pm->seperator;
+
+ /* ds points to an array[64] of synonym values i.e. how synon its AA is  */
+
+ for (x = 1; x < 65; x++) {
+   if (nnaa[pcu->ca[x]] != 0)
+     fprintf(fblkout, "%5.3f%c",
+	     ( (float) nncod[x] / (float) nnaa[pcu->ca[x]])
+	     *  ((float) *(ds + x)), sp );
+   else
+     fprintf(fblkout, "0.000%c",sp);
+
+   if (x == 64)
+     fprintf(fblkout, "%-20.20s", title);
+
+   if (!(x % 16))
+     fprintf(fblkout, "\n");
+   }
+   return 1;
+}
+/******************   RAAU output             *****************************/
+/* Writes Relative amino acid usage output to file. Amino Acid usage is   */
+/* normalised for gene length                                             */
+/**************************************************************************/
+int raau_usage_out(FILE * fblkout, long *nnaa)
+{
+   long int        aa_tot = 0;
+   static char     first_line = TRUE;
+   int i,x;
+   char sp;
+
+   if (pm->seq_format=='M')                     /*  if machine readable  */
+      sp = pm->seperator;
+   else
+      sp = '\t';   
+
+   if (first_line) {                            /* if true write a header*/
+     if ( pm->seq_format=='M')
+	 fprintf(fblkout, "%s", "Gene_name");
+       else
+	 fprintf(fblkout, "%-20.20s", "Gene name");
+
+      for (i = 0; i < 22; i++)
+	if ( pm->seq_format=='M')
+	    fprintf(fblkout, "%c%s", sp,paa->aa3[i]);/* three letter AA names*/
+	  else
+	    fprintf(fblkout, "%c %-6.6s", sp,paa->aa3[i]);
+      fprintf(fblkout, "\n");
+      first_line = FALSE;
+   }
+   for (i = 1; i < 22; i++)
+     if (i != 11)
+       aa_tot += nnaa[i];                       /* total No. of AAs      */
+   
+   if ( pm->seq_format=='M')
+     fprintf(fblkout, "%.30s", title);
+   else
+     fprintf(fblkout, "%-20.20s", title);       /* don't waste spaces    */
+   
+   for (x = 0; x < 22; x++)
+     if (x == 11)
+       fprintf(fblkout, "%c0.0000",sp);         /* report 0 for stops    */
+     else if (aa_tot)
+       if (  pm->seq_format=='M')
+	   fprintf(fblkout, "%c%.4f",sp,
+		   (double) nnaa[x] / (double) aa_tot);
+	 else
+	   fprintf(fblkout, "%c%7.4f",sp,
+		   (double) nnaa[x] / (double) aa_tot);
+     else                                       /*What no AminoAcids!!!! */
+       if (  pm->seq_format=='M')
+	 fprintf(fblkout, "%c%c",sp,sp);            
+	 else
+	   fprintf(fblkout, "%c ***** ",sp);        
+
+   fprintf(fblkout, "\n",sp);
+   return 1;
+}
+/******************   AA usage output         *****************************/
+/* Writes amino acid usage output to file.                                */
+/**************************************************************************/
+int aa_usage_out(FILE * fblkout, long *nnaa)
+{
+  static char     first_line = TRUE;
+  int i;
+  char sp=pm->seperator;
+
+  if (first_line) {
+    (pm->seq_format=='M')?
+      fprintf(fblkout, "%s", "Gene_name"):
+      fprintf(fblkout, "%-20.20s ", "Gene name");
+    
+    for (i = 0; i < 22; i++)
+      (pm->seq_format=='M')?
+	fprintf(fblkout, "%c%s", sp,paa->aa3[i]):    /* 3 letter AA code     */
+      fprintf(fblkout, "%-5.5s", paa->aa3[i]);
+    fprintf(fblkout, "\n");
+    first_line = FALSE;
+  }
+  (pm->seq_format=='M')?
+    fprintf(fblkout, "%.20s", title):
+    fprintf(fblkout, "%-20.20s ", title);
+  
+  for (i = 0; i < 22; i++){
+    (pm->seq_format=='M')?
+      fprintf(fblkout, "%c%li", sp,nnaa[i]):
+      fprintf(fblkout, "%-5li",nnaa[i]);
+  }
+
+  fprintf(fblkout, "\n");
+  return 1;
+}
+/******************  Base Silent output     *******************************/
+/* Calculates and write the base composition at silent sites              */
+/* normalised as a function of the possible usage at that silent site with*/
+/* changing the amino acid composition of the protein. It is inspired by  */
+/* GC3s but is much more complicated to calculate as not every AA has the */
+/* option to use any base at the third position                           */
+/* All synonymous AA can select between a G or C though                   */
+/**************************************************************************/
+void base_sil_us_out(FILE * foutput, long *nncod, long *nnaa)
+{
+   int             id,i,x,y,z;
+   long            bases_s[4];     /* synonymous GCAT bases               */
+                                      
+   long            cb[4];          /* codons that could have been GCAT    */
+   int             done[4];
+   char sp=  (char) (pm->seq_format=='H')? (char) '\t': (char) pm->seperator;
+
+   for (x = 0; x < 4; x++) {
+     cb[x] = 0;
+     bases_s[x] = 0;
+   }                               /* blank the arrays                    */
+
+   for (x = 1; x < 5; x++)
+     for (y = 1; y < 5; y++)
+       for (z = 1; z < 5; z++) {   /* look at all 64 codons               */
+	 id = (x - 1) * 16 + y + (z - 1) * 4;
+
+	 if (*(ds + id) == 1 || pcu->ca[id] == 11)
+           continue;              /* if no synon skip to next       codon */
+	 bases_s[z - 1] += nncod[id]; /* count No. codon ending in base X     */
+       }     
+			
+   for (i = 1; i < 22; i++) {
+     for (x = 0; x < 4; x++)      /* don't want to count bases in 6 fold  */
+         done[x] = FALSE;         /* sites twice do we so we remember     */   
+
+     if (i == 11 || *(da + i) == 1)
+       continue;                  /* if stop codon skip, or AA not synony */
+
+      for (x = 1; x < 5; x++)    /* else add aa to could have ended count */
+     for (y = 1; y < 5; y++)
+        for (z = 1; z < 5; z++) {
+           id = (x - 1) * 16 + y + (z - 1) * 4; 
+           /* assign codon values in range 1-64                           */
+           if (pcu->ca[id] == i && done[z - 1] == FALSE) {
+	   /* encode AA i which we know to be synon so add could_be_x ending*/
+         /* by the Number of that amino acid                              */
+	     cb[z - 1] += nnaa[i];    
+	     done[z - 1] = TRUE;     /* don't look for any more or we might   */
+                                 /* process leu+arg+ser twice             */
+           }                       
+        }
+   }
+
+   /* Now the easy bit ... just output the results to file                */      
+   for (i = 0; i < 4; i++) {
+      if (cb[i] > 0)
+     fprintf(foutput, "%6.4f%c", (double) bases_s[i]/(double)cb[i], sp);
+      else
+     fprintf(foutput, "0.0000%c",sp);
+   }
+   return;
+}
+/******************  Clean up               *******************************/
+/* Called after each sequence has been completely read from disk          */
+/* It re-zeros all the main counters, but is not called when concatenating*/
+/* sequences together                                                     */
+/**************************************************************************/
+int clean_up(long int *nncod, long int *nnaa)
+{
+   int x;
+   int i;
+   
+   for (x = 0; x < 65; x++)
+      nncod[x] = 0;
+   for (x = 0; x < 23; x++)
+      nnaa[x] = 0;
+                                    /* dinucleotide count remembers the   */                                     
+   dinuc_count(" ", 1);             /* last_base from the last fragment   */
+                                    /* this causes the last base to be "" */
+   for (x = 0; x < 3; x++)
+      for (i = 0; i < 16; i++)
+         din[x][i] = 0;
+
+   dinuc_count(" ", 1);
+   master_ic = tot = 
+   non_std_char = AT_TOT = GC_TOT = AA_TOT = GAP_TOT = IUBC_TOT = 0; 
+   long_seq = FALSE;
+   valid_stops = valid_start = codon_tot = tot = fram = 0;                   
+   return 1;
+}
+/*****************Codon Adaptation Index output   *************************/
+/* Codon Adaptation Index (CAI) (Sharp and Li 1987). CAI is a measurement */
+/* of the relative adaptiveness of the codon usage of a gene towards the  */
+/* codon usage of highly expressed genes. The relative adaptiveness (w) of*/
+/* each codon is the ratio of the usage of each codon, to that of the most*/
+/* abundant codon for the same amino acid. The relative adaptiveness of   */
+/* codons for albeit a limited choice of species, can be selected from the*/
+/* Menu. The user can also input a personal choice of values. The CAI     */
+/* index is defined as the geometric mean of these relative adaptiveness  */
+/* values. Non-synonymous codons and termination codons (genetic code     */
+/* dependent) are excluded. To aid computation, the CAI is calculated as  */
+/* using a natural log summation, To prevent a codon having a relative    */
+/* adaptiveness value of zero, which could result in a CAI of zero;       */
+/* these codons have fitness of zero (<.0001) are adjusted to 0.01        */
+/**************************************************************************/
+int cai_out(FILE * foutput, long int *nncod)
+{
+   long int        totaa = 0;
+   double          sigma;
+   float           ftemp;
+   int x;
+   char sp=  (char) (pm->seq_format=='H')? 
+       (char) '\t': 
+       (char) pm->seperator;
+   static char       cai_ttt = FALSE;
+   static char       description[61];
+   static char       reference[61];
+  
+   static CAI_STRUCT user_cai;
+
+
+   if (!cai_ttt ) {                       /* have we been called already   */     
+      user_cai.des = description;         /* assign an array to a pointer  */
+      user_cai.ref = reference;           /* as above                      */
+      
+      if ( pm->caifile==NULL && pm->verbose==TRUE 
+	   && pm->menu==TRUE && (pcai == cai )){
+          /* this is false                                                 */
+	  /* if personal caifile is on commandline or                      */
+          /* in non-interactive mode or -silent option                     */
+          /* or cai values are not the default values                      */
+	  
+
+	  printf("\nDo you wish to input a personal choice of CAI"
+          " values (y/n) [n] ");
+      gets(pm->junk);
+
+      /* This allows a user defined choice of CAI values to be selected    */ 
+      if ('Y' == (char) toupper( (int) pm->junk[0])) {
+          /* tell the user a little about what we are looking for          */
+          printf("\nInput file must contain 64 CAI values\n"
+                 "ranging from 0.00 to 1.00\n"
+                 "values must be separated by spaces\n");
+         /* open the CAI adaptiveness values file                          */
+           if (!(pm->caifile = open_file("file with CAI values"
+                       ,"cai.coa", "r", 0))) my_exit(6,"cai_out");
+      
+      }
+      }                                          /* matched if pm->caifile=*/
+     if (pm->caifile){  
+       rewind (pm->caifile);        /* unlikely unless fopfile = caifile   */
+       x = 0;
+       strcpy(user_cai.des,"User supplied CAI adaptation values ");
+       strcpy(user_cai.ref,"No reference");
+       user_cai.cai_val[x++] = (float) 0.0;
+
+     while ((fscanf(pm->caifile, "%f ", &ftemp)) != EOF) {
+                                    /* if any bad CAI values are read EXIT*/
+         if (ftemp < 0 || ftemp > 1.0) {
+           printf("\nError CAI %f value out of range\nEXITING",ftemp);
+           my_exit(99,"cai_out");
+        }                                        
+        user_cai.cai_val[x++] = ftemp;                    /* assign value */
+     }                                                    /* end of while */
+     if (x != 65) {                 /*             wrong number of codons */
+        fprintf(pm->my_err, "\nError in CAI file, found %i values"
+            " expected 64 values EXITING\n", x - 1);
+        my_exit(99,"cai_out");
+     }
+     pcai = &user_cai;              /* assigns pointer to user CAI values */
+      }                             /*        matches if( pm->caifile...  */
+
+    
+     printf ("Using %s (%s) w values to calculate "
+	        		      "CAI \n",pcai->des,pcai->ref);
+     cai_ttt = TRUE;                /*stops this "if" from being entered  */
+
+    }                              /* matches if (!cai_ttt )             */
+   
+   for (x = 1, sigma = 0; x < 65; x++) {
+      if (pcu->ca[x] == 11 || *(ds + x) == 1) continue;
+      if (pcai->cai_val[x] < 0.0001)/* if value is effectively zero       */
+            pcai->cai_val[x] = (float) 0.01;               /* make it .01 */
+      sigma += (double) *(nncod + x) * log((double) pcai->cai_val[x]);
+      totaa += *(nncod + x);
+   }
+
+   if (totaa) {                     /* catch floating point overflow error*/
+      sigma = sigma / (double) totaa;
+      sigma = exp(sigma);
+   } else
+      sigma = 0;
+
+   fprintf(foutput, "%5.3f%c", sigma,sp);
+   return 1;
+}
+/*****************Codon Bias Index output        **************************/
+/* Codon bias index is a measure of directional codon bias, it measures   */
+/* the extent to which a gene uses a subset of optimal codons.            */
+/* CBI = ( Nopt-Nran)/(Nopt-Nran) Where Nopt = number of optimal codons;  */
+/* Ntot = number of synonymous codons; Nran = expected number of optimal  */
+/* codons if codons were assigned randomly. CBI is similar to Fop as used */
+/* by Ikemura, with Nran used as a scaling factor. In a gene with extreme */
+/* codon bias, CBI will equal 1.0, in a gene with random codon usage CBI  */
+/* will equal 0.0. Note that it is possible for Nopt to be less than Nran.*/
+/* This results in a negative value for CBI.                              */
+/* ( Bennetzen and Hall 1982 )                                            */
+/**************************************************************************/
+int cbi_out(FILE * foutput, long int *nncod, long int *nnaa )
+{
+   long int        tot_cod  = 0;
+   long int        opt      = 0; 
+   float           exp_cod  = (float) 0.0; 
+   float           fcbi;
+   int             c,x;
+   char            str[2];
+   char sp=  (pm->seq_format=='H')? 
+       (char) '\t':
+       (char) pm->seperator;
+
+
+   static char       description[61];
+   static char       reference[61];
+   static char       first_call_cbi  = TRUE;
+   static char       has_opt_info[22];
+   static FOP_STRUCT user_cbi;
+
+   if (first_call_cbi) {                 /* have we been called already   */
+
+     user_cbi.des = description;         /* assign a pointer to array     */
+     user_cbi.ref = reference;    
+      
+      if ( pm->cbifile == NULL && pm->verbose==TRUE 
+	  && pm->menu==TRUE && ( pcbi == fop )){ 
+          /* this is false                                                 */
+	  /* if personal fopfile is on commandline or                      */
+          /* in non-interactive mode or -silent option                     */
+          /* or fop values are not the default values                      */
+
+      printf("\nDo you wish to input a personal choice of CBI"
+         " values (y/n) [n] ");
+
+      gets(pm->junk);
+
+      if ('Y' == (char) toupper( (int) pm->junk[0])) {
+
+     printf("\nInput file must contain 64 CBI values\n"
+        " 1= rare codon\n 2= common codon\n 3= optimal codon\n");
+
+     if (!(pm->cbifile = open_file("file with CBI values"
+                       ,"cbi.coa", "r", 0)))
+        my_exit(6,"cai_out");
+          }                         /* matches if Y==                     */
+     }                              /* matches if pm->cbifile==NULL       */
+
+
+     if ( pm->cbifile ){
+       rewind (pm->cbifile);        /* fopfile can be the same as cbifile */
+       strcpy(user_cbi.des,"User supplied choice");
+       strcpy(user_cbi.ref,"No reference");    
+       x = 0;
+       user_cbi.fop_cod[x++] = 0;
+
+       while ((c = fgetc(pm->cbifile)) != EOF && x <=66) {
+       sprintf (str,"%c",c);	
+	 if (isdigit(c) && atoi(str) >= 0 
+	     && atoi(str) <= 3) {
+           user_cbi.fop_cod[x++] = (char) atoi(str);
+	   
+	 }                          /*                             isdigit */
+       }                            /*                        end of while */
+
+     if (x != 65) {                /*              wrong number of codons */
+        sprintf(pm->messages, "\nError in CBI file %i digits found,  "
+            "expected 64 EXITING\n", x - 1);
+        my_exit(99,pm->messages);
+     }                        
+       pcbi = (&user_cbi);
+    }                              /*             matches if(pm->cbifile)  */
+
+    
+     printf ("Using %s (%s) \noptimal codons to calculate "
+	        		      "CBI\n",pcbi->des,pcbi->ref);
+
+
+				   /* initilise has_opt_info             */			      
+     for (x = 1; x < 22; x++) has_opt_info[x]=0;
+     
+     for (x = 1; x < 65; x++)     {
+        if (pcu->ca[x] == 11 || *(ds + x) == 1) 
+		continue;			      			      
+        if (pcbi->fop_cod[x] == 3 ) 
+		has_opt_info[pcu->ca[x]]++;        
+     }  
+
+
+
+     first_call_cbi = FALSE;       /*      this won't be called again      */
+   }                               /*          matches if (first_call_cbi) */
+
+
+   for (x = 1; x < 65; x++) {
+      if (! has_opt_info[pcu->ca[x]])      continue;
+      switch ((int) pcbi->fop_cod[x]) {
+      case 3:
+        opt     += nncod[x];
+        tot_cod += nncod[x];
+        exp_cod += (float) nnaa[pcu->ca[x]]/ (float) da[pcu->ca[x]]; 
+      break;
+      case 2:
+      case 1:
+        tot_cod += *(nncod + x);
+        break;
+      default:
+         sprintf(pm->messages, " Serious error in CBI information found"
+          " an illegal CBI value of %f for codon %i"
+          " permissible values are \n 1 for non-optimal"
+          " codons\n 2 for common codons\n"
+          " 3 for optimal codons\n" " EXITING ",
+          pcbi->fop_cod[x], x);
+	 
+          my_exit(99,pm->messages);
+          break;
+      }                             /*                   end of switch     */
+   }                                /*                   for (    )        */                     
+
+   if( tot_cod - exp_cod)
+     fcbi= (opt - exp_cod) / (tot_cod - exp_cod);     
+   else  
+     fcbi= (float) 0.0; 
+    
+   fprintf(foutput, "%5.3f%c", fcbi,sp);                /* CBI     QED     */
+
+   return 1;
+}
+
+/****************** Frequency of OPtimal codons output  ********************/
+/* Frequency of Optimal codons (Fop) (Ikemura 1981). This index, is ratio  */
+/* of optimal codons to synonymous codons (genetic code dependent). Optimal*/
+/* codons for several species are in-built and can be selected using Menu 3*/
+/* By default, the optimal codons of E. coli are assumed. The user may also*/
+/* enter a personal choice of optimal codons. If rare synonymous codons    */
+/* have been identified, there is a choice of calculating the original Fop */
+/* index or a modified index. Fop values for the original index are always */
+/* between 0 (where no optimal codons are used) and 1 (where only optimal  */
+/* codons are used). When calculating the modified Fop index, any negative */
+/* values are adjusted to zero.                                            */
+/***************************************************************************/
+int fop_out(FILE * foutput, long int *nncod)
+{
+   long int        nonopt = 0;
+   long int        std = 0;
+   long int        opt = 0;
+   float           ffop;
+   int             c,x;
+   char            nonopt_codons = FALSE;
+    
+   char            str[2];
+
+
+   char sp=  (pm->seq_format=='H')? (char) '\t': (char) pm->seperator;
+
+   static char     first_call = TRUE;
+   static char     description[61];
+   static char     reference[61];
+   static char     asked_about_fop = FALSE;
+   static char     factor_in_rare = FALSE;
+   static char     has_opt_info[22];
+   static FOP_STRUCT user_fop;
+
+   if (first_call) {                /* have I been called previously      */
+     user_fop.des = description;
+     user_fop.ref = reference;
+     if ( pm->fopfile == NULL && pm->verbose==TRUE 
+	  && pm->menu == TRUE && (pfop == fop )) {
+          /* this is false                                                 */
+	  /* if personal fopfile is on commandline or                      */
+          /* in non-interactive mode or -silent option                     */
+          /* or fop values are not the default values                      */
+
+         printf("\nDo you wish to input a personal choice of Fop"
+	          " values (y/n) [n] ");
+         gets(pm->junk);
+         if ('Y' == (char) toupper( (int) pm->junk[0])) {
+          printf("\nInput file must contain 64 Fop values\n"
+                 " 1= rare codon\n 2= common codon\n 3= optimal codon\n");
+
+          if (!(pm->fopfile = open_file("file with Fop values"
+                       ,"fop.coa", "r", 0))) my_exit(6,"fop_out");
+
+         }                           /*                         if 'Y' == */
+      }                              /* if (pm->fopfile == NULL........ ) */
+  
+ 
+    if ( pm->fopfile ) {
+      rewind (pm->fopfile);          /*    possible for fopfile = cbifile */
+      strcpy(user_fop.des,"User supplied choice");
+      strcpy(user_fop.ref,"No reference");
+      x = 0;
+      user_fop.fop_cod[x++] = 0;
+      
+      while ((c = fgetc(pm->fopfile)) != EOF && x <=66) {
+        sprintf (str,"%c",c);
+      
+        if (isdigit(c) && atoi(str) >= 0 
+            && atoi(str) <= 3) {
+	        user_fop.fop_cod[x++] = (char) atoi(str);	
+        }                           /*                       test isdigit */
+     }                              /*                       end of while */
+
+     if (x != 65) {                 /*             wrong number of codons */
+        sprintf(pm->messages, "\nError in Fop file %i values found,  "
+            "expected 64 EXITING\n", x - 1);
+        my_exit(99,pm->messages);
+     }
+     pfop = &user_fop;              /*  assigns pointer to user fop values*/
+    }
+     
+
+     printf ("Using %s (%s)\noptimal codons to calculate "
+	        		      "Fop\n",pfop->des,pfop->ref);
+	
+	
+				   /* initilise has_opt_info             */			      
+     for (x = 1; x < 22; x++) has_opt_info[x]=0;
+        
+     for (x = 1; x < 65; x++)     {
+        if (pcu->ca[x] == 11 || *(ds + x) == 1) 
+		continue;			      			      
+        if (pfop->fop_cod[x] == 3 ) 
+		has_opt_info[pcu->ca[x]]++;
+	
+	if (pfop->fop_cod[x] == 1 ){
+	   if (!asked_about_fop && pm->verbose) {
+             printf("\nIn the set of optimal codons you have selected,\n"
+        	  "non-optimal codons have been identified\nThey can be "
+        	  "used in the calculation of a modified Fop, "
+        	  "(Fop=(opt-rare)/total)\n else the original formulae "
+        	  "will be used (Fop=opt/total)\n\n\t\tDo you wish "
+        	  "calculate a modified fop (y/n) [n] ");
+	     gets(pm->junk);
+	     if ( 'Y' == (char) toupper( (int)pm->junk[0]))
+	       factor_in_rare = TRUE;
+	     asked_about_fop = TRUE;
+           }
+	   
+	   if ( factor_in_rare == TRUE )
+	            has_opt_info[pcu->ca[x]]++;
+        }  
+    }                                 /*    matches for (x=1           */
+   first_call = FALSE;
+   }                                  /*    matches if ( !first_call ) */
+   
+   
+   
+   for (x = 1; x < 65; x++) {
+      if (!has_opt_info[pcu->ca[x]] ) 
+       continue;
+      
+      switch ((int) pfop->fop_cod[x]) {
+      case 3:
+     opt += *(nncod + x);
+     break;
+      case 2:
+     std += *(nncod + x);
+     break;
+      case 1:
+     nonopt_codons = TRUE;
+     nonopt += *(nncod + x);
+     break;
+      default:                      
+     sprintf(pm->messages, " Serious error in fop information found"
+         " an illegal fop value of %f for codon %l"
+         " permissible values are \n 1 for non-optimal"
+         " codons\n 2 for common codons\n"
+         " 3 for optimal codons\n" " EXITING ",
+         pfop->fop_cod[x], x);
+	 printf ("opt %l, std %l, nonopt %l\n",opt,std,nonopt); 
+     my_exit(99,pm->messages);
+     break;
+      }
+   }
+                                    /* only ask this once  ...            */
+
+
+   if (factor_in_rare && (opt + nonopt + std) )
+      ffop = (float) (opt - nonopt) / (float) (opt + nonopt + std);
+   else if ((opt + nonopt + std))
+      ffop = (float) opt / (float) (opt + nonopt + std);
+   else   
+      ffop=0.0;
+
+
+   fprintf(foutput, "%5.3f%c", ffop,sp);
+
+   return 1;
+}
+
+/***************  Effective Number of Codons output   *********************/
+/* The effective number of codons (NC) (Wright 1990). This index is a     */
+/* simple measure of overall codon bias and is analogous to the effective */
+/* number of alleles measure used in population genetics. Knowledge of the*/
+/* optimal codons or a reference set of highly expressed genes is not     */
+/* needed when calculating this index. Initially the homozygosity for each*/
+/* amino acid is estimated from the squared codon frequencies.            */
+/**************************************************************************/
+float enc_out(FILE * foutput, long int *nncod, long int *nnaa) {
+   int             numaa[9];
+   int             fold[9];
+   int             error_t = FALSE;
+   int             i,z,x;
+   double          totb[9];
+   double          averb = 0, bb = 0, k2 = 0, s2 = 0;
+   float           enc_tot = 0.0F;
+   char sp=  (pm->seq_format=='H')? (char) '\t': (char) pm->seperator;
+
+/* don't assume that 6 is the largest possible amino acid family assume 9*/
+   for (i = 0; i < 9; i++) {            
+      fold[i] = 0;              /* initialise arrays to zero             */
+      totb[i] = 0.0;
+      numaa[i] = 0;
+   }
+
+   for (i = 1; i < 22; i++) {   /* for each amino acid                  */
+      if (i == 11)
+     continue;                  /* but not for stop codons              */
+
+      if (*(nnaa + i) <= 1)     /* if this aa occurs once then skip     */
+     bb = 0;
+      else {
+     for (x = 1, s2 = 0; x < 65; x++) { 
+         /* Try all codons but we are only looking for those that encode*/
+         /* amino amid i, saves having to hard wire in any assumptions  */
+        if (pcu->ca[x] != i) continue;           /* skip is not i       */
+
+
+        if (*(nncod + x) == 0)  /* if codons not used then              */
+           k2 = 0.0;            /* k2 = 0                               */
+        else
+           k2 = pow(((double) *(nncod + x) / (double) *(nnaa + i)),
+            (double) 2);
+
+        s2 += k2;               /* sum of all k2's for aa i             */
+     }
+     bb = (((double) *(nnaa + i) * s2) - 1.0) /  /* homozygosity        */
+        (double) (*(nnaa + i) - 1.0);
+      }
+
+      if (bb > 0.0000001) {
+     totb[*(da + i)] += bb;         /* sum of all bb's for amino acids  */
+                                    /* which have z alternative codons  */
+     numaa[*(da + i)]++;            /* where z = *(da+i)                */
+      }
+                                    /* numaa is no of aa that were z    */
+      fold[*(da + i)]++;            /* fold z=4 can have 9 in univ code */
+   }                                /* but some aa may be absent from   */
+                                    /* gene therefore numaa[z] may be 0 */
+   enc_tot = (float) fold[1];
+
+   for (z = 2, averb = 0, error_t = FALSE; z <= 8; z++) {   
+                                   /* look at all values of z if there  */
+      if (fold[z]) {               /* are amino acids that are z fold   */
+     if (numaa[z] && totb[z] > 0)
+        averb = totb[z] / numaa[z];
+     else if (z==3 && numaa[2] && numaa[4] && fold[z]==1 )   
+                                   /* special case                      */
+        averb = (totb[2] / numaa[2] + totb[4] / numaa[4]) * 0.5;
+     else {                        /* write error to stderr             */
+        codon_error( z, numaa[z], title, 3 );  
+        error_t = TRUE;            /* error catch for strange genes     */
+        break;
+        }
+     enc_tot += (float) fold[z] / (float) averb;    
+                                   /* the calculation                   */
+      }
+   }
+
+   if (error_t)
+      fprintf(foutput, "*****%c",sp);
+   else if (enc_tot <= 61)
+      fprintf(foutput, "%5.2f%c", enc_tot,sp);
+   else
+      fprintf(foutput, "61.00%c",sp);
+
+   return enc_tot;
+}
+
+/*******************   G+C output          *******************************/
+/* This function is a real work horse, initially it counts base composit */
+/* ion in all frames, length of gene, num synonymous codons, number of   */
+/* non synonymous codons. Then dependent on the value for which used in  */
+/* switch statement. We return various analyses of this data             */
+/* if which ==1 then the output is very detailed, base by base etc.      */
+/* if which ==2 then the output is for GC content only                   */
+/* if which ==3 then the output is for GC3s (GC at synonymous 3rd posit) */
+/* if which ==4 then the output is for L_sym                             */
+/* if which ==5 then the output is for L_aa                              */
+/* The output from this subroutine is in a tabular format if human read- */
+/* able output is selected, and in columns if machine readable. Also the */
+/* number of values reported changes as it is assumed the user has access*/
+/* to a spreadsheet type programme if they are requesting tabular output */
+/*************************************************************************/
+void gc_out(FILE * foutput, FILE * fblkout, int which){
+
+   long int        id;
+   long int        bases[5];        /* base that are synonymous GCAT     */
+   long int        base_tot[5];
+   long int        base_1[5];
+   long int        base_2[5];
+   long int        base_3[5];
+   long int        tot_s = 0;
+   long int        totalaa = 0;
+   static char     header = FALSE;
+   int x,y,z;
+   char sp=  (pm->seq_format=='H')? 
+       (char) '\t': 
+       (char) pm->seperator;
+
+   typedef double lf;
+
+   for (x = 0; x < 5; x++) {
+      bases[x] = 0;                 /* initialise array values to zero    */
+      base_tot[x] = 0;
+      base_1[x] = 0;
+      base_2[x] = 0;
+      base_3[x] = 0;
+   }
+
+   for (x = 1; x < 5; x++)
+      for (y = 1; y < 5; y++)
+     for (z = 1; z < 5; z++) {      /* look at all 64 codons              */
+        id = (x - 1) * 16 + y + (z - 1) * 4;
+
+        if (pcu->ca[id] == 11)
+           continue;                /* skip if a stop codon               */
+        base_tot[x] += ncod[id];    /* we have a codon xyz therefore the  */
+        base_1[x] += ncod[id];      /* frequency of each position for base*/
+        base_tot[y] += ncod[id];    /* x,y,z are equal to the number of   */
+        base_2[y] += ncod[id];      /* xyz codons .... easy               */
+        base_tot[z] += ncod[id];    /* will be fooled a little if there   */
+        base_3[z] += ncod[id];      /* non translatable codons, but these */
+                                    /* are ignored when the avg is calc   */
+        totalaa += ncod[id];
+
+        if (*(ds + id) == 1)
+           continue;                /* if not synon  skip codon           */
+
+        bases[z] += ncod[id];       /* count no of codons ending in Z     */
+                     
+        tot_s += ncod[id];          /* count tot no of silent codons      */
+                      
+     }
+
+
+   if (!tot_s || !totalaa) {
+      fprintf(pm->my_err, "Warning %.20s appear to be too short\n", title);
+      fprintf(pm->my_err, "No output was written to file   \n");
+      return;
+   }
+   switch ((int) which) {
+   case 1:                          /* exhaustive output for analysis     */
+      if (pm->seq_format == 'M') {  /* machine readable format            */
+     if (!header) {                 /* print a first line                 */
+        fprintf(fblkout,
+         "Gene_description%cLen_aa%cLen_sym%cGC%cGC3s%cGCn3s%cGC1%cGC2"
+         "%cGC3%cT1%cT2%cT3%cC1%cC2%cC3%cA1%cA2%cA3%cG1%cG2%cG3\n"
+		,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp);
+        header = TRUE;
+     }
+                                    /* now print the information          */
+     fprintf(fblkout, "%-.20s%c", title,sp); 
+     fprintf(fblkout, 
+	     "%ld%c%ld%c%5.3f%c%5.3f%c%5.3f%c%5.3f%c%5.3f%c%5.3f%c"
+         "%5.3f%c%5.3f%c%5.3f%c%5.3f%c%5.3f%c%5.3f%c%5.3f%c"
+         "%5.3f%c%5.3f%c%5.3f%c%5.3f%c%5.3f\n",
+	     totalaa,sp,
+	     tot_s,sp,
+	     (lf) (base_tot[2] + base_tot[4]) / (lf) (totalaa * 3),sp,
+	     (lf) (bases[2] + bases[4]) / (lf) tot_s,sp,
+	     (lf) (base_tot[2] + base_tot[4] - bases[2] - bases[4])
+	     / (lf) (totalaa * 3 - tot_s),sp,
+	     (lf) (base_1[2] + base_1[4]) / (lf) (totalaa),sp,
+	     (lf) (base_2[2] + base_2[4]) / (lf) (totalaa),sp,
+	     (lf) (base_3[2] + base_3[4]) / (lf) (totalaa),sp,
+	     (lf) base_1[1] / (lf) totalaa,sp, 
+	     (lf) base_2[1] / (lf) totalaa,sp, 
+	     (lf) base_3[1] / (lf) totalaa,sp,
+	     (lf) base_1[2] / (lf) totalaa,sp, 
+	     (lf) base_2[2] / (lf) totalaa,sp, 
+	     (lf) base_3[2] / (lf) totalaa,sp,
+	     (lf) base_1[3] / (lf) totalaa,sp, 
+	     (lf) base_2[3] / (lf) totalaa,sp, 
+	     (lf) base_3[3] / (lf) totalaa,sp,
+	     (lf) base_1[4] / (lf) totalaa,sp, 
+	     (lf) base_2[4] / (lf) totalaa,sp, 
+	     (lf) base_3[4] / (lf) totalaa);
+      } else {                      /* must be human formatted output then*/
+     fprintf(fblkout,               /* tabulated output                   */ 
+         "Gene Name: %-69.69s\nLength   : %-ld aa"
+         " \tNon_synonymous/synonymous codons (%3ld/%5ld)\n"
+         " GC=%5.3f\tGC3s=%5.3f\tGC_not_GC3s=%5.3f\n"
+         "base\t1\t2\t3\ttotal\t\t1\t2\t3 \ttotal\n"
+         "  T\t%5.3f\t%5.3f\t%5.3f\t%5.3f\t"
+         "W\t%5.3f\t%5.3f\t%5.3f\t%5.3f\n"
+         "  C\t%5.3f\t%5.3f\t%5.3f\t%5.3f\t"
+         "S\t%5.3f\t%5.3f\t%5.3f\t%5.3f\n"
+         "  A\t%5.3f\t%5.3f\t%5.3f\t%5.3f\t"
+         "R\t%5.3f\t%5.3f\t%5.3f\t%5.3f\n"
+         "  G\t%5.3f\t%5.3f\t%5.3f\t%5.3f\t"
+         "Y\t%5.3f\t%5.3f\t%5.3f\t%5.3f\n\n",
+         title,
+         totalaa,
+         totalaa - tot_s,
+         tot_s,
+         (lf) (base_tot[2] + base_tot[4]) / (lf) (totalaa * 3),
+         (lf) (bases[2] + bases[4]) / (lf) tot_s,
+         (lf) (base_tot[2] + base_tot[4] - bases[2] - bases[4])
+         / (lf) (totalaa * 3 - tot_s),
+         (lf) base_1[1] / (lf) totalaa, (lf) base_2[1] / (lf) totalaa, 
+         (lf) base_3[1] / (lf) totalaa,
+         (lf) base_tot[1] / (lf) (totalaa * 3),
+         (lf) (base_1[1] + base_1[3]) / (lf) totalaa,
+         (lf) (base_2[1] + base_2[3]) / (lf) totalaa,
+         (lf) (base_3[1] + base_3[3]) / (lf) totalaa,
+         (lf) (base_tot[1] + base_tot[3]) / (lf) (totalaa * 3),
+         (lf) base_1[2] / (lf) totalaa, (lf) base_2[2] / (lf) totalaa, 
+         (lf) base_3[2] / (lf) totalaa,
+         (lf) base_tot[2] / (lf) (totalaa * 3),
+         (lf) (base_1[2] + base_1[4]) / (lf) totalaa,
+         (lf) (base_2[2] + base_2[4]) / (lf) totalaa,
+         (lf) (base_3[2] + base_3[4]) / (lf) totalaa,
+         (lf) (base_tot[2] + base_tot[4]) / (lf) (totalaa * 3),
+         (lf) base_1[3] / (lf) totalaa, (lf) base_2[3] / (lf) totalaa, 
+         (lf) base_3[3] / (lf) totalaa,
+         (lf) base_tot[3] / (lf) (totalaa * 3),
+         (lf) (base_1[3] + base_1[4]) / (lf) totalaa,
+         (lf) (base_2[3] + base_2[4]) / (lf) totalaa,
+         (lf) (base_3[3] + base_3[4]) / (lf) totalaa,
+         (lf) (base_tot[3] + base_tot[4]) / (lf) (totalaa * 3),
+         (lf) base_1[4] / (lf) totalaa, (lf) base_2[4] / (lf) totalaa, 
+         (lf) base_3[4] / (lf) totalaa,
+         (lf) base_tot[4] / (lf) (totalaa * 3),
+         (lf) (base_1[1] + base_1[2]) / (lf) totalaa,
+         (lf) (base_2[1] + base_2[2]) / (lf) totalaa,
+         (lf) (base_3[1] + base_3[2]) / (lf) totalaa,
+         (lf) (base_tot[1] + base_tot[2]) / (lf) (totalaa * 3));
+                                    /* What hit me, did anyone see a bus  */
+      }
+      break;
+   case 2:                          /* a bit more simple ... GC content   */
+      fprintf(foutput, "%5.3f%c", (lf) ((base_tot[2] + base_tot[4]) / (lf) 
+          (totalaa * 3)),sp);
+      break;
+   case 3:                          /* GC3s                               */
+      fprintf(foutput, "%5.3f%c", (lf) (bases[2] + bases[4]) / 
+          (lf) tot_s,sp);
+      break;
+   case 4:                          /* Number of synonymous codons        */
+      fprintf(foutput, "%3li%c", tot_s,sp);
+      break;
+   case 5:                          /* Total length in translatable AA    */
+      fprintf(foutput, "%3li%c", totalaa,sp);
+      break;
+
+#ifdef DEBUG
+   default:
+      fprintf(stderr, " Programming error in GC_out which (%i) is out of "
+          "valid range\n"
+          ,(int) which);
+      my_exit(99, "gc out");
+      break;
+#endif
+   }
+   return;
+}
+
+/********************     DOT    ******************************************/
+/*   Indicates the progress of a search                                   */
+/**************************************************************************/
+
+void dot(int y, long int period)
+{
+   static long int xx;
+   static char     dott=0;
+
+   if (!y) dott = 0;                /* re-zero the width counter          */
+
+   if (++xx % period == 0){         /* every period calls print a .       */
+             fprintf(stderr,".");
+             dott++;
+            }            
+   if ( dott == 50) {               /* every 50 dots wrap the line        */
+             fprintf(stderr,"\n");
+             dott=0;
+            }
+   return;
+}
+/**********************  get_aa    *****************************************/
+/* get_aa converts a numeric codon value (range 0-64 ) into a amino acid   */
+/* and returns that amino acid number                                      */
+/* pcu->ca converts the codon number into amino acid number                */
+/* paa->aa1 converts  amino acid code into letters                         */
+/***************************************************************************/
+
+char *get_aa(int which, char *codon)
+{
+   char           *amino=NULL;
+
+   if (strlen(codon) == 3) {
+      if (which == 1)
+     amino = paa->aa1[pcu->ca[ident_codon(codon)]];
+      else
+     amino = paa->aa3[pcu->ca[ident_codon(codon)]];
+   } else {
+      amino = amino;
+      amino = paa->aa1[0];
+   }
+   return amino;
+}
+/**********************   cutab_out     ***********************************/
+/* Generates a formatted table of codon, RSCU and amino acid usage        */
+/* ds points to an array[64] of synonymous values                         */
+/* it reveals how many synonyms there are for each aa                     */
+/**************************************************************************/
+int cutab_out(FILE * fblkout, long *nncod, long *nnaa)
+{
+   int             last_row[4];
+   int             x;
+   char            sp;
+
+   if (pm->seq_format=='M')
+      sp = pm->seperator;
+   else
+      sp = '\t';
+                         
+   for (x = 0; x < 4; x++)
+      last_row[x] = 0;
+
+   codon_tot = codon_error(1, 1, "", (char) 4); /*  dummy*/
+
+   for (x = 1; x < 65; x++) {
+     if (last_row[x % 4] != pcu->ca[x]){
+       (pm->seq_format=='M')?
+	 fprintf(fblkout, "%s%c%s%c", paa->aa3[pcu->ca[x]], sp, paa->cod[x], sp):
+	 fprintf(fblkout, "%s %s"  , paa->aa3[pcu->ca[x]], paa->cod[x]);
+     }
+     else{
+       (pm->seq_format=='M')?
+	 fprintf(fblkout, "%c%s%c", sp, paa->cod[x], sp):
+	 fprintf(fblkout, "    %s",   paa->cod[x]);
+     }
+/* Sample of output *******************************************************/      
+/*Phe UUU    0 0.00 Ser UCU    1 0.24 Tyr UAU    1 0.11 Cys UGU    1 0.67 */ 
+/*    UUC   22 2.00     UCC   10 2.40     UAC   17 1.89     UGC    2 1.33 */ 
+/*Leu UUA    0 0.00     UCA    1 0.24 TER UAA    0 0.00 TER UGA    1 3.00 */ 
+/*    UUG    1 0.12     UCG    6 1.44     UAG    0 0.00 Trp UGG    4 1.00 */
+/**************************************************************************/
+   (pm->seq_format=='M')?
+      fprintf(fblkout, "%i%c%.2f%c",
+          (int) nncod[x],
+          sp, (nncod[x]) ?
+          ((float) nncod[x] / (float) nnaa[pcu->ca[x]])
+          * (float) (*(ds + x)):0,sp):           /* end of fprintf        */
+      fprintf(fblkout, "%5i%5.2f ",
+          (int) nncod[x],
+          (nncod[x]) ?
+          ((float) nncod[x] / (float) nnaa[pcu->ca[x]])
+          * (float) (*(ds + x)):0);              /* end of fprintf        */
+
+      last_row[x % 4] = pcu->ca[x];
+
+      if (!(x % 4))
+     fprintf(fblkout, "\n");
+      if (!(x % 16))
+     fprintf(fblkout, "\n");
+   }
+   fprintf(fblkout, "%li codons in %16.16s (used %22.22s)\n\n", 
+       (long int) codon_tot, title, pcu->des);
+   return 1;
+}
+/********************  Dinuc_count    *************************************/
+/* Count the frequency of all 16 dinucleotides in all three possible      */
+/* reading frames. This one of the few functions that does not use the    */
+/* codon and amino acid usage arrays ncod and naa to measure the parameter*/
+/* rather they use the raw sequence data                                  */
+/**************************************************************************/
+int dinuc_count(char *seq, long int ttot)
+{
+   static char     a = 0;
+   int i;
+   
+   for (i = 0; i < ttot; i++) {
+      last_base = a;
+      switch (seq[i]) {
+      case 't':
+      case 'T':
+      case 'u':
+      case 'U':
+     a = 1;
+     break;
+      case 'c':
+      case 'C':
+     a = 2;
+     break;
+      case 'a':
+      case 'A':
+     a = 3;
+     break;
+      case 'g':
+      case 'G':
+     a = 4;
+     break;
+      default:
+     a = 0;
+     break;
+      }
+      if (!a || !last_base)
+     continue;                      /* true if either of the base is not  */
+                                    /* a standard UTCG, or the current bas*/
+                                    /* is the start of the sequence       */
+      din[fram][((last_base - 1) * 4 + a) - 1]++;
+      if (++fram == 3) fram = 0;    /* resets the frame to zero           */
+   }
+   return 1;
+}
+/***************** Dinuc_out           ************************************/
+/* Outputs the frequency of dinucleotides, either in fout rows per seq    */
+/* if the output is meant to be in a human readable form, each row repre- */
+/* senting a reading frame. The fourth row is the total of the all the    */
+/* reading frames. Machine readable format writes all the data into a     */
+/* single row                                                             */
+/**************************************************************************/
+int dinuc_out(FILE * fblkout, char *ttitle)
+{
+   static char     called = FALSE;
+   char            bases[5] = {'T', 'C', 'A', 'G'};
+   char            sp = pm->seperator;
+   long            dinuc_tot[4];
+   int i,x,y;
+
+
+   for ( x=0 ; x<4 ; x ++)  dinuc_tot[x]=0;
+ 
+
+   for ( x=0 ; x<3 ; x++ )
+       for ( i=0 ; i<16 ; i++ ){
+         dinuc_tot[x]+=din[x][i];   /* count dinuc usage in each frame   */
+         dinuc_tot[3]+=din[x][i];   /* and total dinuc usage,            */
+       }
+
+   if (pm->seq_format=='H' ) sp = ' ';
+
+   if (!called) {                   /* write out the first row as a header*/
+      called = TRUE;
+
+      if  (pm->seq_format=='H' ) {
+	   fprintf(fblkout,"%-13.13s%cframe%c","title", sp,sp);
+	   for (x = 0; x < 4; x++)
+	    for (i = 0; i < 4; i++) 
+	     fprintf(fblkout,"%c%c%4.4c",bases[x],bases[i],sp);        
+      }else{
+	   fprintf(fblkout, "%s","title");
+        for (y = 0; y < 4; y ++){
+	     fprintf(fblkout, "%c%s",sp,"frame");
+	     for (x = 0; x < 4; x++) 
+	      for (i = 0; i < 4; i++) 
+          fprintf(fblkout,"%c%c%c",sp, bases[x],bases[i]);          
+          }
+          }
+      fprintf(fblkout, "\n");
+       }                            /* matches if (!called)               */ 
+
+/*Sample output   truncated  **********************************************/
+/*title         frame TT    TC    TA    TG    CT    CC    CA    CG    AT  */
+/*MLSPCOPER.PE1__ 1:2 0.024 0.041 0.016 0.008 0.049 0.041 0.033 0.098 ... */
+/*MLSPCOPER.PE1__ 2:3 0.000 0.195 0.000 0.098 0.000 0.138 0.008 0.073 ... */
+/*MLSPCOPER.PE1__ 3:1 0.008 0.016 0.000 0.033 0.033 0.107 0.172 0.262 ... */
+/*MLSPCOPER.PE1__ all 0.011 0.084 0.005 0.046 0.027 0.095 0.071 0.144 ... */
+/*MLSPCOPER.PE2__ 1:2 0.026 0.026 0.009 0.009 0.053 0.035 0.053 0.061 ... */  
+/**************************************************************************/
+   for (x = 0; x < 4; x++) {
+   if ( pm->seq_format == 'H' || x == 0 )   
+     fprintf(fblkout,  (pm->seq_format=='H') ?
+	     "%-15.15s%c":"%-.15s%c", ttitle, sp);
+
+     switch (x) {
+     case 0:
+         fprintf(fblkout, "1:2%c", sp);
+       break;
+     case 1:
+         fprintf(fblkout, "2:3%c", sp);
+       break;
+     case 2:
+         fprintf(fblkout, "3:1%c", sp);
+       break;
+     case 3:
+         fprintf(fblkout, "all%c", sp);
+       break;
+     }
+
+     if ( x == 3 ){ 
+       for (i = 0; i < 16; i++)
+          if ( dinuc_tot[x] )
+	        fprintf(fblkout,"%5.3f%c",
+              (float)(din[0][i]+din[1][i]+din[2][i])/
+              (float)dinuc_tot[x], sp);     
+          else
+            fprintf(fblkout,"%5.3f%c",0.00, sp);              
+     }
+     else{
+       for (i = 0; i < 16; i++)
+         if ( dinuc_tot[x] )	
+           fprintf(fblkout, "%5.3f%c", 
+             (float) din[x][i]/(float)dinuc_tot[x], sp);
+           else
+               fprintf(fblkout,"%5.3f%c", 0.00, sp);              
+     }
+
+     if ( pm->seq_format == 'H' || x == 3) 
+       fprintf(fblkout, "\n");
+   }
+   return 1;
+}
+/************* Coa_raw_out            *************************************/
+/* Write out codon usage in a format compatible with the format required  */
+/* by text2bin, i.e. part of the COA analysis suite of subroutines        */
+/* rather than storing this data in memory, we first write raw codon usage*/
+/* to disk, and then read it in as necessary, the file handle for this    */
+/* data is passed via the fcoaout pointer. By default it writes to the    */
+/* files coa_raw and coa1_raw                                             */
+/**************************************************************************/
+char coa_raw_out(FILE * fcoaout, long *nncod, long *nnaa, char *ttitle)
+{
+
+   static int      count = 0;
+   int i;
+      
+   for (i = 0; i < (int) strlen(ttitle); i++)  /* don't take any chances  */
+      if (isspace( (int) *(ttitle + i)))    *(ttitle + i) = '_';
+
+   strncpy(pm->junk, ttitle, 20);              /* sequence name           */
+   fprintf(fcoaout, "%i_%s ", ++count, pm->junk);
+
+   switch (pm->coa) {
+   case 'c':
+   case 'r':                                  /* if rscu or codon usage   */ 
+      for (i = 1; i < 65; i++)
+     fprintf(fcoaout, "%i\t", (int) nncod[i]);
+      fprintf(fcoaout, "\n");
+      break;
+   case 'a':                                  /* if amino acid usage      */
+      for (i = 1; i < 22; i++)
+     fprintf(fcoaout, "%i\t", (int) nnaa[i]);
+      fprintf(fcoaout, "\n");
+      break;
+#ifdef DEBUG                                  /* Debugging code           */
+   default:
+      fprintf(pm->my_err, " Error in coa_out_raw\n");
+#endif
+   }
+   return 1;
+}
+/**********  sorted_by_axis1    *******************************************/
+/* COA specific routine, after the position of the genes on the first axis*/
+/* has been computed the genes are sorted according to there ordination   */
+/* this allows us to identify gene positioned at either end of the first  */
+/* trend. Then the codon usage of these genes is used to determine the CU */
+/* of these two groups. This information is used to identify optimal codon*/
+/* calculated putative CAI adaptive values and for the Chi squared con-   */
+/* tingency test, used to identify the optimal and non-optimal codons     */
+/* The position of each gene on axis 1 is passed via the ax1 pointer      */
+/* The integer rank of each sequence is stored in sortax1                 */
+/* The number of genes is passed by the integer value lig                 */
+/**************************************************************************/
+void sorted_by_axis1(double *ax1, int *sortax1, int lig)
+{
+   double          min;
+   int             nmin, *tagged;
+   int             i,j;
+   
+   /* allocated an array such that we can record which genes have been    */
+   /* processed already, and are in sortax1                               */
+   if ((tagged = (int *) calloc(lig + 1, sizeof(int))) == NULL)
+      my_exit(3, "sorted by axis 1");
+
+   /* blank the array, shouldn't have to do this for ANSI C compilers     */
+   for (i = 1; i <= lig; i++)
+      tagged[i] = FALSE;
+
+   /* for each gene                                                       */
+   for (j = 1; j <= lig; j++) {
+      i = 0;
+      while (tagged[++i]);          /* find the first gene not in sortax1 */
+      min = ax1[i];                 /* assign it value to min             */  
+      nmin = i;                     /* assign it ordination to nmin       */
+
+      for (i = 1; i <= lig; i++) {  /* for each gene                      */
+       if (tagged[i]) continue;     /* gene is already in sortax1 .. next */
+       if (ax1[i] < min) {          /* find the min value among the rest  */
+        min = ax1[i];               /* assign it value to min             */ 
+        nmin = i;                   /* assign it ordination to nmin       */
+       }
+      }
+      sortax1[j] = nmin;            /* gene with lowest ax1 position is   */
+      tagged[nmin] = TRUE;          /* assigned to sorax1 and tagged      */
+   }
+   free(tagged);
+}
+/***********  gen_cusort_fop                 ******************************/
+/* COA specific routine, takes the sorted array of axis 1 positions from  */
+/* sort_by_axis1 and passed via the sortax1 pointer. The array contains   */
+/* the genes in order of occurrence in the original input file, but the   */
+/* ranked order of each gene is recorded as the array value               */
+/* This allows us to identify genes position at either end of the main    */
+/* trend. Then the codon usage of these genes is used to write out a file */
+/* with the genes in a axis1 position order                               */
+/* the codon usage of the two groups at either end of the principle axis  */
+/* are also counted. This information is then passed to highlow()         */
+/* The position of each gene on axis 1 is passed via the ax1 pointer      */
+/* The integer rank of each sequence is stored in sortax1                 */
+/* The number of genes is passed by the interger value lig                */
+/**************************************************************************/
+void gen_cusort_fop(int *sortax1, int lig, FILE * fnam, FILE *ssummary)
+{
+   int             stops;
+   long int       *low, *high;
+   int             min, max, i ;
+   float           v2;
+   FILE           *fcusort = NULL;
+   int            j;
+
+   
+   /* first open the original raw codon usage file                        */
+   if ((fcusort = open_file("", "cusort.coa", "w", FALSE)) == NULL)
+      my_exit(1, "gen_cusort_fop");                       
+
+   /* calloc enough memory for the codon usage of the low group of genes  */
+   if ((low = (long int *) calloc(65, sizeof(long int))) == NULL)
+      my_exit(3, "low gen_cusort_fop");
+   /* calloc enought memory for the codon usage of the high group of genes*/
+   if ((high = (long int *) calloc(65, sizeof(long int))) == NULL)
+      my_exit(3, "high gen_cusort_fop");
+
+   /*pcoa->fop_gene is set in the advanced correspondence menu and is used*/
+   /*to set the No of genes at either end of the principle axis that are  */
+   /*to be used to create the low and high codon bias subsets of genes    */
+   if (pcoa->fop_gene < 0) {        /* the number represent a percentage  */
+      min = (int) ((float) lig * ((float) pcoa->fop_gene * -0.01));
+      max = lig - (int) ((float) lig * ((float) pcoa->fop_gene * -0.01));
+   } else {                        /*  the value is an absolute number    */
+      min = pcoa->fop_gene;
+      max = lig - pcoa->fop_gene;
+   }
+
+   if (min <= 0) {                 /* error catch in case % is too low    */
+      min = 1;                     /* or fop_gene is set too high         */
+      fprintf(pm->my_err, "Problems with the number genes used for"
+          " fop adjusting to 1 gene\n");
+   }
+   if (max <= 0) {                 /* ditto                               */
+      max = 1;
+      fprintf(pm->my_err, "Problems with the number genes used for"
+          " fop adjusting to one gene\n");
+   }
+   for (j = 1; j < 65; j++) {      /* initialise the blank array          */
+      low[j] = 0;
+      high[j] = 0;
+   }
+
+   /* write explanation about what we are doing to summary.coa            */ 
+   fprintf(ssummary, "\ncusort.coa (not shown here) contains CU of "
+       "genes sorted by their\n"
+       "ordination on the principle axis or factor\n"
+       "Genes used to calculate fop were 1 to %i and %i to %i\n"
+       "these gene numbers REFER ONLY to the file cusort.coa\n"
+       ,min, max + 1, pcoa->rows);
+
+   for (i = 1; i <= lig; i++) {     /* foreach gene                       */
+      rewind(fnam);                 /* go to start of codon_raw           */
+      clean_up(ncod, naa);          /* blank the codon usage array        */    
+      j = 1;
+      while (j++ != sortax1[i])     /* find the rank of gene i            */ 
+       fgets(pm->junk, BUFSIZ,fnam);/* by scanning for lines of CU in     */ 
+      fscanf(fnam, "%s", pm->junk); /* now we know the name of seq i      */
+
+      for (j = 1; j < 64; j++) {    /* now read in the cu of each codon   */
+       fscanf(fnam, "%f", &v2);     /* assign it initially to v2          */ 
+       ncod[j] = (long int) v2;     /* then place this value in ncod      */
+     if (min >= i)                  /* remember the codon usage of the    */ 
+        low[j] += (long int) v2;    /* two groups of genes at either end  */
+     if (max < i)                   /* of the axis, containing min and    */
+        high[j] += (long int) v2;   /* max genes                          */
+      }
+
+      fscanf(fnam, "%f\n", &v2);    /* now read the last codon in         */
+      ncod[64] = (long int) v2;
+      if (min >= i)
+       low[64] += (long int) v2;
+      if (max < i)
+        high[64] += (long int) v2;  /* as above                           */
+
+      /* we want to use codon_us_out to write out the sorted list of CU   */
+      /* to cusort.coa. But if we have any internal stops etc, it will    */
+      /* generate error messages, but we have already seen this messages  */
+      /* on the first pass, so we fool it by saying all the stops are     */
+      /* valid stops and not to complain again                            */
+      for (j = 1, stops = 0; j < 65; j++)   
+                 if (pcu->ca[j] == 11)
+                        stops += (int) ncod[j];
+      dot( 1 , 10 );  
+      codon_usage_out(fcusort, ncod, 11, stops, pm->junk);
+   }
+   fileclose(&fcusort);              
+   highlow(low, high, ssummary);        /* now we call highlow           */
+                                        /* to use the sorted cu output   */
+   free(low);                           /* release the memory to the OS  */
+   free(high);
+}
+
+/************ highlow          ********************************************/
+/* The codon usage of the two groups on either end of the axis is assigned*/
+/* to low and high ... perhaps these would be better called left and right*/
+/* as when they are passed to this function it is not know which group is */
+/* lowly or highly biased. This is decided within highlow, by calculating */
+/* the enc (a measure of bias) for each group and assigning the group with*/
+/* the lowest enc as the higher biased genes. This works if the trend     */
+/* represented by axis1 is truly selection for optimal translation        */
+/* IT'S THE USERS RESPONSIBILITY TO ASSERTAIN IF THIS IS VALID            */
+/* This information is used to identify optimal codons, as well as        */
+/* calculate  putative CAI adaptive values and for the Chi squared con-   */
+/* tingency test, used to identify the optimal and non-optimal codons     */
+/**************************************************************************/
+
+void highlow(long int *low, long int *high, FILE * ssummary)
+{
+
+   int            *last_row, icode, outer,i,j,x ;
+
+   long int       *aa_low, *aa_high, *left, *right, *left_aa, *right_aa;
+   long int       *highest_x;
+   long int        right_tot = 0, left_tot = 0;
+
+   float           enc_low, enc_high;
+   float           a, b, c, d, e, f, g, h, total, hr, br, *x2;
+   float           w;
+   char           *flag, sp;
+
+   FILE           *fcai=NULL,*fhilo = NULL, *ffop = NULL;
+   FILE           *fcbi=NULL;
+
+   /*calloc to the pointers the required storage                          */
+   if ((fhilo = open_file("", "hilo.coa", "w", FALSE)) == NULL)
+      my_exit(1, "hilo.coa");
+   if ((ffop = open_file("", "fop.coa", "w", FALSE)) == NULL)
+      my_exit(1, "fop.coa");
+   if ((aa_low = (long int *) calloc(22, sizeof(long int))) == NULL)
+      my_exit(3, "aa_low");
+   if ((aa_high = (long int *) calloc(22, sizeof(long int))) == NULL)
+      my_exit(3, "aa_high");
+   if ((highest_x = (long int *) calloc(22, sizeof(long int))) == NULL)
+      my_exit(3, "last_row");      
+   if ((x2 = (float *) calloc(65, sizeof(float))) == NULL)
+      my_exit(3, "x2");
+   if ((flag = (char *) calloc(65, sizeof(char))) == NULL)
+      my_exit(3, "flag");
+   if ((last_row = (int *) calloc(65, sizeof(int))) == NULL)
+      my_exit(3, "last_row");
+   
+  
+   if (pm->seq_format=='M')
+      sp = pm->seperator;
+   else
+      sp = '\t';
+
+   /* initialize the various arrays                                       */
+   for (x = 0; x < 4; x++) last_row[x] = 0;
+
+   for (x = 0; x < 22; x++){
+      highest_x[x]=0;
+      aa_low   [x]=0;
+      aa_high  [x]=0;
+      }
+   for (x = 0; x <65 ; x++) {
+      x2      [x]= (float) 0.0;
+      flag    [x]=0;   
+      last_row[x]=0;
+      }
+      
+      
+   /*count the amino acid usage for the two datasets, initially we only   */
+   /*have the codon usage of the two groups                               */
+   for (i = 1; i < 65; i++) {
+      aa_low[pcu->ca[i]] += low[i];
+      aa_high[pcu->ca[i]] += high[i];
+      flag[i] = ' ';                /*flag is used to identify opt codons */
+   }
+
+   enc_low = enc_out(fhilo, low, aa_low);         /*calc enc for each  of */
+   enc_high = enc_out(fhilo, high, aa_high);      /*datasets              */
+   fprintf(fhilo, "\n");
+
+   fprintf(ssummary, "\nenc_left %f enc_right %f\n", enc_low, enc_high);
+
+   for (i = 1; i < 65; i++) {
+      if (*(ds + i) == 1 || pcu->ca[i] == 11)     /*skip stop and nonsynon*/
+     continue;
+
+      if (enc_low < enc_high) {                  /*decide which is more   */
+        left = low;                              /*biased                 */
+        right = high;                            /*left and right refer   */
+        left_aa = aa_low;                        /*the columns of outputed*/
+        right_aa = aa_high;                      /*hilow table            */
+        a = (float) low[i];
+        b = (float) high[i];
+        g = (float) aa_low[pcu->ca[i]];
+        h = (float) aa_high[pcu->ca[i]];
+      } else {
+        left = high;
+        right = low;
+        left_aa = aa_high;
+        right_aa = aa_low;
+        a = (float) high[i];
+        b = (float) low[i];
+        g = (float) aa_high[pcu->ca[i]];
+        h = (float) aa_low[pcu->ca[i]];
+     }
+      /* calculate the chi squared contingency value                      */
+      c = g - a;
+      d = h - b;
+      e = a + b;
+      f = c + d;
+      total = a + b + c + d;
+      if (e * f * h * g)
+     x2[i] = ((a * d - c * b) * (a * d - c * b)) * total / (e * f * g * h);
+      else
+     x2[i] = (float) -99.0;                   /*if 0 assign nonsense value*/
+
+      if (g * h) {
+     hr = a / g;
+     br = b / h;
+     if (hr > br && x2[i] > 6.635)            /* if significant at p<.99  */
+        flag[i] = '*';
+     else if (hr > br && x2[i] > 3.841)       /* if significant at p<0.05 */
+        flag[i] = '@';
+      }
+   }
+   fprintf(ssummary, "Chi squared contingency test of genes from both\n"
+                     "extremes of axis 1\n");
+/* this created the hi-low codon usage table                              */
+/* Sample output truncated (***********************************************/
+/*Asp   GAU   0.10 ( 10) 1.68 ( 53)   Gly   GGU   0.21 ( 12) 0.85 ( 11)   */   
+/*      GAC*  1.90 (184) 0.32 ( 10)         GGC*  3.13 (176) 2.00 ( 26)   */   
+/*Glu   GAA   0.00 (  0) 1.34 ( 55)         GGA   0.05 (  3) 0.69 (  9)   */  
+/*      GAG*  2.00 (255) 0.66 ( 27)         GGG   0.60 ( 34) 0.46 (  6)   */   
+/*                                                                        */
+/*                                                                        */
+/*        Number of codons in high bias dataset 2825                      */
+/*        Number of codons in low  bias dataset 1194                      */
+/*Note: high bias was assigned to the dataset with the lower average Nc   */
+/*NO Chi could be calculated for UGU                                      */
+/*Codon UUC (Phe) chi value was 70.175                                    */
+/*Codon UCC (Ser) chi value was 48.030                                    */
+/*Codon UAC (Tyr) chi value was 86.069                                    */
+/**************************************************************************/ 
+
+   for (outer = 1; outer <= 3; outer += 2) {
+      for (x = 1; x < 5; x++) {
+      for (j = 1; j < 5; j++) {
+        icode = ((x - 1) * 16) + ((j - 1) * 4) + outer; 
+
+
+        for (i = icode; i <= icode + 1; i++) {   /*loop twice             */
+            /* if the previous entry in this column codes for the same AA */
+            if (last_row[i % 2] != pcu->ca[i]) {
+	          fprintf(fhilo, "%s%c%s%c%c", paa->aa3[pcu->ca[i]],
+		              sp, paa->cod[i], flag[i], sp);
+	          fprintf(ssummary, "%s%c%s%c%c", paa->aa3[pcu->ca[i]],
+		              sp, paa->cod[i], flag[i], sp);
+	        } else {                       
+	           fprintf(fhilo, "%c%s%c%c", sp, paa->cod[i], flag[i], sp);
+	           fprintf(ssummary, "   %c%s%c%c",sp,paa->cod[i],flag[i],sp);
+	        }
+            /* write out Codon usage, RSCU and significance for both data */
+	       fprintf(fhilo, "%4.2f (%3i) %4.2f (%3i)%c",
+		       (left[i]) ?
+		       ((float) left[i] / (float) left_aa[pcu->ca[i]])
+		       * (float) (*(ds + i))
+		       : 0.0,
+		       (int) left[i],
+		       (right[i]) ?
+		       ((float) right[i] / (float) right_aa[pcu->ca[i]])
+		       * (float) (*(ds + i))
+		       : 0.0,
+		       (int) right[i],sp);               /*       end of fprintf  */
+	      fprintf(ssummary, "%4.2f (%3i) %4.2f (%3i)%c",
+		       (left[i]) ?
+		       ((float) left[i] / (float) left_aa[pcu->ca[i]])
+		       * (float) (*(ds + i))
+		       : 0.0,
+		       (int) left[i],
+		       (right[i]) ?
+		       ((float) right[i] / (float) right_aa[pcu->ca[i]])
+		       * (float) (*(ds + i))
+		       : 0.0,
+		       (int) right[i],sp);               /*        end of fprintf */
+          last_row[i % 2] = pcu->ca[i];          /* remember the last row */
+        }
+        fprintf(fhilo, "\n");
+        fprintf(ssummary, "\n");
+       }
+       fprintf(ssummary, "\n");
+       fprintf(fhilo, "\n");
+      }
+      fprintf(ssummary, "\n");
+      fprintf(fhilo, "\n");
+   }
+
+   for (i = 1; i < 65; i++) {                    /* count both datasets   */
+      right_tot += right[i];
+      left_tot += left[i];
+   }
+
+
+   fprintf(fhilo, 
+       "\tNumber of codons in high bias dataset %li\n", left_tot);
+   fprintf(fhilo, 
+       "\tNumber of codons in low  bias dataset %li\n", right_tot);
+   fprintf(fhilo, 
+       "Note: high bias was assigned to the dataset with the lower"
+       " average Nc\n");
+
+   fprintf(ssummary, 
+       "\tNumber of codons in high bias dataset %li\n", left_tot);
+   fprintf(ssummary, 
+       "\tNumber of codons in low  bias dataset %li\n", right_tot);
+   fprintf(ssummary, 
+       "Note high bias was assigned to the genes with the lower"
+       " overall Nc\n");
+
+   /* now printout the Chi Squared values for each significant comparison */
+   for (i = 1; i < 65; i++) {
+      if (flag[i] == '*' || flag[i] == '@') {
+     fprintf(fhilo, "Codon %s (%s) chi value was %.3f\n", paa->cod[i],
+         paa->aa3[pcu->ca[i]], x2[i]);
+     fprintf(ssummary, "Codon %s (%s) chi value was %.3f\n", paa->cod[i],
+         paa->aa3[pcu->ca[i]], x2[i]);
+      }
+      if (x2[i] == -99)       /* there were no codons in one of the groups*/
+     fprintf(fhilo, "NO Chi could be calculated for %s\n", paa->cod[i]);
+   }
+   fprintf(fhilo, "\n");
+   fprintf(ssummary, "\n");
+
+   /* now write out the optimal codons as PUTATIVELY identified by codonW */
+   fprintf(ssummary, "These are the PUTATIVE optimal codons\n"
+     "This is the format required for Menu 4 option 2 (Fop) "
+     "and option 3 (CBI)\n"
+     "This data is also duplicated in the files \"fop.coa\" "
+     "and \"cbi.coa\"\n"
+     "The format of these files is that required for input "
+     "as a personal choice\n"
+     "of optimal codons for these indexes\n");
+
+   for (i = 1; i < 65; i++) {
+      if( left[i] > highest_x[pcu->ca[i]])    /* used for calculating CAI */
+                           highest_x[pcu->ca[i]]=left[i]; 
+      
+      if (*(ds + i) == 1 || pcu->ca[i] == 11) {
+     fprintf(ffop, "2");
+     fprintf(ssummary, "2");
+      } else if (flag[i] == '*') {
+     fprintf(ffop, "3");
+     fprintf(ssummary, "3");
+      } else if (((left[i]) ?
+          ((float) left[i] / (float) left_aa[pcu->ca[i]])
+          * (float) (*(ds + i))
+          : 0.0) < 0.1) {                        /* if RSCU <0.1 its rare */
+     fprintf(ffop, "1");
+     fprintf(ssummary, "1");
+      } else {
+     fprintf(ffop, "2");
+     fprintf(ssummary, "2");
+      }
+
+      if (!(i % 16)) {                           /* handle line wrapping  */ 
+     fprintf(ffop, "\n");
+     fprintf(ssummary, "\n");
+      } else {
+     fprintf(ffop, ",");
+     fprintf(ssummary, ",");
+      }
+   }
+   fileclose(&ffop);                              /*   close the Fop file  */
+  
+   if ((fcbi = open_file("", "cbi.coa", "w", FALSE)) == NULL)
+      my_exit(1, "cbi.coa");                     /*    open cbi.coa       */
+      
+ for (i = 1; i < 65; i++) {                      /* write values 2 cbi.coa*/
+
+  if (flag[i] == '*')                       /* Only report optimal codons */
+     fprintf(fcbi, "3");
+  else
+     fprintf(fcbi, "2");                    /* ignore non optimal codons  */
+
+  if (!(i % 16)) 
+     fprintf(fcbi, "\n");
+  else
+     fprintf(fcbi, ",");
+    
+ }
+   
+  fileclose(&fcbi);   
+   
+   fprintf(ssummary, "\n\n");
+    
+   /* now calculate and write out CAI adaptiveness values                 */
+   fprintf(ssummary, "These are PUTATIVE CAI adaptiveness values "
+     "identified by this programme\n"
+     "This data is also duplicated in the file \"cai.coa\"\n"
+     "The format of this file is compatible with the format\n"
+     "of the file used to input a personal selection of CAI values\n"
+     "That is, the format required for Menu 4 option 1\n"
+     "cai.coa\tinput file to be used for CAI calculations\n"
+     "\n\nCod AA    Xi\tWi\t\tCod AA    Xi\tWi\n"); 
+  
+   
+   if ((fcai = open_file("", "cai.coa", "w", FALSE)) == NULL)
+      my_exit(1, "cai.coa"); 
+  
+   for (i = 1, x = TRUE ; i < 65 && x ; i++) {
+    
+    /* if a stop or a non-synonymous codon w = 1                          */
+    if (*(ds + i) == 1 || pcu->ca[i] == 11) {  
+                    fprintf(fcai, "1.0000000 \n");
+                    fprintf(ssummary,"%s %s %6.1f %9.7f\t", 
+                      paa->cod[i], 
+                      paa->aa3[pcu->ca[i]],
+                      (float) left[i], 1.0000000); 
+    } else  if ( highest_x[pcu->ca[i]] ) {
+      
+      /* if a codon is absent then adjust its frequecy to 0.5             */
+      if ( left[i] ) 
+       w= (float) left[i]/ (float) highest_x[pcu->ca[i]];
+      else
+       w= (float) 0.5   / (float) highest_x[pcu->ca[i]];
+      fprintf(fcai, "%9.7f \n", w);                    /* output CAI W    */
+      fprintf(ssummary,"%s %s %6.1f %9.7f\t", 
+             paa->cod[i], paa->aa3[pcu->ca[i]],
+             (left[i]) ? (float) left[i]:0.5 , w); 
+    /* either strange amino acid composition or data sets where too small */               
+    } else {                            
+      fprintf(pm->my_err, 
+          "WARNING An attempt to calculate CAI relative "
+          "adaptivnesss FAILED\n no %s amino acids found"
+          " in the high bias dataset \n",paa->aa3[pcu->ca[i]]);    
+      fprintf(ssummary, 
+          "\nWARNING An attempt to calculate CAI relative adaptiveness "
+          "FAILED\n no %s amino acids found in the high bias dataset \n",
+          paa->aa3[pcu->ca[i]]);
+      x=FALSE;
+   }  
+   if( !(i%2)) fprintf (ssummary  , "\n");
+   } /* matches for (i = 1, x = TRUE ; i < 65 && x ; i++)                 */
+     
+   fileclose(&fcai);                              /* close files           */
+   fileclose(&fhilo);
+   free(aa_low);                                 /* free memory           */
+   free(aa_high);
+   free(highest_x);
+   free(x2);
+   free(flag);
+   free(last_row);
+   return;
+}
+/*********************  hydro_out        **********************************/
+/* The general average hydropathicity or (GRAVY) score, for the hypothet- */
+/* ical translated gene product. It is calculated as the arithmetic mean  */
+/* of the sum of the hydropathic indices of each amino acid. This index   */
+/* was used to quantify the major COA trends in the amino acid usage of   */
+/* E. coli genes (Lobry, 1994).                                           */
+/* Calculates and outputs total protein hydropathicity based on the Kyte  */
+/* and Dolittle Index of hydropathicity (1982)                            */
+/* nnaa               Array with frequency of amino acids                 */
+/* paa                points to a struct containing Amino Acid values     */
+/* pap->hydro         Pointer to hydropathicity values for each AA        */
+/**************************************************************************/
+int hydro_out(FILE * foutput, long int *nnaa)
+{
+   long int        a2_tot = 0;
+   float           hydro = (float) 0.0;
+   int i;
+   char sp=  (pm->seq_format=='H')? (char) '\t': (char) pm->seperator;
+
+   for (i = 1; i < 22; i++)
+      if (i != 11) a2_tot += nnaa[i];
+
+   if (!a2_tot) {           /* whow   .. no amino acids what happened     */
+      fprintf(pm->my_err, "Warning %.20s appear to be too short\n", title);
+      fprintf(pm->my_err, "No output was written to file   \n", title);
+      return 1;
+   }
+   
+   for (i = 1; i < 22; i++)
+      if (i != 11)
+     hydro += ((float) nnaa[i] / (float) a2_tot) * (float) pap->hydro[i];
+
+   fprintf(foutput, "%8.6f%c", hydro,sp );
+
+   return 1;
+}
+/**************** Aromo_out ***********************************************/
+/* Aromaticity score of protein. This is the frequency of aromatic amino  */
+/* acids (Phe, Tyr, Trp) in the hypothetical translated gene product      */
+/* nnaa               Array with frequency of amino acids                 */
+/* paa                points to a struct containing Amino Acid values     */
+/* pap->aromo         Pointer to aromaticity values for each AA           */
+/**************************************************************************/
+int aromo_out(FILE * foutput, long int *nnaa)
+{
+   long int        a1_tot = 0;
+   float           aromo = (float) 0.0;
+   int i;
+   char sp=  (pm->seq_format=='H')? (char) '\t': (char) pm->seperator;
+
+   for (i = 1; i < 22; i++)
+      if (i != 11)
+     a1_tot += nnaa[i];
+
+
+   if (!a1_tot) {
+      fprintf(pm->my_err, "Warning %.20s appear to be too short\n", title);
+      fprintf(pm->my_err, "No output was written to file   \n", title);
+      return 1;
+   }
+   for (i = 1; i < 22; i++)
+      if (i != 11)
+     aromo += ((float) nnaa[i] / (float) a1_tot) * (float) pap->aromo[i];
+
+   fprintf(foutput, "%8.6f%c", aromo,sp);
+   return 1;
+}
+
+
diff --git a/codons.c b/codons.c
new file mode 100755
index 0000000..309a4db
--- /dev/null
+++ b/codons.c
@@ -0,0 +1,1149 @@
+/**************************************************************************/
+/* CodonW codon usage analysis package                                    */
+/* Copyright (C) 2005            John F. Peden                            */
+/* This program is free software; you can redistribute                    */
+/* it and/or modify it under the terms of the GNU General Public License  */
+/* as published by the Free Software Foundation; version 2 of the         */
+/* License,                                                               */
+/*                                                                        */
+/* This program is distributed in the hope that it will be useful, but    */
+/* WITHOUT ANY WARRANTY; without even the implied warranty of             */
+/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the           */
+/* GNU General Public License for more details.                           */
+/* You should have received a copy of the GNU General Public License along*/
+/* with this program; if not, write to the Free Software Foundation, Inc.,*/
+/* 675 Mass Ave, Cambridge, MA 02139, USA.                                */
+/*                                                                        */
+/*                                                                        */
+/* The author can be contacted by email (jfp#hanson-codonw at yahoo.com Anti-*/
+/* Spam please change the # in my email to an _)                          */
+/*                                                                        */
+/* For the latest version and information see                             */
+/* http://codonw.sourceforge.net 					  */
+/**************************************************************************/
+/*                                                                        */
+/* -----------------------        Codons.C       ------------------------ */
+/* This file contains main() function and drives CodonW.                  */
+/*                                                                        */
+/* External subroutines and functions                                     */
+/* clearscr           screen clearing Macro defined in CodonW.h           */
+/* proc_comm_line     process command line arguments                      */
+/* initilize_point    assigns genetic code dependent parameters to structs*/
+/* initilize_coa      selects the default codons to exclude from the      */
+/*                    Correspondence Analysis                             */
+/* main_menu          The interactive menu system                         */
+/* clean_up           Re-zeros various internal counters and arrays       */
+/* open_file          Open files, checks for existing files               */
+/* fileclose          Closes files and returns a NULL pointer or exits    */
+/* textbin            Converts codon usage to binary data file            */
+/* dot(,X)            prints a period every X times it is called          */
+/* PrepAFC            Prepare for the COA                                 */
+/* DiagoRC            This routine generates the COA                      */
+/* colmout            write the output from COA to file                   */
+/* rowout             save as above except records the gene information   */
+/* inertialig         analyse row inertia and records the results to file */
+/* inertiacol         analyse column inertia and record the results       */
+/* suprow             add supplementary genes into COA                    */
+/* get_aa             converts a three base codon into a 1 or 3 letter AA */
+/* codon_error        Called after all codons read, checks data was OK    */
+/* rscu_usage_out     Write out RSCU                                      */
+/* codon_usage_out    Write out Codon Usage                               */
+/* raau_usage_out     Write out normalised amino acid usage               */
+/* dinuc_count        Count the dinucleotide usage                        */
+/* dinuc_out          Write out dinucleotide usage                        */
+/* aa_usage_out       Write out amino acid usage                          */
+/* gc_out             Writes various analyses of base usage               */
+/* cutab_out          Write a nice tabulation of the RSCU+CU+AA           */
+/* base_sil_us_out    Write out base composition at silent sites          */
+/* cai_out            Write out CAI usage                                 */
+/* cbi_out            Write out codon bias index                          */
+/* fop_out            Write out Frequency of Optimal codons               */
+/* enc_out            Write out Effective Number of codons                */
+/* hydro_out          Write out Protein hydropathicity                    */
+/* aromo_out          Write out Protein aromaticity                       */
+/* coa_raw_out        Write out raw codon usage for use by COA analysis   */
+/*                                                                        */
+/*                                                                        */
+/* Internal subroutines to Codon.c                                        */
+/* my_exit            Controls exit from CodonW closes any open files     */
+/* tidy               reads the input data                                */
+/* output             called from tidy to decide what to do with the data */
+/* toutput            handles the reformatting and translation of seqs    */
+/* output_long        if sequence is very long then process what we know  */
+/*                    and write sequence to disk in fragments             */
+/* file_close         Closes open files                                   */
+/* c_help             Generates help informatio                           */
+/* WasHelpCalled      Checks strings to see if help was requested         */
+/*                                                                        */
+/**************************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <string.h>
+#include <errno.h>
+#include <ctype.h>
+
+#define  ORIG_DEFS    
+      /* used to decide whether declarations are external or not          */
+      /* Master Header file                                               */
+#include "codonW.h"     
+#undef   ORIG_DEFS
+
+
+#if defined(__MWERKS__)
+#include <console.h> 
+#endif     
+
+/**************************   MAIN   **************************************/
+/* The main function processes commandline arguments to decide whether    */
+/* CodonW is running in an interactive mode, if so then the menu is called*/
+/* CodonW also had the less documented feature of imitating other useful  */
+/* codon usage and sequence manipulation program.    If the program is    */
+/* called by a recognised name (see proc_comm_line for a list) such as    */
+/* rscu then pm->codons is false and it only performs the required tasks  */
+/* bypassing the menu system.                                             */
+/* Main then calls tidy() to read in the data files, and count codon usage*/
+/* depending on the requested output options toutput calls various subrou */
+/* tines. If COA has been requested it also calls these subroutines and   */
+/* recording useful information to summary.coa.                           */
+/**************************************************************************/
+
+int main(int argc, char *argv[])
+{
+  FILE       *finput = NULL, *foutput = NULL, *fblkout = NULL;
+  FILE       *fcoaout = NULL;
+  FILE       *fsummary= NULL;
+  int        num_seq  = 0;
+
+  num_sequence     = 0;
+  num_seq_int_stop = 0;
+  valid_stops      = 0;
+  last_aa          = 0;
+ 
+#if defined(__MWERKS__) /* Macintosh code-warrior */
+  argc=ccommand(&argv); 
+#endif
+
+  pm = &Z_menu;
+  pm->totals = FALSE;
+  pm->my_err = stderr;
+
+
+  initilize_point(pm->code, pm->f_type, pm->c_type);
+  initilize_coa(pm->code);
+
+  proc_comm_line(&argc, &argv);
+
+
+  
+  /******************** main loop ****************************/
+
+  do {
+    if (pm->codonW) {  
+                                  /* If the program   chosen is codons    */
+      printf(" Welcome to CodonW %.*s for Help type h\n\n",
+              (int) strlen(Revision) - 11, Revision +10 ); 
+                                  /* Now Run the main menu interface      */      
+      if (pm->menu) main_menu(0); 
+    } 
+
+          /* if users select human readable output they want nice tables  */
+    if (pm->bulk == 'C' && pm->seq_format == 'H')  pm->bulk = 'O';
+    if (pm->bulk == 'S' && pm->seq_format == 'H')  pm->bulk = 'O';
+
+     pm->analysis_run = TRUE;       /* codons has started an analysis this*/
+                                    /* parameter is checked by my_exit    */
+
+    if (pm->inputfile != NULL)      /* rewind various input files in case */
+      rewind(pm->inputfile);        /* this is a second analysis run      */
+    if (pm->fopfile != NULL)
+      rewind(pm->fopfile);
+    if (pm->cbifile != NULL)
+      rewind(pm->cbifile);  
+    if (pm->caifile != NULL)
+      rewind(pm->caifile);
+
+    /* num_sequence                 number of sequences read              */
+    /* num_seq_int_stop             number with internal stop codons      */
+    /* valid_stops                  No.terminated with a stop codon       */
+    /* tot                          total number of codons read           */
+
+    num_sequence = num_seq_int_stop = valid_stops = tot = 0;
+
+    clean_up(ncod, naa);            /*re-zero count of amino and codons   */
+    finput = pm->inputfile;
+    foutput = pm->outputfile;
+    fblkout = pm->tidyoutfile;
+
+    fileclose(&pm->fcoa_out);
+    if (pm->coa) 
+      if ((pm->fcoa_out = open_file("", "coa_raw", "w", FALSE)) == NULL)
+	my_exit(1, "coa_raw");          /*controlled exit from CodonW     */
+    fcoaout = pm->fcoa_out;
+
+/*  Tidy                                                                  */
+/*  reads input data, returns the number of sequences read in             */
+/*  num_sequence is global so I don't really have to assign it here       */    
+    num_sequence = tidy(finput, foutput, fblkout, fcoaout);
+
+    fprintf(pm->my_err,"\n\n\t\tNumber of sequences: %i\n",
+        num_sequence);
+
+/* num_seq_int_stop  value is calculated in codon_usage_out               */ 
+    if (num_seq_int_stop > 0 && pm->warn ) {   
+      if (pm->totals && (num_seq_int_stop >= valid_stops ))
+	   fprintf(pm->my_err, "\tWARNING\t At least one sequence in your"
+                    " input file has\ninternal stop codons (found %i"
+                    " internal stops) \tWARNING\n",num_seq_int_stop);
+      else
+	   fprintf(pm->my_err, "\tWARNING\t %i sequences had internal "
+                           "stop codons \tWARNING\n",num_seq_int_stop);
+    }
+/* don't wait for a pause if no_menu has been set                          */
+    if ( pm->codonW && pm->menu ) pause; 
+
+    if ( pm->coa && pm->totals)          /* idiots error catch             */ 
+      my_exit(99,"A COA analysis of concatenated sequences is nonsensical\n"
+      "I have completed any other requests but not the COA");
+
+/* if COA has been requested then open summary.coa and start the analysis  */
+    if (pm->coa) {
+     if (fsummary == NULL)
+      if ((fsummary = open_file("", "summary.coa", "w", FALSE)) == NULL)
+       my_exit(1, "summary.coa");
+/* set the number of genes in the analysis to the number read in by tidy   */
+     pcoa->rows = num_sequence;
+     fileclose(&fcoaout);
+/* if COA has been selected then during the reading in phase raw codon usag*/
+/* will have been written to the file coa_raw                              */
+/* text bin converts this to binary data for the COA analysis program      */
+      textbin("coa_raw", "cbrawin");
+      printf("Generating correspondence analysis\n"); 
+      dot(0,10);
+      
+         
+      fprintf(fsummary, "\t\tSummary of Correspondence Analysis \n\n"
+                        "The input file was %s it contained %i genes\n"        
+                        "The number of axes generated was %i\n"
+                        "A COA was requested of %s%s usage\n\n\n"
+                        "Most of the output presented in this file "
+                        "has also been written to separate files\n"
+                        "genes.coa\tThe position of the genes on the "
+                        "first %i axis\n" 
+                        "%s.coa\tThe position of the %i %s on the %i "
+                        "principle axes\n\n\n",
+                        pm->curr_infilename, 
+                        pcoa->rows,
+                        ((pcoa->rows<pcoa->colm)?pcoa->rows:pcoa->colm)-1,
+                        (pm->coa == 'r')        ?"relative synonymous ":"", 
+                        (pm->coa == 'a')        ?"amino acid" : "codon",
+                        pcoa->axis,
+	                    (pm->coa == 'a')        ?"amino" : "codon",
+	                    pcoa->colm,
+	                    (pm->coa == 'a')        ?"amino acids":"codons", 
+	                    pcoa->axis);
+/* allocate memory for the rows and columns, scale both, and write out the*/
+/* resulting matrix to the file cbrawin                                   */
+
+      PrepAFC("cbrawin");
+
+/* Now do the analysis, calculate the data inertia and all the vectors    */
+
+      DiagoRC(fsummary);
+
+/* colmout records the position of the columns on each of the factors/axes*/
+
+      if (pm->coa == 'a')
+          colmout("cbfcco", "amino.coa", paa, fsummary);
+      else
+          colmout("cbfcco", "codon.coa", paa, fsummary);
+
+/* rowout records the position of the genes on each of the axis           */
+
+      rowout("cbfcli", "genes.coa", "coa_raw", fsummary);
+
+/* pcoa->level == e for exhaustive analysis of inertia                    */  
+      if (pcoa->level == 'e') {     
+
+       fprintf(fsummary, "\n\n\nYou requested detailed output from the COA"
+           "\n\nThe absolute and relative inertia "
+           "of each gene and %s (see also inertia.coa)\n",
+           (pm->coa == 'a') ? "amino acids" : "codons");
+/* inertialig must preceed inertiacol, records inertia of genes to file   */
+/* it opens the raw codon usage file and loads the raw data to memory     */
+       inertialig("inertia.coa", "coa_raw" ,fsummary);
+/* uses the preloaded raw codon usage, to calculate inertia and other data*/
+/* such as contribution of each column to each factor and to the extent   */
+/* each column is explained by each factor and what the residual variation*/
+/* is                                                                     */
+       inertiacol("inertia.coa", fsummary);
+      }
+      
+/* if pcoa->add_row is real string, then it will be the name of the file  */
+/* containing additional sequence data, that will be excluded from the COA*/
+/* but factored in, using the original COA vectors and then all other     */
+/* calculation can proceed as with the original data                      */
+      if (strlen(pcoa->add_row)) {
+          if ((finput = open_file("", pcoa->add_row, "r", FALSE)) 
+              == NULL) my_exit(6, "add_row");
+          if ((foutput = tmpfile()) == NULL)
+              my_exit(1, "temp file foutput");
+          if ((fblkout = tmpfile()) == NULL)
+              my_exit(1, "temp file fblkout");
+
+    if ((fcoaout = open_file("", "coa1_raw", "w", FALSE)) == NULL)
+      my_exit(1, "coa1_raw");
+
+    clean_up(ncod, naa);
+    num_sequence =num_seq_int_stop=valid_stops=tot = 0;
+/* load the additional data file and process as normal                    */
+/* but don't calculate any indices or write the data to the normal output */
+/* files, rather write them to tmp files which will be deleted at end of  */
+/* program execution                                                      */
+    num_seq = tidy(finput, foutput, fblkout, fcoaout);
+
+/* close the files now we are finished                                    */
+    fileclose(&fcoaout);
+    fileclose(&foutput);
+    fileclose(&fblkout);
+    fileclose(&finput);
+
+/* covert to binary, use additional raw data file, note not coa_raw this  */
+    textbin("coa1_raw", "cb1raw");
+/* now call the routine suprow and add these additional genes, we will    */
+/* process this data for inertia and append the gene and col. coordinates */
+/* to the original gene.coa and codon.coa (or amino.coa)                  */
+    suprow(num_seq, "cbfcvp", "cb1raw", "genes.coa", "coa1_raw", fsummary);
+    
+/* close these files now that we have finished with them and the COA      */
+
+    fileclose(&foutput);
+    fileclose(&fblkout);
+    fileclose(&fcoaout);
+      }
+    }
+   printf("\n");
+  } while (pm->codonW && pm->menu ); /* OK now we loop back to main_menu  */
+/* though only if we are in interactive mode and running as CodonW        */
+  my_exit(0,"");                     /* last call to my_exit              */
+  return 0;                          /* dummy return to keep pedantic but */
+                                     /* brain dead compilers happy        */
+}
+
+/**********************  END of MAIN()   **********************************/
+
+
+/**********************  Subroutines     **********************************/
+/* Tidy                                                                   */
+/*  reads input data from a sequence file containing fasta like formatted */
+/*  sequence discards numbers, but keeps other characters                 */
+/*  Each sequence must begin with title line must start with > or ;       */
+/*  any following descriptive lines must begin with ; or >.Sequence start */
+/*  is the first alphabetic character on the line following the headers   */
+/*  There is no limit to sequence length or number of sequences but       */
+/*  input lines should be less than 200 char in width                     */
+/**************************************************************************/
+
+int tidy(FILE * finput, FILE * foutput, FILE * fblkout, FILE * fcoaout)
+{
+  char            seq[MAX_GENE + LINE_LENGTH + 1];
+  char            in[LINE_LENGTH + 1];
+  int             first_line = TRUE, ic = 0;
+  int             ii = 0;
+  int             i,x;
+  long            ic_orig = 0;
+/* while still able to read data from the input file keep reading         */
+  while ((fgets(in, LINE_LENGTH, finput) != NULL)) {
+
+/* idiot error check to see if the file looks like fasta or PIR format    */ 
+    if (!num_sequence && in[0] != ';' && in[0] != '>') {
+      fprintf(stderr, "\n Error input file not in a recognised format \n"
+                      " you must convert it into FASTA/Pearson format" 
+                      " EXITING\n");
+      my_exit(99, "input file not in a recognised format:tidy");
+    }
+
+    if (in[0] == ';' || in[0] == '>') { /* if true them this is a header   */
+      if (first_line) {                 /* if true this is the first header*/
+
+	first_line = FALSE;                 /* will only be reset when reread  */
+                                        /* the next sequence               */
+	if (num_sequence) {                 /* wait till we have read the first*/
+                                        /* before writing to disk          */
+/* now if we are concatenating sequence data we need will handle it thus   */
+        if (pm->totals) {	    
+
+/* first if translating or reformatting the input file flush the read      */
+/* data to the disk                                                        */
+
+         if (strchr("RNT",(int)pm->bulk)!=NULL) output_long(fblkout, seq);
+         if (tot) {
+   /* if something we have sequence read in, then we need to process this  */
+   /* check whether the last codon of the sequence was was a stop          */
+            last_aa = codon_usage_tot(seq, tot);
+    	    if (pcu->ca[last_aa] == 11) valid_stops++;
+	     }
+/* rather re-setting everything to zero, we will just blank the array seq  */
+         tot = 0;  
+	  } else {     
+/* else matches if tot; if sequences are not being concatenated we call    */
+/* output to decide what to do with all the read data                      */
+/* then we blank all the data from memory and start again                  */
+        output(seq, foutput, fblkout, fcoaout);
+	    clean_up(ncod, naa);      
+	  }                 
+	}                                    /* matches if(num_sequence)       */
+
+/* If we get here we have read a header line, this then needs to be proc'ed*/
+/* first the header is tested to see does it contain spaces the string is  */
+/* converted from the first non space character to the title array         */
+
+ for (ii = 1; isspace( (int) in[ii]) && ii < (int) strlen(in); ii++)
+     ;
+ strncpy(title, in + ii, 99);
+
+/* Titles are cleaned up by removing newline characters and the delimiting */
+/* character p,->seperater and also null terminating the title string      */
+
+ for (i = 0; i < (int) strlen(title); i++) {
+	  if (title[i] == '\n')	
+          title[i] = '\0';          /* chops new line off                  */
+	  else if (title[i] == pm->seperator )
+	    title[i] = '_';             /* removes the separator if present    */
+	  else if (i == (int) (strlen(title) - 1))
+	    title[i] = '\0';            /* if we have reached end of title     */
+	}
+
+/* if we are reformatting the data, we print a friendly dot just in-case   */
+if (strchr("RNT", (int)pm->bulk) ==NULL || pm->totals)
+    dot((int) num_sequence, 5);       
+/* we have now finished processing our first header line and are reading   */
+/* our sequence data                                                       */
+num_sequence++; 
+      }                            /* matches if first line                */
+      continue;                    /* read another line ie. jump to while()*/
+    }                              /* if (in[0] == ';' || in[0] == '>')    */
+    else{                          /* this must be a line containing seq   */
+	first_line = TRUE;             /* so reset the first_line variable     */
+      }
+
+/* at this point we have read in the header lines and have been or about to*/
+/* process the input data, now we test how much we have read into the array*/
+/* seq, tot is equivalent to the last element in the array                 */
+/* if tot is greater than or equal to MAX_GENE then the array is quite full*/
+/* luckily we made the array seq to be MAX_GENE plus LINE_LENGTH +1        */
+
+    if (tot >= MAX_GENE) {         /* sequence is larger than seq          */
+      master_ic += MAX_GENE;       /* now remember how many bases we are   */
+      ic_orig = tot;               /* going to write to disk               */
+                                   /* and what size the array was to start */
+
+      if (strchr("RNT", (int) pm->bulk) != NULL)
+	     output_long(fblkout, seq);/* flush to disk and then continue      */
+      else if (pm->bulk == 'D')
+       	dinuc_count(seq, tot);     /* then we had better count the dinucs  */
+
+/* Debugging code in-case we are asking for something that we can't handle */
+#ifdef DEBUG
+      else if (strchr("OCASLDBX", (int) pm->bulk) != NULL) ; /* dummy      */ 
+      else if (pm->bulk)
+	fprintf(stderr, "ERROR-22 %c pm->bulk undefined\n", pm->bulk);
+
+      if (pm->cai || pm->fop || pm->cbi || pm->enc || pm->gc ||
+	  pm->gc3s || pm->sil_base || pm->bulk ||
+	  pm->coa);
+      else
+	fprintf(stderr, "Programming error");
+#endif
+
+
+/* Now count first MAX_GENE bases, luckily MAX_GENE is always a multiple of*/
+/* 3, we count the bases and amino acids in codon_usage_tot                */
+
+      last_aa = codon_usage_tot(seq, MAX_GENE);
+
+/* now we move all unprocessed/written/counted bases to the front of seq   */
+
+      for (i = MAX_GENE, x = 0; i < ic_orig; i++, x++)
+       seq[x] = seq[i];            /* i is pointing near the end of array  */
+      tot = x;                     /* x the front of the array             */
+    }                              /* Matches if (tot >= MAX_GENE)         */
+
+    ic = 0;                        /* first base of the input file         */
+    while (in[ic] != '\0') {       /* scan input line till we see a Null   */
+      if (isalpha((int)in[ic])) ;  /* do nothing if a alpha                */
+      else if (pm->bulk == 'R' && in[ic] == '-'); /* do nothing            */
+      else if (in[ic] == '*' || in[ic] == '.') ;  /* do nothing            */
+
+      else {
+    ic++;                          /* is not one above skip to next letter */
+    continue;                     
+      }                            /* while( in[ic] != '\0')               */
+
+
+   in[ic] = (char)toupper((int)in[ic]);/*   converts2capitals              */
+   if (strrchr("CG", (int) in[ic]) != NULL)
+	GC_TOT++;                          /* is it a G or C                   */
+   else if (strrchr("ATU", (int) in[ic]) != NULL)
+	AT_TOT++;                          /* is it an A or T                  */
+   else if ( in[ic] == '-' )
+	GAP_TOT++;                         /* is it a gap character            */
+   else
+	non_std_char++;                    /* then it isn't a standard base    */
+
+   if (strrchr("ABCDEFGHIKLMNPQRSTVWYZX"
+		  ,(int) in[ic]) != NULL)
+	AA_TOT++;                          /* it might be an amino acid        */
+      if (strrchr("MRWSYKVHDBXN" , (int) in[ic]) != NULL)
+	IUBC_TOT++;                        /* it might be a IUBC code          */
+
+  seq[tot] = in[ic];                       /* move base into seq array         */
+  seq[tot + 1] = '\0';                     /* make sure array is null term'ed  */
+
+
+ /* now we test that the first codon is a valid start codon                    */
+
+  if ( tot == 0 && master_ic == 0 ) {
+      	
+      in[1] = (char)toupper((int)in[1]);  /* Uppercase the first codon         */
+      in[2] = (char)toupper((int)in[2]);  
+
+       if ( in[1] == 'T' && (in[0] == 'A' || in[2] == 'G' ))
+	  valid_start=TRUE;                /* Yeup it could be a start codon   */
+	else
+	  valid_start=FALSE;               /* Nope it doesn't seem to be one   */
+  }
+      ic++;                            /* total No. of sequence bases read */
+      tot++;                           /* total currently stored in memory */
+    }
+  }                                    /* reached end of input file        */
+
+/* Idiot error catch, this file is empty, at least it looks empty to codonW*/
+
+  if ( !num_sequence ) my_exit(99,"The input file was empty");
+
+/* better make sure to write anything left in seq to disk before returning */
+
+  output(seq, foutput, fblkout, fcoaout);
+  return (int) num_sequence;
+}
+
+/************************  TOUTPUT       **********************************/
+/* toutput                                                                */
+/*                                                                        */
+/* This subroutine is very similar to output_long, basically it reformats */
+/* or translates sequences less than MAX_GENE in length as a single read  */
+/* It writes in reader format "ACG ATT ATC" i.e writes the sequence in    */
+/* codons. Because it works with output_long it needs to know whether     */
+/* the sequence being written to disk is a fragment or a complete gene    */
+/**************************************************************************/
+int toutput(FILE * fblkout, char *seq) {
+  long int        ic = 0;
+  int             space = 3;
+  char            codon[4];
+  int i,x;
+  
+  if (long_seq == FALSE) {         /* then this must be a complete genes  */
+    switch (pm->bulk) {
+    case 'T':                      /* tidy or fasta formatted header      */      
+      fprintf(fblkout, ">%-20.20s%6li\n",
+          title, (long int) tot + master_ic);
+      break;
+    case 'R':                      /* reader header .. don't ask          */ 
+      fprintf(fblkout, ">%6li %-70.70s\n",
+          (long int) tot + master_ic, title);
+      break;
+    case 'N':                      /* Conceptually translated DNA header  */
+      fprintf(fblkout, ">%-20.20s%6li\n", 
+           title, (long int) ((tot + master_ic) / 3));
+      break;
+    default:                       /* whoops                              */
+      printf("\nProgramming error type A2 check code \n");
+      my_exit(99, "toutput");
+      break;
+    }
+  } else {                        
+      
+/* then long_seq must be true, this means we are about to finish writing a*/
+/* sequence that has already been written in MAX_GENE chunks to disk)     */
+/* when we wrote the original header line, we didn't know the size of the */
+/* sequence, but now we do so we are going to update that bit of info     */
+/* luckily remembered to record where the header line is in the file      */
+/* its at fl_pos_start                                                    */
+
+    fl_pos_curr = ftell(fblkout);   /* record where we are at present     */
+    fseek(fblkout, fl_pos_start, 0);/* find the header line for this seq  */
+    switch (pm->bulk) {
+    case 'T':                       /* Now update the info                */
+      fprintf(fblkout, ">%-20.20s%6li",     
+          title, (long int) tot + master_ic);   
+      break;
+    case 'R':                  
+      fprintf(fblkout, ">%6li %-70.70s",    
+          (long int) tot + master_ic, title);   
+      break;
+    case 'N':
+      fprintf(fblkout, ">%-20.20s%6li", title, 
+              (long int) ((tot + master_ic) / 3));
+      break;
+    default:
+      printf("\nProgramming error type A3 check code \n");
+      my_exit(99, "output");
+    }
+    fseek(fblkout, fl_pos_curr, 0);/* now we move back to where we were   */
+  }
+
+
+  while (ic < tot) {               /* keep writing till the array is empty*/
+    switch (pm->bulk) {           
+    case 'T':
+      fprintf(fblkout, "%c", seq[ic++]);
+      reg++;
+      break;
+    case 'R':
+      if (space == 3) {            /* Its reader format so print a space   */                                
+    fprintf(fblkout, " ");         /* every third base                     */
+    space = 0;                      
+      } else {                     /* not the 3rd base yet so just print   */
+    fprintf(fblkout, "%c", seq[ic++]);
+    space++;
+    reg++;
+      }
+      break;
+    case 'N':
+      for (i = (int) ic, x = 0; i < (int) ic + 3 && i < tot; i++, x++)  
+          codon[x] = *(seq + i);   /* get the next three bases if there    */
+      codon[x] = '\0';             /* null terminate the codon array       */
+      ic += 3;                     /* remember that we have read 3 bases   */
+      /* use the function get_aa to return the amino acid for the codon    */
+      /* 1 = is for the one letter code of the codon                       */
+      fprintf(fblkout, "%c", *get_aa(1, codon));   
+      reg++;
+      break;
+    }
+    if (!(reg % 61)) {             /* every 60 bases print a new line char */
+      reg = 1;                  
+      fprintf(fblkout, "\n");
+    }
+  }
+
+  if (reg != 1) {                  /* reached the end of sequence so we    */
+    fprintf(fblkout, "\n");        /* print a \n char unless we just did   */
+    reg = 1;                       /* reset  number of bases printed       */
+  }
+
+/* Now that we have finished writing this sequence to disk lets have a     */
+/* closer look at it, and do a few diagnostics about the bases used        */
+
+  if (AT_TOT + GC_TOT > AA_TOT*0.5) {/* Assume its DNA then                */
+    fprintf(pm->my_err, "%3li>\t%6li %-40.40s\tDNA\tGC%"                   
+        " =%5.3f\n"                 /* with G+C content and length of gene */
+        ,num_sequence
+        ,(long int) tot + master_ic, title
+        ,(float) GC_TOT / (GC_TOT + AT_TOT));
+
+    if (non_std_char - IUBC_TOT && pm->warn )  /* any non IUBC characters */
+      fprintf(pm->my_err, "\t\t WARNING %d non IUBC standard characters "
+          "in sequence %i\n"
+          ,non_std_char - IUBC_TOT
+          ,num_sequence);
+  } else {                         /* if not DNA then it must be a protein */
+    fprintf(pm->my_err, "\t%3i>\t%6li %-40.40s\tPROTEIN\n"
+        ,num_sequence
+        ,(long int) tot + master_ic
+        ,title);
+    if ( (tot+master_ic)-AA_TOT && pm->warn)  /* non IUBC AA chars        */ 
+      fprintf(pm->my_err, "\t\t WARNING %d non "                               
+          "standard AA characters "
+          "in sequence %i\n"
+          ,non_std_char
+          ,num_sequence);
+  }
+  return 1;                        /* return to calling function           */
+}
+
+
+/************************* output_long   **********************************/
+/* called to write a block of a sequence that has exceeded the MAX_GENE   */
+/* limit. If this is the first time it has been called for this sequence  */
+/* (ie. long_seq is false) it write a dummy header line which is updated  */
+/* by toutput when the last fragment of the sequence is written to disk   */
+/**************************************************************************/
+
+int output_long(FILE * fblkout, char *seq)
+{
+  long int        ic = 0;
+  char            space = 3;
+  char            codon[4];
+  int i,x;
+  
+  if (long_seq == FALSE) {         
+/* First call to output_long for seq. So record where the header line is  */
+/* and then write the dummy header line.                                  */
+
+      fl_pos_start = ftell(fblkout);
+    if (pm->bulk == 'R')
+      fprintf(fblkout, ">%6s %-72.72s\n", "      ", title); 
+    else                   
+      fprintf(fblkout, ">%-20.20s%9s\n", title, "    ");    
+    long_seq = TRUE;               
+  }
+/* see toutput for explanation of the switch statement                    */                       
+  while (ic < MAX_GENE && ic < tot) {     
+    switch (pm->bulk) {
+    case 'T':
+      fprintf(fblkout, "%c", seq[ic++]);
+      reg++;
+      break;
+    case 'R':
+      if (space == 3) {
+    fprintf(fblkout, " ");
+    space = 0;
+      } else {
+    fprintf(fblkout, "%c", seq[ic++]);
+    space++;
+    reg++;
+      }
+      break;
+    case 'N':
+      for (i = (int) ic, x = 0; i < (int) ic + 3 && i < tot; i++, x++)
+	codon[x] = *(seq + i);
+      codon[x] = '\0';
+      fprintf(fblkout, "%c", *get_aa(1, codon));
+      ic += 3;
+      reg++;
+      break;
+    default:
+      printf("\nProgramming error type A1 check code \n");
+      my_exit(99, "output_long");
+    }
+    if (!(reg % 61)) {
+      reg = 1;
+      fprintf(fblkout, "\n");
+    }
+  }
+  return 1;                        /* return to tidy                      */
+}
+
+
+/*************************  output       **********************************/
+/* Called from after subroutine tidy has read the sequence into memory    */
+/* or  more accurately counted the codon and amino acid usage. This sub-  */
+/* routine, via a switch checks which parameters and indices have been    */
+/* requested and write these to file, it handles all output except for COA*/
+/**************************************************************************/
+
+
+void output(char *seq, FILE * foutput, FILE * fblkout, FILE * fcoaout)
+{
+  char sp;
+ 
+  /* set the column delimiter to something shorter than pm->seperator     */
+  sp = (char) (pm->seq_format=='H')? (char) '\t': (char) pm->seperator;
+
+  if (tot) {                       /* still data in array seq..           */
+    last_aa = codon_usage_tot(seq, tot);
+    if (pcu->ca[last_aa] == 11)
+      valid_stops++;                /* check the last codon was a stop    */
+  }
+  
+  /* codon_error, if 4th parameter is 1, then checks for valid start and  */
+  /* internal stop codon, if 4th parmater is 2, checks that the last codon*/
+  /* is a stop or was partial, and for non-translatable codons            */
+  codon_error(last_aa, valid_stops, title, (char) 1); 
+  codon_error(last_aa, valid_stops, title, (char) 2);
+
+  /* if we are concatenating sequences then change the title to avger_of  */ 
+  if(pm->totals)                   
+    (pm->seq_format=='M')?
+      strcpy(title, "Average_of_genes"):
+      strcpy(title, "Average of genes");
+  
+                
+  if (strchr("RNT", (int) pm->bulk) != NULL) {
+    /* better write the remaing sequence in seq to disk                   */
+    toutput(fblkout, seq);                              
+  } else if (strchr("OCASDLDBX", (int) pm->bulk) != NULL) {
+
+/* These subroutines are self explanatory (see the top of this file)      */
+/* are called such that only one can be called for each sequence read     */
+/* all these calls are written to the bulk output file                    */
+
+    switch ((int) pm->bulk) {
+    case 'S':
+      rscu_usage_out(fblkout, ncod, naa);
+      break;
+    case 'C':
+      codon_usage_out(fblkout, ncod, last_aa, valid_stops, title);
+      break;
+    case 'L':
+      raau_usage_out(fblkout, naa);
+      break;
+    case 'D':
+      dinuc_count(seq, tot);
+      dinuc_out(fblkout, title);
+      break;
+    case 'A':
+      aa_usage_out(fblkout, naa);
+      break;
+    case 'B':
+      gc_out(foutput, fblkout, 1);
+      break;
+    case 'O':
+      cutab_out(fblkout, ncod, naa);
+      break;
+    case 'X':
+            /* X is no bulk output written to file */
+      break;
+    default:
+      fprintf(stderr, "ERROR-23 %s bulk undefined\n",  pm->prog);
+      my_exit(99, "output");
+      break;
+    }
+  } else if (pm->bulk) {            /* just a programming error catch     */
+    fprintf(stderr, "ERROR-24 %s -prog undefined\n", pm->prog);
+    my_exit(99, "output");
+  }
+  
+  
+  /* if an index has been requested then this is true                     */                    
+  if (pm->sil_base || pm->cai || pm->fop   || pm->enc  || pm->gc3s ||
+            pm->gc || pm->cbi || pm->L_sym || pm->L_aa || pm->coa  || 
+            pm->hyd|| pm->aro) {
+      /* if this is the first sequence then write a header line           */
+
+    if (num_sequence == 1 || pm->totals) {
+
+      fprintf(foutput, (pm->seq_format == 'H')?
+	      "%-25.25s%c":"%-.25s%c"
+	      ,"title",sp);
+      if (pm->sil_base)
+	fprintf(foutput, "%s%c%s%c%s%c%s%c", "T3s",sp,"C3s",sp,"A3s",sp,
+"G3s",sp);
+      if (pm->cai)
+	fprintf(foutput, "%s%c", "CAI",sp);
+      if (pm->cbi)
+	fprintf(foutput, "%s%c", "CBI",sp);
+      if (pm->fop)
+	fprintf(foutput, "%s%c", "Fop",sp);
+      if (pm->enc)
+	fprintf(foutput, "%s%c", "Nc",sp);
+      if (pm->gc3s)
+	fprintf(foutput, "%s%c", "GC3s" ,sp);
+      if (pm->gc)
+	fprintf(foutput, "%s%c", "GC"   ,sp);
+      if (pm->L_sym)
+	fprintf(foutput, "%s%c", "L_sym",sp);
+      if (pm->L_aa)
+	fprintf(foutput, "%s%c", "L_aa" ,sp);
+      if (pm->hyd)
+	fprintf(foutput, "%s%c", "Gravy",sp);
+      if (pm->aro)
+	fprintf(foutput, "%s%c", "Aromo",sp);
+
+      fprintf(foutput, "\n");
+    }
+    
+    /* if output format is human readable print the fixed width sequence  */
+    /* name, else print only the name of the sequence                     */
+    fprintf(foutput, (pm->seq_format == 'H')?
+	      "%-25.25s%c":"%-.25s%c"
+	      ,title,sp);
+
+    /*Need to use if statements as we allow more than one index to be calc*/
+    /* per sequence read in                                               */
+    if (pm->sil_base)
+      base_sil_us_out(foutput, ncod, naa);
+    if (pm->cai)
+      cai_out(foutput, ncod);
+    if (pm->cbi)
+      cbi_out(foutput, ncod, naa);  
+    if (pm->fop)
+      fop_out(foutput, ncod);
+    if (pm->enc)
+      enc_out(foutput, ncod, naa);
+    if (pm->gc3s)
+      gc_out(foutput, fblkout, 3);
+    if (pm->gc)
+      gc_out(foutput, fblkout, 2);
+    if (pm->L_sym)
+      gc_out(foutput, fblkout, 4);
+    if (pm->L_aa)
+      gc_out(foutput, fblkout, 5);
+    if (pm->hyd)
+      hydro_out(foutput, naa);
+    if (pm->aro)
+      aromo_out(foutput, naa);
+    if (pm->coa)
+      coa_raw_out(fcoaout, ncod, naa, title);
+
+    fprintf(foutput, "\n");
+
+  }
+  return;
+}
+
+/************************* my_exit       **********************************/
+/* Called to clean up open files and generate an intelligent exit message */
+/* Also warns if no analysis has been run, the user did not select R from */
+/* the main menu. If COA was selected then it reminds the user to look    */
+/* at the file summary.coa, and deletes any stray binary files            */
+/**************************************************************************/
+
+int my_exit(int error_num, char *message)
+{
+
+  fileclose(&pm->inputfile);
+  
+  /* if we are masuquarading as another program we assign both outputfile */
+  /* and tidyout the same filehandle (we don't want to close this twice   */
+  if ( pm->outputfile == pm->tidyoutfile ){
+   fileclose(&pm->outputfile);
+  }else{
+   fileclose(&pm->outputfile);
+   fileclose(&pm->tidyoutfile);
+  }
+  
+  fileclose(&pm->cuout);
+  fileclose(&pm->fopfile);
+  fileclose(&pm->cbifile);
+  fileclose(&pm->caifile);
+  fileclose(&pm->logfile);
+  fileclose(&pm->fcoa_in);
+  fileclose(&pm->fcoa_out);
+
+  if (pm->inputfile = fopen("cbrawin", "r")) {
+    fclose(pm->inputfile);
+    deletefile("cbrawin");
+  }
+  if (pm->inputfile = fopen("cbfcco", "r")) {
+    fclose(pm->inputfile);
+    deletefile("cbfcco");
+  }
+  if (pm->inputfile = fopen("cbfcli", "r")) {
+    fclose(pm->inputfile);
+    deletefile("cbfcli");
+  }
+  if (pm->inputfile = fopen("cbfcpc", "r")) {
+    fclose(pm->inputfile);
+    deletefile("cbfcpc");
+  }
+  if (pm->inputfile = fopen("cbfcpl", "r")) {
+    fclose(pm->inputfile);
+    deletefile("cbfcpl");
+  }
+  if (pm->inputfile = fopen("cbfcta", "r")) {
+    fclose(pm->inputfile);
+    deletefile("cbfcta");
+  }
+  if (pm->inputfile = fopen("cbfcvp", "r")) {
+    fclose(pm->inputfile);
+    deletefile("cbfcvp");
+  }
+  if (pm->inputfile = fopen("cb1rawin", "r")) {
+    fclose(pm->inputfile);
+    deletefile("cb1rawin");
+  }
+  if (error_num == 2 || error_num == 0 ) {
+    if (pm->analysis_run) {
+      fprintf(stderr, "Files used:\n");
+      if (strlen(pm->curr_infilename))
+	fprintf(pm->my_err, " Input file was\t %s \n", 
+            	pm->curr_infilename);
+
+      if (strlen(pm->curr_outfilename)){
+	fprintf(pm->my_err, " Output file was\t %s %s",
+		pm->curr_outfilename,
+		(pm->codonW) ? " (codon usage indices, e.g. gc3s)\n":"\n");
+      }
+
+      if (strlen(pm->curr_tidyoutname)){
+	fprintf(pm->my_err, " Output file was\t %s %s",
+		pm->curr_tidyoutname,
+		(pm->codonW) ? " (bulk output e.g. raw codon usage)\n":"\n");	
+      }
+
+      if (pm->coa)
+	fprintf(pm->my_err, " For more information about the COrrespondence "
+		"Analysis see summary.coa\n");
+    } else if ( pm->codonW )          
+      fprintf(stderr, " \n\n WARNING You are exiting before codonW has generated any results\n"
+	      "  Select 'r' from the main menu to run\n");
+  }
+
+  if ( pm->codonW )  printf("\n CodonW has finished\n");
+
+  switch ((int) error_num) {
+
+  case 0:
+    /* silent exit */
+    exit(0);
+    break;
+  case 1:
+    printf("failed to open file for output <%s>\n", message);
+    exit(1);
+    break;
+  case 2:
+    printf("user requested exit <%s>\n", message);
+    exit(0);
+    break;
+  case 3:
+    printf("failed to allocate memory <%s>\n", message);
+    exit(1);
+    break;
+  case 4:
+    printf("Write to disk failed ! <%s>\n", message);
+    exit(1);
+    break;
+  case 5:
+    printf("Read from disk failed! <%s>\n", message );
+    exit(1);
+    break;
+  case 6:
+    printf("failed to open file for reading <%s>\n", message);
+    exit(1);
+    break;
+  case 7: 
+    printf("failed to close file <%s>\n", message);
+    exit(1);
+  case 99:
+    printf(" Controlled exit <%s>\n",message);
+    exit(0);
+    break;
+  default:
+    printf("for unknown reason\n");
+    exit(1);
+    break;
+  }
+  return 0;
+}
+
+/************************** file_close   **********************************/
+/* Fileclose function checks whether the filepointer is open, if so it    */
+/* attempts to close the open file handle and assigns a null pointer      */
+/* to that  handle                                                        */
+/**************************************************************************/
+
+int  fileclose(FILE ** file_pointer)
+{
+   if (*file_pointer != NULL ) {
+     if (fclose(*file_pointer) == EOF ) {
+       fprintf(stderr,"Failed to close file %i \n",errno);
+       perror ("Unexpected condition in fileclose");
+       exit(7);
+     }    
+     *file_pointer = NULL;           /* make sure file_pointer is null*/
+   }
+  return 1;
+}
+
+/************************** Chelp    **************************************/
+/* Chelp scans opens the help file and returns text associated with that  */
+/* help keyword. Help keywords are surrounded by hashs, starting in the   */
+/* first column of the ASCII help file and are terminated by //           */
+/**************************************************************************/
+
+int chelp ( char *help_keyword )
+{
+ char helplib [MAX_FILENAME_LEN]="";
+ char *p=NULL, inhelp=FALSE;
+ char QueryString[120];            /* limit for help phrase is 120 chars  */
+ char HelpMessage[121];
+ int  line_counter=2;              /* assume 2 blank lines to start with  */
+ FILE *hfp=NULL;
+/* Inital steps is to locate help file                                    */
+/* First check if CODONW_H has been set as an environment variable        */
+/* If not then assume that the help file is in the current directory      */       
+   
+ p=getenv( "CODONW_H" );
+ if ( p != NULL ) 
+     strcpy ( helplib , p );   
+ else {
+     strcpy ( helplib , "codonW.hlp");
+  }
+
+ hfp=open_file("",helplib, "r", FALSE);
+
+/* if we can't open the help file then explain what we where trying to do  */
+
+ if ( hfp == NULL ) {
+           fprintf ( stderr , 
+                  "Could not open help file codonw.hlp\n" 
+                   "Expected to find this file in %s\n" 
+                   "This can be overridden by setting the" 
+                   "environmental variable\n"
+                   "CODONW_H to the help file location\n", 
+                   helplib);
+                   pause;           /* make sure they Ack. the error mesg  */
+                   return 0;        /* abort                               */
+     }   
+/* Now that we have opened the help file, assemble the help keyword string */
+
+strcpy (QueryString , "#");
+strcat (QueryString , help_keyword );
+strcat (QueryString , "#");
+fprintf(stderr,"\n\n");             
+
+/* now scan the help file looking for this keyword                         */ 
+
+while ( fgets ( HelpMessage, 120, hfp ) )   {
+
+ if ( strstr (HelpMessage,QueryString) != NULL ) 
+     inhelp=TRUE;                                          /* we found it  */
+
+ else if  ( inhelp && strstr ( HelpMessage  , "//") )  {   /* found the end*/
+           fileclose(&hfp );                            
+	       if ( line_counter )pause;
+           return 1;                                     
+     }
+ /* if inhelp is true we have found the help keyword but not reached EOF   */
+ else if  ( inhelp ) {       
+     if ( strchr(HelpMessage,'\n') )
+        fprintf ( stderr, "%s",HelpMessage );    
+                                           /*stderr,it must be interactive */
+     else
+        fprintf ( stderr, "%s\n",HelpMessage );
+                                           /*make sure there are line feeds*/ 
+
+
+  /* count how many lines I have printed to the terminal and compare it    */
+  /* with the length of the terminal screen as defined by pm->term_length  */
+
+  if (line_counter++ >= pm->term_length-3 && line_counter ) {
+    line_counter=0;
+    pause;
+    fprintf(stderr, "%s",HelpMessage);
+   }
+ }   
+}   
+
+/* Error catches for problems with help file                              */
+if ( HelpMessage == NULL && inhelp == FALSE ){
+   fprintf ( stderr ," Error in help file, %s not found ", QueryString);
+   pause;
+  }
+else {
+    fprintf (stderr , "Premature end of help file ...  \n");
+    pause;
+ }   
+return 0;                           /* failed for some reason             */
+}
+
+
+/******************** WasHelpCalled     ***********************************/
+/* Checks the string input to see if the user asked for help              */
+/**************************************************************************/
+
+char WasHelpCalled ( char * input ) {
+    char ans = FALSE;
+
+    if ( strlen ( input) == 1 && (char)toupper((int)input[0]) == 'H') 
+        ans = TRUE;
+    else if ( !strcmp ( input , "help") ) 
+        ans = TRUE;
+    else if ( !strcmp ( input , "HELP") ) 
+        ans = TRUE;
+
+    return ans;
+}
diff --git a/commline.c b/commline.c
new file mode 100755
index 0000000..53c3f0d
--- /dev/null
+++ b/commline.c
@@ -0,0 +1,755 @@
+/**************************************************************************/
+/* CodonW codon usage analysis package                                    */
+/* Copyright (C) 2005            John F. Peden                            */
+/* This program is free software; you can redistribute                    */
+/* it and/or modify it under the terms of the GNU General Public License  */
+/* as published by the Free Software Foundation; version 2 of the         */
+/* License,                                                               */
+/*                                                                        */
+/* This program is distributed in the hope that it will be useful, but    */
+/* WITHOUT ANY WARRANTY; without even the implied warranty of             */
+/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the           */
+/* GNU General Public License for more details.                           */
+/* You should have received a copy of the GNU General Public License along*/
+/* with this program; if not, write to the Free Software Foundation, Inc.,*/
+/* 675 Mass Ave, Cambridge, MA 02139, USA.                                */
+/*                                                                        */
+/*                                                                        */
+/* The author can be contacted by email (jfp#hanson-codonw at yahoo.com Anti-*/
+/* Spam please change the # in my email to an _)                          */
+/*                                                                        */
+/* For the latest version and information see                             */
+/* http://codonw.sourceforge.net 					  */
+/**************************************************************************/
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <ctype.h>
+#include "codonW.h"
+
+/************** process_command_line  *************************************/
+/* The command line is passed to this function for processing. The name of*/
+/* the programme is read, and based on this, CodonW will emulate several  */
+/* useful codon usage analysis programmes routinely used in our laboratory*/
+/* all other command line arguments are read. Unrecognised arguments are  */
+/* reported to the user, arguments not preceded by a dash are assumed to  */
+/* be filenames. The input, output and bulk output files to be precise    */
+/**************************************************************************/
+int proc_comm_line( int *pargc , char ***pargv)     
+{
+    char    *p;
+    char    c;
+    int     n;
+    char    prog_name[64];
+    char    root[MAX_FILENAME_LEN];
+    
+/* decide how to process argc[0] which will be the name of the programme  */
+#if defined (_WINDOWS) || defined (_DOS) || defined ( WIN32 ) 
+    if ( (p = strrchr(**pargv, '\\')) != NULL )
+        strncpy(prog_name, p+1 , 63);
+    else
+        strncpy(prog_name, **pargv, 10);
+#elif defined (BSD) || defined(SYSV) || defined (UNIX)  || defined (LINUX) /* Must be unix   */
+    if ( (p = strrchr(**pargv, '/')) != NULL )
+        strncpy(prog_name, p+1, 63);
+    else
+        strncpy(prog_name, **pargv, 10);  
+#elif defined(VMS) || defined (OPENVMS)          /* maybe VMS or OPENVMS  */
+    if ( (p = strrchr(**pargv, ']')) != NULL )
+        strncpy(prog_name, p+1, 63);
+    else
+        strncpy(prog_name, **pargv, 10);        
+#else
+    printf("UNRECOGNISED SYSTEM type won't be able to impersonate other programmes\n");
+    strcpy(prog_name, "codon");	               /* OK I give up          */
+#endif
+
+  if ( (p = strrchr(prog_name, '.')) != NULL )  /* remove file extension  */        
+      *p = '\0';                                /* tidy.exe -> tidy       */
+
+     /* first call to garg initialises the function  with the command line*/
+     /* parameters and the number of arguments, subsequent calls strip    */
+     /* these off one by one                                              */
+ 
+     /* has the user asked for help               ????????????            */
+  if ((p = garg(*pargc, *pargv, "-h", GARG_EXACT)) || 
+      (p = garg(0, NULL, "-help", GARG_EXACT))){
+      printf(
+	       "codonW [inputfile] [outputfile] [bulkoutfile] [options]\n"
+	       "General options and defaults:\n"
+	       " -h(elp)\tThis help message\n"
+	       " -nomenu\tPrevent the menu interface being displayed\n"
+               " -nowarn\tPrevent warnings about sequences being displayed\n"
+	       " -silent\tOverwrite files silently\n"     
+	       " -totals\tConcatenate all genes in inputfile\n"
+	       " -machine\tMachine readable output\n"
+	       " -human\t\tHuman readable output\n"
+	       " -code N\tGenetic code as defined under menu 3 option 5\n"
+	       " -f_type N\tFop/CBI codons as defined by menu 3 option 6\n"
+	       " -c_type N\tCai fitness values as defined by menu 3 option 7\n"
+	       " -t (char)\tColumn separator to be used in output files "
+                    "(comma,tab,space)\n"
+               "\nCodon usage indices and Amino acid indices \n"
+	       " -cai\t\tcalculate Codon Adaptation Index (CAI)\n"
+	       " -fop\t\tcalculate Frequency of OPtimal codons index (FOP)\n"
+	       " -cbi\t\tcalculate Codon Bias Index (CBI)\n"
+	       " -enc\t\tEffective Number of Codons (ENc)\n"
+	       " -gc\t\tG+C content of gene (all 3 codon positions)\n"
+	       " -gcs3\t\tGC of synonymous codons 3rd positions\n"
+	       " -sil_base\tBase composition at synonymous third codon "
+		   "positions\n"
+
+               );
+	pause;
+	printf(
+	       " -L_sym\t\tNumber of synonymous codons\n"
+	       " -L_aa\t\tTotal number of synonymous and non-synonymous codons\n"
+	       " -all_indices\t\tAll the above indices\n"
+	       " -aro\t\tCalculate aromaticity of protein\n"
+	       " -hyd\t\tCalculate hydropathicity of protein\n"
+	       " -cai_file  {file}\tUser input file of CAI values\n"
+	       " -cbi_file  {file}\tUser input file of CBI values\n"
+	       " -fop_file  {file}\tUser input file of Fop values\n"
+               "\nCorrespondence analysis (COA) options \n"
+	       " -coa_cu \tCOA of codon usage frequencies\n"
+	       " -coa_rscu\tCOA of Relative Synonymous Codon Usage\n"
+	       " -coa_aa\tCOA of amino acid usage frequencies\n"
+	       " -coa_expert\tGenerate detailed(expert) statistics on COA\n"
+	       " -coa_axes N\tSelect number of axis to record\n"
+	       " -coa_num N\tSelect number of genes to use to identify "
+           "optimal codons\n"
+	       "\t\tvalues can be whole numbers or a percentage (5 or 10%%)\n"
+	       "\nBulk output options | only one can be selected per analysis\n"
+	       " -aau\t\tAmino Acid Usage (AAU)\n"
+	       " -raau\t\tRelative Amino Acid Usage (RAAU)\n"
+	       " -cu\t\tCodon Usage (CU) (default)\n"
+	       );
+	pause;
+	printf(
+	       " -cutab\t\tTabulation of codon usage\n"
+	       " -cutot\t\tTabulation of dataset's codon usage\n"
+	       " -rscu\t\tRelative Synonymous Codon Usage (RSCU)\n"
+	       " -fasta\t\tfasta format\n"
+	       " -tidy\t\tfasta format\n"
+	       " -reader\tReader format (codons are separated by spaces)\n"
+	       " -transl\tConceptual translation of DNA to amino acid\n"
+	       " -base\t\tDetailed report of codon G+C composition\n"
+	       " -dinuc\t\tDinucleotide usage of the three codon pos.\n"
+	       " -noblk\t\tNo bulk output to be written to file\n"
+	       "\nWhere {file} represents an input filename, and N an integer"
+           " value"
+            );
+        pause;
+        my_exit(99,"");             /* after writing out help quit         */
+    }
+    
+
+/* These parameters are normally set in menu3 ie. the defaults menu        */ 
+/* for a explanation of the various GARG_FLAGS see gargs                   */
+
+/* -silent stops warnings about file about to be overwritten               */
+    if (garg(0, NULL, "-silent", GARG_THERE))    
+        pm->verbose = FALSE;
+
+    
+/* -total  causes sequences to be concatenated and treated as one sequence */
+    if (  garg(0, NULL, "-total" ,  GARG_THERE))
+        pm->totals = TRUE;          
+
+/* -machine or -human determines for whom the output should be formatted   */
+    if (p = garg(0, NULL, "-human", GARG_THERE)) 
+        pm->seq_format = 'H';
+    if (p = garg(0, NULL, "-mach",  GARG_THERE))
+        pm->seq_format = 'M';
+
+/* -code determines the genetic code                                       */   
+    if (p = garg(0, NULL, "-code", GARG_NEXT | GARG_EXACT)) {
+        strcpy(pm->junk, p);
+        n=0;
+        while ( isdigit( (int) pm->junk[n]) && pm->junk[n] != '\0') 
+            n++;
+        if ( n != (int)strlen(pm->junk)
+             || atoi(pm->junk) < 0 || atoi(pm->junk) > NumGeneticCodes ) {  
+            printf( "FATAL: The value for genetic code %s is invalid\n", 
+			         pm->junk);
+            my_exit(99,"Fatal error in genetic code value");
+        } else {
+            pm->code = (char) atoi(p);              /* define genetic code */
+	    initilize_point(pm->code, pm->f_type, pm->c_type);
+        }
+    }
+
+/* -f_type selects which of the predefined fop values to use               */
+/* NB. The fop is selected with the integer value corresponding to the menu*/
+/* choice under the defaults menu. It must be in the range 1-NumFopSpecies */
+
+
+  if (p = garg(0, NULL, "-f_type", GARG_NEXT | GARG_EXACT))  {
+        strcpy(pm->junk, p);
+        n = 0;
+        while ( isdigit( (int) pm->junk[n])  && pm->junk[n] != '\0') 
+            n++;
+        if ( n != (int)strlen(pm->junk) || atoi(pm->junk) < 0 || 
+		atoi(pm->junk) >= NumFopSpecies ) {
+            printf("FATAL: The value for fop_type %s is not valid\n", 
+			        pm->junk);
+            my_exit(99,"Fatal error in Fop value");
+        } else { 
+            pm->f_type = (char) atoi(p);  /* define organism type for Fop  */
+	    initilize_point(pm->code, pm->f_type, pm->c_type);
+        }
+    }
+
+/* -d_type selects which of the predefined CAI values to use               */
+/* NB. The CAI is selected with the integer value corresponding to the menu*/
+/* choice under the defaults menu. It must be in the range 1-NumCAISpecies */
+    if (p = garg(0, NULL, "-c_type", GARG_NEXT | GARG_EXACT))  {
+        strcpy(pm->junk,p);
+        n = 0;
+        while ( isdigit( (int) pm->junk[n])  && pm->junk[n] != '\0') 
+            n++;
+        if ( n != (int)strlen(pm->junk) || atoi(pm->junk) < 0 || 
+		atoi(pm->junk) >= NumCaiSpecies) {
+            printf("FATAL: The value for cai_type %s is not valid\n", 
+			       pm->junk);
+            my_exit(99,"Fatal error in CAI type value");
+
+        } else {
+            pm->c_type = (char) atoi(p);  /* define organism type for CAI  */
+	    initilize_point(pm->code, pm->f_type, pm->c_type);
+        }
+    }
+
+
+/* Command line arguments for the indices menu (4)                        */
+/* The presence of any of these flags, cause the relevant indices to be   */
+/* calculated                                                             */
+/* Indices are CAI, FOP, CBI, Nc, GC, GC3s, Lsyn, Laa, silent_base        */
+/* composition, hydropathicity, aromaticity                               */
+    if (p = garg(0, NULL, "-cai" ,  GARG_EXACT))
+        pm->cai = TRUE;
+    if (p = garg(0, NULL, "-fop" ,  GARG_EXACT))
+        pm->fop = TRUE;
+    if (p = garg(0, NULL, "-cbi" ,  GARG_EXACT))
+        pm->cbi = TRUE;
+    if (p = garg(0, NULL, "-enc" ,  GARG_EXACT))
+        pm->enc = TRUE;
+    if (p = garg(0, NULL, "-gc" ,  GARG_EXACT))
+        pm->gc = TRUE;
+    if (p = garg(0, NULL, "-gc3s" ,  GARG_EXACT))
+        pm->gc3s = TRUE;
+    if (p = garg(0, NULL, "-sil_base" ,  GARG_EXACT))
+        pm->sil_base = TRUE;
+    if (p = garg(0, NULL, "-L_sym" ,  GARG_EXACT))
+        pm->L_sym = TRUE;
+    if (p = garg(0, NULL, "-L_aa" ,  GARG_EXACT))
+        pm->L_aa = TRUE;
+    if (p = garg(0, NULL, "-hyd" ,  GARG_EXACT))
+        pm->hyd = TRUE;
+    if (p = garg(0, NULL, "-aro" ,  GARG_EXACT))
+        pm->aro = TRUE; 
+/* Turns on all the above indices                                         */
+    if (p = garg(0, NULL, "-all_indices" ,  GARG_EXACT)){
+        pm->cai = TRUE;
+        pm->fop = TRUE;
+        pm->cbi = TRUE;
+        pm->enc = TRUE;
+        pm->gc = TRUE;
+        pm->gc3s = TRUE;
+        pm->sil_base = TRUE;
+        pm->L_sym = TRUE;
+        pm->L_aa = TRUE;
+        pm->hyd = TRUE;
+        pm->aro = TRUE; 
+    }
+
+/* This section in used to input the filenames for personal choices of Fop */
+/* CBI or CAI values. The name is tested to make sure the file is readable */
+/* the pointer to the file is then assign to the relevant pointer in the   */
+/* struct Z_menu and then processed properly in codon_us.c                 */
+ 
+/* Fop                                                                     */    
+    if (p = garg(0, NULL, "-fop_file", GARG_NEXT | GARG_EXACT)) {
+        if ( (pm->fopfile = open_file( "", p, "r", FALSE)) == NULL ) {
+            printf("Could not open Fop file - %s\n", p);
+            my_exit(1,"commline open fop file");
+        } else
+            strncpy(pm->fop_filen, pm->junk, MAX_FILENAME_LEN - 1);  
+ /* idiot catch, if you load personal fop values you want to calculate fop */
+	pm->fop=TRUE;                 
+    }
+
+/* CAI                                                                     */
+    if (p = garg(0, NULL, "-cai_file", GARG_NEXT | GARG_EXACT)) {
+        if ( (pm->caifile = open_file( "", p, "r", FALSE)) == NULL ) {
+            printf("Could not open CAI file - %s\n", p);
+            my_exit(1,"commline failed error");
+        } else
+            strncpy(pm->cai_filen, pm->junk, MAX_FILENAME_LEN - 1);
+	pm->cai=TRUE;                                  /* idiot catch          */
+    }
+/* CBI                                                                     */
+     if (p = garg(0, NULL, "-cbi_file", GARG_NEXT | GARG_EXACT)) {
+        if ( (pm->cbifile = open_file( "", p, "r", FALSE)) == NULL ) {
+            printf("Could not open CBI file - %s\n", p);
+            my_exit(1,"Commline failed to open file");
+        } else
+            strncpy(pm->cbi_filen, pm->junk, MAX_FILENAME_LEN - 1);
+        pm->cbi = TRUE;                          /* idiot catch            */
+    }
+
+
+/* This section changes the default correspondence menu choices normally   */
+/* set in menu menu 5.                                                     */
+/* Note only one of -coa_cu -coa_rscu -coa_aa can be chosen                */
+    if (p = garg(0, NULL, "-coa_cu"   ,  GARG_EXACT))
+        pm->coa = 'c';
+    if (p = garg(0, NULL, "-coa_rscu" ,  GARG_EXACT))
+        pm->coa = 'r';
+    if (p = garg(0, NULL, "-coa_aa"   ,  GARG_EXACT))
+        pm->coa = 'a';
+    if (p = garg(0, NULL, "-coa_expert",  GARG_EXACT)) /* detailed inertia */
+        (coa.level='e');                               /* analysis         */
+    
+ 
+/* These are options selectable under the advanced COA menu                */
+/* This first option -coa_axes changes the number of axis recorded to file */    
+    if (p = garg(0, NULL, "-coa_axes", GARG_NEXT | GARG_EXACT)){
+      if ( isdigit( (int) *p) ){ 
+      n = (char)atoi(p);                      
+      /* just check that correspondence analysis has been selected         */     
+      if ( pm->coa == 'a' && (n > 20 || n<0)  || ( n<0 || n>59 )) { 
+        fprintf(pm->my_err,"Value %d is out of range for Number COA Axis "
+            "adjusting to max value\n",n);
+        if ( pm->coa == 'a' ) pcoa->axis = 20; else  pcoa->axis = 59;
+        }else{
+           pcoa->axis = (char) n;
+           }
+      }
+      }
+            
+/* Select the size of dataset to use to identify optimal codons            */
+    if (p = garg(0, NULL, "-coa_num",  GARG_NEXT|GARG_EXACT))  {
+        strcpy (pm->junk,p) ;
+        if( (p=strchr ( pm->junk,'%')) != NULL) {
+              p='\0';
+              pcoa->fop_gene=atoi(pm->junk)*-1;
+        }else {
+               pcoa->fop_gene=atoi(pm->junk);
+        }        
+    }        
+
+
+/* These option are mutually exclusive and are normally selected using the */
+/* the bulk output menu (menu 8)                                           */
+
+    if ( p = garg(0, NULL, "-raau", GARG_EXACT))
+        pm->bulk = 'L';
+    if ( p = garg(0, NULL, "-cu"  , GARG_EXACT))
+        pm->bulk = 'C';
+    if ( p = garg(0, NULL, "-cutab", GARG_THERE))
+        pm->bulk = 'O';
+    if ( p = garg(0, NULL, "-cutot", GARG_THERE)){
+       pm->bulk   = 'C';
+       pm->totals =TRUE;
+    }      
+    if ( p = garg(0, NULL, "-reader", GARG_EXACT))
+        pm->bulk = 'R';
+    if ( p = garg(0, NULL, "-rscu", GARG_EXACT))
+        pm->bulk = 'S';
+    if ( p = garg(0, NULL, "-tidy", GARG_EXACT))
+        pm->bulk = 'T';
+    if ( p = garg(0, NULL, "-fasta", GARG_EXACT))
+        pm->bulk = 'T';
+    if ( p = garg(0, NULL, "-aau", GARG_EXACT))
+        pm->bulk = 'A';
+    if ( p = garg(0, NULL, "-transl", GARG_THERE))
+        pm->bulk = 'N';
+    if ( p = garg(0, NULL, "-base", GARG_THERE)) 
+        pm->bulk = 'B';
+    if (p = garg(0, NULL,  "-dinuc", GARG_THERE))
+        pm->bulk = 'D';
+    if (p = garg(0, NULL,  "-noblk", GARG_EXACT))
+        pm->bulk = 'X';
+ 
+/* -t is used to change the column separator used in the output files     */
+/* at present it must be a space, tab or comma                            */
+/* Must occur after -transl or it misreads transl as a seperator          */ 
+    if (p = garg(0, NULL, "-t"   , GARG_NEXT | GARG_SUBSQ)) {
+        strcpy(pm->junk, p);
+        n = 0;
+        do {
+            c = pm->junk[n++];
+        } while ( strchr("'\"\0", (int) c) != NULL );
+        if ( strchr ("\t, ", (int) c) == NULL ) {
+            printf( "WARNING: The chosen separator %s is unsuitable use"
+                    "comma, tab or space\n", pm->junk);
+        } else {
+            pm->seperator = c; 
+        }
+    }
+  
+    
+/* These options are commandline specific, ie. they do not have an        */
+/* menu option                                                            */
+
+/* prevents the menu system from being displayed, everything is           */
+/* assumed to have been given on the command line                         */
+
+    if (p = garg(0, NULL, "-nomenu", GARG_EXACT))
+        pm->menu = FALSE;
+
+/* prevents warnings about possible problems with the sequence data       */
+/* being displayed, i.e. partial codons, stop codons, start codons        */
+
+    if (p = garg(0, NULL, "-nowarn", GARG_THERE))
+        pm->warn = FALSE;
+
+
+
+/* This section tries to identify the name used to call CodonW and it that*/
+/* name concurs with one of those tested for, certain commandline options */
+/* are assumed and the programme becomes much less interactive            */
+
+/* First step is to convert programme name to lower case                  */
+    for ( n=0; *(prog_name + n) != '\0'; n++)
+        *(prog_name + n) = (char) tolower( (int) *(prog_name + n));
+
+
+/* special options designed to unify code used by several auxiliary       */
+/* programmes. In essence CodonW will impersonate other commonly used     */
+/* codon usage analysis programmes if called using a special name         */
+
+    if ( !strcmp(prog_name, "raau"       )   )
+        pm->bulk = 'L';
+    else if ( !strcmp(prog_name, "cu"    )   )
+        pm->bulk = 'C';
+    else if ( !strcmp(prog_name, "cutab" )   )
+        pm->bulk = 'O';
+    else if ( !strcmp(prog_name, "reader")   )
+        pm->bulk = 'R';
+    else if ( !strcmp(prog_name, "rscu"  )   )
+        pm->bulk = 'S';
+    else if ( !strcmp(prog_name, "tidy"  )   )
+        pm->bulk = 'T';
+    else if ( !strcmp(prog_name, "aau"   )   )
+        pm->bulk = 'A';
+    else if ( !strcmp(prog_name, "dinuc" )   )
+        pm->bulk = 'D';
+    else if ( !strcmp(prog_name, "transl")   )
+        pm->bulk = 'N';
+    else if ( !strcmp(prog_name, "bases" )   ) 
+        pm->bulk = 'B';        
+    else if ( !strcmp(prog_name, "base3s")   ) {
+        pm->prog     = 's' ;
+        pm->menu     = FALSE;
+        pm->sil_base = TRUE;
+    } else if ( !strcmp(prog_name, "cai"   )   ) {
+        pm->prog   = 'c';
+        pm->menu   = FALSE;
+        pm->cai    = TRUE;
+    } else if ( !strcmp(prog_name, "fop"   )   ) {
+        pm->prog   = 'f';
+        pm->menu   = FALSE;
+        pm->fop    = TRUE;
+    } else if ( !strcmp(prog_name, "gc3s"  )   ) {
+        pm->prog   = '3';
+	pm->menu   = FALSE;
+        pm->gc3s   = TRUE;
+    } else if ( !strcmp(prog_name, "gc"    )   ) {
+        pm->prog   = 'g';
+        pm->menu   = FALSE;
+        pm->gc     = TRUE;
+    } else if ( !strcmp(prog_name, "enc"   )   ) {
+        pm->prog   = 'e';
+        pm->menu   = FALSE;
+        pm->enc    = TRUE;
+    } else if ( !strcmp(prog_name, "cbi"   )   ) {
+        pm->prog   = 'i';
+        pm->menu   = FALSE;
+        pm->cbi    = TRUE;
+    } else if ( !strcmp(prog_name, "cutot"   )   ) {
+        pm->bulk   = 'C';
+        pm->menu   = FALSE;
+        pm->totals =TRUE;
+    } else {
+       pm->codonW=TRUE;       /* if argc[0] is not recognised assume codons*/
+                              /* if blk output is still X then assume cu   */
+       if (pm->bulk=='X') pm->bulk='C'; 
+       }
+
+
+    if (!pm->codonW ) {       /* we appear to be impersonating another prog*/
+                              /* now we switch to the correct greeting     */ 
+        if (pm->bulk && pm->bulk!='X'){
+	    pm->seperator='\000';     /* stop chars being converted by tidy*/
+            switch (pm->bulk) {
+            case 'R':
+                printf("\t\t\tREADER Formatting Program\n");
+                break;
+            case 'T':
+                printf("\t\t\tTIDY   Formatting Program\n");
+                break;
+            case 'S':
+                printf("\tRelative Synonymous Codon Usage\n");
+                break;
+            case 'B':
+                printf("\t\t\tBase composition calculation\n");
+                break;
+            case 'C':
+	            (pm->totals)?
+                printf("\t\t\tTotal Codon Usage Tabulation\n"):
+                printf("\t\t\tCodon Usage\n");
+                break;
+            case 'L':
+                printf("\tRelative Amino Acid Usage Calculating \n");
+                break;
+            case 'D':
+                printf("\t\t\tDi-Nucleotide frequencies Program\n");
+                break;
+            case 'A':
+                printf("\t\t\tAmino Acid Usage Calculating Program\n");
+                break;
+            case 'N':
+                printf("\t\tDNA 2 AA translating Program\n");
+                break;
+            case 'O':
+                printf("\tCodon usage tabulation Program\n");
+                break;
+            case 'G':
+                printf("\tTotal Codon usage tabulation\n");
+                break;
+            default:
+                 {
+                    fprintf(stderr, "Sorry:- could not recognise BULK option"
+                                    " -%c (Use -h for help)", pm->bulk);
+                    my_exit(99,"bad option commandline");
+                }
+            }
+	 }
+        else if (pm->prog)
+            switch (pm->prog) {
+            case 's':
+                printf("\t\t\tSilent base G+C+A+T Calculating Program\n");
+                break;
+            case 'e':
+                printf("\t\tNc Calculating Program\n");
+                break;
+            case 'f':
+                printf("\t\tFop Calculating Program\n");
+                break;
+            case 'c':
+                printf("\t\t\tCAI Calculating Program\n");
+                break;
+            case '3':
+                printf("\t\t\tGC3s Calculating Program\n");
+                break;
+            case 'g':
+                printf("\t\t\tG+C Calculating Program\n");
+                break;
+            case 'i':
+                printf("\t\t\tCodon Bias Index Calculating Program\n");
+                break;
+            default:
+                 {
+                    fprintf(stderr, "Sorry:- could not recognise ");
+                    fprintf(stderr, "argument -%c (Use -h for help)",
+                        pm->prog);
+                    my_exit(99,"commline");
+                }                             /* matches default          */
+            }
+        else
+            fprintf( stderr, "unknown error type in commline.c" );
+    }                                         /* matched if (!pm->codonW) */
+
+
+/* By this point we should have processed all the command line arguments  */
+/* so now we test for any remaining, these are unrecognised               */
+
+    while (p = garg(0, NULL, "-", GARG_THERE))
+      if ( pm->menu )           
+        printf("Unrecognised argument %s\n", p);
+      else {
+          /* if we are running without a menu then abort this run         */
+          sprintf ( pm->junk,"Unrecognised argument %s", p);
+	      my_exit ( 99 , pm->junk);
+    }
+
+/* Anything remaining should be file names                                */
+/* The first name should be the input file name                           */
+
+    if ( p = garg(0, NULL, "", GARG_THERE)) {
+        if ( (pm->inputfile = open_file( "", p, "r", FALSE)) == NULL )  {
+            printf("Could not open input file - %s\n", p );
+            my_exit(1,"failed to open file in proc_commline");
+        } else
+            strncpy(pm->curr_infilename, pm->junk, MAX_FILENAME_LEN - 1);
+    }
+/* The second should be the output filename                               */
+    if ( p = garg(0, NULL, "", GARG_THERE)) {
+        if ( (pm->outputfile = open_file( "", p, "w", 
+            (int) pm->verbose)) == NULL ) {
+            printf("Could not open output file - %s\n", p );
+            my_exit(1,"commline out file");
+        } else
+            strncpy(pm->curr_outfilename, pm->junk, MAX_FILENAME_LEN - 1);
+    } 
+
+/* The third which only occurs if the programme is running as CodonW      */
+
+    if ( pm->codonW && (p = garg(0, NULL, "", GARG_THERE)) ) {
+        if ( (pm->tidyoutfile = open_file( "", p, "w", 
+            (int) pm->verbose)) == NULL ) {
+            printf("Could not open blkoutput file - %s\n", p );
+            my_exit(1,"commline blk outfile");
+        } else
+            strncpy(pm->curr_tidyoutname, pm->junk, MAX_FILENAME_LEN - 1);
+    } 
+
+/* Now check the command line is empty ... it should be at this point     */
+    while (p = garg(0, NULL, "", GARG_THERE))
+        printf("This command line parameter was not recognised %s\n", p);
+
+/* IF no file name was found on the command line and the programme is     */
+/* impersonating another programme or we decided not to use the menu      */
+/* we need to load an input file name                                     */
+
+    if ( (!pm->codonW || !pm->menu) && !pm->inputfile ){
+      if ( (pm->inputfile = open_file( "input filename", "input.dat", 
+          "r", FALSE)) == NULL )  {
+	   printf("Could not open input file - %s\n", p );
+	   my_exit(1,"commline inputfile");
+      } 
+     strncpy(pm->curr_infilename, pm->junk, MAX_FILENAME_LEN - 1);
+    }
+    
+/* If we have an input filename but no output then we must prompt for the */
+/* output filename                                                        */
+
+    if ( pm->inputfile  && !pm->outputfile ) {    
+      /* If we are trying to impersonate another programme use this method*/
+      /* but make sure that we know what this other programme is called   */
+      if ( !pm->codonW && strlen (prog_name) ){
+        strcpy(pm->curr_outfilename, prog_name);
+        strcat(pm->curr_outfilename, ".def");
+      } else {
+ 
+	/* Use the input filename as a root filename                      */
+	strncpy(root, pm->curr_infilename, MAX_FILENAME_LEN - 5);
+	for (n = (int) strlen(root); n && root[n]!='.'  ; --n);
+	if ( n  ) root[n] = '\0';               /* find root of filename  */
+
+	strcpy(pm->curr_outfilename, root);
+        strcat(pm->curr_outfilename, ".out");
+      }                                        /* matchs else             */  
+
+      /* now we know the suggested name for the output file lets open it  */   
+      if ( pm->verbose ) {
+	if ( (pm->outputfile = open_file( "indices output filename",
+	 pm->curr_outfilename, "w",(int) pm->verbose)) == NULL )     
+	                  my_exit(1,"commline");
+	strncpy(pm->curr_outfilename, pm->junk, MAX_FILENAME_LEN - 1);
+      }else{
+	if ( (pm->outputfile = open_file( "",
+	 pm->curr_outfilename, "w",(int) pm->verbose)) == NULL )     
+	                  my_exit(1,"commline");
+	strncpy(pm->curr_outfilename, pm->junk, MAX_FILENAME_LEN - 1);
+      }
+
+    }                                          /* match if ( pm->inputfile */
+    
+
+
+    /* we had a commandline inputfile name and output filename but none    */
+    /* for bulkoutput .. we prompt to save having to use menu 1            */
+    if ( pm->inputfile && ! pm->tidyoutfile ){
+      if ( pm->codonW ) {
+                       	      /* Use the input filename as a root filename */
+	strncpy(root, pm->curr_infilename  , MAX_FILENAME_LEN - 5);
+
+	for (n = (int) strlen(root); n && root[n]!='.'  ; --n);
+	if ( n  ) root[n] = '\0';               /* find root of filename  */
+
+	strcpy(pm->curr_tidyoutname, root);
+        strcat(pm->curr_tidyoutname, ".blk");
+
+    /* now we know the suggested name for the output file lets open it     */      
+	if( pm->verbose) {
+	  if ( (pm->tidyoutfile = open_file( "bulk output filename",
+		 pm->curr_tidyoutname, "w",(int) pm->verbose)) == NULL ) 
+	    my_exit(1,"commline");
+	strncpy(pm->curr_tidyoutname, pm->junk, MAX_FILENAME_LEN - 1);
+	}else{
+	  if ( (pm->tidyoutfile = open_file( "",
+	      pm->curr_tidyoutname, "w",(int) pm->verbose)) == NULL ) 
+	    my_exit(1,"commline");
+	strncpy(pm->curr_tidyoutname, pm->junk, MAX_FILENAME_LEN - 1);
+	}
+      }else{ 
+	/* only use one output file when impersonating other programmes        */
+    /* just in case we make blkout and output the same file                */      
+	pm->tidyoutfile = pm->outputfile; 
+      }
+    }
+return 1;
+}
+/****************** Garg     ***********************************************/
+/* This subroutine strips of the commandline arguments and passes them back*/
+/* to the calling function. Each time it is called with argc and argv non  */
+/* null the commandline is refreshed. If called with these are null args   */
+/* a commandline pre-stored is used, this commandline is striped arg by arg*/
+/* as they are identified                                                  */
+/* This subroutine was developed as a collaboration with Colin McFarlane   */
+/* GARG_EXACT           The argument must match targ exactly               */
+/* GARG_THERE           The targ may be sub-string of the argument         */
+/* GARG_SUBSQ           The string immediate after targ is returned        */ 
+/* GARG_NEXT            The next argument after targ is returned           */ 
+/* else                 return NULL                                        */
+/***************************************************************************/
+char           *garg(int argc, char *argv[], const char *targ, int mode)
+{
+  static char    *argw[MAX_ARGS];
+  static int      done[MAX_ARGS];
+  static int      argn;
+
+  int             arg = 1, nc;
+
+  if (argv) {
+    if (--argc < 1)
+      return NULL;
+    for (argn = 0; argn < argc; argn++) {
+      argw[argn] = argv[argn + 1];
+      done[argn] = 0;
+    }
+  }
+  nc = mode & GARG_EXACT ? BUFSIZ : strlen(targ);
+
+  for (arg = 0; arg < argn; arg++)
+    if ((0 == strncmp(targ, argw[arg], nc)) && !done[arg]) {
+      done[arg] = 1;
+      if (mode & GARG_THERE)
+        return argw[arg];
+      if (mode & GARG_SUBSQ)
+        return &argw[arg][nc];
+      if (mode & GARG_NEXT) {
+        done[++arg < argn ? arg : --arg] = 1;
+        return argw[arg];
+      }
+      return argw[arg];
+    }
+  return NULL;
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/coresp.c b/coresp.c
new file mode 100755
index 0000000..82d7cbd
--- /dev/null
+++ b/coresp.c
@@ -0,0 +1,1673 @@
+/**************************************************************************/
+/* CodonW codon usage analysis package                                    */
+/* Copyright (C) 2005            John F. Peden                            */
+/* This program is free software; you can redistribute                    */
+/* it and/or modify it under the terms of the GNU General Public License  */
+/* as published by the Free Software Foundation; version 2 of the         */
+/* License,                                                               */
+/*                                                                        */
+/* This program is distributed in the hope that it will be useful, but    */
+/* WITHOUT ANY WARRANTY; without even the implied warranty of             */
+/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the           */
+/* GNU General Public License for more details.                           */
+/* You should have received a copy of the GNU General Public License along*/
+/* with this program; if not, write to the Free Software Foundation, Inc.,*/
+/* 675 Mass Ave, Cambridge, MA 02139, USA.                                */
+/*                                                                        */
+/*                                                                        */
+/* The author can be contacted by email (jfp#hanson-codonw at yahoo.com Anti-*/
+/* Spam please change the # in my email to an _)                          */
+/*                                                                        */
+/* For the latest version and information see                             */
+/* http://codonw.sourceforge.net 					  */
+/**************************************************************************/
+/* This file contains source code for                                     */
+/* the core functions involved in correspondence                          */
+/* analysis, this code was originally written                             */
+/* by Jean Thioulouse                                                     */
+/* ADE software: multivariate analysis and graphical                      */
+/* display of environmental data                                          */
+/* IN Guariso,G and Rizzoli, A (eds),                                     */
+/* Software per l'Ambiente. Patron editor, Bolonia, pp.57-62.             */
+/*                                                                        */
+/* and is used with kind permission                                       */
+/*                                                                        */
+/* It has however been extensively modified to integrate it               */
+/* as seamlessly as practical into CodonW and as such can no              */
+/* longer be considered as a stand alone package                          */   
+/*                                                                        */
+/* Originally written as a general Multivariate analysis (MVA)            */
+/* package, it is now hardwired specifically for codon or amino           */
+/* acid usage analysis                                                    */
+/*                                                                        */
+/* All unnecessary functions have been removed                            */
+/* Originally each data file had an associated resource file              */
+/* which described required parameters                                    */
+/* The need for these files has been removed                              */
+/*                                                                        */
+/**************************************************************************/
+/* Functions                                                              */
+/* textbin      converts codon usage to binary data file                  */
+/*                                                                        */
+/**************************************************************************/
+
+
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+#include "codonW.h"
+
+
+/*************** textbin          *****************************************/
+/* examines the struct pcoa to see which codons/amino acids are to be inc */
+/* in the analysis. It then writes this data to a binary file             */
+/* it also counts the amino acid and codon usage of each gene             */
+/**************************************************************************/
+
+void textbin(char *fileread, char *fileout) 
+{
+  double     *vlec;
+  int         v2;
+  int i,j,x;
+  
+  pcoa->colm=0;
+  
+  if ( pm->coa=='a' ) {
+    for ( i=1; i<22;i++) 
+      if ( pcoa->amino[i] ) pcoa->colm++;  /* number of colms in analysis */
+  }else {
+    for ( i=1; i<65;i++) 
+      if ( pcoa->codons[i]) pcoa->colm++;  /* number of colms in analysis */    
+  }
+  
+  vecalloc(&vlec, pcoa->colm);             /* allocate an array           */
+  
+  /* open output files                                                    */
+  if ( (pm->fcoa_in = open_file( "", fileread, "r", FALSE)) == NULL )  {
+    fprintf(pm->my_err,"(txt2bin)");
+    my_exit(1,"txt2bin");
+  }
+  if ( (pm->fcoa_out = open_file( "",fileout, "wb", FALSE)) == NULL )  {
+    fprintf(pm->my_err,"(txt2bin)");
+    my_exit(6,"fileout");
+  }     
+  
+  for (i=1;i<=pcoa->rows;i++) {            /* pcoa-rows is the No of genes */
+    fscanf(pm->fcoa_in,"%s",pm->junk);
+
+    /* read the data from coa_raw into the array vlec                      */
+    switch (pm->coa){ 
+    case 'a': 
+      for (j=1,x=1;j<21;j++) {
+        fscanf(pm->fcoa_in,"%i",&v2);
+        if ( pcoa->amino[j] )
+         vlec[x++] = (double) v2;
+        }                                       
+        fscanf(pm->fcoa_in,"%i\n",&v2);
+        if ( pcoa->amino[j] ) 
+          vlec[pcoa->colm]  = (double) v2;
+        if ( x != pcoa->colm ) my_exit (99,"Fatal Error in txt2bin");
+        break;
+    case 'c':
+      for (j=1,x=1;j<64;j++) { 
+         fscanf(pm->fcoa_in,"%i",&v2);
+        if( pcoa->codons[j] )
+            vlec[x++] = (double) v2;
+        }                       
+        fscanf(pm->fcoa_in,"%i\n",&v2);
+        if(pcoa->codons[j] ) 
+        vlec[pcoa->colm] = (double) v2;
+        if ( x != pcoa->colm ) my_exit (99,"Fatal Error in txt2bin");
+      break;
+    case 'r':
+      clean_up ( ncod , naa );   
+      for (j=1,x=1;j<64;j++) { 
+        fscanf(pm->fcoa_in,"%i",&v2);
+        naa[pcu->ca[j]]+=v2;                         /* count amino acids */
+        ncod[j]         =v2;                         /* count codons      */
+      }                       
+      fscanf(pm->fcoa_in,"%i\n",&v2);                /* read last codon   */
+      naa[pcu->ca[j]]+=v2;
+      ncod[j]         =v2;
+
+      for (j=1,x=0;j<=64;j++) { 
+      if(pcoa->codons[j] ) {
+     ++x;
+         vlec[x] = (double) ((naa[pcu->ca[j]])? 
+                  (float) ncod[j]/naa[pcu->ca[j]]*(float)( *(ds+j) ):
+            0.00);
+     } 
+    }
+      break;
+    
+#ifdef DEBUG
+    default:
+      fprintf(pm->my_err,"error in textbin %c unknown \n",pm->coa );
+      break;
+#endif
+    }                                                          /* end if */
+    writevec(vlec, pm->fcoa_out);    
+  }
+                           /* close files and release memory and return  */
+  fileclose(&pm->fcoa_in);
+  fileclose(&pm->fcoa_out);  
+  free  (vlec); 
+}
+
+/*************** colmout          *****************************************/
+/* The user has already decided how many axis to be recorded to file      */
+/* this value is stored in pcoa->axis. After the analysis is complete the */
+/* output data is stored in several binary formatted file. In this case   */
+/* nfice and nfics points at the file names.                              */  
+/* For each axis that has been requested to be recorded, the position     */
+/* of each column (either amino or codon ) is read from the binary file   */
+/* and converted into an easily read text file, which is pointed          */
+/* at by nfics and the summary file pointed at by summary.                */
+/**************************************************************************/
+void colmout(char *nfice, char *nfics,AMINO_STRUCT *ppaa, FILE *summary)
+{
+    double  *vlec;
+    int         col, lig=0;
+    FILE        *fice=NULL, *fics=NULL;
+    float       v2;
+    int x,i,j;
+    char sp=pm->seperator;
+
+    lig=pcoa->colm;
+ 
+  col=pcoa->axis;                               /* number of axis        */
+
+    vecalloc(&vlec, col);
+
+if( (fice=open_file("",nfice,"rb",FALSE))==NULL) my_exit(6,"nfice2");
+if( (fics=open_file("",nfics, "w",FALSE))==NULL) my_exit(1,"nfics2");
+
+fprintf(summary,"\n\nThe position of each %s by axis \n"
+	"also see %s for seperate output\n", 
+	(pm->coa=='a')? "amino acid":"codon",nfics);
+
+fprintf(fics   , "%s","label");
+fprintf(summary, "%-20.20s","label");
+
+
+for (j=1;j<=col;j++) {
+  fprintf(fics   , "%c%s%d",sp,"Axis",j);
+  fprintf(summary, "%c%9s%d",sp, "Axis",j);
+}  
+fprintf(fics   , "\n");
+fprintf(summary, "\n");
+
+
+i=0;
+x=1;
+  while( x<=lig ) {
+                                /* only write out for the columns analysed */
+   if( pm->coa == 'a' ) { 
+      
+       while  ( !pcoa->amino[++i] );                /* skip amino if false */
+
+      fprintf(fics   , "%s%c",ppaa->aa3[i],sp );
+      fprintf(summary, "%-20.20s%c",ppaa->aa3[i],sp );
+      x++;
+    }else{
+      
+      while  ( !pcoa->codons[++i] );                /* skip codon if false */   
+      
+      fprintf(fics    , "%s%c",ppaa->cod[i],sp);
+      fprintf(summary , "%-20.20s%c",ppaa->cod[i],sp);
+      x++;
+    }  
+        readvec(vlec, fice);
+        for (j=1;j<col;j++) {
+            v2 = (float) vlec[j];
+            fprintf(fics   , "%f%c", v2,sp); 
+            fprintf(summary, "%10.5f%c", v2,sp);
+        }
+        v2 = (float) vlec[col];
+        fprintf(fics   , "%f\n", v2); 
+        fprintf(summary, "%10.5f\n", v2);
+    }
+    fileclose(&fics);
+    fileclose(&fice);
+    free(vlec);
+}
+/*************** rowout           *****************************************/
+/* The position of each gene on each of the principle axis as given by    */
+/* pcoa->axis is converted from a binary text file to an ASCII file as    */
+/* well as the summary file                                               */
+/**************************************************************************/
+void rowout(char *nfice, char *nfics, char *ncout, FILE *summary)
+{
+  double    *vlec, *ax1;
+  int           col, lig,*sortax1;
+  FILE      *fice=NULL, *fics=NULL, *fnam=NULL;
+  float     v2;
+  int i,j;
+  char sp=pm->seperator;
+  
+  lig=pcoa->rows;
+  col=pcoa->axis;
+
+  vecalloc(&vlec, col);
+  vecalloc(&ax1 , lig);
+  if( (sortax1= (int *) calloc(lig+1,sizeof(int)))==NULL) 
+      my_exit(3,"sortax1"); 
+ 
+  if( (fice=open_file("",nfice,"rb",FALSE))==NULL) my_exit(6,"nfice3");
+  if( (fics=open_file("",nfics, "w",FALSE))==NULL) my_exit(1,"nfics3");
+  if( (fnam=open_file("",ncout, "r",FALSE))==NULL) my_exit(6,"ncout3");
+
+  fprintf(summary,"\n\nThe position of each gene by axis \n" 
+      "(see also %s)\n",nfics);
+
+  fprintf(fics   , "%s%c","label",sp);
+  fprintf(summary, "%-20.20s%c","label",sp);
+
+  for (j=1;j<=col;j++) {
+    fprintf(fics   , "%s%d%c","Axis",j,sp);
+    fprintf(summary, "%9s%d%c", "Axis",j,sp);
+  }  
+    fprintf(fics   , "\n");
+    fprintf(summary, "\n");
+
+  for (i=1;i<=lig;i++) {
+
+    fgets(pm->junk,BUFSIZ,fnam);
+    pm->junk[35]='\0';
+    for ( j=35 ; j>=0; j--) 
+      if ( isspace( (int) pm->junk[j]) ) pm->junk[j]='\0';
+ 
+   fprintf(fics   , "%s%c",pm->junk,sp);
+    fprintf(summary, "%-20.20s%c",pm->junk,sp);
+    
+    readvec(vlec, fice);
+    for (j=1;j<col;j++) {
+      
+      if (j==1)  ax1[i]=vlec[j];                        /* first factors */
+      
+      v2 = (float) vlec[j];
+      fprintf(fics   , "%f%c", v2,sp);
+      fprintf(summary, "%10.5f%c", v2,sp);
+    }
+    v2 = (float) vlec[col];
+    fprintf(fics    , "%f\n", v2);
+    fprintf(summary , "%10.5f\n", v2);
+  } 
+
+ if ( pm->coa != 'a' )   {  
+  sorted_by_axis1  ( ax1, sortax1, lig);
+  gen_cusort_fop ( sortax1, lig, fnam, summary );
+ }
+  fileclose(&fics);
+  fileclose(&fice);
+  fileclose(&fnam);
+  free(ax1);
+  free(sortax1);
+  free(vlec);
+}
+
+/************** vecalloc          *****************************************/
+/* Allocate memory for a vector of size n and assign that memory to the   */
+/* pointer to a pointer vac                                               */
+/**************************************************************************/
+void vecalloc (double **vec, int n)
+{
+    if ( (*vec = (double *) calloc(n+1, sizeof(double))) != NULL) {
+        **vec = n;
+        return;
+    } else 
+        my_exit(3,"vecalloc");
+}
+
+/************** writevec          *****************************************/
+/* Write out the value of the vector v1 to a binary file fic              */
+/**************************************************************************/
+void writevec(double *v1, FILE *fic)
+{
+    float   v2;
+    int     i, c1;
+
+    c1 = (int) v1[0];                       /* Num of vectors             */
+
+    for (i=1;i<=c1;i++) {
+        v2 = (float) v1[i];
+        if ( fwrite((const char *)&v2, 4, 1, fic) != 1)
+        my_exit(4,"writevec");
+    }
+}
+
+/************** PrepAFC           *****************************************/
+/* Calculated Distance matrix for values in contingency table             */
+/* Values are first scaled by n (where n is the total usage of a row or   */
+/* column                                                                 */
+/**************************************************************************/
+
+void PrepAFC(char *nfic)
+{
+    char bid[17];
+    int    i, j;
+    double  **w;
+    double  *poili, *poico;
+    double  a1, a2, x1, n;
+
+/*-------------------------------------------------------------------------*/
+
+    vecalloc(&poili, pcoa->rows);
+    vecalloc(&poico, pcoa->colm);
+    taballoc(&w, pcoa->rows, pcoa->colm);
+
+    lecmat(w, nfic);
+
+    n = 0;
+    for (i=1;i<=pcoa->rows;i++) {
+        a1 = 0.0;
+        a2 = 0.0;
+        for (j=1;j<=pcoa->colm;j++) {
+            x1 = w[i][j];
+            a1 = a1 + x1;
+            poico[j] = poico[j] + x1;
+        }
+        n = n + a1;
+        poili[i] = a1;
+    }    
+/* scale the vectors, and matrix                                           */
+    scalvec(poili, 1.0/n);
+    scalvec(poico, 1.0/n);
+    scalmat(w, 1.0/n);
+    strcpy(bid,"cbfcpl"); 
+    ecrvec(poili, bid);
+    strcpy(bid,"cbfcpc");
+    ecrvec(poico, bid);
+
+/*-------------------------------------------------------------------------*/
+
+    for (i=1;i<=pcoa->rows;i++) {
+        a1 = poili[i];
+        if (a1 != 0.0) {
+            for (j=1;j<=pcoa->colm;j++) {
+                a2 = poico[j];
+                if (a2 != 0) w[i][j] = w[i][j] / a1 / a2 - 1;
+            }
+        }
+    }
+    strcpy(bid,"cbfcta");
+    ecrmat(w, bid);
+
+/*-------------------------------------------------------------------------*/
+    freetab(w);
+    freevec(poili);
+    freevec(poico);
+    pcoa->inertia = (float) inertot ();
+}
+
+/************** inertot         ********************************************/
+/* Calculate total data inertia                                            */
+/***************************************************************************/
+
+double inertot ( void )
+{
+    int     i, j; 
+    double      **tab;
+    double  *pl, *pc;
+    double  a1, s1, inertia;
+    taballoc (&tab, pcoa->rows, pcoa->colm);
+    vecalloc (&pc, pcoa->colm);
+    vecalloc (&pl, pcoa->rows);
+
+    lecmat (tab,"cbfcta");
+    lecvec(pl, "cbfcpl");
+    lecvec(pc, "cbfcpc");
+    inertia = 0;
+    for (i=1;i<=pcoa->rows;i++) {
+        a1 = pl[i];
+        for (j=1;j<=pcoa->colm;j++) {
+            s1 = tab[i][j];
+            inertia = inertia + s1 * s1 * a1 * pc[j];
+        }
+    }   
+    freetab(tab);
+    freevec(pl);
+    freevec(pc);
+    
+    return inertia;
+}
+
+/************** lecmat            *****************************************/
+/* Opens binary file nfic, reads the values it contains and records them  */
+/* in the matrix pointed to by tab                                        */
+/**************************************************************************/
+void lecmat (double **tab, char *nfic)
+{
+    int     i, j, l1, c1;
+    float   v2;
+    FILE    *fic=NULL;
+    
+    l1 = (int) tab[0][0];
+    c1 = (int) tab[1][0];
+
+    if( (fic=open_file("",nfic,"rb",FALSE))==NULL) my_exit(1,"lecmat");
+
+
+    for (i=1;i<=l1;i++) {
+        for (j=1;j<=c1;j++) {
+            if ( fread((char *)&v2, 4, 1, fic) != 1) {
+            fprintf(pm->my_err,"Error: can't read matrix (lecmat)");
+            my_exit(5,"lecmat");
+            }
+            tab[i][j] = v2;
+        }
+    }  
+    fileclose(&fic);
+}
+
+/************** freetab           *****************************************/
+/* Releases memory dynamically allocated to a table tab(x,y)              */
+/**************************************************************************/
+void freetab (double **tab)
+{
+    int     i, n;
+    n = (int) *(*(tab));                /* number of rows in table        */
+    for (i=0;i<=n;i++) {
+            free((char *) *(tab+i) );
+    }
+    free((char *) tab);
+}
+
+/************** freevec           *****************************************/
+/* Releases memory dynamically allocated to a vector                      */
+/**************************************************************************/
+void freevec (double *vec)
+{   
+    free((char *) vec); 
+}  
+
+/************** taballoc          *****************************************/
+/* Dynamically allocates memory to the table tab(l1,c1)                   */
+/**************************************************************************/
+void taballoc (double ***tab, int l1, int c1)
+{
+    int     i;
+    
+    if ( (*tab = (double **) calloc(l1+1, sizeof(double *))) != NULL) {
+        for (i=0;i<=l1;i++) {
+            if ( (*(*tab+i)=(double *) calloc(c1+1, sizeof(double))) == NULL ) {
+            fprintf(pm->my_err,"(taballoc)");
+            my_exit(3,"taballoc");
+            }           
+        }
+    } else {fprintf(pm->my_err,"(taballoc)");
+            my_exit(3,"taballoc2");
+            }
+
+    **(*tab) = l1;
+    **(*tab+1) = c1;
+}
+
+/**************   lecvec          *****************************************/
+/* Reads vectors from filename *nfic and assigns them to a vector         */
+/**************************************************************************/
+void lecvec (double *v1, char *nfic)
+{
+    float   v2;
+    int     i, c1;
+    FILE    *fic=NULL;
+    
+    if( (fic=open_file("",nfic,"rb",FALSE))==NULL) my_exit(6,"lecvec");
+
+    c1 = (int) v1[0];
+    for (i=1;i<=c1;i++) {
+        if ( fread((char *)&v2, 4, 1, fic) != 1){
+        fprintf(pm->my_err,"(lecvec)");
+        my_exit(5,"lecvec");
+        }
+        v1[i] = v2;
+    }    
+    fileclose(&fic);
+}
+
+/************** ecrmat           ******************************************/
+/* Writes the table pointed to by **tab to the binary filename *nfic      */
+/**************************************************************************/
+void ecrmat (double **tab, char *nfic)
+{
+    int     i, j, l1, c1;
+    float   v2;
+    FILE    *fic=NULL;
+    
+    l1 = (int)tab[0][0];
+    c1 = (int)tab[1][0];
+
+    if( (fic=open_file("",nfic,"wb",FALSE))==NULL) my_exit(1,"ecrmat");
+
+    for (i=1;i<=l1;i++) {
+        for (j=1;j<=c1;j++) {
+            v2 = (float)tab[i][j];
+            if ( fwrite((const char *)&v2, 4, 1, fic) != 1)  {
+            fprintf(pm->my_err,"(ecrmat)");
+            my_exit(4,"ecrmat");         
+            }
+        }
+    }
+    
+    fileclose(&fic);
+}
+/************** ecrvec           ******************************************/
+/* Writes the pointer pointed to by *v1 to the binary file *nfic          */
+/**************************************************************************/
+void ecrvec (double *v1, char *nfic)
+{
+    float   v2;
+    int     i, c1;
+    FILE    *fic=NULL;
+
+    c1 = (int)v1[0];
+
+    if( (fic=open_file("",nfic,"wb",FALSE))==NULL) my_exit(1,"ecrvec");
+    
+    
+    for (i=1;i<=c1;i++) {
+        v2 = (float)v1[i];
+        if ( fwrite((const char *)&v2, 4, 1, fic) != 1){
+            fprintf(pm->my_err,"(ecrvec)");
+            my_exit(4,"ecrvec");         
+            }
+    }
+    
+    fileclose(&fic);
+}
+
+/************** scalmat          ******************************************/
+/* Scale the matrix pointed to by **tab by r                              */
+/**************************************************************************/
+void scalmat (double **tab, double r)
+{
+    int l1, c1, i, j;
+
+    l1 = (int) tab[0][0];
+    c1 = (int) tab[1][0];
+    for (i=1;i<=l1;i++) {
+        for (j=1;j<=c1;j++) {
+            tab[i][j] = tab[i][j] * r;
+        }
+    }
+}
+
+/************** scalvec          ******************************************/
+/* Scale the vector pointed to by *v1 by r                                */
+/**************************************************************************/
+void scalvec (double *v1, double r)
+{
+    int i, c1;
+    
+    c1 = (int) v1[0];
+    
+    for (i=1;i<=c1;i++) {
+        v1[i] = v1[i] * r;
+    }
+}
+
+/************** DiagoRC          ******************************************/
+/* This function generates/calculates the correspondence analysis factors */
+/**************************************************************************/
+void DiagoRC ( FILE *summary)
+{
+  int           lcmin, rang, f1, i, j, k;
+  double           **w, **ctab, **auxi, **vp1, **vp2;
+  double        *poili, *poico, *l;
+  double         s, s1, a1, inertotal;
+
+    
+  lcmin = pcoa->colm;
+  if (pcoa->rows < pcoa->colm) lcmin = pcoa->rows;
+  taballoc(&w, pcoa->rows, pcoa->colm);
+  taballoc(&ctab, lcmin, lcmin);
+  taballoc(&auxi, lcmin, 2);
+  vecalloc(&poili, pcoa->rows);
+  vecalloc(&poico, pcoa->colm);
+  vecalloc(&l, lcmin);
+
+  lecvec(poili, "cbfcpl");
+  sqrvec(poili);
+  lecvec(poico, "cbfcpc");
+  sqrvec(poico);
+  lecmat(w, "cbfcta");
+  
+  inertotal=0;
+  for (i=1;i<=pcoa->rows;i++) {
+    a1 = poili[i];
+    for (j=1;j<=pcoa->colm;j++) {
+      s1 = w[i][j] * a1 * poico[j];
+      w[i][j] = s1;
+      s1 = s1 * s1;
+      inertotal = inertotal + s1;
+    }
+  }
+  
+  fprintf(summary,"The total inertia of the data was %f\n",inertotal); 
+  fprintf(summary, "\nExplanation of the variation by axis "
+                    "(see also eigen.coa)\n");
+
+
+/*  prodmatAAtB and prodmatAtAB calc product of the scaled distance matrix */
+/*  DiagoComp diagnolises the product matrix ctab                          */
+/*  editvalpro output the eigen values                                     */
+
+dot(1,10);
+    if (pcoa->rows < pcoa->colm) {
+
+        prodmatAAtB(w, ctab);                            
+        DiagoComp(pcoa->rows, ctab, l, &rang);
+        f1=pcoa->axis;
+        editvalpro(summary, l, pcoa->rows, inertotal);
+        for (j=1;j<=pcoa->rows;j++) {
+            auxi[j][1] = l[j];
+            auxi[j][2] = l[j]/inertotal;
+        }
+        sqrvec(l);
+    } else {
+        prodmatAtAB(w, ctab);
+        DiagoComp(pcoa->colm, ctab, l, &rang);
+        f1=pcoa->axis;
+        editvalpro(summary, l, pcoa->colm, inertotal);
+        for (j=1;j<=pcoa->colm;j++) {
+            auxi[j][1] = l[j];
+            auxi[j][2] = l[j]/inertotal;
+        }
+        sqrvec(l);
+    }   
+
+    if (f1==0) {
+        if (lcmin == 1) f1 = 1;
+        else f1 = 2;
+    }
+
+   /* output the relative inertia values                                   */
+    ecrmat(auxi, "cbfcvp");
+
+   /* Calculate the factorial coordinates                                  */
+
+    if (pcoa->rows < pcoa->colm) {
+        taballoc(&vp2, pcoa->colm, f1);
+        for (j=1;j<=pcoa->colm;j++) {
+            for (k=1;k<=f1;k++) {
+                s = 0;
+                for (i=1;i<=pcoa->rows;i++) {
+                    s = s + w[i][j] * ctab[i][k];
+                }
+            vp2[j][k] = s;
+            }       
+        }
+        for (i=1;i<=pcoa->colm;i++) {
+            if (poico[i] != 0) {
+                for (j=1;j<=f1;j++) {
+                    vp2[i][j] = vp2[i][j] / poico[i];
+                }
+            }
+        }
+        for (i=1;i<=pcoa->rows;i++) {
+            if (poili[i] != 0) {
+                for (j=1;j<=pcoa->rows;j++) {
+                    ctab[i][j] = ctab[i][j] * l[j] / poili[i];
+                }
+            }
+        }
+        ecrmatred(ctab, f1, "cbfcli");
+        ecrmatred(vp2, f1,  "cbfcco");
+        freetab(vp2);
+    } else {
+        taballoc(&vp1, pcoa->colm, f1);
+        taballoc(&vp2, pcoa->rows, f1);
+        for (i=1;i<=pcoa->colm;i++) {
+            for (j=1;j<=f1;j++) {
+                vp1[i][j] = ctab[i][j];
+            }
+        }
+        prodmatABC(w, vp1, vp2);
+        for (i=1;i<=pcoa->rows;i++) {
+            if (poili[i] != 0.0) {
+                for (j=1;j<=f1;j++) {
+                    vp2[i][j] = vp2[i][j] / poili[i];
+                }
+            }
+        }
+        for (i=1;i<=pcoa->colm;i++) {
+            if (poico[i] != 0) {
+                for (j=1;j<=rang;j++) {
+                    ctab[i][j] = ctab[i][j] * l[j] / poico[i];
+                }
+            }
+        }
+        ecrmat(vp2, "cbfcli");
+        ecrmatred(ctab, f1, "cbfcco");
+        freetab(vp1);
+        freetab(vp2);
+    }
+
+    goto fin;
+
+/* free memory                                                            */
+
+fin:
+    freetab(w);
+    freetab(ctab);
+    freetab(auxi);
+    freevec(poili);
+    freevec(poico);
+    freevec(l);
+    
+}                                                       /* End of DiagoRC */
+ 
+/************** sqrvec           ******************************************/
+/* This function calculates the square root of a vector                   */
+/**************************************************************************/
+void sqrvec (double *v1)
+{
+    int i, c1;
+    double v2;
+    
+    c1 = (int) v1[0];
+    
+    for (i=1;i<=c1;i++) {
+        v2 = v1[i];
+        if (v2 < 0.0) {
+        fprintf(pm->my_err,"Error: Square root of negative number (sqrvec)");
+        my_exit(99,"sqrvec");        
+        }
+        v2 = sqrt(v2);
+        v1[i] = v2;
+    }
+}
+
+/************** prodmatAAtB         ***************************************/
+/* Calculate the product of matrix a*a and return it as matrix b          */
+/**************************************************************************/
+void prodmatAAtB (double **a, double **b)
+{
+    int j, k, i, lig, col;
+    double s;
+    
+    lig = (int) a[0][0];
+    col = (int) a[1][0];
+
+    for (j=1;j<=lig;j++) { 
+    dot ( 1 , 10 );
+        for (k=j;k<=lig;k++) {
+            s = 0;
+            for (i=1;i<=col;i++) {
+                s = s + a[j][i] * a[k][i];
+            }
+        b[j][k] = s;
+        b[k][j] = s;
+        }       
+    }
+}
+
+/************** prodmatABC          ***************************************/
+/* Calculate the product of matrix a*b and return it as matrix c          */
+/**************************************************************************/
+void prodmatABC (double **a, double **b, double **c)
+{
+    int j, k, i, lig, col, col2;
+    double s;
+    
+    lig = (int) a[0][0];
+    col = (int) a[1][0];
+    
+    col2 = (int) b[1][0];
+
+    for (i=1;i<=lig;i++) {
+    dot(1,10);
+        for (k=1;k<=col2;k++) {
+            s = 0;
+            for (j=1;j<=col;j++) {
+                s = s + a[i][j] * b[j][k];
+            }
+        c[i][k] = s;
+        }       
+    }
+}
+
+/************** prodmatAtAB         ***************************************/
+/* Calculate the product of matrix a*A and return it as matrix b          */
+/**************************************************************************/
+void prodmatAtAB (double **a, double **b)
+{
+    int j, k, i, lig, col;
+    double s;
+    
+    lig = (int) a[0][0];
+    col = (int) a[1][0];
+
+    for (j=1;j<=col;j++) { 
+     dot(1,100);
+        for (k=j;k<=col;k++) {
+            s = 0;
+            for (i=1;i<=lig;i++) {
+                s = s + a[i][k] * a[i][j];
+            }
+        b[j][k] = s;
+        b[k][j] = s;
+        }       
+    }
+}
+
+/**************  editvalpro         ***************************************/
+/* Calculate eigenvalues, relative inertia and Sum of inertia for each    */
+/* factor and record this to eigen.coa and summary.coa                    */
+/**************************************************************************/
+void editvalpro (FILE *ficlist, double *vp, int n, double s)
+{
+  double        sc1, sc2;
+  int           i, n1;
+  float         v2, v3, v4;
+  FILE *eigen=NULL;  
+  char sp;
+
+  sp=pm->seperator;
+
+  if ( (eigen=open_file("","eigen.coa","w",FALSE))==NULL ) 
+          my_exit(1,"editvalpro");
+
+
+  sc1 = 0.0;
+  for (i=1;i<=n;i++) {
+    if (vp[i] < 0.0) {
+      v2 = (float) vp[i];
+      fprintf(ficlist, "Eigenvalue number %d is negative : %+.4E\n", i, v2);
+      vp[i] = 0.0;
+    }
+  }
+  n1 = (n > 40) ? 40 : n;
+  fprintf(ficlist, "Num. Eigenval.   R.Iner.  R.Sum    "
+			"|Num. Eigenval.   R.Iner.  R.Sum  |");
+  fprintf(ficlist, "\n");
+  for (i=1;i<=n1;i=i+2) {
+    sc1 = sc1 + vp[i];
+    if (i < n1) {
+      sc2 = sc1 + vp[i+1];
+      v2 = (float) vp[i];
+      v3 = (float)vp[i]/(float)s;
+      v4 = (float)sc1/(float)s;
+      fprintf(ficlist, "%.2d   %+.4E %+.4f %+.4f ", i, v2, v3, v4);
+      fprintf(eigen ,"%.2d%c%.4E%c%.4f%c%.4f\n",i,sp,v2,sp,v3,sp,v4);
+      v2 = (float)vp[i+1];
+      v3 = (float)vp[i+1]/(float)s;
+      v4 = (float)sc2/(float)s;
+      fprintf(ficlist, "  |%.2d   %+.4E %+.4f %+.4f |", i+1, v2, v3, v4);
+      fprintf(eigen ,"%.2d%c%.4E%c%.4f%c%.4f\n",i+1,sp,v2,sp,v3,sp,v4);
+    } else {
+      v2 = (float)vp[i];
+      v3 = (float)vp[i]/(float)s;
+      v4 = (float)sc1/(float)s;
+      fprintf(ficlist, "%.2d   %+.4E %+.4f %+.4f ", i, v2, v3, v4);
+      fprintf(eigen ,"%.2d%c%.4E%c%.4f%c%.4f\n",i,sp,v2,sp,v3,sp,v4);
+    }
+    sc1 = sc2;
+    fprintf(ficlist, "\n");
+  }
+  fprintf(ficlist, "\n");
+fileclose(&eigen);
+}
+
+/**************  ecrmatred        *****************************************/
+/* Output c1 columns of matrix tab to filename *nfic                      */
+/**************************************************************************/
+void ecrmatred (double **tab, int c1, char *nfic)
+{
+    int     i, j, l1;
+    float   v2;
+    FILE    *fic=NULL;
+    
+    l1 = (int) tab[0][0];
+
+    if( (fic=open_file("",nfic,"wb",FALSE))==NULL) my_exit(1,"ecrmatred");
+
+    for (i=1;i<=l1;i++) {
+        for (j=1;j<=c1;j++) {
+            v2 = (float) tab[i][j];
+            if ( fwrite((const char *)&v2, 4, 1, fic) != 1){
+        fprintf(pm->my_err,"(ecrmatred)");
+        my_exit(4,"ecrmatred");     
+        }
+        }
+    }
+    
+    fileclose(&fic);
+}
+
+/**************  readvec            ***************************************/
+/* read vector v1 from filehandle fic                                     */
+/**************************************************************************/
+void readvec (double *v1, FILE *fic)
+{
+    float   v2;
+    int     i, c1;
+
+    c1 = (int) v1[0];
+
+    for (i=1;i<=c1;i++) {
+        if ( fread((char *)&v2, 4, 1, fic) != 1) {
+        fprintf(pm->my_err,"(readvec)");
+        my_exit(5,"readvec");     
+        }
+        v1[i] = v2;
+    }
+}
+
+/**************  DiagoComp         ***************************************/
+/* Diagnolisation of matrix w                                            */
+/* T. FOUCART Analyse factorielle de tableaux multiples,                 */
+/* Masson, Paris 1984,185p., p. 62. D'aprhs VPROP et TRIDI,              */
+/* de LEBART et coll.                                                    */
+/* Lots of nasty goto jumps ... ported from Fortran                      */
+/*************************************************************************/
+void DiagoComp (int n0, double **w, double *d, int *rang)
+{
+    double          *s;
+    double          a, b, c, x, xp, q, bp, ab, ep, h, t, u , v;
+    double          dble;
+    int             ni, i, i2, j, k, jk, ijk, ij, l, ix, m, m1, isnou;
+    
+    vecalloc(&s, n0);
+    a = 0.000000001;
+    ni = 100;
+    if (n0 == 1) {
+        d[1] = w[1][1];
+        w[1][1] = 1.0;
+        *rang = 1;
+        freevec (s);
+        return;
+    }
+    
+    for (i2=2;i2<=n0;i2++) {
+       
+        b=0.0;
+        c=0.0;
+        i=n0-i2+2;
+        k=i-1;
+        if (k < 2) goto Et1;
+        for (l=1;l<=k;l++) {
+            c = c + fabs((double) w[i][l]);
+        }
+        if (c != 0.0) goto Et2;
+        
+Et1:    s[i] = w[i][k];
+        goto Etc;
+        
+Et2:    for (l=1;l<=k;l++) {
+            x = w[i][l] / c;
+            w[i][l] = x;
+            b = b + x * x;
+        }
+        xp = w[i][k];
+        ix = 1;
+        if (xp < 0.0) ix = -1;
+        
+/*      q = -sqrt(b) * ix; */
+        dble = b;
+        dble = -sqrt(dble);
+        q = dble * ix;
+
+        s[i] = c * q;
+        b = b - xp * q;
+        w[i][k] = xp - q;
+        xp = 0;
+        for (m=1;m<=k;m++) {
+            w[m][i] = w[i][m] / b / c;
+            q = 0;
+            for (l=1;l<=m;l++) {
+                q = q + w[m][l] * w[i][l];
+            }
+            m1 = m + 1;
+            if (k < m1) goto Et3;
+            for (l=m1;l<=k;l++) {
+                q = q + w[l][m] * w[i][l];
+            }
+            
+Et3:        s[m] = q / b;
+            xp = xp + s[m] * w[i][m];
+        }
+        bp = xp * 0.5 / b;
+        for (m=1;m<=k;m++) {
+            xp = w[i][m];
+            q = s[m] - bp * xp;
+            s[m] = q;
+            for (l=1;l<=m;l++) {
+                w[m][l] = w[m][l] - xp * s[l] - q * w[i][l];
+            }
+        }
+        for (l=1;l<=k;l++) {
+            w[i][l] = c * w[i][l];
+        }
+        
+Etc:    d[i] = b;
+    } /* for (i2=2;i2<n0;i2++) */
+    
+    s[1] = 0.0;
+    d[1] = 0.0;
+    
+    for (i=1;i<=n0;i++) {
+     dot(1,100);
+        k = i - 1;
+        if (d[i] == 0.0) goto Et4;
+        for (m=1;m<=k;m++) {
+            q = 0.0;
+            for (l=1;l<=k;l++) {
+                q = q + w[i][l] * w[l][m];
+            }
+            for (l=1;l<=k;l++) {
+                w[l][m] = w[l][m] - q * w[l][i];
+            }
+        }
+        
+Et4:    d[i] = w[i][i];
+        w[i][i] = 1.0;
+        if (k < 1) goto Et5;
+        for (m=1;m<=k;m++) {
+            w[i][m] = 0.0;
+            w[m][i] = 0.0;
+        }
+
+Et5:;
+    }
+    
+    for (i=2;i<=n0;i++) {
+        s[i-1] = s[i];
+    }
+    s[n0] = 0.0;
+    for (k=1;k<=n0;k++) {
+        m = 0;
+
+Et6:    for (j=k;j<=n0;j++) {
+     dot(1,100);
+            if (j == n0) goto Et7;
+            ab = fabs((double) s[j]);
+            ep = a * (fabs((double) d[j]) + fabs((double) d[j+1]));
+            if (ab < ep) goto Et7;
+        }
+    
+Et7:    isnou = 1;
+        h = d[k];
+        if (j == k) goto Eta;
+        if (m < ni) goto Etd;
+        
+        fprintf(pm->my_err,"Error: can't compute matrix eigenvalues");
+        my_exit(99,"corresp");
+        
+Etd:    m = m + 1;
+        q = (d[k+1]-h) * 0.5 / s[k];
+        
+/*      t = sqrt(q * q + 1.0); */
+        dble = q * q + 1.0;
+        dble = sqrt(dble);
+        t = dble;
+        
+        if (q < 0.0) isnou = -1;
+        q = d[j] - h + s[k] / (q + t * isnou);
+        u = 1.0;
+        v = 1.0;
+        h = 0.0;
+        jk = j-k;
+        for (ijk=1;ijk<=jk;ijk++) {
+    dot(1,100);
+            i = j - ijk;
+            xp = u * s[i];
+            b = v * s[i];
+            if (fabs((double) xp) < fabs((double) q)) goto Et8;
+            u = xp / q;
+            
+/*          t = sqrt(u * u + 1); */
+            dble = u * u + 1.0;
+            dble = sqrt(dble);
+            t = dble;
+            
+            s[i+1] = q * t;
+            v = 1 / t;
+            u = u * v;
+            goto Et9;
+
+Et8:        v = q / xp;
+
+/*          t = sqrt(1 + v * v); */
+            dble = 1.0 + v * v;
+            dble = sqrt(dble);
+            t = dble;
+            
+            s[i+1] = t * xp;
+            u = 1 / t;
+            v = v * u;
+
+Et9:
+            q = d[i+1] - h;
+            t = (d[i] - q) * u + 2.0 * v * b;
+            h = u * t;
+            d[i+1] = q + h;
+            q = v * t - b;
+            for (l=1;l<=n0;l++) {
+                xp = w[l][i+1];
+                w[l][i+1] = u * w[l][i] + v * xp;
+                w[l][i] = v * w[l][i] - u * xp;
+            }
+        }
+        d[k] = d[k] - h;
+        s[k] = q;
+        s[j] = 0.0;
+        goto Et6;
+
+Eta:;
+    } /* for (k=1;k<=n0;k++) */
+    
+    for (ij=2;ij<=n0;ij++) {
+     dot(1,300);
+        i = ij - 1;
+        l = i;
+        h = d[i];
+        for (m=ij;m<=n0;m++) {
+            if (d[m] >= h) {
+                l = m;
+                h = d[m];
+            }
+        }
+        if (l == i) {
+            goto Etb;
+        } else {
+            d[l] = d[i];
+            d[i] = h;
+        }
+        for (m=1;m<=n0;m++) {
+            h = w[m][i];
+            w[m][i] = w[m][l];
+            w[m][l] = h;
+        }
+
+Etb:;
+    } /* for (ij=2;ij<=n0;ij++) */
+
+    *rang = 0;
+    for (i=1;i<=n0;i++) {
+        if (d[i] / d[1] < 0.00001) d[i] = 0.0;
+        if (d[i] != 0.0) *rang = *rang + 1;
+    }
+    freevec(s);
+} /* DiagoComp */
+
+/************** inertialig         ***************************************/
+/* Called when advanced correspondence analysis option has been selected */
+/* This analyses and reports the absolute and relative contributions of  */
+/* each gene to the inertia of the principal factors (by default the     */
+/* first 4 axis)                                                         */
+/*************************************************************************/
+void inertialig( char *inertia_out, char *ncout, FILE *summary)
+{
+    int     i, j, k, f1, l1,c1,lcmin;
+    double  **cooli, **w;
+    double      *vtab, *conli, *poili, *poico;
+    double      l0, inertotal, a1, a2, m2, m3, s1;
+    double      temp1=0,temp2=0;
+    FILE *inert_out=NULL,*fnam=NULL;
+    
+   l1       =pcoa->rows;
+   c1       =pcoa->colm;
+   f1       =pcoa->axis;
+   inertotal  =pcoa->inertia;
+   
+    if( (inert_out=open_file( "",inertia_out,"w",FALSE))==NULL) 
+	      my_exit(1,"inertia out");
+    
+    lcmin = c1; if (l1<lcmin) lcmin=l1;
+    taballoc (&w, l1,c1);
+    vecalloc(&poili, l1);
+    vecalloc(&poico, c1);
+    taballoc(&cooli, l1, f1);
+    vecalloc(&conli, l1);
+    vecalloc(&vtab, lcmin);
+
+    lecvec(poili, "cbfcpl");
+    sqrvec(poili);
+    lecvec(poico, "cbfcpc");
+    sqrvec(poico);
+    lecmat(cooli, "cbfcli");
+    selectcol("cbfcvp", vtab, 2);
+    lecmat(w, "cbfcta");
+
+    fprintf(summary, "\n\nNumber of rows: %d, columns: %d\n", l1, c1);
+    fprintf(summary, "Total inertia: %8.6G - Number of axes: %d\n\n", 
+	                  inertotal, f1);
+    fprintf(summary, "Contributions of each gene to the recorded factors "
+                     "A.K.A axes\n");
+
+/* calculate the contribution                                              */
+
+    for (i=1;i<=l1;i++) {
+        a1 = poili[i];
+        for (j=1;j<=c1;j++) {
+            s1 = w[i][j] * a1 * poico[j];
+            s1 = s1 * s1;
+            conli[i] = conli[i] + s1;
+        }
+    }
+
+/* scale the vectors by 1/inertia total                                    */
+
+    scalvec(conli, 1.0/inertotal);    
+
+    
+  if( (fnam=open_file("",ncout, "r",FALSE))==NULL) my_exit(6,"inertialgn");
+
+    fprintf(summary, "Row inertia\n");
+    fprintf(summary, "All contributions are in 1/10000\n\n");
+    fprintf(summary, "----------Absolute contributions----------\n");
+    fprintf(summary, "Short_Gene_Name|Num  |");
+    for (k=1;k<=f1;k++) {
+        fprintf(summary, "Fac%2d|", k);
+        
+    }
+    fprintf(summary  , "\n");
+    fprintf(inert_out, "\n");
+    for (i=1;i<=l1;i++) {
+
+      fgets(pm->junk,BUFSIZ,fnam);
+      pm->junk[35]='\0';
+      for ( j=35 ; j>=0; j--) if ( isspace((int)pm->junk[j]) ) 
+	       pm->junk[j]='\0';
+      
+      fprintf(inert_out   ,"%-.15s%c",pm->junk,pm->seperator);
+      fprintf(summary, "%-15.15s",pm->junk);
+      
+      fprintf(summary  ,"|%5d|", i);
+      fprintf(inert_out,"%d%c", i,pm->seperator);
+      
+      l0 = poili[i]*poili[i]/inertotal;
+
+      for (j=1;j<=f1;j++) {
+	temp1=(cooli[i][j] * cooli[i][j]);         /* bug fix for Think C      */
+	temp2=(l0 / vtab[j]);                      /* need to split calculation*/
+	a1 = temp1 * temp2;
+	fprintf(summary, "%5d|", (int) (a1 * 10000));
+	fprintf(inert_out,"%d%c",(int) (a1 * 10000),pm->seperator);
+      }
+        fprintf(summary, "\n");
+        fprintf(inert_out,"\n");
+    }
+    fprintf(summary, "\n\nRelative contributions\nThis is the variation \n"
+	    "in the %s usage of each gene that is \n"
+	    "explained by each axis/factor\n"
+	    "see also %s \n",
+	    (pm->coa=='a')?"amino acid":"codon",inertia_out);
+
+    fclose(fnam);
+    if( (fnam=open_file("",ncout, "r",FALSE))==NULL) 
+        my_exit(6,"inertialgn");
+
+    fprintf(summary, "----------Relative contributions----------\n");
+    fprintf(summary, "Short_gene_name|Num  |");
+    for (k=1;k<=f1;k++) {
+        fprintf(summary, "Fac%2d|", k);
+    }
+    fprintf(summary, "|Remains| Weight | Cont.|");
+    fprintf(summary, "\n");
+    fprintf(inert_out,"\n");
+    
+    for (i=1;i<=l1;i++) {
+
+      fgets(pm->junk,BUFSIZ,fnam);
+      pm->junk[35]='\0';
+      for ( j=35 ; j>=0; j--) if ( isspace( (int) pm->junk[j]) ) 
+          pm->junk[j]='\0';
+      
+      fprintf(inert_out   , "%-.15s%c",pm->junk,pm->seperator);
+      fprintf(summary, "%-15.15s",pm->junk);
+
+
+        fprintf(summary, "|%5d|", i);  
+        fprintf(inert_out,"%d%c", i,pm->seperator);
+        a2 = 0.;
+        m3 = poili[i]*poili[i]/inertotal;
+        m2 = conli[i];
+        if (m2 == 0.) m2 = 1.;
+        for (j=1;j<=f1;j++) {
+            a1 = cooli[i][j] * cooli[i][j] * m3 / m2;
+            a2 = a2 + a1;
+            fprintf(summary, "%5d|", (int) (a1 * 10000)); 
+            fprintf(inert_out,"%d%c",(int) (a1 * 10000),pm->seperator);
+        }
+        fprintf(summary, "|%5d  ", (int) ((1-a2) * 10000));
+        fprintf(summary, "|%5d   |%5d |\n", (int) (inertotal * m3 * 10000), 
+            (int) (m2 * 10000));
+        fprintf(inert_out, "\n");
+    }
+    fprintf(summary  , "\n");
+    fprintf(inert_out, "\n");
+    
+                                                        /* free memory    */  
+    freetab(w);
+    freevec(poili);
+    freevec(poico);
+    freetab(cooli);
+    freevec(conli);
+    freevec(vtab);
+    fileclose(&inert_out);
+    fileclose(&fnam);
+
+}                                                       /* End of Inertia */
+
+/************** inertiacol         ****************************************/
+/* Called when advanced correspondence analysis option has been selected  */
+/* This analyses and reports the absolute and relative contributions of   */
+/* each codon or amino acid to the inertia of the principal factors (by   */
+/* default the first 4 axis)                                              */
+/**************************************************************************/
+void inertiacol(char *inertia_out, FILE *summary )
+{
+    int             x,i, j, k, f1, l1,c1, lcmin;
+    double  **cooco, **w;
+    double      *vtab, *conco, *poili, *poico;
+    double      l0, inertotal, a1, a2, m2, m3, s1;
+    FILE *inert_out=NULL;
+    
+    if( (inert_out=open_file( "",inertia_out,"a",FALSE))==NULL) 
+		my_exit(1,"inertia out2");
+
+    l1      =pcoa->rows;
+    c1      =pcoa->colm;
+    f1      =pcoa->axis;
+    inertotal =pcoa->inertia;
+
+    lcmin = c1; if (l1<lcmin) lcmin=l1;
+
+    taballoc (&w, l1,c1);
+    vecalloc(&poili, l1);
+    vecalloc(&poico, c1);
+    taballoc(&cooco, c1, f1);
+    vecalloc(&conco, c1);
+    vecalloc(&vtab, lcmin);
+
+    lecvec(poili, "cbfcpl");
+    sqrvec(poili);
+    lecvec(poico, "cbfcpc");
+    sqrvec(poico);
+    lecmat(cooco, "cbfcco");
+    selectcol("cbfcvp", vtab, 2);
+    lecmat(w, "cbfcta");
+
+    fprintf(summary, "\n\nColumn inertia\nNumber of genes: %d, columns: "
+	                 "%d\n\n", l1, c1);
+    fprintf(summary, "This is the fraction of the total inertia that is\n"
+	    "explained for each %s by each of the recorded\n"
+            "factors or axes\n\n\n",(pm->coa=='a')? "amino acids":"codons");
+
+
+    for (i=1;i<=l1;i++) {
+        a1 = poili[i];
+        for (j=1;j<=c1;j++) {
+            s1 = w[i][j] * a1 * poico[j];
+            s1 = s1 * s1;
+            conco[j] = conco[j] + s1;
+        }
+    }
+
+    /* scale the vectors by 1/inertia total                                     */
+    scalvec(conco, 1.0/inertotal);
+    
+    fprintf(summary, "\n\nColumn inertia\n");
+    fprintf(summary, "All contributions are in 1/10000\n\n");
+    fprintf(summary, "----------Absolute contributions----------\n");
+    fprintf(summary, "Key|Num  |");
+    for (k=1;k<=f1;k++) {
+        fprintf(summary, "Fac%2d|", k);
+    }
+    fprintf(summary, "\n");
+    for (x=0,i=1;i<=c1;i++) {
+
+      if (pm->coa == 'a' ){
+        
+          while(pcoa->amino[++x] == FALSE);
+       
+        fprintf(summary, "%s", paa->aa3[x]); 
+        fprintf(inert_out,"%s%c",paa->aa3[x],pm->seperator);      
+      }else{ 
+	
+          while(pcoa->codons[++x] == FALSE);
+        
+          fprintf(summary, "%s", paa->cod[x]); 
+        fprintf(inert_out,"%s%c",paa->cod[x],pm->seperator);      	
+      }
+      
+      fprintf(summary, "|%5d|", i); 
+      fprintf(inert_out,"%d%c",i,pm->seperator);
+      
+      l0 = poico[i]*poico[i]/inertotal;
+      for (j=1;j<=f1;j++) {
+	a1 = cooco[i][j] * cooco[i][j] * l0 / vtab[j];
+	fprintf(summary,  "%5d|", (int) (a1 * 10000));
+	fprintf(inert_out,"%i%c", (int) (a1 * 10000),pm->seperator );
+      }
+        fprintf(summary,  "\n");
+        fprintf(inert_out,"\n"); 
+    }
+    fprintf(summary,  "\n"); 
+    fprintf(inert_out,"\n"); 
+    fprintf(summary, "----------Relative contributions----------\n");
+    fprintf(summary, "Key|Num  |");
+    for (k=1;k<=f1;k++) {
+        fprintf(summary, "Fac%2d|", k);
+    }
+    fprintf(summary, "|Remains| Weight | Cont.|");
+    fprintf(summary, "\n");
+    for (x=0,i=1;i<=c1;i++) {
+
+
+      if (pm->coa == 'a' ){
+        
+          while(pcoa->amino[++x] == FALSE);
+        
+        fprintf(summary, "%s", paa->aa3[x]); 
+        fprintf(inert_out,"%s%c",paa->aa3[x],pm->seperator);      
+	}else{ 
+	
+          while(pcoa->codons[++x] == FALSE);
+        
+        fprintf(summary, "%s", paa->cod[x]); 
+        fprintf(inert_out,"%s%c",paa->cod[x],pm->seperator);      	
+	}
+
+        fprintf(summary, "|%5d|", i); 
+        fprintf(inert_out,"%d%c",i,pm->seperator);
+        a2 = 0.;
+        m3 = poico[i]*poico[i]/inertotal;
+        m2 = conco[i];
+        if (m2 == 0.) m2 = 1.;
+        for (j=1;j<=f1;j++) {
+            a1 = cooco[i][j] * cooco[i][j] * m3 / m2;
+            a2 = a2 + a1;
+            fprintf(summary, "%5d|", (int) (a1 * 10000)); 
+            fprintf(inert_out,"%d%c",(int) (a1 * 10000),pm->seperator);
+        }
+        fprintf(summary, "|%5d  ", (int) ((1-a2) * 10000));
+        fprintf(summary, "|%5d   |%5d |\n", 
+			(int) (inertotal * m3 * 10000), (int) (m2 * 10000));
+        fprintf(inert_out,"\n");
+    }
+    fprintf(summary, "\n");
+    
+    freetab(w);
+    freetab(cooco);
+    freevec(poili);
+    freevec(poico);
+    freevec(conco);
+    freevec(vtab);    
+} 									/* End of Inertia */       
+
+/**************  selectcol         ***************************************/
+/* extract a column from the file *nfic, column has the dimension of the */
+/* number of genes. If these disagree it will about. Col is the number of*/
+/* the column to extract.                                                */
+/*************************************************************************/
+void selectcol (char *nfic , double *col, int numcol)
+{
+    FILE    *fic=NULL;
+    int i, c1,l1;
+    double *vlec;   
+    
+    c1=2;
+    l1=( pcoa->rows < pcoa->colm)? pcoa->rows:pcoa->colm;
+    
+    vecalloc(&vlec, c1);
+
+
+    if (numcol>c1) {
+        fprintf (pm->my_err,"fatal input-output error numcol>c1 (selectcol");
+        my_exit(99,"corresp");
+    }
+    
+if( (fic=open_file( "",nfic,"rb",FALSE))==NULL) my_exit(6,"nfic4");
+    for (i=1;i<=l1;i++) {
+        readvec(vlec, fic);
+        col[i] = vlec[numcol];
+    }
+    
+    fileclose(&fic);
+    freevec(vlec);
+}
+ 
+/**************  suprow            ***************************************/
+/* This sub adds supplementary genes after the correspondence analysis   */
+/* has completed for an initial set of genes. The supplementary genes are*/
+/* read in and processed up to the point of the generation of factors    */
+/* at which point the factors for the initial analysis are used to calc  */
+/* the position of the supplementary genes on the originally identified  */
+/* axis                                                                  */
+/*************************************************************************/
+void suprow (int num_seq, char *nficvp, char *nfictasup, char *nficlisup, 
+char*option , FILE *summary)
+{
+    int         l1,c1,l2,c2,i,j,k;
+    double      **compos, **tabsup;
+    double      *vp, *poico;
+    double      *moy, *var;
+    double      a1, a2;
+    FILE        *ficlisup=NULL;
+    FILE        *fnam=NULL;
+    
+    l2=num_seq;
+    c2=pcoa->colm;
+    l1=pcoa->rows;
+    c1=pcoa->colm;
+
+   if( (fnam=open_file("",option, "r",FALSE))==NULL) 
+       my_exit(6,"sup row corresp");
+      
+                
+    taballoc(&tabsup, l2, c2);
+    lecmat(tabsup, nfictasup);
+
+    taballoc(&compos, c1, pcoa->axis);
+    lecmat(compos, "cbfcco");
+    vecalloc(&moy, c1);
+    vecalloc(&var, c1);
+
+    vecalloc(&vp, pcoa->axis);
+    lecvalpro(vp, nficvp);
+
+    vecalloc(&poico, c1);
+    lecvec(poico, "cbfcpc");
+    
+    for (j=1;j<=pcoa->axis;j++) {
+        vp[j] = sqrt((double)vp[j]);
+        a1 = vp[j];
+        for (i=1;i<=c1;i++) {
+            compos[i][j] = compos[i][j] / a1;
+        }
+    }
+    for (i=1;i<=c1;i++) {
+        a1 = poico[i];
+        for (j=1;j<=pcoa->axis;j++) {
+            compos[i][j] = compos[i][j] * a1;
+        }
+    }
+     
+ /* Transform genes with the initial factor                               */
+
+    for (i=1;i<=l2;i++) {
+            a1 = 0.0;
+            for (j=1;j<=c1;j++) {
+                a1 = a1 + tabsup[i][j];
+            }
+            if (a1 != 0.) {
+                for (j=1;j<=c1;j++) {
+                    a2 = tabsup[i][j] / a1;
+                    if (poico[j]!=0) {tabsup[i][j] = a2 / poico[j];}
+                }
+            }
+        }
+    
+  /* Position the suppli. genes on the original factors                    */
+  
+  if( (ficlisup = open_file("",nficlisup,"a",FALSE))==NULL ) 
+         my_exit(1,"nficlisup");
+
+   fprintf(summary,"\n\nThe position of each additional gene by axis " 
+                "(see also %s )\n",option);
+
+    fprintf(summary, "Additional genes added after COA: \n");
+    fprintf(summary, "Number of genes: %d, columns: %d\n\n", l1, c1);
+
+      
+    for (i=1;i<=l2;i++) { 
+      fgets(pm->junk,BUFSIZ,fnam);
+      pm->junk[35]='\0';
+      for ( j=35 ; j>=0; j--) 
+          if ( isspace((int)pm->junk[j]) ) pm->junk[j]='\0';
+       fprintf(ficlisup, "%s%c",pm->junk,pm->seperator);
+       fprintf(summary , "%s%c",pm->junk,pm->seperator);
+    
+        for (k=1;k<=pcoa->axis;k++) {
+            a1 = 0.;
+            for (j=1;j<=c1;j++) {
+                a1 = a1 + tabsup[i][j] * compos[j][k];
+            }
+            fprintf(ficlisup,"%f%c",(float)a1,pm->seperator);
+            fprintf(summary ,"%10.5f%c",(float)a1,pm->seperator);
+        }
+    fprintf(ficlisup,"\n");
+    fprintf(summary ,"\n");
+    }
+    fclose(ficlisup);
+
+    freetab (tabsup);
+    freetab (compos);
+    freevec (vp);
+    freevec(poico);
+    freevec(moy);
+    freevec(var);
+    fileclose(&fnam);
+}
+
+/**************  lecvalpro         ***************************************/
+/* Read a vector from a binary formatted file                            */
+/*************************************************************************/
+void lecvalpro (double *v1, char *nfic)
+{
+    float   v2;
+    int     i, c1;
+    FILE    *fic=NULL;
+    
+    if ( (fic=open_file("",nfic,"rb",FALSE))==NULL) my_exit(6,"lecvalpro");
+
+    c1 = (int) v1[0];
+    for (i=1;i<=c1;i++) {
+        if ( fread((char *)&v2, 4, 1, fic) != 1) {
+        fprintf(pm->my_err,"(lecvalpro)");    
+        my_exit(5,"lecvalpro");
+        }
+        v1[i] = v2;
+        if ( fread((char *)&v2, 4, 1, fic) != 1)  {
+        fprintf(pm->my_err,"(lecvalpro)");
+        my_exit(5,"lecvalpro2");
+        }
+    }   
+    fileclose(&fic);
+}
+
diff --git a/indices.txt b/indices.txt
new file mode 100755
index 0000000..d15ef25
--- /dev/null
+++ b/indices.txt
@@ -0,0 +1,139 @@
+Codon usage indices 
+
+This document describes the indices calculated by CodonW, by default only 
+the G+C content of the sequence is reported. The others being dependent on 
+the genetic code selected. More than one index may be calculated at the same 
+time.  
+
+Codon Adaptation Index (CAI) (Sharp and Li 1987). 
+CAI is a measurement of the relative adaptiveness of the codon usage of a 
+gene towards the codon usage of highly expressed genes. The relative 
+adaptiveness (w) of each codon is the ratio of the usage of each codon, to 
+that of the most abundant codon for the same amino acid. The relative 
+adaptiveness of codons for albeit a limited choice of species, can be 
+selected from Menu 3. The user can also input a personal choice of values. 
+The CAI index is defined as the geometric mean of these relative 
+adaptiveness values. Non-synonymous codons and termination codons (dependent 
+on genetic code) are excluded. 
+ 
+To prevent a codon absent from the reference set but present in other genes 
+from having a relative adaptiveness value of zero, which would cause CAI to 
+evaluate to zero for any genes which used that codon; it was suggested that 
+absent codons should be assigned a frequency of 0.5 when estimating ? (Sharp 
+and Li 1987). An alternative suggestion was that ? should be adjusted to 
+0.01 where otherwise it would be less than this value (Bulmer 1988). CodonW 
+does not adjust the ? value if a non-zero-input value is found; zero values 
+are assigned a value of 0.01. 
+
+Frequency of Optimal codons (Fop) (Ikemura 1981). 
+This index, is the ratio of optimal codons to synonymous codons (genetic 
+code dependent). Optimal codons for several species are in-built and can be 
+selected using Menu 3. By default, the optimal codons of E. coli are 
+assumed. The user may also enter a personal choice of optimal codons. If 
+rare synonymous codons have been identified, there is a choice of 
+calculating the original Fop index or a modified Fop index. Fop values for 
+the original index are always between 0 (where no optimal codons are used) 
+and 1 (where only optimal codons are used). When calculating the modified 
+Fop index, negative values are adjusted to zero. 
+
+Codon Bias Index (CBI) (Bennetzen and Hall 1982). 
+Codon bias index is another measure of directional codon bias, it measures 
+the extent to which a gene uses a subset of optimal codons. CBI is similar 
+to Fop as used by Ikemura, with expected usage used as a scaling factor. In a 
+gene with extreme codon bias, CBI will equal 1.0, in a gene with random 
+codon usage CBI will equal 0.0. Note that it is possible for the number of 
+optimal codons to be less than expected by random change. This results in a 
+negative value for CBI.
+
+The effective number of codons (NC) (Wright 1990).
+This index is a simple measure of overall codon bias and is analogous to the 
+effective number of alleles measure used in population genetics. Knowledge 
+of the optimal codons or a reference set of highly expressed genes is 
+unnecessary. Initially the homozygosity for each amino acid is estimated 
+from the squared codon frequencies (see Equation 5).
+
+	
+If amino acids are rare or missing, adjustments must be made. When 
+there are no amino acids in a synonymous family, Nc is not calculated 
+as the gene is either too short or has extremely skewed amino acid 
+usage (Wright 1990). An exception to this is made for genetic codes 
+where isoleucine is the only 3-fold synonymous amino acid, and is not 
+used in the protein gene. The reported value of Nc is always between 20 
+(when only one codon is effectively used for each amino acid) and 61 
+(when codons are used randomly). If the calculated Nc is greater than 
+61 (because codon usage is more evenly distributed than expected), it 
+is adjusted to 61.
+
+G+C content of the gene. 
+The frequency of nucleotides that are guanine or cytosine.
+
+G+C content 3rd position of synonymous codons (GC3s).
+This the fraction of codons, that are synonymous at the third codon 
+position, which have either a guanine of cytosine at that third codon 
+position. 
+
+Silent base compositions. 
+Selection of this option calculates four separate indices, i.e. G3s, C3s, 
+A3s & T3s. Although correlated with GC3s, this index is not directly 
+comparable. It quantifies the usage of each base at synonymous third codon 
+positions. When calculating GC3s each synonymous amino acid has at least one 
+synonym with G or C in the third position. Two or three fold synonymous 
+amino acids do not have an equal choice between bases in the synonymous 
+third position. The index A3s is the frequency that codons have an A at their 
+synonymous third position, relative to the amino acids that could have a 
+synonym with A in the synonymous third codon position. The codon usage 
+analysis of Caenorhabditis elegans identified a trend correlated with the 
+frequency of G3s. Though it was not clear whether it reflected variation in 
+base composition (or mutational biases) among regions of the C. elegans 
+genome, or another factor (Stenico et al. 1994).
+
+Length silent sites (Lsil). 
+Frequency of synonymous codons.
+
+Length  amino acids (Laa). 
+Equivalent to the number of translatable codons.
+
+Hydropathicity of protein. 
+The general average hydropathicity or (GRAVY) score, for the hypothetical 
+translated gene product. It is calculated as the arithmetic mean of the sum 
+of the hydropathic indices of each amino acid (Kyte and Doolittle 1982). 
+This index has been used to quantify the major COA trends in the amino acid 
+usage of E. coli genes (Lobry and Gautier 1994). 
+
+Aromaticity score
+The frequency of aromatic amino acids (Phe, Tyr, Trp) in the hypothetical 
+translated gene product. The hydropathicity and aromaticity protein scores 
+are indices of amino acid usage. The strongest trend in the variation in the 
+amino acid composition of E. coli genes is correlated with protein 
+hydropathicity, the second trend is correlated with gene expression, while 
+the third is correlated with aromaticity (Lobry and Gautier 1994). The 
+variation in amino acid composition can have applications for the analysis 
+of codon usage. If total codon usage is analysed, a component of the 
+variation will be due to differences in the amino acid composition of genes. 
+
+Bennetzen, J. L., and B. D. Hall, (1982). Codon selection in yeast. Journal 
+of Biological Chemistry 257: 3026-3031.
+Bulmer, M., (1988). Are codon usage patterns in unicellular organisms 
+determined by selection-mutation balance. Journal of Evolutionary 
+Biology 1: 15-26.
+Ikemura, T., (1981). Correlation between the abundance of Escherichia coli 
+transfer RNAs and the occurrence of the respective codons in its 
+protein genes: a proposal for a synonymous codon choice that is 
+optimal for the E. coli system. Journal of Molecular Biology 151: 389-
+409.
+Kyte, J., and R. Doolittle, (1982). A simple method for displaying the 
+hydropathic character of a protein. Journal of Molecular Biology 157: 
+105-132.
+Lobry, J. R., and C. Gautier, (1994). Hydrophobicity, expressivity and 
+aromaticity are the major trends of amino acid usage in 999 
+Escherichia coli chromosome encoded genes. Nucleic Acids Research 22: 
+3174-3180.
+Sharp, P. M., and W. H. Li, (1987). The codon adaptation index a measure of 
+directional synonymous codon usage bias, and its potential 
+applications. Nucleic Acids Research 15: 1281-1295.
+Stenico, M., A. T. Lloyd and P. M. Sharp, (1994). Codon usage in 
+Caenorhabditis elegans delineation of translational selection and 
+mutational biases. Nucleic Acids Research 22: 2437-2446.
+Wright, F., (1990). The effective number of codons used in a gene. Gene  87 
+: 23-29.
+
diff --git a/input.dat b/input.dat
new file mode 100755
index 0000000..740568c
--- /dev/null
+++ b/input.dat
@@ -0,0 +1,2835 @@
+>YCG9 Probable          1377 residues Pha 0 Code 0
+ATGAATATGCTCATTGTCGGTAGAGTTGTTGCTAGTGTTGGGGGAAGCGGACTTCAAACG
+CTTTGCTTTGTTATTGGTTGTACGATGGTTGGTGAAAGGTCACGTCCATTGGTGATTTCC
+ATCCTAAGTTGTGCATTTGCTGTAGCTGCTATCGTTGGTCCTATAATCGGAGGTGCCTTT
+ACAACCCATGTTACCTGGAGGTGGTGCTTCTATATCAATCTTCCTATCGGTGGTCTTGCC
+ATTATTATGTTTTTACTCACATATAAGGCCGAGAATAAGGGTATACTTCAACAAATTAAA
+GATGCTATAGGAACAATCTCGAGCTTTACTTTTAGTAAGTTCAGACACCAAGTTAATTTT
+AAAAGACTTATGAATGGCATAATCTTCAAGTTTGACTTCTTTGGTTTTGCCCTCTGCTCT
+GCAGGGCTGGTCCTTTTCCTACTGGGGCTAACCTTTGGTGGTAATAAATATAGTTGGAAC
+TCTGGCCAAGTCATCGCATATTTGGTTTTGGGTGTCTTACTTTTTATTTTTTCATTGGTG
+TACGATTTCTTCTTATTCGATAAATTCAACCCGGAACCTGATAATATATCCTACAGGCCT
+CTCCTTCTAAGAAGATTGGTAGCAAAACCAGCCATAATAATAATAAACATGGTAACATTT
+CTATTATGTACCGGTTACAATGGGCAAATGATATACTCTGTCCAGTTTTTCCAACTTATA
+TTTGCGTCGAGTGCATGGAAAGCCGGTCTTCACTTGATACCAATCGTTATTACCAACGTT
+ATTGCGGCCATTGCAAGTGGTGTGATTACCAAAAAGCTCGGTTTAGTTAAACCACTCTTA
+ATATTTGGAGGCGTTCTTGGGGTAATTGGAGCAGGGCTTATGACACTTATGACAAATACG
+TCCACGAAGTCAACTCAAATTGGTGTTTTGCTATTACCGGGGTTTTCCCTTGGATTTGCT
+CTACAAGCATCGCTCATGAGTGCACAGCTTCAAATTACCAAAGATCGTCCAGAAGCTGCT
+ATGGACTTTATTGAAGTAACAGCTTTCAATACATTCATGAAGTCATTAGGTACAACTCTT
+GGTGGTGTGCTTTCAACCACTGTTTTTTCCGCCTCCTTTCACAACAAAGTATCACGAGCT
+CATCTAGAGCCTTACGAAGGAAAAACGGTTGATGACATGATTTTGTATCGTCTTCAAAAC
+TACGACGGTTCTCATTCGACTATTGGAAACATTTTAAGCGACTCCATTAAGAACGTATTT
+TGGATGGATCTAGGGTTTTATGCCTTAGGATTTTTGTTTTGTAGTTTTTCATCCAATAAG
+AAATTAATCATACCAAAAAAGGACGAGACACCAGAAGATAATTTAGAAGACAAGTAG
+>YCG8        573 residues Pha 0 Code 0
+ATGAGAACGGCCGTACCGCAGTTGCTGGAAGCAACTGCCTGTGTCTCTAGAGAATGCCCC
+CTCGTCAAAAGAAGTCAGGACATAAAAAGAGCAAGAAAACGTCTACTCAGTGACTGGTAT
+AGGCTCGGCGCTGATGCAAACATGGATGCCGTATTACTAGTTGTTAACTCCGCCTGGAGG
+TTTCTGGCCGTCTGGCGACCCTTCGTAAACTCAATCCAACATGCAACTCAGGAATTGTAT
+CAAAATATCGCCCATTACCTTCTTCATGGCAACGTAAATATACAGAGGGTCACAGCACTA
+CTACAGCTCGTAATGGGACAGGACGATTTACTTTTTAGTATGGATGATGTTCTACAAGAG
+GTCTTCAGAATACAGCTCTATTTGAATAAGATGCTGCCGCACAACTCTCACAAATGGCAA
+AAGCCATCCCCCTTTGACTCCGCAAACTTACTACTTAACTTCAGAGACTGGACAACTGAC
+AATGCTCTCCTCCAAGAGTTGCTACTATCCTATCCCACAATTAATAAAAACAAACACAAA
+AATCACTCCGTCCCTCGTCTAATACAAGTTTGA
+>ALPHA2        633 residues Pha 0 Code 0
+ATGAATAAAATACCCATTAAAGACCTTTTAAATCCACAAATCACAGATGAGTTTAAATCC
+AGCATACTAGACATAAATAAAAAGCTCTTTTCTATTTGCTGTAATTTACCTAAGTTACCA
+GAGAGTGTAACAACAGAAGAAGAAGTTGAATTAAGGGATATATTAGGATTCTTATCTAGG
+GCCAACAAAAACCGTAAGATTAGTGATGAGGAGAAGAAGTTGTTGCAAACAACATCTCAA
+CTCACTACTACCATTACTGTATTACTCAAAGAAATGCGCAGCATAGAAAACGATAGAAGT
+AATTATCAACTTACACAGAAAAATAAATCGGCGGATGGGTTGGTATTTAATGTGGTAACT
+CAAGATATGATAAACAAAAGTACTAAACCTTACAGAGGACACCGGTTTACAAAAGAAAAT
+GTCCGAATACTAGAAAGTTGGTTTGCAAAGAACATCGAGAACCCATATCTAGATACCAAG
+GGCCTAGAGAATCTAATGAAGAATACCAGTTTATCTCGCATTCAAATCAAAAACTGGGTT
+TCGAATAGAAGAAGAAAAGAAAAAACAATAACAATCGCTCCAGAATTAGCGGACCTCTTG
+AGCGGTGAGCCTCTGGCAAAGAAGAAAGAATGA
+>ALPHA1        528 residues Pha 0 Code 0
+ATGTTTACTTCGAAGCCTGCTTTCAAAATTAAGAACAAAGCATCCAAATCATACAGAAAC
+ACAGCGGTTTCAAAAAAGCTGAAAGAAAAACGTCTAGCTGAGCATGTGAGGCCAAGCTGC
+TTCAATATTATTCGACCACTCAAGAAAGATATCCAGATTCCTGTTCCTTCCTCTCGATTT
+TTAAATAAAATCCAAATTCACAGGATAGCGTCTGGAAGTCAAAATACTCAGTTTCGACAG
+TTCAATAAGACATCTATAAAATCTTCAAAGAAATATTTAAACTCATTTATGGCTTTTAGA
+GCATATTACTCACAGTTTGGCTCCGGTGTAAAACAAAATGTCTTGTCTTCTCTGCTCGCT
+GAAGAATGGCACGCGGACAAAATGCAGCACGGAATATGGGACTACTTCGCGCAACAGTAT
+AATTTTATAAACCCTGGTTTTGGTTTTGTAGAGTGGTTGACGAATAATTATGCTGAAGTA
+CGTGGTGACGGATATTGGGAAGATGTGTTTGTACATTTGGCCTTATAG
+>CHA1         1083 residues Pha 0 Code 0
+ATGTCGATAGTCTACAATAAAACACCATTATTACGTCAATTCTTCCCCGGAAAGGCTTCT
+GCACAATTTTTCTTGAAATATGAATGCCTTCAACCAAGTGGCTCCTTCAAAAGTAGAGGA
+ATCGGTAATCTCATCATGAAAAGTGCCATTCGAATTCAAAAGGACGGTAAAAGATCTCCT
+CAGGTTTTCGCTAGTTCTGGCGGTAATGCCGGTTTTGCTGCTGCAACAGCATGTCAAAGA
+CTGTCTCTACCATGTACAGTCGTGGTTCCTACAGCGACAAAGAAGAGAATGGTAGATAAA
+ATCAGGAACACCGGTGCCCAGGTTATCGTGAGTGGTGCCTACTGGAAAGAAGCAGATACT
+TTTTTAAAAACAAATGTCATGAATAAAATAGACTCTCAGGTCATTGAGCCCATTTATGTT
+CATCCCTTCGATAATCCGGATATTTGGGAAGGACATTCATCTATGATAGATGAAATAGTA
+CAAGATTTGAAATCGCAACATATTTCCGTGAATAAGGTTAAAGGCATAGTATGCAGCGTT
+GGTGGAGGTGGTTTATACAATGGTATTATTCAAGGTTTGGAAAGGTATGGTTTAGCTGAT
+AGGATCCCTATTGTGGGGGTGGAAACGAATGGATGTCATGTTTTCAATACTTCTTTGAAA
+ATAGGCCAACCAGTTCAATTCAAGAAGATAACAAGTATTGCTACTTCTCTAGGAACGGCC
+GTGATCTCTAATCAAACTTTCGAATACGCTCGCAAATACAACACCAGATCCGTTGTAATA
+GAGGACAAAGATGTTATTGAACCCTGTCTTAAATATACACATCAATTCAATATGGTGATT
+GAACCGGCATGTGGCGCCGCATTGCATTTGGGTTACAACACTAAGATCCTAGAAAATGCA
+CTGGGCTCAAAATTAGCTGCGGATGACATTGTGATAATTATTGCTTGTGCGAGCTCCTCT
+AATACTATAAAGGACTTGGAAGAAGCGTTGGATAGCATGAGAAAAAAAGACACTCCTGTA
+ATAGAAGTCGCTGACAATTTCATATTTCCAGAAAAAAATATTGTGAATTTAAAAAGTGCT
+TGA
+>KRR1          951 residues Pha 0 Code 0
+ATGGTGTCTACACATAACAGAGATAAACCTTGGGATACGGATGATATTGATAAATGGAAG
+ATAGAGGAGTTTAAGGAAGAGGATAACGCATCCGGTCAACCTTTTGCTGAAGAGTCCAGT
+TTTATGACTTTGTTTCCTAAATACAGAGAAAGTTACTTGAAGACGATTTGGAATGATGTA
+ACAAGGGCTCTAGACAAACACAACATAGCGTGTGTTCTAGATTTAGTCGAAGGTTCTATG
+ACAGTAAAAACAACTAGAAAAACATACGATCCCGCTATCATTTTGAAAGCCAGAGATTTG
+ATCAAATTATTGGCGAGATCCGTTCCTTTCCCGCAAGCCGTTAAGATCCTACAAGATGAC
+ATGGCATGCGACGTTATTAAAATTGGTAATTTCGTTACTAACAAAGAAAGGTTTGTCAAG
+AGAAGACAACGTCTTGTAGGCCCTAACGGTAATACTTTAAAGGCTTTGGAACTTCTAACT
+AAATGTTACATTCTAGTACAAGGTAACACAGTAAGTGCCATGGGTCCCTTCAAGGGCTTG
+AAGGAGGTCCGTCGAGTAGTAGAAGATTGTATGAAAAATATTCACCCTATCTATCATATC
+AAGGAATTAATGATAAAAAGAGAATTGGCAAAAAGGCCAGAGTTAGCCAATGAAGATTGG
+TCAAGATTCTTGCCCATGTTTAAGAAGAGGAATGTGGCCAGAAAGAAACCCAAGAAGATC
+AGAAACGTCGAAAAGAAGGTCTATACTCCATTTCCTCCTGCCCAATTGCCTAGAAAGGTT
+GATTTGGAAATTGAAAGTGGTGAGTATTTCTTAAGCAAGAGAGAAAAGCAAATGAAGAAA
+TTAAATGAGCAAAAGGAAAAGCAAATGGAAAGAGAAATCGAAAGGCAGGAAGAGAGAGCA
+AAAGATTTCATAGCTCCGGAAGAAGAAGCATACAAGCCAAACCAAAATTAG
+>PRD1         2139 residues Pha 0 Code 0
+ATGCGATTGTTGCTGTGCAAGAATTGGTTTGCGTCACCTGTAATCTCACCACTACTGTAT
+ACCCGCTCCTTATATTCAATGGCTAACACTACTAGTTTCCCTATTGCTCCCCAGGCCCCG
+CCTAATTGGTCGTTCACTCCCAGCGATATTAGTGGGAAAACCAACGAAATCATCAACAAC
+AGCAACAATTTCTATGATTCTATGAGTAAGGTAGAGAGCCCTTCCGTGAGTAATTTTGTG
+GAGCCTTTCATGAAGTTTGAAAATGAATTGGGCCCAATAATTAACCAATTAACTTTCTTA
+CAGCATGTGTCGTCTGATAAAGAAATTAGGGACGCATCTGTGAACTCCTCAATGAAACTG
+GATGAGTTGAACATCGATCTATCTCTGCGTCACGACATCTTTTTGCAATTCGCCCGCGTC
+TGGCAGGATGTTCAATCGAAGGCAGATTCTGTGGAAAGAGAAACTTTCAAATACGTTGAG
+AAGTCTTACAAGGACTACATTCATTCTGGTTTGGAACTTGACGAGGGAAACCGATTGAAA
+ATCAAAGAGATCAAAAAGAAGATCTCCGTTAACTCTATTAATTTTTCGAAGAATCTGGGA
+GAACAAAAGGAATACATCACTTTCACCAAAGAACAATTGGAAGGTGTGCCGGATTCTATT
+TTGACGCAGTTCGAGACAATAAAATCTGACAAAGATAGCAATGAAACCTTGTATAAAGTC
+ACCTTCAAATATCCGGACATTTTTCCCGTGATGAAATTGGCATCCTCAGCTCAGACTAGA
+AAGCAGGCCTTTTTGGCCGACCAAAATAAGGTCCCTGAAAATGAAGCTATACTGTTGGAT
+ACATTGAAGCTGCGTGACGAATTGGCCTCGTTATTGGGCTATGACACGTATGCGAACTAC
+AACCTGTATGATAAAATGGCTGAAGATAGCACTACGGTAATGAACTTTTTGAATGATTTG
+AAGGACAAGCTAATTCCGCTGGGCAGAAAGGAACTACAGGTCTTGCAAGATATGAAAGCC
+GAAGATGTTAAGAAACTTAACCAGGGTGCAGATCCAAACTACTACATTTGGGACCACCGT
+TACTACGATAACAAATATTTGTTAGAAAACTTCAATGTGGACCTAGAAAAGATTTCTGAA
+TATTTTCCACTAGAGGCTACGATTACTGGTATGCTGGAAATATACGAAACATTGTTTAAT
+TTGAAGTTTATCGAGACGAAAGATTCTCAAAACAAATCTGTTTGGCATGACGACGTCAAA
+CAAATCGCCGTTTGGAATATGGATGATCCAAAGTCTCCAAACTTTGTTGGTTGGATTTAT
+TTCGATTTACATCCTCGTGATGGTAAATATGGCCACGCTGCCAATTTTGGTTTATCGTCA
+TCATTCATGATTGATGACACCACAAGATCGTATCCGGTTACTGCGTTGGTTTGCAATTTC
+TCCAAATCTACGAAGGATAAACCTTCTCTACTGAAGCATAACGAAATAGTGACCTTTTTC
+CATGAATTGGGCCATGGTATCCATGACCTGGTGGGACAAAACAAGGAATCGAGGTTTAAT
+GGCCCCGGATCTGTTCCATGGGATTTTGTGGAGGCACCTTCCCAAATGTTAGAATTTTGG
+ACTTGGAATAAGAATGAATTAATCAACCTCTCATCACATTACAAAACGGGCGAAAAAATT
+CCAGAATCTTTGATCAATTCATTGATCAAAACTAAACACGTAAATGGTGCTTTATTCACT
+CTAAGACAATTACATTTTGGGTTATTTGATATGAAAGTACATACTTGTAAAGACTTGCAA
+AACCTGTCAATTTGCGATACCTGGAACCAATTGAGACAGGATATTTCTTTGATTTCTAAT
+GGTGGTACGTTATCCAAGGGTTATGATTCATTTGGCCATATAATGTCAGACTCTTACTCT
+GCCGGTTATTACGGTTATCTATGGGCGGAAGTCTTTGCAACTGATATGTATCACACCAAA
+TTCGCTAAGGATCCGTTAAATGCCAAGAATGGGATACAATACCGTGATATTGTGTTGGCT
+CGTGGTGGCCTTTATGATATTAATGATAATCTGAAAGAATTTTTGGGTAGGGAACCTTCT
+AAGGATGCTTTCTTGAAGGAGCTGGGCTTACAGAACTAA
+>KAR4         1008 residues Pha 0 Code 0
+ATGGCATTCCAAGATCCAACTTACGACCAGAATAAAAGCAGACACATCAACAACAGTCAC
+TTGCAAGGGCCAAACCAGGAAACAATAGAAATGAAATCTAAACACGTATCATTCAAACCC
+TCTAGAGACTTCCATACAAACGATTACTCGAATAACTACATTCATGGGAAGTCGCTACCG
+CAACAGCATGTTACTAATATTGAGAATAGGGTTGATGGCTATCCAAAACTTCAGAAATTA
+TTTCAGGCGAAAGCTAAACAAATAAATCAATTTGCCACTACGCCATTTGGGTGTAAAATC
+GGAATAGATTCCATTGTTCCAACGTTGAATCACTGGATACAGAACGAAAATTTGACTTTC
+GACGTGGTGATGATTGGCTGCTTAACAGAAAATCAGTTTATTTACCCAATTTTAACCCAA
+TTGCCATTGGATAGATTGATCTCCAAACCAGGTTTCCTGTTCATCTGGGCCAATTCTCAA
+AAAATCAATGAACTTACTAAACTTTTGAATAATGAAATATGGGCTAAAAAGTTTAGAAGA
+AGTGAAGAATTGGTTTTTGTTCCTATTGACAAGAAATCACCGTTTTATCCAGGTTTAGAT
+CAGGACGATGAAACGTTGATGGAAAAAATGCAATGGCACTGTTGGATGTGTATCACAGGT
+ACAGTAAGGAGGTCTACAGATGGACATCTTATTCATTGTAACGTAGACACTGACTTGAGT
+ATCGAAACGAAGGACACCACTAATGGTGCTGTACCATCCCATTTGTATCGTATTGCAGAA
+AACTTCTCTACCGCGACTAGACGATTACATATTATTCCTGCAAGGACTGGTTACGAGACA
+CCCGTCAAAGTAAGACCTGGCTGGGTTATAGTGAGCCCAGATGTTATGTTGGATAACTTC
+TCACCCAAGAGATATAAAGAAGAGATAGCTAATTTAGGTTCGAATATCCCATTAAAAAAT
+GAGATTGAGCTGTTAAGACCAAGAAGTCCAGTACAAAAAGCACAATAA
+>PBN1         1251 residues Pha 0 Code 0
+ATGGTGACAAGACATAGAGTGACTGTACTCTACAATGCCCCTGAGGATATCGGTAATCAT
+ATGCGCCAAAATGACACTCATTTGACTGTTCGTGGAGGTTCTGGTGTGGTTTTACAACAA
+AGGTGGCTATTAGAGAGGACTGGAAGCTTGGATAAATCCTTTACGAGAATCACTTGGAGG
+CCCAGAGCGGACTTGGCTAGAAGTTTAAGCGTTATAGAAAATGAACTGAGTGCTGGCTTT
+TCAGTTTACTCAAATTCTTCGGATGTGCCGGAAAGGTTTATTACTAACCCAGTCTACAAT
+TCATTTCACAGTGAGAAGTTTGACATAGAGCAGTACTTGCCTCCCGAAGTAGATTTGAAT
+CTGTCATGGAATCCAGAAGATTTTACATATGATATATCAGTGGAGCCCACACAAATCCAA
+ATTGTTGAATATCGTCTGTTGAAACAGGGTGAAGAATTTACAATTGCAAGAGTGAAAGAT
+GAGAAACTCGAAGTAGGTGTATTCTTTGTGGATGCAAGTGATGAAAGTGATGTCGATATT
+GGTGGAATACGTTGTAATTGGAGGATGGACGATGGTAAAATGGAAAGATGTCAGAAAACA
+TCCTTATTGTATAAACAGGGCCATATCGCATACAATCACTCGACGACTACGACATCACTA
+TATCTGAATGAACCTATCGGTTTGCATCCAAAAATCATGATTGATCTCACAGATTTCGAA
+GAACGCCCTAAATGCATGTATCTAATGCACCTGCAATTGCCGTTAGAATTATTTATCGAT
+AAATTCCAATCCTCTCCCTTACTACTTTTTGGAGAAGACGACTTAGAATTACCAGAATAC
+TCTCTTCGAGATAAGGCATGGGGTTCTGAAAGTATCTTTGAATTGAAAGCCGGCACAATG
+AATGAAGTGACATTGCATACTAGATATATTGAGCCTTCTAATAATAAAGGGGATAAATTA
+GAAGTTTCATTTGATCCAGAAGTTATATTAGCCTGCGACACAGGTGACAATAAAGTTTCC
+CGTAATCCATTTTATAAAAAAGGTCTAGGATATGAATCTCTCTTTACAGACGATACTACA
+TTCCGCCATTTGAACTCGACAACTCTTCTAGTACCAATTCCAAGGCCTGACACAAAGGAT
+TATTCCAAGATCAAAAATGGTACGTTACTATGCTTACTCATCTCCATCATATACATTTTC
+TCCAAGGTATTTGGTAACAACAAGAAGAAAAGATCAGTAAAACGGGAATAA
+>LRE1         1761 residues Pha 0 Code 0
+ATGCCCAATACGCATACTCAACATGTGCAAATATCAGAGCCAAATCCTGTAAATACTTTG
+TCTACACCATCCAAAAGAGGTCACCGCCATCGCAGATCGCTAGCAATATCAGGAGATTTT
+GATTTTTTGAAACAGCCTGCAGCAATTGTGAATTTACCACCTCCACAGGCGGCTGAAAAT
+TGTCCTTCAACTGCCCCAACTGCTGTATCAAGTACATTATCGCCAATACGCTACAATAGA
+TTTCCTTGCAAAACCAATGAAGACGCTGGAACGTTAGATTTGCCTGAACCAAGATTTTAT
+CCGTTATCACCAAAGAACAATCTGCAAACACCAAGTCCACGATTTTTCATTAGTGAAGAG
+CCAAGTTTTTCATCGCCAGTTAAAGGCGTCCCAGATGCCATTATTAACCTTGACGATGCG
+TTGAAGACAAGGCCTAGGTCATTTAAATCACATAGAAGATCTGAATCCGCTCCTCCTGAT
+TTGGAGGTTATGGTAGATAAGGGCAATTGTGCAGCCGGTTCTAACTCTATGATTAAAGAA
+GAAGAGGACTCCTTAATTGAACCAGAATCGAAAAATGAATATTATGAGCAAAAGCTTCCA
+ACAGCACTATTATCCCCACTGCGGCCTTCCCTTTGTGTATCTGAACAGGCCATTGATGTA
+GATGATTCAGCTCTCAATGGGTCACCGACCCATCACAACCATGGGATGCAAAACGCCAAT
+GCACGGAATTCCAACACATTCAATTCGTTGAAGATCAAAGGCCAAAAGCAAAGATATTAT
+CATTATACGAAGCAGCTACCTTTGACCGTAGGCTGTGACTCGCAATCTCCAAAAGAACAA
+AGGTCGGCTGCTTCAATGACAATCAATCAGGCAATGACACCTTCTTCCCTGGCCTATACC
+CCTTCTAAACTAGCATCTACTCCCGCAACACCAGTATCCTTTTATGACAGCAATGCGGAC
+ATTAACTTAGAAAGTGATAATTTTCCACTAAAAGATAACCCTAGATATGCCAAGGATGGT
+TATCCTAAAAAGTGCGGCAATTCACAGCTTAATCGTGTGCTGGATAGCGATAAAAGACAG
+GATTTTAGTGGAGAATCGAGAAGAAGAAGATCGGGCAGTCCTATCTCCCACATGCAACAC
+CGCAACCTGATTGATAATATGAAAGGTAGACGAAACAGTAACACGATAAACTCAATCTTC
+AACTACAAGAGTCAACATTATGAAATGCCATATGATGATATGATGAAAAATGAAAACATT
+AATGCACAGTCCATGCCCTTTTCAGTCAACGGTGTCAACAATGAAAATAGTATCGGAGGG
+GTTATTACGAGAGCGGACGATGCACCCCTTCAACACTCTGTGGTCAAATCCTGTACGCCT
+GATGGCAAGGAAGAAATGAATAGGCTTAAAAGTAATGACAGTAATGAATATTCCAAGTCT
+GAAGGGCAGATCAGAACCAATTCGCAACTAAGTAAGGACATTCTCATGGGTGAACCAGGT
+GATATGGTTGATCTGTCCTCTTTTGTCAACACGCAGAGAAAAGCCTCAAATGAAACTGGT
+GACTTAGTCTTTAGTTTATCCCAGGATGATGACGCACTGAAAACGTTCCATGCGAGCAAT
+AGCGCAGCAACAAGCAATGAAAGCTGGTGTATTAGCGATGGTGCGTTAGGAAAGCAGGCG
+CAGGACAGTGAAGTTAGGAGGAAAGAAATCAAATTAGGACTCTTTAGACATATTTTCAAG
+GAAGTAATACAACAATATTAA
+>APA1          966 residues Pha 0 Code 0
+ATGAGTATCCCCGCTGACATTGCATCTTTAATTAGTGACAAGTACAAAAGTGCCTTCGAT
+AATGGTAACTTAAAATTTATCCAGACTGAAACAACGAAAACAAAGGACCCAAAAACCAGC
+ATGCCATACTTGATTAGCCACATGCCAAGTCTGATCGAAAAGCCAGAGCGTGGCCAAACT
+CCAGAAGGAGAGGATCCACTAGGCAAACCTGAGGAAGAATTAACGGTTATCCCAGAATTT
+GGTGGTGCCGATAACAAAGCGTATAAATTGCTATTAAACAAATTCCCTGTAATCCCTGGA
+CACACTTTATTGGTAACTAACGAATACCAACATCAAACTGATGCCTTGACCCCAACCGAT
+TTATTGACTGCTTATAAGTTGCTGTGTGCCTTGGACAATGAAGAATCCGACAAGAGACAC
+ATGGTCTTTTACAATTCTGGTCCAGCCAGTGGTTCTTCATTGGACCACAAACATTTGCAA
+ATTTTGCAAATGCCTGAAAAGTTCGTCACTTTCCAAGATAGACTATGTAATGGTAAAGAA
+CATTTCCTACCAACTTTCAATACTGAACCTTTGCAAGATGCTAAAGTCTCGTTCGCTCAT
+TTTGTCTTGCCAATGCCGGAGTCCGAAGAAACTGTTGATGAAGACCTATTAGCTATGTGT
+TACATCTCCATATTGCAAAGAGCTTTGACCTTTTTCCAGGACTGGTTGAACGAAAATCCA
+GAACTAAAGAAATCCTACAATCTTATGTTAACCAAGGAATGGATCTGTGTCGTTCCACGT
+TCGAAGGCCTTTTCTGATGAAATGAAGATAGGTTTCAACTCCACAGGTTATTGTGGTATG
+ATCTTAACCAAAAATGATGAAGTTTTCTCCAAGATTACTGAAAAACCTGAATTGATTAAC
+GATATCTTATTGGAATGTGGTTTCCCAAACACTTCTGGTCAAAAACCAAACGAATACAAC
+TATTGA
+>YCE9          939 residues Pha 0 Code 0
+ATGTTTAGTAAATACCTCGTAACTGCATCTTCCCTCTTTGTGGCTTTGACCTCTGCAGCA
+TCTACCGTTGATCTAGATGCTCTGCTTCTTCTACCAGGGGTCGAGTCCCACGACGGCGTT
+GATACTGTATTTTCGACCAAAGACTTTTATCAAGTGTCATTCGTCAAATCCATTGCTCCT
+GCTATCGTAAACAGCTCCGTAATCTTCCACGATGTTTCTCGTGGTGTGGCTATGGGCAAT
+GTCAAGAGCAGAGCAAGTATCTTCAACCCAGAGGAAACGTATTACGATTGGGAACAGTAC
+CAAGTAGTAAATAACGGAGACTGGCGAACCGAATGGGCACCTGCCTCTGACTGCATTTGG
+AGGGAGGAGAAGGATAACAGCGACGAAACACCGGACAGATTCCCCATCTCGGTGCCATAT
+AATTGGACGTCACAGTACTCAATTGTAGATTATGACACAGACGCTAACGAAGACAATTTA
+GATTTCAGGTTTATTAAATCATTGCTAGATAAGAAAAATTGGTTGAATAAAATTAACCAG
+ACTGTTTCCCAATCCAGTATTATGGTAGCACCAATGATTAAGCCATACAATGTGGTCCAG
+CTTTGGTATTCAAAATATATGGTTTGGGCAAACGTTCAAAGACAATATTGTAGCGGTGTT
+TATCCAGGAGGGACTCAATGTAGCGCTTGGTCCAGGTACTACCATGTTGATGCACCTACC
+TGCGATGAGCCTGTCGCCTCTTACATGACCAAAATGTCGGAAAATGAGGTTCAGTGTCCC
+AATGAGAGAAACGCAACTACCCTAGAGCCTCTCCGCCTGAATAAGCAGGGAGACTCTGAT
+TTTTCTTTGACTTTCGAGGAAGAGGAAGAGGAAGAGACAGGATCTAAATCTCTTTGGAGT
+ACATTGAAAAAAATTTTCTCTAAAAGAAGTATAAGTTGA
+>YCE8         1392 residues Pha 0 Code 0
+ATGAACCGTATTACTAGGAAAAGTTGTTTATTCGCGATTATATTTGCATCATTATTTGTG
+ACACATGCATTGGGTGCCGCTATTGATCCGCCAAGGCGACCACATAATGTGAAGCCTTTT
+CATAACGGTAATCTCGAACTTCAAAGAAGAGCAAATGAACCGTTTTTTGAAATAGATGTC
+AAGAGTCTGAACACAAACTCACCGATATCAGAGTTGTGTAAAAAAGATTTGCACGTCATT
+GAATCGTCTCATGATCTTTTTCATTTACAAAACCAATGTGAATTCATCTTGGGGTCATTA
+AAAGTCACAAACTATGATTCTAACATTTTGGATTTGAACAGCTTGAGGGCCATTGGTGGT
+GACCTGATTATTCAGGATTCACCTGAACTGATCAGAATCCAAGCCGGGAACTTGAATAAA
+ATCGAAGGGCTCTTCCAATTACAGGGACTAACCTCTTTGGTTTCTGTTGAAATTCCAACT
+TTGAAATTTTGTCAGTCACTGGAGTGGAAAGTTGTTCCCATCTTGAACTACGTCTCCATG
+GATTCTCAGAATATTGAGATTATAAAGGATATTGTCATATCGGATACTTCATTAGCAAAC
+ATCGAGAATTTCAACAAGGTTCAGGAAATTGATACTTTCAATATCAATAATAACAGATTT
+TTAGAAACTATTCATTCGAACGTTAAAACCATTAGGGGACAATTCAGTGTACATGCGAAC
+GCTAAGGAGCTAGAACTTGAAATGCCACACTTGAGAGAAGTGGAAAACATAACGATTAGG
+GACACATCATTGGTCTACCTTCCACAATTAACAAAAGTGAAAAGCTCTTTAGAGTTCATC
+GAAAATTACTTTTACGAATTGAACCTGAACAATTTGCAGAAGATTGGTGGAACATTAGGA
+ATTATCAACAATGTAAATTTAATAAAAGTTAATTTGGAGAACTTAACAGACATTCAAGGT
+GGCTTGATGATCGCCGATAACGAATCCCTCGAGGATATTACTTTCCTGCCAAACTTGAAG
+CAGATTGGAGGTGCTATTTTCTTTGAAGGTTCGTTCAAAGATATCATGTTCGATAGCTTG
+AAACTGGTGAAAGGTAGCGCTTTTATTAAGAGTTCATCAAACGTGTTGGATTGCAATAAA
+TGGACAAACCCATCAAATGGAAGATCAATCATCAGGGGTGGGAAATTCACTTGTATTTCT
+GGTAAGAAGGAAAATACGCTGAATGTTAAACAGGATGGTACAATCATAGAAAAAGGGTAC
+AAAGATTTAACGCAAGAAGGTGAAGACTCCAAGAAAAGAGTGATTTCAAAATACGCGAAC
+TCAGCAAATCCAAGCATGCAATTGGACCCCCTTCTTTTTGGTACATGCCTTGTTGCTATG
+TTATTGTTTTAA
+>YCE7          777 residues Pha 0 Code 0
+ATGAAGAAGACGTTCGAGCAGTTTCGAAAAAGCAATTTACTATTTCAGGTTCTCAAAGGA
+CCCCAGCATCTAGAATGTCAGAAGTTATTTGTCCTTGATTCTTCATTCAATCCACCACAT
+CTGGCCCATTTTCAACTACTATCGCAGACTATTAAAAACTTCAAATTGAAGGACACCCGT
+TCGCATGTTTTATTACTGTTAGCGGTGAATAATGCAGATAAGTTGCCTAAGCCGGCATCT
+TTTCCAACTCGTCTGGAAATGATGTGCTTATTCGCTGACTACCTTCAGGAGAAGCTCCCC
+CAATCTGTAGTATCTGTCGGGTTGACTGTTTTCTCGAAATTCATCGACAAGGACAAAATA
+TTACATGAGCAATTTGTTAAAGGATGCAGTGCAGATATAGGCTACTTAGTTGGTTTTGAT
+ACAATTGCTAGGATCTTTGATGAAAAATATTATCATCCTTTAAAAATCAGTGATGTAATG
+GAGAGCTTCATGTCGGGATCTCAATTATATTGCTTGGCGAGAGGCGATTGCCATCTCAGT
+GCTGAATCGCAACTAAGATACGCCAGTGACATCCTTGAGGGAAAATTCGAACCGGTAATA
+CCAAGAGAATGGGGCGCTAGGATTCATGTTATGCAAAATGATTATCCAGCATTAAGAAAT
+GTTTCATCATCCGAGATTAGGAACAAACTGAAGAATGGGCAAGTGGAGAGTTTGAAAGAC
+GAGTTGCCATTGTGCATATACGATTATTTGATCAATAATAAGACAATATTTGATTGA
+>YCE5         2283 residues Pha 0 Code 0
+ATGAAGATAACGTGTACAGACTTGGTGTACGTCTTCATTTTACTCTTCCTAAACACGAGT
+TGTGTCCAAGCCGTTTTTTCAGATGATGCATTTATCACTGATTGGCAACTGGCTAACTTA
+GGTCCTTGGGAGAAAGTCATCCCTGATTCTCGAGACCGCAACAGGGTTCTCATCTTATCG
+AACCCTACCGAAACTTCCTGCTTAGTTTCTTCGTTTAACGTTTCTTCCGGACAGATTCTT
+TTCAGAAACGTTTTACCCTTTACCATTGATGAGATTCAACTGGATAGTAATGACCATAAC
+GCAATGGTTTGTGTGAACTCTTCAAGCAACCATTGGCAGAAATATGATTTACACGATTGG
+TTTTTACTAGAGGAAGGCGTAGATAATGCCCCTTCTACGACCATTTTACCTCAATCCTCA
+TATTTAAACGATCAAGTATCTATTAAGAACAATGAACTACATATTCTCGATGAGCAGTCA
+AAACTGGCAGAATGGAAATTGGAGTTACCTCAAGGGTTCAATAAAGTGGAATATTTTCAT
+CGTGAAGATCCCCTGGCGTTAGTGTTGAACGTTAATGATACCCAATATATGGGATTCTCT
+GCCAATGGCACAGAATTGATCCCCGTTTGGCAAAGAGATGAATGGTTGACTAACGTGGTA
+GACTATGCTGTATTGGACGTCTTCGATTCTAGGGATGTGGAGTTGAACAAAGATATGAAA
+GCGGAACTTGATTCAAATTCGCTTTGGAATGCTTACTGGCTTAGATTGACAACTAATTGG
+AATCGCCTTATCAACTTATTGAAAGAAAACCAATTCTCACCAGGACGTGTCTTCACTAAA
+CTCCTAGCTCTAGACGCTAAGGATACCACGGTATCAGATTTGAAGTTCGGATTCGCCAAA
+ATCTTAATTGTTTTGACGCATGATGGCTTTATCGGCGGCCTTGATATGGTCAATAAGGGC
+CAACTTATCTGGAAACTCGATTTAGAAATTGATCAGGGCGTCAAAATGTTCTGGACGGAT
+AAAAACCATGACGAACTTGTTGTTTTTTCGCATGATGGGCATTATTTGACAATTGAAGTT
+ACTAAAGATCAACCGATTATCAAATCAAGATCCCCCCTATCTGAAAGGAAAACTGTTGAT
+TCCGTTATTAGGCTGAATGAACATGATCACCAGTATCTGATTAAGTTTGAGGATAAGGAT
+CATTTACTGTTCAAATTGAATCCCGGCAAGAATACGGATGTACCAATAGTTGCCAACAAC
+CATTCTAGTTCCCACATATTCGTCACAGAGCATGACACGAATGGCATTTATGGCTACATA
+ATCGAAAACGATACGGTAAAACAAACTTGGAAAAAAGCCGTAAATTCGAAAGAGAAAATG
+GTGGCATATAGCAAGAGGGAAACAACAAACCTAAACACTCTTGGTATTACACTAGGTGAC
+AAATCGGTTCTTTATAAATATTTGTACCCCAACCTAGCGGCTTATCTGATCGCTAATGAA
+GAACATCATACAATCACTTTTAACTTAATTGATACCATTACAGGAGAAATCCTCATTACC
+CAAGAGCACAAGGATTCTCCGGATTTTAGGTTTCCAATGGATATTGTTTTCGGTGAATAT
+TGGGTCGTTTATTCCTATTTCAGTTCTGAACCTGTTCCAGAACAAAAGTTAGTAGTGGTG
+GAATTATATGAGTCACTAACCCCAGATGAGCGTTTGTCTAACTCAAGCGACAATTTTTCT
+TATGATCCATTGACTGGACACATTAACAAACCTCAATTTCAAACTAAACAATTCATTTTT
+CCCGAGATTATCAAAACAATGTCCATTTCCAAGACAACGGATGATATTACCACAAAGGCA
+ATCGTTATGGAATTAGAAAATGGACAAATCACCTACATACCAAAGCTTTTATTGAATGCA
+AGAGGTAAACCAGCAGAAGAAATGGCCAAGGATAAGAAAAAAGAGTTTATGGCTACCCCA
+TACACGCCAGTTATCCCAATTAATGATAATTTCATTATCACTCATTTCAGAAATCTATTG
+CCAGGATCCGATTCGCAGTTGATCTCCATCCCAACCAATCTGGAATCCACAAGCATTATA
+TGTGATCTAGGCCTTGATGTATTTTGTACAAGGATCACACCTTCGGGCCAATTTGATTTA
+ATGAGTCCTACTTTCGAAAAGGGTAAATTGCTTATTACTATATTCGTCTTGTTGGTGATC
+ACGTATTTTATCCGTCCTTCTGTTTCAAACAAGAAGTTGAAATCCCAATGGCTAATTAAA
+TAG
+>YCE6          324 residues Pha 0 Code 0
+ATGGTAAAGGGTAAAACGTTTCTGAAAAGAATCTGTCCGGAAGAAACGTTAAACGAAGAA
+ACTAAGCAGGAAGTTTCGGTAGGGTTCGATAAGATGAGAACCCTGTTGCGGTCTCGAGAA
+TCAGGGATGACTTTCTCCCAAGGACCTAAGTTAGCCAGTTGCCAATCAGTGATAAATGCA
+TCATCTGAAAAAACGGCTTGGACACAACTCGTGTTTAGGAAGAGTAAAATGAAGACGTAC
+ACCAAGTCTGTACACGTTATCTTCATTGCTATGGGGGAAGGGGAGGATGAAAGTGTTGAT
+ATGAATGTAGGTATTAGTTATTAA
+>YCE4         1254 residues Pha 0 Code 0
+ATGGCTGTATTTACTCCTCCATCAGGTAATAGCAATTCCACCGACCATACTCACACACAA
+GATGACCACGACAAAGATGATAATGATATCAAGAAATTCTACATAAGGCCAAGTTTAGGC
+TTAAAACTGTGGGGTCCGCTCGTACCCGCTCCTGATAACCTACCGGGACTATACACTCTA
+ATCACTATCCAATCTGCAGTGGGTTTCTTTGCCCTTTGGAGACTGAGAAGGCTCTACAAA
+CTACCGCCACCGCGCCGCATTGCCACTGGCACTCACTCGGATTTATCCTTTGGCGAACTA
+CCCAGTGAAATGATTGTCAATGGCAAGACTAAAATCAAAAAGGATATTGCTGACTTTCCA
+ACTTTGAACCGCTTCTCCACCACCCATGGTGACATTGTGCTCGCCCCTCCTCCCATCATA
+CCTCGCCAATCTCGATTCGTCAGCGTCAGAAAGCTCTTATGGGGGTTGTTTGGCTCTTTG
+CTACTTTCTCAGTCACTGTTGGAGCTTACTCGCCTGAACTTTCTTAAATACGACCCCTGG
+TGCGACGAAATGAAATCCGTACGTGACAAGAAGTTTTTCAACAATATTGTCAAATATTAT
+CACGAGGGCATAGACCCCACCAAAATAAAAGTCAAGGATGCTATGAACGGTACTCCTCTC
+TCGACAAATATCCCTGAGGTCAAACAAAGCGTCGCTCTCGCTAGAGCGCAAGTTGAGGCG
+CAGAATCCCATTATTAAATGGTTCGGACCCTTGGAATACAAGCCCATGTCTTTCAACGAG
+TACCTCAATCGCATGGAATTTCACTTGGACATGTTCGAGTTTTTTCAAAATAAAAGAAAC
+ATTAGAGAAAATTCCATTGAACTCATCAATTCCATATCCCACAATCCGCAGTCTTCTTCT
+ACTGGCCTTGAAGGTCTTTCCGAGTCCAAAAAACTCCATCTACAAAATGTGGAAAAAAGA
+CTGCATTTCTTAGCATCTTCGGGAGATTCCATTTCCGCACCAGTAAAGAAGAGATCCAGC
+ACCACACTCTCCCGAGGTGTCATTTTGCCCCATGACACGAAAGGCCCGCAAGATATTGAT
+CTCGATACAATAAGATCGCTTTATGATCCATGGATGACTTTGGCCTTAGAAACTTCGCTA
+AGCATCAAATTCATACCAACTACCATGCCCTCCCATACCAAGACACCCACTAGCACGGAC
+CAGCCGTTACCAGGGCCTACCCCCAAGGCTCTCACTAATGAAAAGACACATTAG
+>PDI1         1569 residues Pha 0 Code 0
+ATGAAGTTTTCTGCTGGTGCCGTCCTGTCATGGTCCTCCCTGCTGCTCGCCTCCTCTGTT
+TTCGCCCAACAAGAGGCTGTGGCCCCTGAAGACTCCGCTGTCGTTAAGTTGGCCACCGAC
+TCCTTCAATGAGTACATTCAGTCGCACGACTTGGTGCTTGCGGAGTTTTTTGCTCCATGG
+TGTGGCCACTGTAAGAACATGGCTCCTGAATACGTTAAAGCCGCCGAGACTTTAGTTGAG
+AAAAACATTACCTTGGCCCAGATCGACTGTACTGAAAACCAGGATCTGTGTATGGAACAC
+AACATTCCAGGGTTCCCAAGCTTGAAGATTTTCAAAAACAGCGATGTTAACAACTCGATC
+GATTACGAGGGACCTAGAACTGCCGAGGCCATTGTCCAATTCATGATCAAGCAAAGCCAA
+CCGGCTGTCGCCGTTGTTGCTGATCTACCAGCTTACCTTGCTAACGAGACTTTTGTCACT
+CCAGTTATCGTCCAATCCGGTAAGATTGACGCCGACTTCAACGCCACCTTTTACTCCATG
+GCCAACAAACACTTCAACGACTACGACTTTGTCTCCGCTGAAAACGCAGACGATGATTTC
+AAGCTTTCTATTTACTTGCCCTCCGCCATGGACGAGCCTGTAGTATACAACGGTAAGAAA
+GCCGATATCGCTGACGCTGATGTTTTTGAAAAATGGTTGCAAGTGGAAGCCTTGCCCTAC
+TTTGGTGAAATCGACGGTTCCGTTTTCGCCCAATACGTCGAAAGCGGTTTGCCTTTGGGT
+TACTTATTCTACAATGACGAGGAAGAATTGGAAGAATACAAGCCTCTCTTTACCGAGTTG
+GCCAAAAAGAACAGAGGTCTAATGAACTTTGTTAGCATCGATGCCAGAAAATTCGGCAGA
+CACGCCGGCAACTTGAACATGAAGGAACAATTCCCTCTATTTGCCATCCACGACATGACT
+GAAGACTTGAAGTACGGTTTGCCTCAACTCTCTGAAGAGGCGTTTGACGAATTGAGCGAC
+AAGATCGTGTTGGAGTCTAAGGCTATTGAATCTTTGGTTAAGGACTTCTTGAAAGGTGAT
+GCCTCCCCAATCGTGAAGTCCCAAGAGATCTTCGAGAACCAAGATTCCTCTGTCTTCCAA
+TTGGTCGGTAAGAACCATGACGAAATCGTCAACGACCCAAAGAAGGACGTTCTTGTTTTG
+TACTATGCCCCATGGTGTGGTCACTGTAAGAGATTGGCCCCAACTTACCAAGAACTAGCT
+GATACCTACGCCAACGCCACATCCGACGTTTTGATTGCTAAACTAGACCACACTGAAAAC
+GATGTCAGAGGCGTCGTAATTGAAGGTTACCCAACAATCGTCTTATACCCAGGTGGTAAG
+AAGTCCGAATCTGTTGTGTACCAAGGTTCAAGATCCTTGGACTCTTTATTCGACTTCATC
+AAGGAAAACGGTCACTTCGACGTCGACGGTAAGGCCTTGTACGAAGAAGCCCAGGAAAAA
+GCTGCTGAGGAAGCCGATGCTGACGCTGAATTGGCTGACGAAGAAGATGCCATTCACGAT
+GAATTGTAA
+>GLK1         1503 residues Pha 0 Code 0
+ATGTCATTCGACGACTTACACAAAGCCACTGAGAGAGCGGTCATCCAGGCCGTGGACCAG
+ATCTGCGACGATTTCGAGGTTACCCCCGAGAAGCTGGACGAATTAACTGCTTACTTCATC
+GAACAAATGGAAAAAGGTCTAGCTCCACCAAAGGAAGGCCACACATTGGCCTCGGACAAA
+GGTCTTCCTATGATTCCGGCGTTCGTCACCGGGTCACCCAACGGGACGGAGCGCGGTGTT
+TTACTAGCCGCCGACCTGGGTGGTACCAATTTCCGTATATGTTCTGTTAACTTGCATGGA
+GATCATACTTTCTCCATGGAGCAAATGAAGTCCAAGATTCCCGATGATTTGCTAGACGAT
+GAGAACGTCACATCTGACGACCTGTTTGGGTTTCTAGCACGTCGTACACTGGCCTTTATG
+AAGAAGTATCACCCGGACGAGTTGGCCAAGGGTAAAGACGCCAAGCCCATGAAACTGGGG
+TTCACTTTCTCATACCCTGTAGACCAGACCTCTCTAAACTCCGGGACATTGATCCGTTGG
+ACCAAGGGTTTCCGCATCGCGGACACCGTCGGAAAGGATGTCGTGCAATTGTACCAGGAG
+CAATTAAGCGCTCAGGGTATGCCTATGATCAAGGTTGTTGCATTAACCAACGACACCGTC
+GGAACGTACCTATCGCATTGCTACACGTCCGATAACACGGACTCAATGACGTCCGGAGAA
+ATCTCGGAGCCGGTCATCGGATGTATTTTCGGTACCGGTACCAATGGGTGCTATATGGAG
+GAGATCAACAAGATCACGAAGTTGCCACAGGAGTTGCGTGACAAGTTGATAAAGGAGGGT
+AAGACACACATGATCATCAATGTCGAATGGGGGTCCTTCGATAATGAGCTCAAGCACTTG
+CCTACTACTAAGTATGACGTCGTAATTGACCAGAAACTGTCAACGAACCCGGGATTTCAC
+TTGTTTGAAAAACGTGTCTCAGGGATGTTCTTGGGTGAGGTGTTGCGTAACATTTTAGTG
+GACTTGCACTCGCAAGGCTTGCTTTTGCAACAGTACAGGTCCAAGGAACAACTTCCTCGC
+CACTTGACTACACCTTTCCAGTTGTCATCCGAAGTGCTGTCGCATATTGAAATTGACGAC
+TCGACAGGTCTACGTGAAACAGAGTTGTCATTATTACAGAGTCTCAGACTGCCCACCACT
+CCAACAGAGCGTGTTCAAATTCAAAAATTGGTGCGCGCGATTTCTAGGAGATCTGCGTAT
+TTAGCCGCCGTGCCGCTTGCCGCGATATTGATCAAGACAAATGCTTTGAACAAGAGATAT
+CATGGTGAAGTCGAGATCGGTTGTGATGGTTCCGTTGTGGAATACTACCCCGGTTTCAGA
+TCTATGCTGAGACACGCCTTAGCCTTGTCACCCTTGGGTGCCGAGGGTGAGAGGAAGGTG
+CACTTGAAGATTGCCAAGGATGGTTCCGGAGTGGGTGCCGCCTTGTGTGCGCTTGTAGCA
+TGA
+>YCD8         1587 residues Pha 0 Code 0
+ATGAGCTATGGAACTATAAATGATATGAATGAATCGGTAACGAACTATCGAATAAAAAAA
+GCCCAAAACAATATCAAGGGATGGTACGCTTACTCATTTTCTAGCGAACCATTTGTCGTT
+TCTGCGGTTTCAACGTATATTCCCTTACTACTGCAGCAATTTGCGAGTATAAATGGTGTA
+AAAGTTCACGATCACTCCATACCCTGCCTGTCAGAAACGGGTAGTGATTCAGATAAGTGT
+GTTCTTGGTTTGTTCAACAATCGGATCTTCGTAGATACTTCAAGTTTTGCATTATATGTC
+TTTTCCCTTAGCGTTTTATTCCAAACTATAATAGTCATTTCCGTTTCAGGGATAGTAGAT
+CTCTGGGGGAGCGTTAAATTCAAAGGCAGAATTCTGGTTTGGTTTGGTATTGTGGGCGCA
+TTGTCGACTGTTGCGATTTCAAAATTGAATGATACCCAGATTTATTCTCTGGCTGGGCTT
+TATATAGTGGCCAATGGTTGTTTTGGCGTTATCAATGTTGTTGGGAATTCTCTTCTGCCC
+ATTTTTGTCAAGGATTCTTTGAAATGTCAAAGTCAAGGAGCTTATGAACCTGATAAGGTA
+GACTCGTTAACTACTGTTATTAGCGGTAGAGGTGCATCTTTAGGTTATTCAAGTGCCCTC
+ATTGTTCAGATTGTATCTATGTTCTTAGTCGCATCTAAAAAGGGCAGTAAGCAGGATGTT
+CAAGTGGCTGTTCTTTTCGTTGGGATTTGGTGGTTTGTGTGGCAACTGCCCATGATCTGG
+TTGATTGACGATGTGACAATACCGATAAGAGTTGACGATTCTACATTAGCATCCGCCCGC
+AGTCCGTATCCCGGTGAGCAAGACGCCTTGGGTCAACTAAACTGGAAGAATTACCTTTCA
+TATGGTTGGGTTTCGCTTTTCGAATCGTTTAAACATGCCAGACTATTGAAAGATGTGATG
+ATTTTTCTTATTGCGTGGTTTATTATTAGTGATTCCATTACAACTATAAATTCTACAGCG
+GTTTTGTTCTCCAAGGCAGAACTGCACATGAGTACCCTCAATTTAATCATGATAAGTGTT
+TTGACCGTTGTAAATGCAATGCTGGGTGCCTTTATGATTCCACAATTTCTTGCCACAAAG
+TTTCGGTGGACTTCTAGTCAAACTTTGATGTACATTATCATTTGGGCAAGTTTCATACCA
+TTTTATGGTATTCTTGGATTTTTCTTCAATGCGTTCGGTTTAAAGCATAAGTTTGAAATG
+TTCTTATTGGCCATTTGGTATGGATTATCACTAGGTGGCCTGTCCGCGGTTTCAAGATCA
+GTTTTCAGTTTGATTGTACCTCCAGGAAAAGAATCCACGTTTTTTAGTATGTTCAGTATC
+ACAGATAAGGGGTCGTCCATCCTGGGACCCTTCCTTGTTGGACTGCTTACCGATAAAACG
+CATAATATTCGCTATTCGTTTTATTTCTTCTTTTTGCTTTTGATGCTATCATTGCCTGTG
+CTAAACTGTTTGGATGTCAAGAGAGGTAGAAGAGAGGCTGAAGAACTCAGTCAAGTTTTA
+CCTGAAAGTGAAAGAAGGTTGGATTAG
+>SRO9         1401 residues Pha 0 Code 0
+ATGAAGATCTTTTGGGATCCTAGATCGGTAATAGAACATCAGGATTACTCTGGACCTGCT
+AACGTGTTTCATCTTCTTTTCACTTCTCTGCCCACGATGTCTGCTGAAACCGCCGCCGCA
+AACACTGCTACTGCCCCAGTCCCAGAAGTGCAAGAACAAGAGAGCTCCAAGAGCAAGCAA
+GTCAACTTGACGCCGGCACCATTGCCCACATCTTCCCCATGGAAACTTGCTCCTACTGAG
+ATCCCTGTTTCTACTATCTCAATAGAAGACTTGGATGCCACAAGAAAGAAGAAGAACAGA
+ACACCCACTCCGAAATCATCGACTGCTACCAAGTGGGTTCCCATCAAGGCCTCCATTACC
+GTCTCTGGCACCAAAAGATCCGGTTCCAAGAATGGTGCAAGTAATGGCAACAGCAACAAG
+AGCAAAAACAACAAAACTGCAGCATCGTCGACATCGTCGAGTAATGCTAACAGGAAAAAG
+AAGCATCACCAACATAATGCTAAGAAGCAACAACAAATGAAGAAAGATGGCTTTGAATCG
+GCAGTAGGTGAGGAAGATTCAAAAGACGCTACCTCTCAAGAAAATGGTCAATCTACACAA
+CAGCAACAACCACCTCACCACCGTAATCATCACCACAGTCATCACCATAACAGCAATGGT
+CCTCAAAGGAGAAAGTTCCACAACAGTAATAACGCCGGTATGCCTCAGAACCAAGGCTTC
+CCACCACAGTTTAAACCTTACCAAGGACGCAACGCTCGTAATAACAACAACAACCGCTCT
+AAATACCACAACCACTTCCATCACAACCAACAACATCCTCAACAACCTATGGTCAAATTA
+CAGCAACAGTTTTATCCAGTCCAACCAGTGTTAATGGCCATCAACAACATTGCTAGACAA
+ATTGAATACTATTTCAGCGAAGAAAACTTGACCGTCGACAATTACTTAAGGTCCAAACTC
+TCCAAGGATGGTTTTGCTCCATTGTCTTTAATCTCTAAGTTTTACAGAGTTGTTAACATG
+TCCTTCGGAGGTGACACTAACCTGATTTTAGCCGCATTGAGAGAAATTGTCGCTAACGAA
+GCCGCTACCGTCAATGTTGCAGAAGGTACTTTGGCCGCCAAGGAAGGTGATAACGTTACC
+GGTGAAGCCAAAGAACCATCTCCATTGGATAAGTACTTCGTTCGTTCCAAGAGCTGGTCA
+AACTGGTTACCAGAAACTTTTGAAACTGAAATTAATATTGAAAAAGAACTGGTCGGCGAT
+GCATTGGACCAATTCATGATATCCCTACCACCTGTTCCTCAACAAGAAGAGGAATCATCC
+ACTGAACTCGCTTCTCAAGAACAAGAAACCAAAGAAGACTCTGCGCCGGTTGCTGCCGGT
+GAATCCGAGTCTTCCTTATAA
+>YCD6         1701 residues Pha 0 Code 0
+ATGCAGGTTCAAAAAATGGTGAGAGATAACAGTAATAACGGTAGCGATAAAAGCGTCCAT
+TGGGAGAGGAGGAATAATAACGGCGCAGGCCCCCGTTATCGTTCCAGAAGCGGTAATACC
+GGTGCTTTGGCAACAAAACTAAGTAATGGGACGCTCTCTGTCAGAGGATTAGTGAAGGAC
+CGAACAGGAAGCGGCAAGATCGCGGGCTGTGTGGAGGCGTTTCTGGATGCCAGGACCCAA
+TTGAATACGCCCTGGGACCGTGCTAAGTGCAATTGGCTGGACCAGATAGATTACTATGTA
+CAGTTGAGAAAGACCGCGTTTTCTAAGGAATTGGACCAACTAAGGAAGCCCATGATCGAT
+GCATATGTGGCGGAGATGAGGCAGAAGTTTGATGCCTCCTATGGACAATCCAGGGCGCAA
+TTGGAAGCCAAACTGGCGCAGGTGGACAGTGAATGGCATATGGTACATGGTGATGTGCAT
+GCAAAACTGGAAAAACTCGTGGAAGAACGCCGGTTTTTGAAAAGATTAAGCGACACGATC
+GTACCACCCAGGTCCAAAAGATCACAGCGGCTGTCTCCATTGACCAAAGAGGACCGAGCC
+AACTGTATCTGTCCGCAGCCCAAAGGAATGAGCGACACCGCTTGGTTCGAAGCCATTCAG
+AAGAAAATGTTAGGAATGAATGGTACCATCAAGCTCCTAGAGACAGAACAGAAACTACTG
+GCTGACGAGAAAAACAGCGTGAGGAAGACGTTCTGGCCCATGGTGGAAGCACATTCACGC
+TCGAATGAATTTGCTTATCTGGAGAAATGCATCAGGCTGATGGCCTCTCAGAGAGCAATA
+TGCTTTTGTCTTGATATAGAGGCTTTCGAAACAAACCAGAACGTAATCACCGAAATTGGG
+ATTTCAATTTATGACCCCAGGGAAAATATGGTGCCGTCAATGGTTCCAATTACAAAGAAT
+TACCACCTAATTATCGAGGAGTCCCTGGAACTTAGAAACCAAAAATGGGTCTGTGACTAC
+AAGGATTGCTACTTATTGGGAGAAAGCTATGTTTTGAGCTTGAAAGAGTGCGTGCATTTC
+ATTCAATCACTAATAAACTATTACTTGGTCCCGGTGACCGAAGAAGACAAGACATGGTCA
+AGGGCATTTGTTGGTCATCACGTGAGCGGGGATCTTAAGTGGCTGGAGACTATTGGTGTC
+AAATTCCCTGGCAGAGGGTATGAAGGCCATCTGGACCATACGCTGCTTTTGGCTGAAACT
+CCCGGTGATCTAGACGTGTTCATCTTGGACACTGAGCAGTTTTACAGGAAATCGTATGGC
+GAAAAGGGCAGCAGTCTGGGCAAGATTCTGCGGTTGTTCGAGATACCGCATGCGTTTCTA
+CACAATGCCGGTAACGATGCCTACTATACCCTGCATTTGTTCATGAAGTTTTGCGATGTT
+AATTTCAGGAAAATAAGCGGCATGGACGATGTTCTTAAAGTAATGGGCCAAGTAAAAGTT
+TGGGGAGAACGAGACGTACGAGAGCCTAAAGTGGTGCCCATGTCGTATGCCATCTCCATC
+GAGGAGGCAGTCAAAAATCGGACGTACCGCAAGGGCGTCAAGAGCAGTAGGAAGGAAAGA
+GTCTGCCAAACGGAATTCGGTGGGTTAACGTATTTCGGAACTGCTAAAGACGCCTTCACA
+AGCACTCTTCCGACACACTAA
+>YCD5          333 residues Pha 0 Code 0
+ATGGTATCTCAAGAAACTATCAAGCACGTCAAGGACCTTATTGCAGAAAACGAGATCTTC
+GTCGCATCCAAAACGTACTGTCCATACTGCCATGCAGCCCTAAACACGCTTTTTGAAAAG
+TTAAAGGTTCCCAGGTCCAAAGTTCTGGTTTTGCAATTGAATGACATGAAGGAAGGCGCA
+GACATTCAGGCTGCGTTATATGAGATTAATGGCCAAAGAACCGTGCCAAACATCTATATT
+AATGGTAAACATATTGGAGGCAACGACGACTTGCAGGAATTGAGGGAGACTGGTGAATTG
+GAGGAATTGTTAGAACCTATTCTTGCAAATTAA
+>YCD3          507 residues Pha 0 Code 0
+ATGAATAAGTGGAGCAGGCTGTACGTTATAACTGTACGCAGGACTTTTCCAGGGAGAAGA
+AACATTGTACTGACGCAGTACTGGAATAAGAGCAAGAAAATGAGTGACGAATCGAATGAC
+GTGAAGTGGAACGATGCCCTGACACCATTGCAGCTGATGGTGCTGAGAGATAAGGCCACT
+GAAAGGCCCAACACCGGTGCGTATTTACACACCAACGAGTCCGGTGTCTACCATTGTGCC
+AACTGCGACAGACCGTTGTATTCGAGCAAGGCCAAGTTCGACGCTCGTTGTGGATGGCCC
+GCATTCTACGAAGAGGTATCCCCTGGAGCCATCACATATCATCGTGACAATTCTTTAATG
+CCTGCGAGGGTGGAGATATGTTGTGCAAGGTGTGGTGGACACTTGGGACATGTGTTTGAA
+GGTGAAGGCTGGAAACAGTTGCTAAACTTGCCCAAGGACACCAGACACTGTGTGAACAGT
+GCGTCTTTAAACCTCAAGAAGGATTAA
+>STE50        1041 residues Pha 0 Code 0
+ATGGAGGACGGTAAACAGGCCATCAATGAGGGATCAAACGATGCTTCGCCGGATCTGGAC
+GTGAATGGCACAATATTGATGAATAATGAAGACTTTTCCCAGTGGTCGGTTGATGATGTG
+ATAACTTGGTGTATATCCACGCTGGAGGTGGAAGAAACCGATCCATTATGTCAGAGACTG
+CGAGAAAATGATATTGTAGGAGATCTTTTGCCGGAATTGTGCTTGCAAGATTGCCAGGAC
+TTGTGTGACGGTGATTTGAATAAGGCCATAAAATTCAAGATACTGATCAATAAGATGAGA
+GACAGCAAGTTGGAGTGGAAGGACGACAAGACTCAAGAGGACATGATAACGGTACTGAAA
+AACTTGTACACTACTACATCTGCGAAATTGCAAGAATTTCAATCGCAGTACACAAGGCTG
+AGGATGGATGTCTTGGACGTAATGAAGACCAGCTCAAGCTCTTCTCCGATTAACACACAT
+GGAGTGTCCACTACGGTACCTTCTTCAAACAACACAATTATACCCAGTAGTGACGGTGTG
+TCTCTTTCACAAACAGACTATTTCGACACAGTTCATAACCGACAATCACCGTCAAGGAGA
+GAATCCCCGGTAACGGTATTTAGGCAACCCAGTCTTTCCCACTCAAAATCTTTGCACAAG
+GATAGCAAAAACAAAGTACCCCAAATATCTACAAACCAATCTCACCCATCTGCCGTTTCA
+ACAGCGAACACACCGGGGCCATCACCTAACGAGGCGTTAAAACAGTTGCGTGCATCTAAA
+GAAGACTCCTGCGAACGGATCTTGAAAAACGCAATGAAAAGACATAACTTAGCAGATCAG
+GATTGGAGACAATATGTCTTGGTCATTTGCTATGGGGATCAAGAGAGGCTGTTAGAATTG
+AACGAAAAGCCTGTGATCATATTCAAGAACTTAAAGCAACAGGGTTTGCACCCCGCCATT
+ATGTTAAGAAGAAGAGGTGATTTCGAAGAAGTAGCAATGATGAACGGAAGTGACAATGTC
+ACCCCCGGTGGAAGACTCTAA
+>HIS4         2400 residues Pha 0 Code 0
+ATGGTTTTGCCGATTCTACCGTTAATTGATGATCTGGCCTCATGGAATAGTAAGAAGGAA
+TACGTTTCACTTGTTGGTCAGGTACTTTTGGATGGCTCGAGCCTGAGTAATGAAGAGATT
+CTCCAGTTCTCCAAAGAGGAAGAAGTTCCATTGGTGGCTTTGTCCTTGCCAAGTGGTAAA
+TTCAGCGATGATGAAATCATTGCCTTCTTGAACAACGGAGTTTCTTCTCTGTTCATTGCT
+AGCCAAGATGCTAAAACAGCCGAACACTTGGTTGAACAATTGAATGTACCAAAGGAGCGT
+GTTGTTGTGGAAGAGAACGGTGTTTTCTCCAATCAATTCATGGTAAAACAAAAATTCTCG
+CAAGATAAAATTGTGTCCATAAAGAAATTAAGCAAGGATATGTTGACCAAAGAAGTGCTT
+GGTGAAGTACGTACAGACCGTCCTGACGGTTTATATACCACCCTAGTTGTCGACCAATAT
+GAGCGTTGTCTAGGGTTGGTGTATTCTTCGAAGAAATCTATAGCAAAGGCCATCGATTTG
+GGTCGTGGCGTTTATTATTCTCGTTCTAGGAATGAAATCTGGATCAAGGGTGAAACTTCT
+GGCAATGGCCAAAAGCTTTTACAAATCTCTACTGACTGTGATTCGGATGCCTTAAAGTTT
+ATCGTTGAACAAGAAAACGTTGGATTTTGCCACTTGGAGACCATGTCTTGCTTTGGTGAA
+TTCAAGCATGGTTTGGTGGGGCTAGAATCTTTACTAAAACAAAGGCTACAGGACGCTCCA
+GAGGAATCTTATACTAGAAGACTATTCAACGACTCTGCATTGTTAGATGCCAAGATCAAG
+GAAGAAGCTGAAGAACTGACTGAGGCAAAGGGTAAGAAGGAGCTTTCTTGGGAGGCTGCC
+GATTTGTTCTACTTTGCACTGGCCAAATTAGTGGCCAACGATGTTTCATTGAAGGACGTC
+GAGAATAATCTGAATATGAAGCATCTGAAGGTTACAAGACGGAAAGGTGATGCTAAGCCA
+AAGTTTGTTGGACAACCAAAGGCTGAAGAAGAAAAACTGACCGGTCCAATTCACTTGGAC
+GTGGTGAAGGCTTCCGACAAAGTTGGTGTGCAGAAGGCTTTGAGGAGACCAATCCAAAAG
+ACTTCTGAAATTATGCATTTAGTCAATCCGATCATCGAAAATGTTAGAGACAAAGGTAAC
+TCTGCCCTTTTGGAGTACACAGAAAAGTTTGATGGTGTAAAATTATCCAATCCTGTTCTT
+AATGCTCCATTCCCAGAAGAATACTTTGAAGGTTTAACCGAGGAAATGAAGGAAGCTTTG
+GACCTTTCAATTGAAAACGTCCGCAAATTCCATGCTGCTCAATTGCCAACAGAGACTCTT
+GAAGTTGAAACCCAACCTGGTGTCTTGTGTTCCAGATTCCCTCGTCCTATTGAAAAAGTT
+GGTTTGTATATCCCTGGTGGCACTGCCATTTTACCAAGTACTGCATTAATGCTTGGTGTT
+CCAGCACAAGTTGCCCAATGTAAGGAGATTGTGTTTGCATCTCCACCAAGAAAATCTGAT
+GGTAAAGTTTCACCCGAAGTTGTTTATGTCGCAGAAAAAGTTGGCGCTTCCAAGATTGTT
+CTAGCTGGTGGTGCCCAAGCCGTTGCTGCTATGGCTTACGGGACAGAAACTATTCCTAAA
+GTGGATAAGATCTTGGGTCCAGGTAATCAATTTGTGACTGCCGCCAAAATGTATGTTCAA
+AATGACACTCAAGCTCTATGTTCCATTGATATGCCAGCTGGCCCAAGTGAAGTTTTGGTT
+ATTGCCGATGAAGATGCCGATGTGGATTTTGTTGCAAGTGATTTGCTATCGCAAGCTGAA
+CACGGTATTGACTCCCAAGTTATCCTTGTTGGTGTTAACTTGAGCGAAAAGAAAATTCAA
+GAGATTCAAGATGCTGTCCACAATCAAGCTTTACAACTGCCACGTGTGGATATTGTTCGT
+AAATGTATTGCTCACAGTACGATCGTTCTTTGTGACGGTTACGAAGAAGCCCTTGAAATG
+TCCAACCAATATGCACCAGAACATTTGATTCTACAAATCGCCAATGCTAACGATTATGTT
+AAATTGGTTGACAATGCAGGGTCCGTATTTGTGGGTGCTTACACTCCAGAATCGTGCGGT
+GACTATTCAAGTGGTACTAACCATACATTACCAACCTATGGTTACGCTAGGCAGTACAGT
+GGTGCCAACACTGCAACCTTCCAAAAGTTTATCACTGCCCAAAACATTACCCCTGAAGGT
+TTAGAAAACATCGGTAGAGCTGTTATGTGCGTTGCCAAGAAGGAGGGTCTAGACGGTCAC
+AGAAACGCTGTGAAAATCAGAATGAGTAAGCTTGGGTTGATCCCAAAGGATTTCCAGTAG
+>BIK1         1323 residues Pha 0 Code 0
+ATGGATAGATATCAAAGAAAGATAGGATGTTTCATACAAATCCCAAATTTGGGGCGCGGA
+CAACTGAAATACGTGGGTCCAGTGGACACGAAAGCTGGAATGTTTGCTGGTGTAGACTTA
+CTTGCCAACATTGGTAAGAACGATGGATCATTCATGGGGAAGAAGTATTTTCAAACAGAG
+TATCCTCAAAGTGGACTATTTATCCAGTTGCAAAAAGTCGCATCATTGATCGAGAAGGCA
+TCGATATCGCAAACCTCGAGAAGAACGACGATGGAACCGCTATCAATACCCAAAAACAGA
+TCTATTGTGAGGCTCACTAACCAGTTCTCTCCCATGGATGATCCTAAATCCCCCACACCC
+ATGAGAAGTTTCCGGATCACCAGTCGGCACAGCGGTAATCAACAGTCGATGGACCAGGAG
+GCATCGGATCACCATCAACAGCAAGAATTTGGTTACGATAACAGAGAAGACAGAATGGAG
+GTCGACTCTATCCTGTCATCAGACAGAAAGGCTAATCACAACACCACCAGCGATTGGAAA
+CCGGACAATGGCCACATGAATGACCTCAATAGCAGCGAAGTTACAATTGAATTACGAGAA
+GCCCAATTGACCATCGAAAAGCTACAAAGGAAACAACTACACTACAAAAGGCTACTCGAT
+GACCAAAGAATGGTCCTCGAAGAAGTGCAACCGACTTTTGATAGGTATGAAGCCACAATA
+CAAGAAAGAGAGAAAGAGATAGACCATCTCAAGCAACAATTGGAGCTCGAACGCAGACAG
+CAAGCCAAACAAAAGCAGTTTTTTGACGCTGAGAATGAACAGCTACTTGCTGTCGTAAGC
+CAACTACACGAAGAGATCAAAGAAAACGAAGAGAGAAATCTTTCTCATAATCAACCCACT
+GGTGCCAACGAAGATGTCGAACTCCTGAAAAAACAGCTGGAACAATTACGCAACATAGAA
+GACCAATTTGAGTTACACAAGACAAAGTGGGCTAAAGAACGCGAACAATTGAAAATGCAT
+AACGATTCGCTCAGTAAAGAATACCAAAATTTGAGCAAGGAACTATTTTTGACAAAACCA
+CAAGATTCCTCATCGGAAGAGGTGGCATCCTTAACGAAAAAACTTGAAGAGGCTAATGAA
+AAAATCAAACAGTTGGAACAGGCTCAAGCACAAACAGCCGTGGAATCGTTGCCAATTTTC
+GACCCCCCTGCACCAGTCGATACCACGGCAGGAAGACAACAGTGGTGTGAGCATTGCGAT
+ACGATGGGTCATAATACAGCAGAATGCCCCCATCACAATCCTGACAACCAGCAGTTCTTC
+TAG
+>FUS1         1539 residues Pha 0 Code 0
+ATGGTAGCAACAATAATGCAGACGACAACAACTGTGCTGACGACAGTCGCCGCAATGTCT
+ACTACCTTAGCATCAAATTACATATCTTCGCAAGCTAGTTCCTCGACGAGTGTAACAACA
+GTAACGACAATAGCGACATCAATACGCTCTACACCGTCTAATCTACTCTTTTCTAATGTG
+GCGGCTCAGCCAAAATCATCTTCAGCAAGCACAATTGGGCTTTCAATCGGACTTCCCATC
+GGAATATTCTGTTTCGGATTACTTATCCTTTTGTGTTATTTCTACCTTAAAAGGAATTCG
+GTGTCCATTTCAAATCCACCCATGTCAGCTACGATTCCAAGGGAAGAGGAATATTGTCGC
+CGCACTAATTGGTTCTCACGGTTATTTCGGCAGAGTAAGTGTGAGGATCAGAATTCATAT
+TCTAATCGTGATATTGAGAAGTATAACGACACCCAGTGGACCTCGGGTGATAACATGTCT
+TCAAAAATACAGTACAAAATTTCCAAACCCATAATACCGCAGCATATACTGACACCTAAG
+AAAACGGTGAAGAACCCATATGCTTGGTCTGGTAAAAACATTTCGTTAGACCCCAAAGTG
+AACGAAATGGAGGAAGAGAAAGTTGTGGATGCATTCCTGTATACTAAACCACCGAATATT
+GTCCATATTGAATCCAGCATGCCCTCGTATAATGATTTACCTTCTCAAAAAACGGTGTCC
+TCAAAGAAAACTGCGTTAAAAACGAGTGAGAAATGGAGTTACGAATCTCCACTATCTCGA
+TGGTTCTTGAGGGGTTCTACATACTTTAAGGATTATGGCTTATCAAAGACCTCTTTAAAG
+ACCCCAACTGGGGCTCCACAACTGAAGCAAATGAAAATGCTCTCCCGGATAAGTAAGGGT
+TACTTCAATGAGTCAGATATAATGCCTGACGAACGATCGCCCATCTTGGAGTATAATAAC
+ACGCCTCTGGATGCAAATGACAGCGTGAATAACTTGGGTAATACCACGCCAGATTCACAA
+ATCACATCTTATCGCAACAATAACATCGATCTAATCACGGCAAGACCCCATTCAGTGATA
+TACGGTACTACTGCACAACAAACTTTGGAAACCAACTTCAATGATCATCATGACTGCAAT
+AAAAGCACTGAGAAACACGAGTTGATAATACCCACCCCATCAAAACCACTAAAGAAAAGG
+AAAAAAAGAAGACAAAGTAAAATGTATCAGCATTTACAACATTTGTCACGTTCTAAACCA
+TTGCCGCTTACTCCAAACTCCAAATATAATGGAGAGGCTAGCGTCCAATTAGGGAAGACA
+TATACAGTTATTCAGGATTACGAGCCTAGATTGACAGACGAAATAAGAATCTCGCTGGGT
+GAAAAAGTTAAAATTCTGGCCACTCATACCGATGGATGGTGTCTGGTAGAGAAGTGTAAT
+ACACGAAAGGGTACTATTCACGTCAGTGTTGACGATAAAAGATACCTCAATGAAGATAGA
+GGCATTGTGCCTGGTGACTGTCTCCAAGAATACGACTGA
+>YC08          579 residues Pha 0 Code 0
+ATGTCCCCAACTGGAAACTACTTAAACGCTATTACAAACCGTCGTACCATCTACAATTTG
+AAGCCCGAATTACCACAAGGTGTCGGTTTGGATGATGTAAAGAGAACTGTACACGTTATT
+CTCAAGAATACGCCAACAGCTTTTAACTCACAAGTGAATCGCGCTGTCATTATCGTTGGT
+GATACACACAAAAGGATATGGGATGCTGTTGCGAGCGCAATGCCAACTGCTGAAGCCAAG
+AAGAGACCAGAGTCTTGCAGAGATGAGGCTTACGGTTCAGTCATTTTCTTCACTGATGAA
+GGACCAACTGAAAACTGCAAGAGATTTTCCAGCCTTGGCACCGCTTTCCCAACATGCGCC
+GCTCATACGACCGGTGCTGTGCAAATTCAGTCTTGGACTGCCCTCGAACTATTGGGATTG
+GGGGCTAATTTGCAACACTATAATGACTACGTCAAATCTGCTTTGCCTCAAGATGTTCCT
+ATTGCGTGGACTGTACAATCTCAATTGGTCTTTGGTGTTCCAACTGCCTTGCCAGAAGAA
+AAGACTTTTATCAATAACGTAATCAACGTTTATCACTGA
+>AGP1         1902 residues Pha 0 Code 0
+ATGTCGTCGTCGAAGTCTCTATACGAACTGAAAGACTTGAAAAATAGCTCCACAGAAATA
+CATGCCACGGGGCAGGATAATGAAATTGAATATTTCGAAACAGGCTCCAATGACCGTCCA
+TCCTCACAACCTCATTTAGGTTACGAACAGCATAACACTTCTGCCGTGCGTAGGTTTTTC
+GACTCCTTTAAAAGAGCGGATCAGGGTCCACAGGATGAAGTAGAAGCAACACAAATGAAC
+GATCTTACGTCGGCTATCTCACCTTCTTCTAGACAGGCTCAAGAACTAGAAAAAAATGAA
+AGTTCGGACAACATAGGCGCTAATACAGGTCATAAGTCGGACTCGCTGAAGAAAACCATT
+CAGCCTAGACATGTTCTGATGATTGCGTTGGGTACGGGTATCGGTACTGGGTTACTGGTC
+GGTAACGGTACCGCGTTGGTTCATGCGGGTCCAGCTGGACTACTTATTGGTTACGCTATT
+ATGGGTTCTATCTTGTACTGTATTATTCAAGCATGTGGTGAAATGGCGCTAGTGTATAGT
+AACTTGACTGGTGGCTACAATGCATACCCAGTTTCCTTGTGGATGATGGTTTTTGGGTTT
+GCAGTCGCTTGGGTTTATTGTTTGCAATGGCTGTGTGTGTGTCCTCTGGAATTGGTGACC
+GCATCCATGACTATCAAATATTGGACGACATCTGTGAACCCGGATGTGTTCGTCATTATT
+TTCTATGTTTTGGTGATTACTATTAATATTTTCGGTGCTCGTGGTTATGCAGAAGCTGAG
+TTCTTCTTCAACTGTTGCAAAATTTTGATGATGACTGGGTTCTTCATTCTTGGTATTATC
+ATCGATGTTGGTGGCGCTGGTAATGATGGTTTTATTGGTGGTAAATACTGGCACGATCCG
+GGCGCTTTCAATGGTAAACATGCCATTGACAGATTTAAAGGTGTTGTTGCAACATTAGTG
+ACTGCTGCTTTTGCCTTTGGTGGTTCAGAGTTTATTGCCATCACCACTGCAGAACAATCT
+AATCCAAGAAAGGCCATTCCAGGTGCGGCCAAACAAATGATCTACAGAATCTTATTCCTA
+TTCTTGGCTACCATTATTCTACTGGGTTTCTTGGTGCCATACAATTCCGATCAATTATTG
+GGTTCTACCGGTGGTGGTACTAAAGCCTCGCCATATGTCATTGCTGTTGCATCCCACGGT
+GTCCGTGTCGTCCCACACTTCATTAACGCCGTTATTCTACTTTCCGTGCTGTCCATGGCT
+AACTCCTCCTTCTACTCCAGTGCTCGTTTATTTTTAACTCTATCCGAGCAAGGTTACGCT
+CCTAAGGTTTTCTCCTACATCGACAGAGCCGGTAGACCATTGATTGCCATGGGTGTTTCT
+GCATTGTTTGCCGTTATTGCCTTCTGTGCTGCATCTCCCAAGGAAGAACAAGTTTTCACT
+TGGTTATTGGCCATTTCTGGTTTGTCTCAGCTTTTCACATGGACTGCCATTTGTTTATCC
+CATCTTAGATTTAGAAGAGCCATGAAAGTCCAAGGGAGATCTCTTGGAGAATTGGGTTTC
+AAATCTCAAACTGGTGTTTGGGGATCTGCCTACGCTTGCATTATGATGATTTTAATTCTT
+ATTGCCCAATTTTGGGTCGCTATCGCCCCCATTGGTGAAGGTAAGCTGGATGCACAAGCC
+TTTTTCGAAAACTACTTGGCTATGCCAATCTTGATTGCACTATATGTCGGCTACAAGGTC
+TGGCACAAGGATTGGAAACTGTTCATCAGGGCCGACAAGATCGACCTAGATTCTCATAGA
+CAAATCTTTGATGAAGAATTAATCAAGCAAGAAGACGAAGAATATAGGGAACGTTTGAGG
+AACGGACCTTATTGGAAAAGGGTCGTTGCCTTCTGGTGTTAA
+>LEU2         1095 residues Pha 0 Code 0
+ATGTCTGCCCCTAAGAAGATCGTCGTTTTGCCAGGTGACCACGTTGGTCAAGAAATCACA
+GCCGAAGCCATTAAGGTTCTTAAAGCTATTTCTGATGTTCGTTCCAATGTCAAGTTCGAT
+TTCGAAAATCATTTAATTGGTGGTGCTGCTATCGATGCTACAGGTGTCCCACTTCCAGAT
+GAGGCGCTGGAAGCCTCCAAGAAGGTTGATGCCGTTTTGTTAGGTGCTGTGGGTGGTCCT
+AAATGGGGTACCGGTAGTGTTAGACCTGAACAAGGTTTACTAAAAATCCGTAAAGAACTT
+CAATTGTACGCCAACTTAAGACCATGTAACTTTGCATCCGACTCTCTTTTAGACTTATCT
+CCAATCAAGCCACAATTTGCTAAAGGTACTGACTTCGTTGTTGTCAGAGAATTAGTGGGA
+GGTATTTACTTTGGTAAGAGAAAGGAAGACGATGGTGATGGTGTCGCTTGGGATAGTGAA
+CAATACACCGTTCCAGAAGTGCAAAGAATCACAAGAATGGCCGCTTTCATGGCCCTACAA
+CATGAGCCACCATTGCCTATTTGGTCCTTGGATAAAGCTAATGTTTTGGCCTCTTCAAGA
+TTATGGAGAAAAACTGTGGAGGAAACCATCAAGAACGAATTCCCTACATTGAAGGTTCAA
+CATCAATTGATTGATTCTGCCGCCATGATCCTAGTTAAGAACCCAACCCACCTAAATGGT
+ATTATAATCACCAGCAACATGTTTGGTGATATCATCTCCGATGAAGCCTCCGTTATCCCA
+GGTTCCTTGGGTTTGTTGCCATCTGCGTCCTTGGCCTCTTTGCCAGACAAGAACACCGCA
+TTTGGTTTGTACGAACCATGCCACGGTTCTGCTCCAGATTTGCCAAAGAATAAGGTCAAC
+CCTATCGCCACTATCTTGTCTGCTGCAATGATGTTGAAATTGTCATTGAACTTGCCTGAA
+GAAGGTAAGGCCATTGAAGATGCAGTTAAAAAGGTTTTGGATGCAGGTATCAGAACTGGT
+GATTTAGGTGGTTCCAACAGTACCACGGAAGTCGGTGATGCTGTCGCCGAAGAAGTTAAG
+AAAATCCTTGCTTAA
+>NFS1         1494 residues Pha 0 Code 0
+ATGTTGAAATCAACTGCTACAAGATCGATAACAAGATTATCTCAAGTTTACAACGTTCCA
+GCGGCCACATATAGGGCTTGTTTGGTAAGCAGGAGATTCTATTCCCCTCCTGCAGCAGGC
+GTGAAGTTAGACGACAACTTCTCTCTGGAAACGCATACCGATATTCAGGCTGCTGCAAAG
+GCACAGGCTAGTGCCCGTGCGAGTGCATCCGGTACCACCCCAGATGCTGTAGTAGCTTCT
+GGTAGCACTGCAATGAGCCATGCTTATCAAGAAAACACAGGTTTTGGTACTCGTCCCATA
+TATCTTGACATGCAAGCCACTACACCAACAGACCCTAGGGTTTTGGATACGATGTTGAAG
+TTTTATACGGGACTTTATGGTAATCCTCATTCCAACACTCACTCTTACGGTTGGGAAACA
+AATACTGCTGTGGAAAATGCTAGAGCTTACGTAGCAAAGATGATCAATGCCGACCCCAAG
+GAAATAATATTCACTTCGGGAGCGACCGAATCTAATAATATGGTTCTTAAGGGTGTCCCA
+AGATTTTATAAGAAGACTAAGAAACACATCATCACCACTAGAACGGAACACAAGTGTGTC
+TTGGAAGCCGCACGGGCCATGATGAAGGAGGGATTTGAAGTCACTTTCCTAAATGTGGAC
+GATCAAGGTCTTATCGATTTGAAGGAATTGGAAGATGCCATTAGACCAGATACCTGTCTC
+GTCTCTGTGATGGCTGTCAATAATGAAATCGGTGTCATTCAACCTATTAAAGAAATTGGT
+GCAATTTGTAGAAAGAATAAGATCTACTTTCATACTGACGCCGCACAAGCCTATGGTAAG
+ATTCACATTGATGTCAATGAAATGAACATTGATTTACTATCAATTTCTTCTCACAAGATT
+TACGGTCCAAAGGGAATAGGTGCCATCTATGTAAGAAGGAGACCAAGAGTTAGATTAGAA
+CCTTTACTATCCGGTGGTGGCCAAGAGAGAGGATTGAGATCTGGTACTTTGGCCCCCCCA
+TTGGTAGCGGGATTTGGTGAAGCTGCGAGATTGATGAAGAAAGAATTTGACAACGACCAA
+GCTCACATCAAAAGACTATCCGATAAATTAGTCAAAGGTCTATTATCCGCTGAACATACC
+ACGTTGAACGGATCTCCAGATCATCGTTATCCAGGGTGTGTTAACGTTTCTTTCGCCTAC
+GTGGAAGGAGAATCTTTATTGATGGCACTAAGGGATATCGCATTATCCTCGGGTTCAGCC
+TGTACATCTGCTTCCCTAGAACCTTCTTATGTTTTACATGCGCTGGGTAAGGATGATGCA
+TTAGCCCATTCTTCCATCAGATTTGGTATTGGTAGATTTAGTACTGAAGAGGAGGTCGAC
+TACGTCGTTAAGGCCGTTTCTGACAGAGTAAAATTCTTGAGGGAACTTTCACCATTATGG
+GAAATGGTTCAAGAAGGTATTGACTTAAACTCCATCAAATGGTCAGGTCATTGA
+>BUD3         4104 residues Pha 0 Code 0
+ATGGAGAAAGACCTGTCGTCTCTTTACTCTGAAAAGAAAGACAAAGAGAACGATGAAACC
+TTATTTAACATCAAACTATCCAAATCTGTTGTCGAGACCACACCGCTAAATGGTCATTCA
+TTGTTTGATGATGATAAATCACTTTCAGACTGGACGGATAATGTGTTCACTCAATCAGTA
+TTCTATCACGGGTCAGATGACTTGATATGGGGGAAGTTCTTTGTCTGCGTGTACAAGTCC
+CCCAACAGCAATAAGTTGAACGCTATAATATTCGACAAATTAGGAACATCATGCTTCGAA
+TCCGTCGATATATCTTCCAACTCGCAATACTATCCGGCCATTGAGAATTTGAGTCCAAGT
+GATCAGGAAAGCAATGTTAAGAAATGCATTGCTGTCATTCTGTTACAGCGCTATCCATTA
+CTTTCACCATCAGACTTATCACAAATATTGTCCAATAAATCGGAAAATTGCGACTATGAC
+CCCCCTTATGCTGGAGATTTGGCTAGTAGTTGCCAGTTGATAACAGCAGTTCCTCCAGAA
+GATCTGGGGAAGCGCTTCTTTACATCAGGACTTCTGCAAAATAGATTTGTCAGCTCTACC
+CTGTTAGATGTTATTTATGAAAACAATGAATCCACCATCGAACTAAATAATAGGTTGGTA
+TTCCATCTGGGTGAACAACTTGAACAACTTTTTAACCCAGTCACAGAATACTCACCGGAA
+CAGACAGAATATGGTTATAAGGCGCCAGAGGACGAATTACCCACAGAATCGGATGATGAT
+CTTGTCAAGGCCATTTGCAACGAGTTATTACAACTACAAACAAATTTTACTTTCAATTTG
+GTAGAATTTTTGCCAAAATTCCTGATCGCCTTGAGAGTCAGAGTACTCAATGAAGAAATT
+AATGGGTTATCCACAACCAAATTAAATCGACTCTTCCCACCTACAATAGATGAAGTCACA
+AGAATCAATTGTATTTTTCTAGACTCGCTAAAGACAGCAATCCCTTACGGTTCCCTCGAA
+GTACTGAAGGCATGCAGCATTACTATTCCTTATTTCTACAAAGCATATACAAGACACGAG
+GCGGCCACAAAGAACTTCAGCAAAGATATTAAATTGTTTATTAGGCATTTCAGCAATGTA
+ATTCCAGAAAGAGAGGTCTACACGGAAATGAAAATCGAGAGTATAATTAAGGGACCTCAG
+GAAAAACTACTGAAGCTAAACTTAATTATAGAGAGATTGTGGAAGTCGAAAAAATGGAGA
+CCGAAAAATCAAGAAATGGCAAAAAAATGCTACAACAATATCATTGATGTCATTGATTCG
+TTTGGAAAATTAGATTCCCCACTTCATTCTTATAGTACCAGAGTATTTACTCCATCGGGA
+AAAATCCTTACAGAATTAGCCAAATGCTGGCCCGTAGAACTGCAATACAAATGGCTGAAG
+AGAAGGGTAGTCGGTGTGTATGATGTAGTGGATTTGAATGATGAAAATAAGAGAAATTTA
+TTAGTCATATTCAGTGATTATGTGGTTTTCATCAATATACTGGAGGCAGAAAGTTACTAC
+ACTTCAGATGGATCAAACAGGCCCTTAATCTCAGATATTTTAATGAACTCATTGATCAAC
+GAAGTTCCGTTGCCCTCCAAGATCCCTAAGTTGAAAGTGGAGCGTCATTGCTATATAGAT
+GAGGTTCTAGTTTCTATATTAGACAAAAGCACTCTACGTTTTGATCGATTGAAGGGAAAA
+GATTCTTTCTCAATGGTATGTAAATTATCCTCTGCATTTATCTCTTCTTCGTCAGTTGCT
+GACTTGATTACGAAGGCTAGAATTTTGGAAAAAGACACTGCATTTCATTTATTTAAAGCT
+AGTAGAAGCCATTTTACATTATATTCTACTGCTCACGAGCTTTGCGCTTATGATTCCGAA
+AAAATAAAATCAAAATTTGCCTTATTCCTGAACATACCACCATCCAAGGAGATATTGGAG
+GTCAACAACCTTCATTTGGCTTTTTTTGCAAGATTTTGCAGTAACGATGGTAGAGATAAC
+ATCGTAATCTTAGACGTCTTAACCAAACATGACGATAAACATATAGAAGTTACATCCGAT
+AACATTGTTTTCACCATAATTAATCAATTGGCCATTGAAATACCGATATGCTTTTCTTCC
+TTAAACTCATCGATGGCCAAAGATTTACTCTGTGTAAATGAGAATTTGATAAAAAACTTA
+GAACATCAATTGGAAGAGGTCAAGCACCCTTCAACAGACGAACATAGGGCTGTTAATAGC
+AAACTTTCCGGTGCATCCGATTTCGATGCTACTCACGAGAAGAAAAGATCATACGGTACC
+ATAACAACATTTAGAAGCTATACAAGCGACTTGAAGGACAGTCCATCAGGCGATAATAGT
+AATGTCACCAAGGAAACTAAGGAAATTTTACCAGTGAAACCTACGAAAAAGTCTTCAAAA
+AAACCAAGAGAAATTCAAAAGAAGACCAAGACAAACGCCTCTAAAGCAGAGCACATAGAA
+AAGAAGAAGCCTAACAAAGGCAAAGGGTTTTTTGGCGTGTTAAAAAATGTTTTTGGAAGT
+AAAAGCAAGAGCAAGCCTTCACCAGTTCAAAGAGTGCCTAAAAAAATATCGCAGAGGCAT
+CCTAAGTCTCCAGTGAAGAAGCCAATGACCTCAGAAAAGAAATCCTCCCCTAAAAGGGCA
+GTCGTTTCATCTCCCAAAATTAAAAAGAAAAGTACTTCTTTTTCCACAAAAGAATCACAA
+ACTGCTAAATCTTCTCTTCGAGCAGTTGAATTCAAATCTGATGACTTGATCGGAAAACCA
+CCTGATGTTGGAAATGGCGCACATCCTCAAGAAAATACCAGAATATCTTCAGTAGTAAGG
+GATACAAAATATGTCTCCTACAATCCCTCTCAGCCTGTGACAGAAAATACCAGTAACGAA
+AAAAATGTCGAACCAAAAGCGGATCAATCCACAAAGCAGGATAACATTTCCAATTTTGCA
+GATGTAGAGGTATCTGCGTCTTCTTATCCTGAAAAACTTGATGCAGAAACAGATGATCAA
+ATAATTGGGAAGGCGACGAATTCGTCATCAGTTCATGGAAATAAAGAGCTGCCAGACCTT
+GCTGAGGTGACTACAGCAAATAGGGTTTCTACAACATCGGCTGGGGACCAACGTATTGAT
+ACCCAAAGCGAATTTTTACGTGCAGCTGATGTTGAAAACTTAAGTGATGACGATGAACAC
+AGACAGAATGAAAGTAGAGTTTTTAACGATGACCTCTTTGGTGATTTTATTCCTAAGCAT
+TACCGTAATAAACAGGAGAACATTAACAGCTCGAGTAATTTGTTTCCAGAGGGAAAGGTG
+CCCCAAGAAAAGGGCGTATCAAATGAAAACACTAACATATCTCTCAAAACTAATGAAGAT
+GCATCTACATTGACGCAGAAACTCTCTCCACAAGCGAGTAAAGTGCTGACAGAAAATTCT
+AATGAATTAAAAGATACCAACAATGAAGGGAAGGACGCAAAGGACATAAAATTAGGAGAT
+GATTACAGTGATAAAGAAACAGCGAAAGAAATAACTAAACCAAAAAATTTTGTTGAAGGA
+ATAACTGAACGGAAAGAAATATTCCCCACTATTCCTAGGTTAGCGCCGCCAGCTTCAAAA
+ATTAACTTTCAAAGGTCACCATCCTATATTGAGCTCTTTCAAGGAATGAGGGTGGTTTTA
+GATAAGCATGATGCCCATTATAACTGGAAACGCTTGGCTAGTCAAGTCTCCTTAAGTGAG
+GGACTAAAAGTCAATACTGAGGAAGATGCGGCAATTATAAATAAAAGTCAGGATGATGCC
+AAGGCGGAAAGAATGACTCAAATTTCTGAAGTGATTGAGTATGAAATGCAGCAACCTATC
+CCAACTTATTTGCCTAAGGCGCATCTAGATGACTCGGGTATTGAAAAAAGTGATGACAAA
+TTCTTCGAAATTGAAGAAGAACTTAAGGAAGAATTGAAGGGCAGCAAAACGGTAATGAAG
+ATGTCGGTAATAATAATCCATCCAATTCTATTCCAAAAATCGAGAAGCCCCCAGCATTCA
+AAGTTATTAGAACATCGCCTGTGA
+>GBP2         1284 residues Pha 0 Code 0
+ATGGAGAGAGAGCTAGGGATGTATGGAAATGATAGGAGTAGATCAAGATCACCTGTACGT
+CGTCGTTTGAGCGACGACAGAGACAGGTACGATGATTATAACGATAGTAGCAGTAATAAT
+GGTAATGGCAGTCGTCGTCAGAGACGCGACCGAGGCTCCCGTTTCAATGATCGGTACGAT
+CAGAGTTATGGTGGCAGCCGCTACCACGATGATAGGAACTGGCCCCCTCGCCGAGGAGGC
+CGTGGCAGAGGAGGAAGCAGATCATTCAGAGGGGGACGCGGTGGCGGTAGGGGTCGTACT
+TTAGGTCCAATTGTTGAAAGAGACTTAGAAAGGCAATTTGACGCGACCAAGAGAAATTTT
+GAAAATAGTATCTTCGTGAGAAACTTGACTTTTGATTGTACCCCTGAAGACCTTAAGGAA
+TTGTTTGGTACAGTGGGCGAAGTTGTGGAGGCTGACATTATCACATCAAAGGGCCATCAC
+CGTGGTATGGGGACTGTGGAATTTACCAAAAACGAATCTGTCCAAGATGCCATATCGAAG
+TTTGATGGTGCCCTCTTTATGGACCGGAAACTAATGGTAAGACAGGATAATCCTCCTCCT
+GAAGCTGCCAAGGAATTTTCTAAGAAAGCTACTAGGGAAGAAATAGATAATGGGTTTGAA
+GTGTTCATCATCAATTTACCGTACTCTATGAATTGGCAATCCTTAAAAGATATGTTTAAA
+GAATGTGGTCATGTCTTGCGTGCCGATGTAGAATTGGATTTCAACGGATTTTCAAGAGGA
+TTCGGTTCTGTCATTTATCCTACTGAGGATGAAATGATTAGAGCTATCGATACATTCAAC
+GGCATGGAAGTAGAAGGTAGAGTTTTGGAAGTTAGAGAAGGGCGTTTCAACAAGAGAAAG
+AACAATGATCGTTATAATCAAAGGCGTGAGGACCTTGAAGATACCAGAGGTACTGAACCA
+GGTCTTGCGCAGGATGCCGCTGTCCACATTGATGAAACTGCAGCAAAATTTACTGAAGGT
+GTCAATCCAGGAGGGGATAGAAACTGTTTCATTTATTGTAGTAATTTACCATTCTCAACA
+GCAAGAAGCGATTTATTCGACTTGTTTGGGCCTATCGGCAAAATCAATAACGCGGAATTG
+AAACCACAGGAAAATGGTCAACCAACTGGTGTTGCTGTTGTAGAATATGAAAATTTAGTA
+GATGCAGATTTTTGTATTCAAAAATTAAATAATTATAATTATGGTGGTTGTAGTTTACAG
+ATCTCTTATGCTAGACGTGATTAA
+>ILV6          930 residues Pha 0 Code 0
+ATGCTGAGATCGTTATTGCAAAGCGGCCACCGCAGGGTGGTTGCTTCTTCATGTGCTACC
+ATGGTGCGTTGCAGTTCCTCGTCGACCTCCGCGTTGGCGTACAAGCAGATGCACAGACAC
+GCAACAAGACCTCCCTTGCCCACACTAGACACTCCTTCCTGGAATGCCAACAGTGCCGTT
+TCATCCATCATTTACGAAACACCAGCGCCTTCTCGTCAACCAAGAAAACAGCATGTCTTG
+AACTGTTTGGTGCAAAACGAACCCGGTGTCTTGTCCAGAGTCTCGGGTACGTTAGCTGCC
+AGAGGCTTTAACATCGATTCGTTGGTCGTGTGCAACACCGAGGTCAAAGACCTAAGTAGA
+ATGACCATTGTTTTGCAAGGGCAAGATGGCGTAGTCGAACAAGCACGCAGACAAATCGAA
+GACTTGGTCCCCGTCTACGCCGTCCTAGACTATACCAATTCTGAGATCATCAAAAGAGAG
+CTAGTGATGGCCAGAATCTCTCTATTGGGTACTGAATACTTCGAAGACCTACTATTGCAC
+CACCACACTTCCACCAATGCTGGCGCCGCTGACTCCCAAGAATTGGTCGCCGAAATCAGA
+GAAAAGCAATTCCACCCTGCCAACTTGCCCGCCAGTGAGGTATTAAGGTTGAAGCACGAG
+CATTTGAACGATATCACCAACTTGACCAACAACTTTGGAGGTCGTGTCGTCGACATCAGC
+GAAACAAGCTGTATTGTGGAATTGTCTGCAAAACCCACACGTATCTCTGCCTTCTTGAAG
+TTGGTCGAGCCATTCGGTGTCCTAGAGTGTGCAAGAAGCGGTATGATGGCATTGCCAAGA
+ACTCCTTTGAAGACAAGCACCGAGGAAGCTGCCGACGAAGACGAAAAGATCAGCGAAATC
+GTCGACATTTCCCAACTACCACCTGGTTAG
+>CWH36         393 residues Pha 0 Code 0
+ATGGAGCTGGCAAAGGAACGTAATGGCCCACATCAAAAACATCATGGCCAATGTCAAAAT
+CACTGTACTTCTCCAAACACTGTACGACAAAACAAAACAAACAAACTCTTGTTAGTAAAA
+AAGAAAGGGAAACTAGTAATATGGAGACACATCGTAAAAAAAATGTTGCACATACGCTTG
+GTTGTTCTTTGGAGCCATTATCCAGAACAGCACGGACATGGCACTAACCACTATGAATAC
+ACCAACAACAGTATAGCTAAATTGGACGCGCAGAGAGTTAGTAGAAGAAGAAGGAAGAAA
+AGGGAAGCGGAGAGAAGAGATTATGACACATACAAACTACTCATTACTCTTTGTTCTTTA
+TTATTCGTTGGACCTTTGTTTCTTAAAGTATAG
+>PEL1         1251 residues Pha 0 Code 0
+ATGACGACTCGTTTGCTCCAACTCACTCGTCCTCATTACAGATTATTATCCCTACCTCTC
+CAGAAACCCTTCAATATAAAAAGGCAGATGTCCGCTGCGAACCCTTCTCCATTTGGCAAT
+TATTTGAACACGATCACTAAGTCCCTACAACAGAATTTACAAACATGCTTTCATTTCCAA
+GCAAAAGAAATCGATATAATCGAATCTCCATCTCAGTTTTACGATCTCTTGAAGACAAAA
+ATACTTAATTCACAAAATAGAATATTCATTGCGTCTCTGTATTTAGGCAAAAGCGAGACT
+GAGTTGGTGGACTGCATATCCCAGGCATTGACCAAGAACCCCAAGTTGAAAGTTTCTTTT
+CTACTTGATGGCCTTCGAGGAACAAGAGAATTGCCTTCCGCCTGTTCCGCCACTTTATTA
+TCGTCTTTAGTAGCCAAATATGGGTCAGAGAGAGTGGATTGCCGATTGTACAAGACGCCT
+GCTTATCATGGTTGGAAAAAAGTCTTGGTTCCCAAGAGATTTAATGAAGGTTTAGGCTTA
+CAACATATGAAAATATATGGGTTTGATAACGAGGTCATTCTTTCGGGAGCCAACCTTTCG
+AACGACTATTTCACCAACAGACAAGATAGATACTATCTCTTTAAATCTCGAAACTTCTCC
+AACTATTATTTTAAATTACATCAACTCATAAGTTCCTTCAGTTATCAGATTATAAAGCCA
+ATGGTGGATGGTAGCATCAACATCATTTGGCCAGATTCGAATCCTACTGTTGAACCGACG
+AAAAATAAAAGGCTGTTTTTAAGGGAAGCATCTCAATTACTAGATGGCTTTTTAAAGAGT
+TCTAAACAAAGCCTCCCGATTACTGCCGTGGGTCAATTCTCCACATTAGTTTACCCAATT
+TCTCAATTCACTCCACTTTTTCCCAAATATAATGACAAATCGACCGAAAAAAGAACAATA
+TTGTCATTGCTTTCCACTATAACAAGCAATGCCATTTCTTGGACGTTCACTGCAGGATAC
+TTCAATATTTTGCCAGACATCAAAGCAAAACTGCTGGCAACGCCGGTTGCTGAGGCAAAT
+GTAATAACAGCTTCCCCCTTTGCAAACGGCTTTTACCAATCAAAGGGCGTCTCATCAAAT
+TTACCTGGTGCTTACTTGTACCTGTCAAAAAAATTTCTACAAGATGTATGTAGGTACAGA
+CAAGATCATGCTATTACCATTAAGAGAATGGCAAAGAGGCGTAGTAAATAA
+>RER1          567 residues Pha 0 Code 0
+ATGGATTACGATAGCTCTGATACAATGAACGGTGGTTCAAGTAACCCCTTAATCACTAAG
+ATGAATACAATGAAATTATTATATCAACACTATTTGGATAAAGTCACTCCTCACGCTAAG
+GAGAGGTGGGCTGTATTGGGTGGTTTGTTATGTTTGTTTATGGTTCGTATTACAATGGCC
+GAAGGCTGGTATGTGATTTGTTATGGTCTAGGTCTATTTTTATTGAATCAATTTTTAGCC
+TTTTTGACCCCAAAATTCGATATGTCCTTACAGCAAGATGAAGAAAACAACGAATTGGAA
+GCTGGAGAAAAATCAGAAGAATTCCGTCCATTCATCAGAAGATTACCAGAGTTCAAATTC
+TGGTATAACAGCATTAGAGCCACTGTCATTTCCCTCTTGTTGTCGCTATTTTCAATCTTC
+GATATTCCAGTATTTTGGCCCATCTTATTGATGTATTTCATATTATTGTTTTTTTTAACT
+ATGAGAAGGCAGATTCAACATATGATAAAATATAGATATATACCCTTAGATATCGGTAAG
+AAGAAATATTCTCATTCTTCTAACTGA
+>CDC10         969 residues Pha 0 Code 0
+ATGGATCCTCTCAGCTCAGTACAGCCTGCTTCTTATGTTGGTTTTGATACCATCACGAAT
+CAGATCGAACATCGTCTGTTGAAGAAAGGTTTTCAATTTAATATAATGGTTGTTGGCCAA
+TCCGGATTGGGTAAAAGTACTCTAATAAATACGTTATTTGCCTCACATTTGATTGATTCT
+GCTACTGGTGATGATATTTCTGCCCTGCCTGTTACAAAAACAACTGAAATGAAAATTTCT
+ACTCATACTCTTGTGGAGGACCGCGTTCGCTTGAATATTAATGTTATAGATACACCTGGA
+TTTGGTGACTTTATTGACAATTCTAAAGCTTGGGAGCCTATTGTGAAGTACATTAAGGAA
+CAACATTCTCAATACTTACGTAAAGAATTGACAGCCCAACGTGAAAGGTTTATTACTGAT
+ACAAGAGTTCATGCAATTCTTTATTTCCTGCAACCAAATGGAAAGGAGTTGAGCCGCCTT
+GACGTTGAAGCCTTGAAAAGATTGACAGAAATAGCAAATGTTATACCAGTTATTGGCAAG
+TCGGATACATTGACTTTAGATGAAAGAACGGAGTTTAGGGAGCTTATTCAAAATGAATTC
+GAAAAATACAATTTCAAGATTTATCCTTATGATTCGGAAGAACTAACTGACGAGGAATTA
+GAACTAAACAGAAGTGTTAGATCTATCATTCCGTTTGCAGTGGTTGGTTCTGAGAATGAG
+ATTGAAATAAACGGTGAAACCTTCAGGGGAAGAAAAACTCGTTGGAGCGCTATTAATGTT
+GAGGATATCAACCAGTGTGATTTTGTATATTTAAGGGAATTTTTGATTCGAACTCATCTC
+CAAGACTTAATCGAAACAACTTCCTACATTCATTATGAAGGGTTCAGAGCAAGACAATTA
+ATTGCCTTGAAAGAAAATGCGAATAGTCGTTCCTCAGCTCATATGTCTAGCAACGCCATT
+CAACGTTGA
+>MRPL32        552 residues Pha 0 Code 0
+ATGAATTCTTTGATTTTTGGTAAACAATTAGCATTTCACAAAATTGTGCCTACCACTGCA
+ATTGGGTGGTTGGTACCGCTAGGAAATCCTTCACTGCAGATTCCAGGCCAAAAACAACTG
+GGATCTATCCACCGTTGGTTGAGAGAAAAGCTACAACAAGATCATAAGGACACTGAAGAT
+AAAGATTTTTTCTCTAATAATGGTATTCTACTAGCAGTTCCTAAAAAAAAAGTATCACAC
+CAAAAAAAAAGGCAAAAACTTTACGGTCCAGGTAAGAAGCAATTGAAGATGATTCACCAT
+TTGAATAAGTGCCCATCATGCGGCCATTATAAGAGAGCCAATACACTGTGTATGTATTGT
+GTTGGACAAATAAGTCATATATGGAAAACGCATACCGCTAAAGAAGAAATTAAGCCGAGA
+CAAGAGGAGGAACTTTCCGAACTAGACCAAAGAGTCCTATATCCTGGTAGAAGAGATACC
+AAATATACCAAGGATTTGAAAGATAAAGATAACTATTTGGAACGTCGCGTTCGGACTTTA
+AAAAAGGACTAG
+>YCP4          744 residues Pha 0 Code 0
+ATGGTAAAGATTGCGATAATTACTTACTCTACCTACGGGCACATAGACGTTTTAGCCCAA
+GCTGTTAAGAAAGGTGTGGAGGCAGCTGGTGGTAAAGCTGATATATACAGGGTCGAGGAA
+ACTTTACCTGATGAAGTCCTCACCAAGATGAACGCTCCTCAGAAACCTGAAGATATTCCT
+GTTGCCACTGAGAAAACGTTGCTCGAATATGACGCCTTTTTGTTCGGTGTTCCAACTAGG
+TTTGGTAATTTGCCGGCTCAATGGTCCGCCTTTTGGGATAAAACCGGTGGATTATGGGCC
+AAGGGCTCTTTGAACGGCAAAGCTGCGGGGATATTCGTTAGTACTTCCAGTTACGGAGGT
+GGTCAAGAAAGTACCGTTAAAGCCTGTTTGTCTTATTTAGCTCATCACGGAATTATCTTT
+TTACCACTGGGTTATAAGAATTCATTTGCTGAGTTAGCCAGTATAGAAGAGGTACACGGT
+GGCTCTCCATGGGGTGCTGGTACCCTTGCAGGACCTGACGGCTCAAGAACTGCGTCTCCA
+CTTGAATTGAGAATTGCTGAAATTCAAGGTAAAACATTCTACGAAACCGCCAAAAAACTT
+TTCCCTGCAAAAGAAGCCAAGCCCTCCACTGAAAAGAAGACCACTACTTCTGATGCGGCT
+AAGAGACAAACTAAACCTGCAGCAGCTACAACTGCAGAAAAGAAGGAGGACAAAGGATTA
+TTATCCTGCTGTACTGTCATGTAA
+>CIT2         1383 residues Pha 0 Code 0
+ATGACAGTTCCTTATCTAAATTCAAACAGAAATGTTGCATCATATTTACAATCAAATTCA
+AGCCAAGAAAAGACTCTAAAAGAGAGATTTAGCGAAATCTACCCCATCCATGCTCAAGAT
+GTAAGGCAATTCGTTAAAGAGCATGGCAAAACTAAAATTAGCGATGTTCTATTAGAACAG
+GTATATGGTGGTATGAGAGGTATTCCAGGGAGCGTATGGGAAGGTTCCGTTTTGGACCCA
+GAAGACGGTATTCGTTTCAGAGGTCGTACGATCGCCGACATTCAAAAGGACCTGCCCAAG
+GCAAAAGGAAGCTCACAACCACTACCAGAAGCTCTCTTTTGGTTATTGCTAACTGGCGAG
+GTTCCAACTCAAGCGCAAGTTGAAAACTTATCAGCTGATCTAATGTCAAGATCGGAACTA
+CCTAGTCATGTCGTTCAACTTTTGGATAATTTACCAAAGGACTTACACCCAATGGCTCAA
+TTCTCTATTGCTGTAACTGCCTTGGAAAGCGAGTCAAAGTTTGCTAAGGCTTATGCTCAA
+GGAATTTCCAAGCAAGATTATTGGAGTTATACTTTTGAAGATTCACTAGACTTGCTGGGT
+AAATTGCCAGTTATTGCAGCTAAAATTTATCGTAATGTATTCAAAGATGGCAAAATGGGT
+GAAGTGGACCCAAATGCCGATTATGCTAAAAATCTGGTCAACTTGATTGGTTCTAAGGAT
+GAAGATTTCGTGGACTTGATGAGACTTTATTTAACCATTCATTCGGATCACGAAGGTGGT
+AATGTATCTGCACATACATCCCATCTTGTGGGCTCAGCACTATCATCACCTTATCTGTCC
+CTTGCATCAGGTTTGAACGGGTTGGCTGGCCCACTTCATGGGCGTGCTAATCAAGAAGTA
+CTAGAATGGTTATTTGCACTTAAAGAAGAGGTAAATGATGACTACTCTAAAGATACGATC
+GAAAAATATTTATGGGATACTCTAAACTCAGGAAGAGTCATTCCCGGTTATGGTCATGCT
+GTGCTAAGGAAAACTGATCCTCGTTATATGGCTCAGCGTAAGTTTGCCATGGACCATTTT
+CCAGATTATGAATTATTCAAGTTAGTTTCATCAATATACGAGGTAGCACCTGGCGTATTG
+ACTGAACATGGTAAAACTAAAAATCCATGGCCAAATGTAGATGCTCACTCTGGTGTCTTA
+TTACAATATTATGGACTAAAAGAATCTTCTTTCTATACCGTTTTATTTGGCGTTTCAAGG
+GCATTTGGTATTCTTGCTCAATTGATCACTGATAGGGCCATCGGTGCTTCCATTGAAAGG
+CCAAAGTCCTATTCTACTGAGAAATACAAGGAATTGGTCAAAAACATTGAAAGCAAACTA
+TAG
+>YCP7          720 residues Pha 0 Code 0
+ATGCAGCCTCATTTAGACAACAACAGTAATAATGACGATGTCAAATTGGATACATTAGGG
+GAACAAAATGTGTTATCATCCGCAGAAAATATCACTTTACCTGAAGACACCTTTAAATCA
+TATATGACCTACTTGCTGTACGAGATGGCTCATTACAAACCGATGATATTTTCCTTCTTG
+GCACTTTCAGTTTCAATTTTAATAGTTGTGATCTTTCATAATGTTAAAGCTTGTGATGTC
+GTTTTTGGTTTTTCAATTTTCGTCACTTCTATTTTGTTTTTGTCTACGTTGATTCCGTTT
+AATGTGTATATCTCGGATGAGGGTTTCAGAATTAAGCTTTTGCTGGAAGTTATCACCCAC
+AGGCCAGCGGTAAAGGGAAAAGAATGGAGAGCAATCACAGACAATATGAATCAATATTTA
+CTTGATAATGGTTTATGGAGTACTCGCTATTACTTTTATAGTAGTGAAAGATGCTACAAA
+TTCTTCAGATTTCTTGTGAAAGAAAAACCCCCAGGTGTGAATGTAAATTCATCGGTAAAG
+GACGCCACAAGTACGCAGATAGATGCACCAGCAAATGAGGCTTCAAATGAGGTAATAAAA
+TGCTTTAGTTTCAGTTCTGACCCAATATTCGAAGCATACTTTGTTAAAGCAGTAGAAGTT
+GAGAAACAAGCACAACAGGAATATTGGAGAAAGCAATATCCTGACGCCGATATACCATGA
+>SAT4         1812 residues Pha 0 Code 0
+ATGACTGGTATGAATGATAATAATGCCGCTATTCCTCAGCAAACTCCAAGGAAACATGCG
+CTATCTTCTAAAGTTATGCAACTTTTTAGAAGCGGTTCAAGATCATCTAGGCAGGGAAAG
+GCCTCATCGAATATCCAGCCACCTTCTAATATAAACACAAACGTTCCATCGGCGTCTAAA
+TCAGCCAAATTTGGTTTACATACCCCAACCACTGCTACTCCTAGGGTAGTTTCTAATCCT
+TCTAATACTGCAGGTGTGAGTAAACCGGGCATGTATATGCCCGAATATTACCAGTCGGCA
+TCACCATCGCACTCTAGTTCATCCGCATCATTAAACAACCATATTGATATTAACACCTCT
+AAGTCATCATCAGCTGCTTCTTTAACTTCGTCAGTATCAGCTTTATCCTTATCACCCACA
+TCAGCCATAAATATTAGCTCCAAAAGTTTGAGCCCAAAGTTCTCTCATCATAGTAACAGC
+AATACTGCTATTACACCCGCGCCTACTCCCACTGCTTCAAATATTAATAATGTAAATAAG
+ATAACCAATACAAGTGCACCTATTTGTGGGAGGTTTCTTGTGCATAAAGATGGTACCCAT
+GAACATCACTTAAAAAATGCTAAGAGACAAGAAAAGCTAAGCACAATGATTAAAAACATG
+GTTGGTGCGAGCAAATTACGTGGTGAGGCAAAATCTGCTGTCCCTGATATAATAATGGAT
+CCAAAGACGACTTTAAAATCCAACAAGAATCCTCCTACTCTTTTTGCAGGCTTCATGAAG
+CAGGTCGTGGATATGGATGATAAATATCCAGAAGGCGCTCCCACAAGTGGCGCTTTAAAT
+TGTCCTGAAAGGGATATATACAGGTCAGATCAAAAAGATTCCAAAAATAATACGCATAAT
+ATCACTACTACTAAAAAAGATAGGCAATGTTTTGCCGAAAAGTATGGTCGCTGTCAAGAA
+GTCCTTGGTAAAGGTGCTTTTGGTGTAGTAAGAATATGTCAAAAGAAAAATGTTTCTTCT
+CAAGATGGTAATAAAAGTGAAAAGCTTTATGCAGTGAAAGAGTTCAAGCGTAGAACATCC
+GAATCAGCAGAAAAGTATTCTAAGAGGTTGACTTCTGAATTTTGCATTTCTTCTTCATTA
+CACCATACAAATATTGTTACTACACTAGATCTTTTCCAAGATGCCAAAGGCGAGTACTGT
+GAAGTAATGGAATATTGTGCAGGTGGCGATCTATTCACTTTGGTCGTTGCCGCCGGAAAA
+TTAGAATATATGGAAGCAGATTGTTTCTTCAAGCAGCTTATTAGAGGTGTTGTTTATATG
+CATGAAATGGGTGTTTGTCATAGAGATTTGAAGCCTGAGAACTTACTGCTTACGCACGAT
+GGTGTGCTAAAAATTACAGACTTTGGTAACAGCGAATGTTTCAAGATGGCATGGGAAAAA
+AATATTCACCTTAGTGGAGGCGTTTGCGGTTCATCGCCGTACATCGCCCCAGAGGAATAT
+ATCAAAGAAGAGTTTGATCCAAGACCCGTAGATATATGGGCATGTGGTGTCATTTATATG
+GCAATGAGAACTGGTAGACAATTGTGGAGTTCTGCTGAAAAAGACGATCCATTTTATATG
+AATTATTTAAAAGGACGTAAGGAAAAGGGAGGCTATGAGCCAATCGAAAGTTTAAAAAGA
+GCCAGGTGTAGGAATGTTATATATTCGATGTTAGATCCCGTTCCGTACAGAAGAATTAAC
+GGGAAACAAATTTTGAACAGTGAATGGGGAAGGGAGATAAAATGCTGCCATAATGGGCGC
+GCATTGAAATAA
+>RVS161        798 residues Pha 0 Code 0
+ATGAGTTGGGAAGGTTTTAAGAAAGCTATCAACAGAGCTGGTCACAGTGTGATAATTAAG
+AATGTCGACAAGACCATTGATAAAGAGTATGACATGGAAGAACGTCGTTATAAAGTTCTT
+CAAAGAGCAGGTGAGGCATTACAAAAGGAAGCCAAAGGTTTCTTGGACTCATTGAGAGCT
+GTGACAGCATCACAGACTACCATTGCCGAGGTCATCTCTAACCTCTATGACGATTCAAAA
+TATGTTGCTGGTGGTGGTTACAACGTTGGTAACTATTATTTGCAATGTGTTCAAGATTTT
+GATAGCGAAACTGTTAAGCAATTAGACGGGCCCTTAAGAGAAACCGTACTAGATCCAATA
+ACAAAGTTTTCGACGTATTTCAAAGAAATTGAGGAGGCCATAAAAAAGAGAGACCATAAG
+AAACAAGACTTCGATGCTGCGAAGGCAAAAGTTCGTAGATTAGTGGACAAACCTGCTAAA
+GATGCCTCTAAACTGCCAAGGGCTGAAAAAGAATTGAGCTTAGCTAAAGATATTTTCGAA
+AATCTTAATAACCAATTGAAAACTGAACTACCACAGTTAGTTTCATTAAGAGTACCTTAC
+TTTGACCCAAGTTTTGAAGCTTTAATCAAGATTCAGCTAAGGTTCTGTACTGATGGTTAC
+ACTCGTTTAGCGCAGATTCAACAATATTTGGACCAACAATCAAGAGACGACTATGCCAAT
+GGGTTATTAGACACTAAAATCGAAGAACTATTAGGACAAATGACAAGCCTAGATATTTGT
+GCGCTCGGGATAAAATAA
+>YCQ0          852 residues Pha 0 Code 0
+ATGTCTGACAAGGAACAAACGAGCGGAAACACAGATTTGGAGAATGCACCAGCAGGATAC
+TATAGTTCCCATGATAACGACGTTAATGGCGTTGCAGAAGATGAACGTCCATCTCATGAT
+TCGTTGGGCAAGATTTACACTGGAGGTGATAACAATGAATATATCTATATTGGGCGTCAA
+AAGTTTTTGAAGAGCGACTTATACCAAGCCTTTGGTGGTACCTTGAATCCAGGGTTAGCT
+CCTGCTCCAGTGCACAAATTTGCTAATCCTGCGCCCTTAGGTCTTTCAGCCTTCGCGTTG
+ACGACATTTGTGCTGTCCATGTTCAATGCGAGAGCGCAAGGGATCACTGTTCCTAATGTT
+GTCGTCGGTTGTGCTATGTTTTATGGTGGTTTGGTGCAATTGATTGCTGGTATTTGGGAG
+ATAGCTTTGGAAAATACTTTTGGTGGTACCGCATTATGTTCTTACGGTGGGTTTTGGTTG
+AGTTTCGCTGCAATTTACATTCCTTGGTTTGGTATCTTGGAAGCTTACGAAGACAATGAA
+TCTGATTTGAATAATGCTTTAGGATTTTATTTGTTGGGGTGGGCCATCTTTACGTTTGGT
+TTAACCGTTTGTACCATGAAATCCACTGTTATGTTCTTTTTGTTGTTCTTCTTACTAGCA
+TTAACTTTCCTACTGTTGTCTATTGGTCACTTTGCTAATAGACTTGGTGTCACAAGAGCT
+GGTGGTGTCCTGGGAGTTGTTGTTGCTTTCATTGCTTGGTACAACGCATATGCAGGTGTT
+GCTACAAAGCAGAATTCATATGTACTGGCTCGTCCATTCCCATTACCATCTACTGAAAGG
+GTAATCTTTTAA
+>ADP1         3150 residues Pha 0 Code 0
+ATGGGAAGTCATCGACGTTATCTCTACTATAGTATATTATCATTTCTATTATTATCCTGC
+TCAGTGGTACTTGCAAAACAAGATGAGACCCCATTCTTTGAAGGTACTTCTTCGAAAAAT
+TCGCGTCTAACTGCACAAGATAAGGGCAATGATACGTGCCCGCCATGTTTTAATTGTATG
+CTACCTATTTTTGAATGCAAACAGTTTTCTGAATGCAATTCGTACACTGGTAGATGTGAG
+TGTATAGAAGGGTTTGCAGGTGATGATTGCTCTCTGCCCCTCTGTGGCGGTCTATCACCG
+GATGAAAGCGGTAATAAGGATCGTCCCATAAGAGCACAAAATGACACCTGTCATTGTGAT
+AACGGATGGGGAGGGATCAATTGTGACGTTTGTCAAGAAGATTTTGTCTGTGATGCGTTC
+ATGCCTGATCCTAGTATTAAGGGGACATGTTATAAGAATGGTATGATTGTAGATAAAGTA
+TTTTCAGGTTGTAATGTGACCAATGAGAAAATTCTACAGATTTTGAACGGCAAAATACCA
+CAAATTACATTTGCCTGTGATAAACCTAATCAAGAATGTAATTTTCAGTTTTGGATAGAT
+CAGTTAGAAAGCTTCTATTGTGGCTTAAGTGATTGTGCCTTTGAATACGACTTGGAACAG
+AATACCTCCCATTATAAGTGTAATGACGTTCAATGCAAATGCGTTCCCGACACTGTGTTG
+TGTGGTGCTAAGGGGTCTATAGATATCTCGGATTTCCTGACAGAGACAATAAAAGGGCCA
+GGAGATTTCAGCTGTGATTTAGAAACAAGGCAATGTAAATTCAGTGAGCCTTCTATGAAT
+GATTTGATATTGACCGTGTTTGGTGACCCTTATATTACTTTGAAGTGTGAATCCGGTGAA
+TGTGTTCATTATAGTGAGATTCCAGGTTACAAATCTCCTTCAAAAGATCCAACAGTGTCA
+TGGCAAGGGAAATTGGTGTTGGCATTGACTGCTGTGATGGTCCTGGCACTTTTTACATTT
+GCTACCTTTTACATTTCTAAATCTCCGTTATTCAGAAATGGATTGGGTTCCTCAAAGTCT
+CCCATTCGTTTGCCAGATGAAGATGCGGTGAATAATTTCTTACAAAATGAAGATGACACA
+CTGGCGACATTAAGTTTTGAAAATATCACTTATAGTGTCCCCTCGATAAATTCAGATGGT
+GTTGAAGAAACTGTGCTGAATGAAATAAGTGGTATCGTGAAGCCCGGCCAAATATTAGCT
+ATCATGGGTGGATCTGGTGCGGGTAAAACTACTTTATTAGATATCCTAGCAATGAAACGG
+AAAACAGGTCACGTTTCGGGTTCCATAAAAGTTAACGGTATTAGTATGGACCGTAAATCT
+TTCTCGAAAATAATCGGGTTCGTCGATCAAGATGACTTTTTGCTGCCCACTTTGACTGTT
+TTTGAAACCGTATTAAATAGTGCGCTGTTAAGATTGCCAAAAGCATTGTCATTCGAGGCC
+AAGAAGGCAAGAGTTTATAAGGTGTTGGAAGAACTAAGAATTATTGATATCAAAGATCGT
+ATTATTGGTAATGAATTTGATCGTGGTATTAGTGGAGGTGAAAAACGCCGAGTTTCCATT
+GCATGTGAATTAGTGACATCTCCATTGGTTTTATTTTTGGATGAACCTACATCTGGTTTA
+GATGCTAGTAATGCCAATAATGTTATTGAATGTTTGGTAAGGTTATCCAGCGACTATAAC
+AGGACATTGGTGCTATCTATTCATCAGCCAAGATCAAATATATTTTATTTATTCGATAAA
+TTGGTCCTGTTAAGTAAAGGTGAGATGGTCTATTCCGGAAATGCCAAAAAAGTGTCAGAA
+TTTTTGAGAAATGAGGGATATATCTGTCCGGACAACTATAATATTGCTGATTATTTGATT
+GATATTACTTTTGAAGCCGGTCCTCAGGGGAAAAGGAGAAGAATCAGAAACATTTCCGAT
+TTAGAAGCTGGTACGGATACTAACGATATTGATAATACGATACACCAAACAACATTTACT
+AGCAGTGATGGTACAACACAGAGAGAGTGGGCTCATCTTGCAGCTCATAGAGATGAGATC
+AGATCTTTACTCAGAGATGAAGAAGATGTAGAGGGAACAGATGGAAGGCGAGGTGCTACT
+GAGATTGACTTAAATACCAAACTACTACACGATAAATATAAAGATAGCGTCTATTATGCA
+GAGCTTTCACAGGAGATCGAGGAAGTTTTAAGCGAAGGTGATGAGGAAAGTAACGTTTTG
+AATGGAGATTTACCCACAGGTCAACAATCTGCTGGTTTTCTGCAACAGTTATCGATATTG
+AATTCAAGAAGTTTTAAAAACATGTACAGAAACCCTAAACTATTATTGGGTAATTATTTA
+CTGACGATCCTATTGAGTTTATTCTTGGGAACACTATATTACAACGTCTCCAATGATATC
+AGCGGTTTTCAGAACAGAATGGGGCTGTTCTTCTTTATACTAACGTACTTCGGTTTTGTT
+ACATTCACAGGTCTCAGCTCGTTCGCTCTGGAAAGGATCATTTTCATAAAAGAAAGATCC
+AATAACTATTACTCGCCACTTGCATACTACATTAGTAAGATAATGAGCGAAGTGGTCCCG
+CTACGTGTTGTACCACCTATACTCTTGTCATTGATTGTTTACCCAATGACTGGTTTAAAC
+ATGAAAGACAATGCTTTTTTTAAATGTATTGGAATCCTTATACTGTTTAACCTTGGGATA
+TCGTTGGAAATCCTAACCATCGGCATAATTTTTGAAGACTTGAATAACTCCATAATATTA
+AGCGTGCTGGTGCTTTTGGGCTCACTACTGTTTAGCGGACTATTTATCAATACTAAGAAT
+ATTACAAACGTGGCCTTCAAGTACCTGAAAAACTTCTCTGTGTTTTACTACGCCTACGAA
+TCTTTATTGATCAATGAGGTCAAAACATTGATGCTGAAAGAGAGAAAGTACGGCTTAAAT
+ATTGAAGTTCCAGGCGCTACTATCTTGAGCACATTTGGATTTGTTGTCCAAAACCTTGTA
+TTTGACATCAAGATCCTGGCTCTGTTTAATGTGGTGTTTTTAATAATGGGGTATCTAGCC
+CTTAAGTGGATAGTTGTGGAACAAAAGTAG
+>PGK1         1251 residues Pha 0 Code 0
+ATGTCTTTATCTTCAAAGTTGTCTGTCCAAGATTTGGACTTGAAGGACAAGCGTGTCTTC
+ATCAGAGTTGACTTCAACGTCCCATTGGACGGTAAGAAGATCACTTCTAACCAAAGAATT
+GTTGCTGCTTTGCCAACCATCAAGTACGTTTTGGAACACCACCCAAGATACGTTGTCTTG
+GCTTCTCACTTGGGTAGACCAAACGGTGAAAGAAACGAAAAATACTCTTTGGCTCCAGTT
+GCTAAGGAATTGCAATCATTGTTGGGTAAGGATGTCACCTTCTTGAACGACTGTGTGCGT
+CCAGAAGTTGAAGCCGCTGTCAAGGCTTCTGCCCCAGGTTCCGTTATTTTGTTGGAAAAC
+TTGCGTTACCACATCGAAGAAGAAGGTTCCAGAAAGGTCGATGGTCAAAAGGTCAAGGCT
+TCCAAGGAAGATGTTCAAAAGTTCAGACACGAATTGAGCTCTTTGGCTGATGTTTACATC
+AACGATGCCTTCGGTACCGCTCACAGAGCTCACTCTTCTATGGTCGGTTTCGACTTGCCA
+CAACGTGCTGCCGGTTTCTTGTTGGAAAAGGAATTGAAGTACTTCGGTAAGGCTTTGGAG
+AACCCAACCAGACCATTCTTGGCCATCTTAGGTGGTGCCAAGGTTGCTGACAAGATTCAA
+TTGATTGACAACTTGTTGGACAAGGTCGACTCTATCATCATTGGTGGTGGTATGGCTTTC
+ACCTTCAAGAAGGTTTTGGAAAACACTGAAATCGGTGACTCCATCTTCGACAAGGCTGGT
+GCTGAAATCGTTCCAAAGTTGATGGAAAAGGCCAAGGCCAAGGGTGTCGAAGTCGTCTTG
+CCAGTCGACTTCATCATTGCTGATGCTTTCTCTGCTGATGCCAACACCAAGACTGTCACT
+GACAAGGAAGGTATTCCAGCTGGCTGGCAAGGGTTGGACAATGGTCCAGAATCTAGAAAG
+TTGTTTGCTGCTACTGTTGCAAAGGCTAAGACCATTGTCTGGAACGGTCCACCAGGTGTT
+TTCGAATTCGAAAAGTTCGCTGCTGGTACTAAGGCTTTGTTAGACGAAGTTGTCAAGAGC
+TCTGCTGCTGGTAACACCGTCATCATTGGTGGTGGTGACACTGCCACTGTCGCTAAGAAG
+TACGGTGTCACTGACAAGATCTCCCATGTCTCTACTGGTGGTGGTGCTTCTTTGGAATTA
+TTGGAAGGTAAGGAATTGCCAGGTGTTGCTTTCTTATCCGAAAAGAAATAA
+>POL4         1749 residues Pha 0 Code 0
+ATGTCTCTAAAGGGTAAATTTTTCGCCTTTTTACCTAATCCTAACACATCTTCCAATAAG
+TTCTTTAAGAGTATATTGGAGAAAAAGGGCGCCACAATTGTGTCAAGTATTCAAAATTGT
+CTTCAATCTAGCCGTAAGGAAGTTATCATTTTGATTGAGGACTCCTTTGTTGATTCTGAT
+ATGCATTTGACTCAGAAAGATATTTTCCAAAGGGAAGCAGGCTTAAATGATGTCGATGAA
+TTTCTTGGTAAGATTGAACAGTCAGGCATTCAATGTGTGAAAACCAGTTGCATCACAAAG
+TGGGTCCAGAATGATAAATTTGCGTTTCAAAAAGATGATTTGATTAAATTTCAACCATCC
+ATTATCGTTATATCAGATAACGCTGATGACGGACAAAGTTCTACTGATAAAGAGAGTGAG
+ATTTCAACTGACGTAGAAAGTGAAAGGAATGATGACAGCAACAATAAGGATATGATACAA
+GCTTCAAAACCTCTTAAGCGACTTTTACAGGAGGATAAAGGAAGAGCTTCCCTTGTTACT
+GACAAAACGAAGTACAAAAACAATGAATTGATTATCGGAGCGTTGAAAAGGTTAACAAAA
+AAATATGAGATCGAAGGTGAGAAATTTCGTGCAAGAAGTTATAGACTGGCTAAACAGTCG
+ATGGAAAATTGCGATTTCAATGTTCGTTCCGGTGAAGAAGCACATACTAAATTAAGGAAT
+ATCGGGCCTAGTATTGCCAAAAAAATACAAGTTATATTAGATACGGGAGTTTTACCAGGT
+TTAAATGATTCAGTGGGATTAGAAGACAAGTTAAAATACTTCAAAAATTGTTACGGCATT
+GGGTCGGAAATTGCTAAACGCTGGAATCTTCTAAATTTTGAAAGCTTTTGTGTTGCAGCT
+AAGAAGGATCCAGAGGAGTTTGTATCAGATTGGACAATTTTATTTGGTTGGTCATATTAC
+GACGATTGGTTATGCAAGATGTCTCGGAATGAATGTTTCACACATTTAAAGAAGGTTCAA
+AAAGCGCTGCGTGGCATTGATCCTGAATGCCAAGTCGAATTACAGGGAAGTTATAATAGG
+GGCTATTCCAAGTGTGGTGACATTGATCTTTTATTTTTCAAGCCGTTTTGTAATGACACG
+ACCGAGTTGGCAAAAATCATGGAAACGCTTTGTATTAAGTTGTACAAGGATGGCTATATC
+CATTGTTTTTTACAGCTAACGCCAAACTTGGAAAAGCTATTCTTAAAAAGAATAGTGGAG
+AGATTTCGTACAGCGAAGATTGTTGGGTATGGAGAAAGAAAGAGGTGGTATTCTTCTGAG
+ATAATCAAGAAATTTTTCATGGGAGTCAAATTCTCTCCAAGAGAATTAGAAGAACTGAAA
+GAAATGAAAAATGATGAAGGCACATTGTTAATTGAAGAAGAAGAAGAAGAAGAAACAAAA
+TTAAACCCGATTGACCAATATATGTCTCTGAATGCCAAGGATGGAAATTATTGCAGAAGA
+TTAGACTTTTTTTGTTGCAAGTGGGATGAGCTTGGAGCAGGAAGAATACACTATACTGGA
+TCTAAAGAGTACAATAGATGGATAAGAATATTGGCAGCGCAAAAAGGCTTCAAGCTTACA
+CAACACGGTTTATTTCGAAATAATATCCTTCTCGAAAGCTTTAACGAACGCAGAATTTTC
+GAGTTATTAAACTTAAAATACGCTGAACCCGAACATAGAAATATCGAATGGGAAAAAAAA
+ACTGCATAA
+>YCQ7         2862 residues Pha 0 Code 0
+ATGCTGATCATCAATGGGAAGATCATCCCTATAGCTCATACTATTTGCGCATTCTCCGCC
+TTCTTTGCAGCTTTGGTCACTGGTTATTCATTACATTTTCATAAAATTGTAACCAATGCA
+CATTATACGTATCCAGATGAGTGGTTTCCTAGTGTATCAGCCACTATCGGGGACCGCTAT
+CCGGAACGTTCTATTTTCCAAATCTTAATAGCTCTAACTGCTTTTCCAAGATTTTTACTG
+CTACTAGGTCACTACTACTTGAACCAATCTAAGGTATGCTTCCTTGTCGGTGTACTCCGG
+ACAGTCTCTTGCGGTGGTTGGGTATACATTACAAGTACAGATGACCACGATATTCATGAT
+ATATTTATGATCACATACATTGTTTTAACGTTACCATGGGATATAATGATTACCCGCTAT
+TCTAGTCCTTTAACTTCGAAGAACAAAGGGTTGACTGCTACAATTTTTTTTGGAACATTG
+TTCCCGATGATTTACTGGTACATTCAGCACTCCGTCCAACAGAGAGCTGGGGCATATTCT
+ATATATGCTTATTTCGAATGGTCTCTGATTCTTTTAGATATTGCATTTGATGCATTTGCT
+TACGCTGATTTCAAAAAGATAGATATTGTTCTCGCTTTTAATGAGAAACCCGGTAATACC
+AGTTTTTTCCAAATTAGAGACTCTAATCCCATAAATTATGGAGAAGAAAAAAGTTCAGAA
+TTGCAGAAAAGTGGTGAAAAGAAGGTTGAAAAGGAAAAACCCGTTGCTAGAAGCGCAACT
+GGTTCATATTTCAGGTTTGACTCTTTTTTTTACTTACTAACAAATATTTTTAACGGTTTT
+CTTTTCTGGTCGAACGTTACGTCCCTTTTATGTAGTATTTGGCATTTCCCGCTATGGTAT
+ATGGGAATCTCAGGTTATGAAGCTGCAATATTGGGTTATTTGGGACCCATTTTCTTATAT
+CTGCCGTTCGTTTCTGAAGCCTTCATGCAATATGGTGTACTTTTAGGAGGTATTATTGCC
+ATTGGTGCCTATATTGTTCAGATGCCAGAATTAAGGTTGATTTCTGTAGCTGTGGGAACT
+TCCATTACCGTTGCAACGTTTGTACAAAATCTAAGATATATCACAAATGCGGAGACTAGT
+TTCTCTTTTGCTCTAACTTGGCTGCTAGGTCTTGTTGCATCTGTGATCTTGAAAATGGGG
+TTCTATACCAACAACCCAACTTGGGTCATTTTAGATGAACGTAATGGTGGGTATAATAAG
+ACAGCTCTCGTGCTTACTGTTTTATTCGGCATGCTGTCGCCTTATGTTAATTCAATTAAT
+TTCGAAGGGAAAAGGAATGCTCAAGCAAAATCTGCTTCGTTGATCGGCAAATTATTTTTG
+GCTGTTGGTTTTGGCTCGTTGTTATTCGGAATTCATCAGTTATTGACGGATTCTTCTACT
+ACTATTTATTGGGCATGGGAAGGTTACAATGAATCACACGGTCCCTTGCCATGGCCTTGG
+GGCGCCTTAACTTGTACGGTCATGTTATTTGCTTCTTTGAGTTCTGTGAAGTTTATGGGC
+AAGCCATTAGTTCCATGTTTGTTGCTTCTCATATCCACTGCTGTACTTTCAGCTAGAAGC
+ATTACACAATGGCCTAAATATATTTTTGGTGGTTTATTGTACGCTATCGCTATGCTTTGG
+TTAGTTCCTTCGTATTTTTCTGCATTAGGCCAAGTTCAAAACATATGGGTTTATGTCCTA
+TCATTCTCCGTTTATATTATCTTTGTCCTTGCCCATGTTTGGGTCGTTGCATACGCATTT
+GTTCCAATGGGCTGGGTACTGAGGGAGAAGATTGAGACGGTTCTTGCCTTTTCTTCCACA
+TTTATCATTATTGGTGCTTTAACATGCAAAAACCTTAACGTTCAACTGGTGACTATGGGC
+AAAAAATTCTTCATTTATGTTTTCTTCTTTGCCGTGGCCCTACTATCACTAACAGCTAGG
+TTCGTGTATGATATTAGACCTACAGGAATTCCTCAGCCTTATCATCCAGATTCTCAGTTG
+ATTACAGCTGGTATTTGGACTATCCACTTTGGTCTCGATAATGATATGTGGGCATCTGAA
+GACAGAATGATCAACCTTATTAAAGATATGGAACTAGATGTGGTAGGTCTACTAGAAACA
+GATACACAAAGAATTACCATGGGGAACAGGGATCTAACTAGCAAACTAGCTCATGATTTG
+AATATGTATGCAGATTTCGGACCAGGTCCAAATAAACATACCTGGGGCTGTGTTCTTCTT
+TCTAAATTCCCTATCGTAAATTCTACGCATCATTTATTGCCCTCTCCAGTTGGGGAACTT
+GCGCCAGCCATTCATGCCACACTTCAAACGTACAATGACACTCTCGTTGACGTCTTTGTA
+TTCCATAGTGGACAAGAAGAGGATGAAGAGGATAGAAGACTGCAAAGTAACTACATGGCT
+AAGCTCATGGGCAATACGACTCGCCCAGCTATTTTATTAAGTTACTTAGTTGTTGATCCA
+GGTGAAGGCAACTACAATACGTACGTTAGTGAAACATCCGGAATGCACGACATTGATCCC
+TCTGACGATGATAGATGGTGTGAGTATATCTTGTATAAGGGCTTGAGAAGAACAGGATAT
+GCTAGAGTTGCAAGAGGAACGATAACCGATACGGAGCTACAAGTTGGTAAGTTCCAAGTT
+TTGAGTGAGCAAGCGTTAGTAGAGCACTCGGATTCTATGTATGAATACGGTCATATGAGT
+GAACCGGAATATGAGGACATGAAATTTCCAGATAAGTTTTTAGGCGAAGGTGAGAGGGGT
+CACTTCTACCATGTTTTTGATGAGCCACGTTATTACTTATAA
+>SRD1          678 residues Pha 0 Code 0
+ATGCGATATAATAATTATGACAACTCTGGAAGTTCCTTCTTAACTAGAGTAGTTAAAAAG
+TCAGATATGGAGAAAACGTTATTATTAAATAGAGAAATTGATGACTGGAAGTCAAACGAT
+AAAAAGAAGGCATATAAGGAACGCGGAAGAGTTTATGCAAGTTGCTCATTTATTGAAGTA
+TCCTTTTCTCAAATAAGGGCTGTTGATGTTGAAAAAAAAATTGAGAATGCCGAACAACTA
+AGAGATCTTACAAGAAATATTGTTAAGAACAAAACCAGCTCTTTGAACGAAATTACACCC
+TCAAAGAATCGTGTTATTAGTGCATGCAATTCCGAGAGACGTACGACTAGCCAAGAAGCA
+AACAATCTTGAAGGCTACCATAGTTGTGCACAAGGAACTAGTCGGTCTGCCAGTATTACG
+AAGAAATACAGCAAAAAGACTACTAGTCGTCCTAAAAGAGAAAAGAGACAAACAATCCTC
+CCAAATGGTGAGATAAAGGAATGCTCTAAATGTAAAGACACTTGGACAATTCAATGGCGT
+AGTGGACCCGACCAAAACAGGGAACTTTGTAGTCCCTGTGGACTCGCCTATGGAAAAAGA
+CTGAAGAAGGAGAATGAAAAAAAAAGGCAAGCGGCAGATAAAAGGATAGATTCGAAACAA
+TCCATAGTATCTATTTAA
+>MAK32        1092 residues Pha 0 Code 0
+ATGATGAATGAAGAGGATTCTACAGAAACGAAAAGCCTAGTCATAACTAATGGCATGTTT
+ATCATAGACGACATCGAGCGTAGTAAATATAATATTCACTATAAGAATGTCCCAGGAGGC
+GGAGGGACTTTTGCCATTTTGGGTGCATGCATAATATCTTCCGGCAATGTCACATCCAAA
+GGTTTGAAGTGGATAGTGGACAGAGGCTCTGACTTTCCAAAGGAAGTTATAAGGGAAATA
+GACTCATGGGGTACTGATGTGAGGTTTCGAGATGACTTTAGCAGATTAACTACCAAAGGG
+TTGAATTATTACGAGGGAAGTGATGATTTGAGAAAGTTCAAGTTTTTGACGCCGAAGAAG
+CAGATTAACGTCGATGACTGGATTTCCACATTTGGGCAGAAGATAATTGATGAAATGCAT
+GCGTTTCATTTGCTATGTTCTGGGTCTAGATGCTTAGACATAATAAACGATCTGCTACGG
+GTGAAAAGTTCAAAGGGCACAAAACCAATCGTGATTTGGGAGCCATTCCCAGATCTTTGC
+GACTTTGATCATCAAAATGACATTAAAAGTGTAATGCAGAGGAACGATGTTACGGTAATA
+TTATCTCCAAATGCCGAAGAATCAAGTCGCTTATTTGGTTTAAGTAGCAAGGAACCGACT
+AGTTTGGAAGAATGTCTAGCATTAGCGCATCGTTTCGATGATTTCATGGATGAAAACAAT
+ATGTGTATTCTACGATGCGGTGCCCTCGGAAGCATATCGGTAAGTGAGAAGTTTAAGAAC
+GGACGAACCTATGACCATTTCCCCGCCTACCATTTCAAAACTCAGTCTAAAGTACTAGAT
+CCTACTGGCGGGGGAAACTCGTTCCTTGGCGGCTTTGCAGTTTCTTATGCCCTAACGAAA
+AGCTTAGATATTGCTAGTATATGTGGGAACATCGCTGCAGGCGCAATAATTGAACAATTC
+GGAATACCGAGGTACGATCCAATTGCTAAAACCTGGAACGGAATCACATTCTTGGATAGA
+CTGAAATTTTACCTTTCACAGTCCGGTCTTCAATATAATATAAACGATCTTTACAAAAGT
+CTAACACGATGA
+>PET18         648 residues Pha 0 Code 0
+ATGAGCTGTACCACTGATAAGTTAATACAAAAGTACGACGCCCTTGTTAGGAAAACCACA
+GAACATAAATTCGCTAAGGAACTATGTGCCGGAACATTGAAGGACCGTAGTTTGTACATC
+TATTTATCACAAGATCTGCAATTTTTTGAAACTAGCTTAAGGTTGATATGTAAGACGACT
+TCTTTAGCACCAACTACTCACGCTTTAATAACCTTAGCCAAAAAGATTGGATTTTTTTCT
+AATGATGAAAACTCATACTTTCATGACTGCTTAGAATTATTGGCACCATCCCTCACCAAG
+GAAGAAAGAGATAATTTTGACAATAAAGCGATCCCCGGCGTTGATGCGTATATTAATTTC
+TTAGATGAGCTGAGAAAGGACGCCTCAATTACATGGCCATCCTTAGTAACCAGCTTATGG
+GTTGCTGAGGAACTCTATTGGAGATGGGCTCGTGATACTCCTAGAGCCCCAGGGTTGCAT
+TGGAAATATCAAAAATGGATTGATTTACATGATGGTGAGCATTTTCAAACTTGGTGTGAA
+TTTCTAAAGGCTGAAGTTGACAAGTTTCCCGTCGAAGAAGTGGAAAGCATATTTGTGAAG
+GTTTCACAGTTCGAGTTCGAATTTTTTGAATCTTGTTACAACGCCTAA
+>MAK31         267 residues Pha 0 Code 0
+ATGGACATCTTGAAACTGTCAGATTTTATTGGAAATACTTTAATAGTTTCCCTTACAGAA
+GATCGTATTTTAGTTGGAAGCTTGGTTGCTGTAGATGCCCAAATGAATTTGCTATTAGAT
+CATGTTGAGGAACGTATGGGCTCCAGTAGTAGAATGATGGGCCTAGTCAGCGTCCCTAGG
+CGTTCCGTTAAGACCATAATGATTGATAAGCCTGTTCTGCAGGAGCTTACTGCGAATAAA
+GTTGAATTGATGGCTAATATTGTTTAG
+>HSP30         999 residues Pha 0 Code 0
+ATGAACGATACGCTATCAAGCTTTTTAAATCGTAACGAGGCTTTAGGGCTTAATCCACCA
+CATGGCCTGGATATGCACATTACCAAGAGAGGTTCGGATTGGTTATGGGCAGTGTTTGCA
+GTCTTTGGCTTTATATTGCTATGCTATGTTGTGATGTTCTTCATTGCGGAGAACAAGGGC
+TCCAGATTGACTAGATATGCCTTAGCTCCTGCATTTTTGATCACTTTCTTTGAATTTTTT
+GCTTTCTTCACTTATGCTTCTGATTTAGGTTGGACTGGTGTTCAAGCTGAATTTAACCAC
+GTCAAGGTTAGCAAGTCTATCACAGGTGAAGTTCCCGGTATTAGACAAATCTTTTACTCG
+AAATATATTGCCTGGTTCTTGTCCTGGCCATGCCTTTTATTTTTAATCGAGTTAGCCGCT
+AGTACTACTGGTGAGAATGACGACATTTCCGCCTTGGATATGGTACATTCGCTGTTAATT
+CAAATCGTGGGTACCTTATTCTGGGTTGTTTCGCTATTAGTTGGTTCATTGATCAAGTCC
+ACCTACAAGTGGGGTTATTACACCATTGGTGCTGTCGCTATGTTGGTTACCCAAGGTGTG
+ATATGCCAACGTCAATTCTTCAATTTGAAAACTAGAGGGTTCAATGCACTTATGCTGTGT
+ACCTGCATGGTAATCGTTTGGTTGTACTTTATCTGTTGGGGTCTAAGTGATGGTGGTAAC
+CGTATTCAACCAGACGGTGAGGCTATCTTTTATGGTGTTTTGGATTTATGTGTATTTGCC
+ATTTATCCATGTTACTTGCTAATTGCAGTCAGCCGTGATGGCAAATTGCCAAGGCTATCT
+TTGACAGGAGGATTCTCTCATCACCATGCTACGGACGATGTGGAAGATGCGGCTCCTGAA
+ACAAAAGAAGCTGTTCCAGAGAGCCCAAGAGCATCTGGAGAGACTGCAATCCACGAACCC
+GAACCTGAAGCAGAGCAAGCTGTCGAAGATACTGCTTAG
+>YCR3         1836 residues Pha 0 Code 0
+ATGGCGCGTCAAAAGCTTACTTTCAAAGAACAAATGGATGGTTTCCCCTGGGTCCAACTT
+GTTGTTGTGTCCTTAGTTAGGTTCAGCGAACCAATTGCGTTTTCGTCACTATTTCCTTAT
+GTTTATTTCATGGTTAGAGATTTTAATATTGCTCCCAATGATGCTCAAGTGTCCAAATAT
+TCAGGTTATTTATCTTCATCATTTGCGTTATGCCAAGTCATATCTGCGTACCACTGGGGT
+AGATTCTCTGAAAAACATGGCAGAAAAATAACATTGACTTGCGGGCTTATAGGAACATCT
+GTATCATTGTTAATACTGGGATTTTCACACAATTTCTATCAGGCTTTGGTGGCAAGAAGT
+TTAATGGGATTGCTAAATGGTAACGTCGGCGTTATTAGAACCATTATTGGTGAAATAGCA
+ACTGAAAGAAAACATCAGGCTTTAGCTTTCAGTACTATGCCTTTATTATTTCAATTTGGT
+GCCGTTGTTGGGCCTATGATCGGTGGGTTTCTTGTATTTAGAGATGGAACAATGAATGAA
+GTGCCACTATGGTTTCCACATTTTGCAAAAAGAATAATTAGGTCATATCCGTACGCCTTG
+CCAAACGTGGTAGTGTGCATGTTTTTGATGTTTGGTTTAACTAATGCAACATTGTTTTTG
+GAAGAAACACATCCTGCTTTTAAAAATAGAAGAGATTACGGTTTAGAGGTCGGTGATTTT
+ATTAAGAAGAATATATTTGGTATACAGCCGAAAAGAAGACCCTGGCAAAAGCGCATTCAG
+GATGATTCGGAAAACATTCACCACCGTAATGAGAATGTGAACAGCAATCGAGGACAAGAT
+AGTGAAGAGGATGAAAATAGTCCCCTAGTGAATACTACCAATGACGATGATACTGAAAGC
+ATACAATCGATTGATCCTATTTTAACAAGAAGACAGTCTGTAGGCCTGATTAGGACATAT
+TCTCTGCATGAACCAACAGACGCTGTGCATGCCAATATAGATACAGCTCCAGACGGTTGT
+AAAGAAAGTAGTATATTTCATCACGTTTTTCATACAAAAGTATTTTACCCTATATCGGTG
+AATTTTATTATGGCTTTACATTTGATTGTATACAACGAATTTTTGCCTGTTTTTTTAGCT
+TATGATTTAGCCGTAGATCCAGAAAATCCAAAGAAGCTGGCTTCAAAATTTCCGTGGAAA
+ATATCTGGCGGTATAGGTTATGAACCAGAACAAACCGGTACTCTTTTGTCGACAACAGGT
+ATCTTTGGTTGTTTTGTGGTTATTTTCATTTTTCCCATAGTTGATCGAAATTTCGATTGT
+TTAACAATTTTCAGAACTTTAGTCAAGCTGTACCCTATTATGTACGTTATGGTTCCTTAC
+GTTGTTTTTCTACAGAATGAACGGATTCCTAGCTGGTATACTGTCGTCTACTTGTACATA
+ATCACAGGGATAAAAACATTTTGTGGCGCTTTAACGTCACCACAAATTATGTTATTAATT
+CATAATTCGAGTCCCTTGAGTTGTAGATCAGTCATCAATGGCGCCACCATTAGTATTTCT
+GCCTCTGCTCGTTTCATAGGTCCCTTAGTATGGGGCTATATTATGTCTTGGTCCCAGCAA
+AATGACGTCGCCTGGGTCAGTTGGTGGTCGTTAAGTCTTTTTTGTATGGTAGCTCTTTAT
+CAAAGTTATAAGATAGCACCAATTGATGATAACGAAAATGAGCTTCATGGACAGGGTAGT
+GAAGATGCCTACAATTCGCAGTCACAGTCTTCTGATTTAAGAATGGCTCATCGATCTAGT
+TTAAGCAGCTTAAGTAACCAACGCTGTACCACATGA
+>SYN         1479 residues Pha 0 Code 0
+ATGTTTCATGCTTTCACCTTCCTTAAAGGTGGTAGATTTTACTCTTCACTAACAGTTAAA
+TCATTGTACGAGCAGGTACACCATACTAGCCATGATCCCATTTCAATTAATGGATGGATC
+AAATCCATAAGACTATTAAAACGTATAGCGTTTTTGGATTTACAAGATGGGACTTCTGTG
+AACCCATTAAGAATAGTTATTCCACTCACAAATACTGATGAAGTACAGTTCCTAAAAATT
+CTGAAAACTGGTCAAACTTTATCTATATCTAATGCTACCTGGCAAAGCACCCCTAATAGA
+AAACAACCTTTTGAATTGCAAATCAAAAATCCTGTCAAGTCAATTAAACTTGTGGGTCCC
+GTTTCAGAAAACTATCCATTACAAAAGAAATATCAAACCTTACGTTATTTAAGGTCCTTA
+CCTACACTAAAATACAGAACCGCTTACTTAAGTGCAATTTTACGGTTAAGATCATTTGTA
+GAATTCCAGTTCATGCTATATTTCCAGAAAAACCACTTCACCAAAGTTTCACCACCAATA
+TTAACTTCAAACGATTGTGAAGGTGCCGGCGAGTTGTTTCAAGTCTCCACCAATACGTCG
+CCAACTGCATCCTCGTACTTTGGGAAGCCGACTTATTTGACTGTGTCCACTCAATTGCAC
+TTGGAAATTTTAGCGTTATCACTGTCAAGGTGTTGGACGTTATCTCCTTGCTTTAGAGCC
+GAAAAGAGTGATACTCCAAGACACCTTTCGGAGTTTTGGATGCTTGAAGTGGAAATGTGC
+TTTGTTAATAGCGTCAACGAGCTAACATCGTTTGTTGAGACTACAATAAAACACATAATT
+AAAGCTTGTATAGATAACCAACAAGAACTCTTGCCGAAGCAATTTATCTCTTCACAAGAA
+AATAATGCATCGTCAGAGCTATCAATAAATCAAGAGACACAACAAATTAAAACACGATGG
+GAAGATTTAATAAATGAAAAATGGCACAATATAACGTATACCAATGCAATAGAAATTCTC
+AAGAAACGCCACAATGAAGTTTCACACTTTAAGTATGAACCTAAATGGGGACAGCCTTTG
+CAAACTGAACATGAAAAATTTTTAGCCGGAGAGTATTTTAAGTCCCCAGTTTTCGTTACC
+GACTATCCACGTCTTTGTAAACCATTCTACATGAAACAAAATTCCACTCCTGACGATACT
+GTTGGATGCTTTGATCTACTGGTTCCTGGAATGGGTGAAATAATTGGTGGGAGTTTAAGG
+GAAGATGACTATGACAAGTTATGTAGAGAAATGAAAGCACGCGGGATGAATAGATCTGGA
+GAATTGGACTGGTATGTTTCTCTGAGAAAAGAAGGAAGTGCACCACACGGAGGCTTTGGT
+CTAGGGTTTGAGAGATTTATCTCATACTTATATGGCAACCATAATATAAAGGATGCCATA
+CCCTTTTATAGAACATCTGCAGAATCCATCGATTTTTGA
+>YCR6         2232 residues Pha 0 Code 0
+ATGGAACTTCAGAATGATTTAGAGTCGCTCGATAACGAGCTGAATGATTTTAGTGAAGAT
+CCATTTCGTGATGATTTCATAACGGATGAAGACGCTGTAAGATCGGGGTGGCGATCTGCG
+TGGACCAGGATGAAATATTGGTTTTATAAGAATAGACTGAAGTGGACAAACAATCCCATA
+GTGATTGGCGACGCGAAAGATAGTAGGGATGGTTCTAACTTTAGAAGGGGTATACCGCTA
+TATGAATTAGACGCGAATGGTCAACCCATTGATACTGAACTTGTTGATGAGAATGAACTT
+TCTTTTGGAACGGGATTTCGTTCCAAAGTGCCTTTTAAAATAATATTTCGCACATTGCTT
+GGCTCGCTGGTGTTTGCCATTTTTTTAATTCTGATGATTAACATAGCAAAACCCCATCAC
+TCCACGAGAGTGCTATCGCACTTTGGCAGTCCTGAATTTGACCCTTACGTGAAGTATTTT
+AACGGTACGCATGAATTTTTCCCCTTAACGATAGTAATTTCACTAGACGGTTTCCATCCT
+TCACTCATATCTAAGAGGAACACACCGTTTTTACATGACTTATATGAATTGAAATATGAT
+GGAGGTATGAATATCACGTCCACACCTTTTATGATACCCAGCTTCCCTACGGAGACCTTT
+CCCAACCATTGGACGTTGGTTACTGGACAATACCCAATACACCACGGTATAGTCTCTAAC
+GTATTTTGGGATCCTGATCTTAATGAAGAATTCCATCCAGGTGTATTGGACCCTCGAATA
+TGGAACAATAATGATACAGAACCAATATGGCAAACTGTTCAGTCTGCATTTGACGGTGAT
+ATACCATTCAAAGCTGCTACCCATATGTGGCCAGGTAGCGATGTGAATTATACCAAGTAT
+AAGACTGAAGAGAAACTACAACCTGAACATAAAAAGCCTATTGCTAGAGAGAGAACTCCA
+TTTTACTTCGACGAATTCAATGCTAAAGAACCACTTTCGCAAAAATTATCCAAGATTATT
+GAATATGTGGATATGAGTACACTGAACGAAAGACCACAGTTAATTCTCGGTTATGTACCG
+AACGTAGATGCCTTTGGACATAAGCATGGATATCCGTCAGAGTCGGAATACTATTATGAA
+GACTTCACTGAAACACTGGGGGAAGTAGATACATTTCTGAAGCAACTAGTGGAATCGCTG
+CAAGAAAGAAATTTAACCAGCTTTACTAATTTGGTCATTGTTAGCGATCATGGTATGAGC
+GATATCGTAGTTCCCTCAAATGTTATTATATGGGAAGACTTACTGGACGAAAAATTGAGG
+AAGGATTATGTATCGCACGCATATCTAGAGGGTCCGATGATGGCTATATCGTTGAAAGAT
+TCCGGAAACATCAATGAGGTTTACCACAATTTAAAGACTTCTATAGATGAAGACAAGTAT
+ACGGTTTACGTTAATGGAAATTTCCCCAAAGAATGGAACTTTAATGATGGAAAAAATCAT
+CACATGGCGTCAATCTGGATTGTGCCCGAGCCTGGGTATGCAGTGATGAAGAAAGAACAA
+TTGAAGAAGGTGGCAAAAGGTGATCATAAGGACAAAAACGAAGACAATGTGTTCACGATT
+GGATCACATGGATACGACAATAACGCGATCGATATGAGATCTGTATTTATTGGTATGGGG
+CCATATTTTCCACAGGGATACATTGAGCCGTTCCAAAATACCGAAATTTACAACCTTTTG
+TGCGATATTTGCGGTGTGGCAGAAAAGGACAGAAATTCCAATGATGGGACTGGGATGCTT
+ATGAACCAACTCCGCGAACCCCAGAGCAGCGAAGAAGTAGAGATTGAAGATGACTTTGAT
+TATTTGGTCAGTAAGTTTGGTGAATTCAGCACTTATAATATAATTTGGGGCGGGTACCCC
+GAAGAGACAGAACAAGACAATGTTGACAATGATAATGATGACAACGACGATGGAAACACT
+GATGAAATAGCCGCTATGCCATCTTCGTCATTAACGATAAAACTAGAAATGACAACTTCA
+ATACCATCAGCAACTGAGACTCTACCGGGCGAAACATCACCATCATCAAGAAGAAGCAGC
+AGCAGCAGCATACAAGCTAGCGCTACTGCTAGCACAGTGGGGGATTGGCTTCAAGACATA
+ATCAACGACGCAAAAGATCTCATTGACGACATAATTGACAGCATCGACGATTTAGTCGAT
+TCTGATACCTAA
+>GNS1         630 residues Pha 0 Code 0
+ATGGAATACGCCACTATGTCTTCTTCGAACTCCACACATAACTTTCAGAGAAAGATTGCT
+CTTATAGGAGCTAGAAATGTCGGCAAAACCACATTAACGGTTCGCTTCGTAGAATCGCGG
+TTCGTTGAATCCTATTATCCCACTATTGAAAATGAATTTACCAGGATAATTCCTTATAAA
+AGTCATGACTGTACTCTGGAAATTCTAGATACTGCAGGCCAAGATGAAGTTTCTCTATTA
+AACATTAAATCGTTGACGGGCGTACGAGGCATAATGCTGTGCTATAGTATAATAAATCGT
+GCTAGCTTTGATCTTATTCCCATTCTCTGGGACAAGCTGGTAGATCAGCTGGGTAAGGAT
+AACCTCCCGGTAATACTTGTGGGTACCAAAGCTGATTTGGGAAGGAGTACAAAAGGTGTA
+AAAAGGTGTGTCACGAAAGCTGAAGGAGAGAAACTAGCTTCGACAATTGGCAGTCAAGAT
+AAGAGGAACCAGGCAGCATTTATAGAATGCAGTGCCGAGTTAGATTATAATGTTGAAGAA
+ACTTTTATGCTCCTTTTGAAACAAATGGAACGTGTCGAAGGAACTCTGGGGCTTGATGCC
+GAAAATAATAATAAATGTTCTATAATGTGA
+>FEN2         1539 residues Pha 0 Code 0
+ATGATGAAGGAATCGAAATCTATCACTCAACATGAGGTTGAGAGAGAATCTGTTTCTTCC
+AAACGTGCCATTAAAAAGAGATTACTTCTGTTTAAAATAGACTTGTTTGTGCTATCATTT
+GTTTGCTTGCAATACTGGATTAATTATGTCGACCGTGTCGGTTTCACCAATGCATATATA
+TCGGGTATGAAGGAAGATCTTAAGATGGTCGGAAACGATTTGACCGTGTCTAACACAGTT
+TTCATGATTGGTTACATTGTAGGTATGGTCCCCAATAATTTAATGTTATTGTGTGTTCCA
+CCTAGGATATGGCTAAGTTTTTGTACGTTTGCCTGGGGTTTATTGACCTTGGGAATGTAC
+AAAGTTACATCGTTCAAACATATTTGCGCAATTAGATTCTTTCAAGCCTTATTTGAGAGT
+TGCACATTTTCAGGAACACATTTTGTTTTGGGTTCGTGGTATAAAGAAGACGAATTGCCC
+ATTAGAAGTGCTATTTTTACAGGTAGCGGTTTGGTGGGATCTATGTTCAGTGGATTTATG
+CAAACAAGTATCTTTACTCATTTGAATGGGCGGAATGGCTTGGCGGGTTGGAGATGGTTA
+TTCATTATTGATTTTTGTATCACATTACCCATTGCAATTTATGGGTTTATTTTCTTCCCC
+GGCCTTCCTGATCAAACAAGTGCTGTTAGCAAATTTTCTATGACGAGATACATTTTTAAT
+GAACAAGAGCTACATTATGCTAGGAGAAGGCTCCCCGCTAGGGACGAAAGCACCCGGTTA
+GACTGGTCGACTATTCCTAGAGTCCTAAAAAGGTGGCACTGGTGGATGTTCTCTCTTGTT
+TGGGTTCTGGGAGGTGAGAATTTGGGTTTCGCATCTAATTCTACATTTGCATTATGGTTA
+CAAAACCAAAAATATACGTTGGCGCAAAGAAATAATTATCCTTCGGGGATATTTGCCGTA
+GGTATAGTTTCTACGCTTTGTTCTGCTGTATATATGAGTAAGATCCCAAGAGCTAGGCAT
+TGGCATGTTTCTGTTTTCATATCATTGGTAATGGTTATTGTTGCGGTACTAATACGTGCA
+GACCCACTAAATCCAAAAGTCGTCTTTTCTGCACAGTATCTTGGAGGCGTAGCATACGCT
+GGACAAGCGGTTTTTTTTTCGTGGGCAAACATTATTTGTCATGCAGATCTTCAAGAACGT
+GCTATCGTTCTTGCTTCAATGAATATGTTTTCAGGGGCCGTTAACGCATGGTGGTCTATA
+TTATTCTTTGCTTCAGATATGGTGCCCAAGTTTGAGAGAGGTTGCTACGCCCTCTTGGCT
+ACGGCAATATCAAGCGGAATTGTCTCGGTCGTCATACGCTCACTACAGATAAAAGAGAAT
+TTGTCTAAGAAACAGGTTCCTTATATAGATGCTAATGACATGCCCGGGGAAGATGACGAT
+GACGACAACCAGGATAATGAAAATGATGGCGACGACGAGAGTATGGAAGTTGAACTTCAT
+AATGAGGAAATGGCCGAAATTTCAAATCCTTTCCGATAA
+>RIM1          444 residues Pha 0 Code 0
+ATGTTTTTACGTACTCAAGCTCGTTTCTTCCATGCTACTACCAAGAAGATGGACTTCTCG
+AAAATGTCCATCGTCGGCCGCATTGGCTCTGAATTCACTGAACATACTTCTGCTAATAAC
+AATCGTTATTTGAAATATAGTATCGCTTCGCAACCAAGAAGAGATGGCCAAACCAATTGG
+TATAATATCACCGTTTTCAATGAACCTCAAATCAATTTTTTGACAGAATATGTTAGAAAA
+GGCGCTTTGGTATATGTTGAAGCAGATGCTGCTAACTATGTCTTCGAGAGAGACGACGGT
+TCTAAGGGTACTACTTTGAGCTTAGTTCAAAAGGACATTAATTTATTGAAGAATGGGAAG
+AAATTAGAAGATGCTGAGGGCCAAGAAAATGCTGAGGGCCAAGAAAATGCTGAGGGCCAA
+GAAAATGCTGCTTCTTCAGAATAA
+>CRY1          414 residues Pha 0 Code 0
+ATGTCTAACGTTGTTCAAGCTCGTGACAATTCCCAAGTTTTTGGTGTTGCTAGAATTTAC
+GCTTCTTTCAACGATACTTTCGTTCATGTTACCGATTTATCTGGTAAGGAAACCATCGCC
+AGAGTTACTGGTGGTATGAAGGTTAAGGCTGACAGAGATGAATCTTCTCCATACGCTGCT
+ATGTTAGCTGCCCAAGATGTTGCCGCTAAGTGTAGGGAAGTCGGTATCACTGCCGTTCAC
+GTTAAGATCAGAGCTACCGGTGGTACTAGAACCAAGACTCCAGGTCCAGGTGGTCAAGCT
+GCTTTGAGAGCTTTGGCCAGATCTGGTTTGAGAATTGGCCGTATCGAAGATGTTACCCCA
+GTTCCATCTGACTCCACCAGAAAGAAGGGTGGTAGAAGAGGTAGAAGATTATGA
+>YCS2        6504 residues Pha 0 Code 0
+ATGAATTCAATTATTAATGCTGCTTCGAAAGTCTTAAGACTCCAAGACGATGTGAAGAAG
+GCTACTATAATATTAGGAGATATACTGATATTACAACCAATTAATCACGAAGTTGAACCA
+GATGTAGAAAACTTGGTACAGCATGAACTAACCAAGATAATACAAGGTTATCCCATACAG
+GATAATATGATTATTAATAGCAAAAAAGGCACAGTTGAAGATGACTTATGCGAACTCAAT
+AACTATACCTGTTTTGCACTTTCGAAAAGCTTTGATTTATGCCATGATAGCAGAAATTTC
+AACATAGCGCAGCCGAAACGATGGATACAATTATTAGAGACATTAACTGACTCAGTTAGT
+TTCGCAGTTATTGTTCAAATTATTCTCACTTTATCTAACATTTCGCTAATAAATAAACAA
+ACCTTGGGGAAGTTAAAAAAACTGAGGATTCGAATTTTCGAAATACTATCAAATAAAAAC
+GATAGTTGGAAATCTACATTACTACAGAAAAACCTTATAGAATGGTACATTTTTATGCTT
+TCCGTGGATTGCACACCTTTAGAATTGCAAAACTTATATCTCCATAAGGAGTTGAAATTC
+TGTAACGATATCTTGAATTCATTAACACTCCAAGTTTCTGATCCTCGCTCACAAAATTAC
+CTGCAATTTGAGAACACGTATAAGCTTTTTCAAATACAAAAGTCATCTAGAATTAACAAC
+TCGTTCCTTTTTTACATAGAATTCAATTCCGTTACCTCAAATAGGATAATGACCATAGAA
+AAACACATTTATTTGGAAATTAAGGAAGGCCAGTTTTGTATTTCAAATGATAACTACATA
+ATCGGTTTATTTGAAAACTTCGAATTCGAAGCGGGCACTTTGTACTTTATTGGAGTTTTA
+ATTGATCACAATAATCGAATAACTCTTTATGTTGATGGAAGTATGATCAATCAGCTCACG
+TTATTTGAAAACTCTATATGCCAATTAAGCACTTGTGAACTGGGATCCATGATTTGTTCA
+ATTAAAGTATATAGATTTTATTTGTGGGATGGATTATTAACAGAATTTGCGATAAATATA
+CTTCAAGCTATCGGCACCAATTACCAATATACATTTAGCAAGAAAAAAGAAGGGCCTGAA
+GTTTTATCGCTCTGCCAAGACTTTTTGATCGCTAAGGCTCATTTAATGGCCAGGCCTGCA
+ACAGAAATATCTTCCACAAAATACATCGATGAGATTGAACTTCTTGAAATGGAAAATATC
+ATTATTGATGTTAACCCAAATGATATTCTTCAAGATTTCACCGAATCGTCTAATTTTACG
+GTAAAATTTGAGGAAAGCACAAACTCGAAAAATATTCCGGAAGTGGGTAAGTGCTATTTC
+TATAGGAGTTCAAACTTGGTTTCAAAATTTGTGTCCATTGATTCTATACGGCTTGCGTTT
+TTAAACATGACAGAATCCGGTAGTATAGACGATCTGTTTCATCATGTATCACATCTGATG
+AATCTTTTACGAAATATTGATATTCTTAATTGGTTTAAAAAAGACTTTGGCTTCCCTTTA
+TTTGCTTATACTTTAAAACAAAAAATAACACAAGATTTATCTCAGCCTCTGAATATCCAA
+TTTTTCAATTTATTCTTAGAATTTTGCGGGTGGGATTTCAACGATATTTCCAAATCCATA
+ATTCTAGATACTGATGCCTACGAAAACATAGTCCTTAACTTGGATTTATGGTATATGAAT
+GAGGATCAAAGTTCTCTGGCGTCAGGCGGATTAGAAATTATCAGATTTCTTTTCTTCCAA
+ATTTCAAGTTTGATGGAAGCCTCTATTTATTCTAAGTTCAATTCCAATAAATTCAATGAT
+ATGAATATCCTAGAAAAACTATGTTTAAGCTATCAGGCTGTCACAAAAAGAGAAAATCAG
+AACAGTAAATTTAATGAGCTATCAAATGATTTAATTTCTGTATTTGTTACTTTATTGAAA
+AGCAATACTGATAAACGACACCTGCAGTGGTTTTTACATCTCTCATATTACTTTATTAAG
+AGAAAAGATGTACGTTCTACAGAAATTATACTTCAAGCGGTAGATCAACTTTTTTCGTTT
+TACTTAGATCAAGGTAGCGACGAAAATGCGAAGATACTTTCAGAGATTATACCACTTAAG
+CTAATGCTGATGATTATGGATCAAATAGTGGAAAATAATGAATCAAACCCTATTACGTGC
+TTGAATATCTTATTTAAGGTAGTTCTGACCAATAAACCGCTTTTCAAACAATTTTACAAA
+AATGATGGTTTGAAACTCATATTGACTATGCTTTGTAAGGTAGGGAAAAGCTATCGAGAG
+GAGATTATTTCTTTGCTTCTCACATATTCTATTGGCAATTATACCACAGCTAACGAAATA
+TTTTCAGGTGCTGAAGACATGATTGGAGGAATTTCAAACGACAAGATAACTGCAAAAGAA
+ATTATTTATTTGGCTGTCAACTTCATTGAGTGGCATGTGATTAATTCTAATGCCAGTGAT
+TCTTCTTCTGTATTGGACCTGAACAACCATATATTAAGATTCGTCGAAGATCTGAAATCG
+CTGAGCGCTGTTCCGATTAATGAATCTGTATTTGATCCTAAAAAAAGTTATGTGATGGTT
+TCATTATTAGATCTCTCGATAGCTTTGAATGAATCGGAGGACATCTCAAAGTTCAAGAGC
+TCTTCAAAAGTGATTTCAGAGCTCATTAAAGGTAATATAATGTGTGCTCTTACGAAATAT
+GCCGCTTATGATTTCGAAGTCTATATGAGCACATTTTTTTGTCACAGTACAGAATACAAA
+CTGGTTTATCCAAAAACTGTAATGAACAATTCCAGTTACTTAGAGCTATCATTTATAGTG
+ACACTCCTACCCGAAATACTTAATGACCTGATAGATAGCAATAACAATTTGAACCTGATG
+ATGTTGAAGCATCCATACACGATGTCAAATCTCCTTTATTTTCTTCGCAAATTTCGACCT
+GATACGTCACAGATAGTTATGCCTAAAGATTTTTATTTCTCAAGTTATACATGTCTCTTG
+CATTGTGTTATTCAGATTGATAAATCATCATTTTACCATTTCAAAAACGTTTCTAAGTCG
+CAACTGTTACAGGAATTCAAAATCTGCATAATGAACTTAATATATTCCAATACTCTAAAG
+CAGATAATCTGGGAGAAAGAAGAATACGAGATGTTTTCTGAGTCACTGATGGCGCATCAG
+GAAGTTTTATTTGCACATGGAGCATGTGATAATGAGACCGTTGGCTTATTGTTAATATTT
+TTTGCCAACAGATTACGTGATTGTGGATACAACAAAGCAGTCTTCAATTGTATGAAAGTG
+ATCATTAAGAACAAGGAAAGGAAACTAAAGGAGGTGGCGTGTTTTTTTGACGCAGCGAAT
+AAAAGTGAAGTACTCGAAGGTTTAAGTAATATCCTCTCATGCAATAACTCTGAAACAATG
+AACCTCATAACTGAACAATACCCATTTTTTTTCAACAATACACAACAGGTACGGTTCATA
+AACATTGTCACCAATATCTTGTTTAAGAACAACAATTTTTCTCCAATAAGCGTTAGACAG
+ATCAAAAACCAAGTTTACGAATGGAAAAATGCAAGATCAGAATACGTCACCCAAAACAAT
+AAAAAGTGCCTTATTTTATTTAGAAAAGACAACACATCCTTAGATTTTAAAATCAAAAAG
+TCCATATCAAGATACACTTACAACCTCAAAACGGATAGAGAAGAAAATGCAGTTTTCTAT
+CGAAATAATTTAAATCTTTTGATTTTTCATCTGAAACATACACTGGAGATACAATCAAAT
+CCAAATTCGTCCTGCAAGTGGTCATTGGACTTTGCAGAAGATTTTGATGGGATGAAACGG
+AGGCTTTTGCCTGCTTGGGAACCAAAATATGAACCACTCATTAACGAGGAAGATGCTAAT
+CAAGATACTATAACAGGTGGTAACAGACAAAGGAGAGAAAGTGGAAGCATTTTATCCTAC
+GAATTTATCGAACATATGGAGACTCTTGAGTCGGAGCCAGTTGGAGATTTGAATGAGAAT
+AGAAAAATTCTTAGACTTTTGAAGGATAACGATTCTATTGCAACTATTTGGAATTGCAGT
+TTGATTATTGGATTAGAAATTAAGGAGGGGATTTTAATTCATGGCAGTAATTACCTTTAC
+TTTGTAAGTGATTACTATTTTAGTTTAGAGGATAAAAAGATTCTAAAATTATCAGAAGTA
+TCGCAAGAATCACGGGATATGACGGTTAGCTTAATTAACGGCCCTGATGTTAAAAGGGTA
+TCAACTTTCCTAAAGCACGAAGTCTTTGTTTGGAAACTTCTCGATATCACTTTCGTTACC
+AAACGACCCTTTCTACTTCGGGATGTCGCCATCGAATTATTGTTCAAAGAGAGAGTTAGC
+GCTTTTTTTAGTTTTTACAACAAAAGAGTGAGAGATGACGTTTTACGGGTACTGAATAAG
+ATCCCGAAGCACCTTCCAGCAGATCCAATTTTTTCAAGCGTTTTACAAGAAATAAACGAC
+CGAGGAAATAGTATAGTGGCAAGAAATGGAATAGGAAAGGCAAGCATTGCTTCCAAATTC
+ACTAGCGTCTTCTCAGCGAACAACAGCCTAATAGATGGATTTGAGATCAGCAAAAAATGG
+GTTAGGGGAGAGATTTCTAATTTTTATTACCTGTTGAGTATCAACATCCTAGCGGGAAGG
+TCATTCAACGATTTGACCCAATATCCAGTGTTTCCGTGGGTTATTGCAGATTACGAAAGT
+AACGTACTCGATTTAGAGAATCCTAAAACTTACCGGGACCTATCGAAACCTATGGGCGCT
+CAAAGTGAGAAAAGGAAATTACAGTTTATAGAGCGTTATGAAGCTTTGGCTTCCCTGGAA
+AATGCTGATTCCGCACCATTTCATTATGGCACGCATTATTCCTCAGCTATGATAGTATCT
+TCATATCTGATAAGGCTGAAGCCCTTTGTCGAATCCTTTTTGTTATTGCAAGGCGGAAGT
+TTTGGCCCTGCAGATCGTTTATTTAGTTCGCTTGAAAGGGCCTGGAGCTCTGCTTCTTCT
+GAAAATACAACGGATGTCAGGGAATTGACACCTGAATTTTTTTTTCTACCTGAATTTTTG
+ATCAACGTTAATAGTTATGACTTTGGTACAGACCAAAGCGGTAAAAAAGTTGACGACGTC
+GTACTTCCACCCTGGGCAAATGGTGACCCAAAGGTTTTCATTCAAAAGAATAGAGAAGCT
+TTAGAAAGTCCTTATGTATCAGCACATTTACATGAATGGATTGATTTGATATTTGGTTAC
+AAACAAAAGGGGGAAATTGCTGTGAAATCTGTTAACGTATTCAACAGATTGAGTTACCCA
+GGCGCTGTAAATCTAGATAATATTGACGATGAAAATGAGCGCAGAGCTATCACAGGCATT
+ATTCACAACTTTGGTCAAACGCCTTTACAAATATTTCAGGAACCTCATCCGGAAAAAATA
+GCCTGCAATGTTCAACAGCTAACAACAGAGGTATGGCGTAAGGTTCCAATGAAGCCAATA
+TTTGAGAAGACAATCTTTAATTTGAATGAAAAGAACAGGTCTGTCGATTATGTTATACAC
+GATCCTAGTTACTTCGATTCATTATACTGGAGGGGCTTCGCTTTCCCAAACTTGTTTTTC
+AGAACGGAAGAATCGTTAGTGTCATTGAGAATTGTGCATAAAAATTGGTTAAAAATTGGA
+CTAGATATTTTTAAAAAGACGCATATGGCTCAGATTACATCGTTTGCGTACTGGAAGTTG
+GGCGAATTCATAACTGGTGATAAAAATGGGCTGATAAAAGTTTGGAAATATCGTAAAGAT
+AAGCATTCGGTTTCAGGTAACCTTGAGAACAAAAAAACAATGTTTGGGCACCTATGCGAG
+CTAAAGGAAATGCGCTGTTATCACGACTACAATACGCTTTTAACCTTAGACATCAGCGGC
+TTAGTATATGTCTGGGACATGATTAATTTCGAACTAGTGAGACAAATAACAAATGATGCG
+CAAAAGGTCGCAATATCTCAACATGCAGGGAGCATTATGGTATTGACTAAGAATAACGCC
+ATTTCGATCTTCAATCTAAATGGACAAATATATACATCAAAGAAATTCGAACCAGCTAAA
+ATTGTAAGCTCAATTGATTTTTTTGACTTCACTAAGTTAGACGCAGGTTACAGAAAGCAT
+ATCTATTGGAAAGAGATGGAAATACTACTAGTGGGCTTTGAAGATGGAACTATAGAAATT
+TACGAGCTCTTTTTGACTTTTCATAATGAATGGGCGATAAAGCTACTGAAACAGCTCTGT
+ACCGAAAGAGGGAAAGCCATAACTAGCATTAAGGGACAGGGGAAGACATACCTGTCCCAG
+AAAAGACGCAAGGATACAGCAGAGCCTCATGAGATAGAAGTGATTGCGGGAACATTAGAT
+GGCAGATTAGCTATTTGGTACTAG
+>YCS3        3681 residues Pha 0 Code 0
+ATGGGGTATCCGCCACCTACACGAAGGCTTGGAGATAAGAAAAGGTACCATTATTCCAAT
+AATCCTAACCGAAGGCATCCTTCCGCTGTTTATTCCAAGAATAGCTTTCCAAAATCAAGC
+AATAATGGATTTGTATCTTCTCCTACTGCCGATAATTCAACAAATCCGTCTGTAACTCCC
+AGTACTGCATCTGTACCTCTTCCTACAGCGGCACCTGGAAGCACGTTTGGTATCGAAGCA
+CCCAGGCCATCTCGATATGATCCGAGCTCAGTCAGTAGGCCTTCGTCATCATCTTATTCG
+TCAACAAGAAAAATTGGAAGCCGTTATAACCCAGATGTGGAAAGATCCTCTTCAACCACT
+AGTTCAACTCCGGAAAGTATGAATACGAGCACCATAACACACACCAATACGGATATCGGA
+AACTCACGCTATTCTCGAAAAACCATGAGCAGATATAATCCTCAATCTACTAGTTCTACA
+AACGTTACCCACTTTCCCTCGGCATTATCAAACGCTCCACCGTTTTATGTTGCCAACGGG
+AGTTCTCGGAGACCTCGATCAATGGATGATTATAGTCCTGATGTAACGAACAAGCTCGAA
+ACAAATAATGTTTCATCTGTTAATAATAACAGCCCTCATTCTTATTACTCTAGGAGCAAC
+AAATGGAGATCCATTGGAACGCCTTCCAGACCACCATTTGATAATCATGTCGGCAATATG
+ACGACCACCAGCAATACTAACTCGATCCATCAAAGGGAACCTTTTTGGAAAGCAAATAGT
+ACTACTATTTTAAAATCAACTCATTCACAGTCATCGCCTTCCCTTCATACTAAAAAATTT
+CACGATGCGAATAAATTGGACAAACCAGAGGCTTCAGTTAAAGTTGAAACACCCAGTAAA
+GATGAGACAAAAACCATATCGTACCATGATAACAATTTTCCACCAAGAAAATCAGTTTCT
+AAACCTAATGCACCTTTAGAACCCGATAATATCAAGGTTGGCGAAGAAGATGCATTGGGG
+AAAAAAGAAGTACATAAAAGTGGGCGTGAGATAGCAAAGGAACATCCTACTCCTGTAAAA
+ATGAAAGAGCATGATGAACTAGAAGCTCGCGCTAAAAAAGTAAATAAAATCAATATTGAT
+GGAAAGCAGGACGAAATTTGGACGACAGCAAAAACAGTGGCCAGTGCAGTCGAAGTTTCC
+AAAGAAAGTCATAAGGAACTAACACGCTCTGTTGAAAGGAAGGAAAGTCCAGAAATTAGA
+GATTATGAAAGAGCATACGATCCGAAAGCCCTGAAAACAGACGCAACAAAGTTGACAGTA
+GACGATGATAATAAAAGTTACGAAGAACCTCTTGAAAAAGTGGAAGGGTGTATTTTCCCA
+TTACCAAAAGCAGAAACGAGATTATGGGAATTGAAAAACCAGAAAAGAAACAAAATAATA
+AGTAAACAAAAGTACTTACTGAAAAAGGCAATTAGGAATTTCTCAGAGTATCCTTTTTAC
+GCACAGAACAAACTTATACATCAGCAGGCTACCGGACTTATCTTGACGAAAATTATATCA
+AAGATAAAAAAGGAGGAACATTTGAAAAAAATAAATTTAAAACATGATTATTTCGATCTC
+CAGAAGAAGTATGAAAAAGAATGCGAAATTTTGACTAAACTGAGTGAAAATTTAAGGAAG
+GAAGAAATCGAAAATAAACGTAAAGAGCACGAATTAATGGAGCAGAAAAGACGTGAAGAA
+GGTATCGAAACAGAAAAAGAAAAAAGCTTACGGCATCCATCCTCGTCTTCCTCATCTCGT
+CGCAGAAATAGGGCTGACTTCGTTGATGATGCGGAAATGGAAAATGTATTGCTACAAATC
+GACCCAAATTATAAACATTATCAGGCTGCTGCAACAATTCCTCCGCTAATTTTAGATCCA
+ATCCGCAAATACTCTTACAAATTCTGTGATGTAAATAACTTGGTTACAGACAAAAAGCTT
+TGGGCGTCTAGAATATTGAAAGACGCCTCTGACAACTTTACTGACCATGAGCACTCTTTA
+TTTTTGGAGGGTTATTTAATTCATCCTAAAAAATTCGGTAAAATTTCTCACTACATGGGC
+GGCTTAAGAAGTCCTGAAGAGTGTGTCCTACATTATTATAGAACAAAGAAAACTGTGAAT
+TATAAACAACTTCTTATCGATAAGAACAAGAAAAGAAAAATGTCAGCCGCTGCGAAGCGC
+CGCAAGAGGAAGGAAAGAAGTAATGACGAGGAAGTCGAAGTTGATGAGAGTAAAGAAGAG
+TCAACGAACACGATAGATAAGGAAGAAAAAAGTGAGAACAATGCCGAGGAAAATGTTCAG
+CCGGTTCTAGTTCAAGGTTCTGAAGTGAAAGGTGATCCATTAGGTACACCGGAAAAAGTT
+GAAAATATGATTGAAAAGAGAGGCGAAGAGTTTGCAGGTGAATTGGAAAATGCTGAGAGG
+GTAAATGACTTAAAAAGGGCGCATGATGAAATTGGAGAAGAGAGCAATAAGTCCAGTGTA
+ATAGAAACCAACAATGAGGTACAAATAATGGCTCCAAAAGGAGGTGTTCGGAATGGTTAT
+TATCCAGAGGAGACCAAAGAACTTGACTTCAGTTTAGAGAATGCGTTACAGAGAAAGAAA
+CACAAATCTGCACCAGAGCATAAAACAAGTTATTGGAGTGTTCGTGAATCTCAACTCTTT
+CCAGAATTGTTGAAGGAGTTTGGCTCTCAATGGTCTCTCATATCAGAAAAACTGGGTACC
+AAATCTACTACAATGGTAAGGAATTACTACCAAAGAAATGCAGCTCGCAATGGATGGAAA
+TTACTGGTTGATGAAACCGACTTAAAGCGAGATGGGACTAGTTCAGAATCTGTACAACAA
+TCTCAAATTTTGATACAACCAGAACGACCAAACATCAATGCCTATAGTAATATTCCTCCT
+CAACAAAGACCGGCTTTGGGTTATTTTGTTGGACAACCAACTCATGGGCATAATACATCT
+ATTTCATCTATCGATGGCTCTATAAGACCATTTGGGCCTGATTTTCATCGTGATACCTTT
+TCTAAAATTAGTGCTCCTTTAACCACTTTACCACCACCAAGACTACCATCTATTCAGTTT
+CCTCGTTCAGAAATGGCAGAACCTACAGTGACAGATTTGCGTAACAGGCCCTTAGACCAT
+ATTGACACGTTGGCTGATGCAGCTTCGTCAGTAACAAATAATCAAAACTTCAGTAATGAA
+AGGAATGCAATTGACATTGGCCGTAAATCGACGACAATCAGCAATCTATTGAATAATTCG
+GATCGAAGCATGAAATCTTCTTTCCAAAGCGCTTCAAGACACGAAGCACAGCTCGAAGAC
+ACTCCCAGCATGAACAATATTGTAGTACAAGAAATAAAACCGAATATTACTACGCCAAGA
+TCGAGTTCTATTTCTGCATTACTAAATCCTGTAAATGGGAATGGGCAATCAAACCCAGAT
+GGAAGGCCGTTGCTGCCATTTCAGCATGCTATTTCTCAAGGCACTCCTACTTTCCCTTTA
+CCGGCCCCTCGCACTAGTCCAATAAGTCGTGCGCCTCCAAAGTTCAATTTTTCGAATGAT
+CCGTTGGCAGCTTTGGCTGCGGTTGCCTCCGCGCCAGATGCAATGAGCAGTTTTTTATCT
+AAAAAGGAAAATAATAATTGA
+>GNS1         1044 residues Pha 0 Code 0
+ATGAATTCACTCGTTACTCAATATGCTGCTCCGTTGTTCGAGCGTTATCCCCAACTTCAT
+GACTATTTACCAACTTTGGAGCGACCATTTTTTAATATTTCGTTGTGGGAACATTTCGAT
+GATGTCGTCACTCGTGTAACTAACGGTAGATTTGTTCCAAGCGAATTCCAATTCATTGCA
+GGTGAATTACCATTAAGCACTTTGCCCCCTGTGCTATACGCCATCACTGCCTATTACGTT
+ATTATTTTTGGTGGCAGGTTTTTGTTAAGTAAGTCGAAACCATTTAAATTAAATGGCCTT
+TTCCAATTGCATAATTTGGTTTTAACTTCACTTTCATTGACGCTTTTATTGCTTATGGTT
+GAACAATTAGTGCCAATTATTGTTCAGCACGGGTTATACTTCGCTATCTGTAATATTGGT
+GCTTGGACTCAACCGCTCGTTACATTATATTACATGAATTACATTGTCAAGTTTATTGAA
+TTTATAGACACCTTTTTCTTGGTGCTAAAACATAAAAAATTGACATTTTTGCATACTTAT
+CACCATGGCGCTACTGCCTTATTATGTTACACCCAATTGATGGGCACCACATCTATTTCT
+TGGGTCCCTATTTCATTGAACCTTGGTGTTCACGTGGTTATGTATTGGTACTATTTCTTG
+GCTGCCAGAGGCATCAGGGTCTGGTGGAAGGAATGGGTTACCAGATTTCAAATTATCCAA
+TTTGTTTTGGATATCGGTTTCATATATTTTGCTGTCTACCAAAAAGCAGTTCACTTGTAT
+TTCCCAATTTTGCCACATTGTGGTGACTGTGTGGGTTCAACAACTGCCACCTTTGCAGGT
+TGTGCCATTATTTCTTCATATTTGGTACTATTTATTTCATTTTACATTAACGTTTATAAA
+CGTAAAGGCACCAAAACCAGTAGAGTGGTAAAGCGTGCCCACGGCGGTGTTGCCGCAAAG
+GTTAATGAGTATGTTAACGTTGACTTGAAAAACGTTCCTACTCCATCTCCATCACCAAAA
+CCTCAACACAGAAGAAAAAGGTAA
+>RBK1         1002 residues Pha 0 Code 0
+ATGGGTATTACAGTAATAGGTTCTCTAAACTATGATTTGGACACATTTACGGATAGATTA
+CCTAACGCTGGAGAAACTTTCAGGGCTAACCACTTCGAAACACATGCTGGTGGTAAGGGA
+TTGAACCAAGCTGCGGCCATTGGTAAATTAAAAAACCCCAGCAGCAGATATAGTGTTCGA
+ATGATTGGTAATGTTGGAAATGATACATTTGGTAAACAATTGAAGGACACTTTATCCGAT
+TGCGGAGTCGATATCACTCACGTCGGTACTTACGAAGGCATTAATACGGGTACCGCTACC
+ATATTAATTGAAGAGAAAGCTGGTGGCCAAAATAGGATATTGATTGTAGAAGGTGCTAAC
+AGCAAGACTATTTATGACCCGAAACAGTTGTGTGAAATTTTTCCAGAGGGCAAGGAGGAA
+GAAGAGTATGTTGTTTTTCAACACGAAATTCCTGATCCTCTTTCCATTATTAAATGGATA
+CATGCGAACAGGCCGAATTTTCAGATCGTATATAACCCCTCACCTTTCAAGACCATGCCT
+AAGAAAGATTGGGAGTTGGTAGACCTTTTGGTCGTTAATGAAATTGAGGGTCTTCAAATC
+GTGGAAAGTGTATTTGATAATGAACTTGTTGAAGAAATAAGGGAGAAGATAAAGGACGAC
+TTTTTAGGAGAATATCGTAAAATTTGTGAGCTTTTGTATGAAAAACTCATGAATCGAAAG
+AAAAGAGGAATTGTGGTTATGACTTTGGGTTCGAGAGGGGTGCTTTTCTGTTCGCACGAA
+AGCCCTGAAGTACAATTCCTTCCGGCTATTCAAAATGTTTCGGTTGTTGATACTACAGGA
+GCTGGAGATACTTTCCTGGGCGGTTTGGTTACTCAATTGTATCAAGGAGAGACCTTGTCT
+ATGGCTATAAAGTTCTCTACATTAGCTAGTTCATTGACCATTCAAAGAAAAGGTGCTGCT
+GAAAGCATGCCACTGTATAAAGATGTTCAGAAAGATGCATAA
+>PHO87        2772 residues Pha 0 Code 0
+ATGAGATTCTCACACTTTCTCAAATACAACGCTGTCCCTGAATGGCAGAATCATTACCTA
+GATTATAACGAATTGAAAAATTTGATCTACACATTACAGACAGATGAATTGAAACAAGAA
+ACGCCAACCGGTGACTTAAACGATGACGCTGACTCTCAGACTCCAGGTCCAATCGCTGAT
+ATAGAAAGCAACATAGCTGCAGGAGAACCATCTCCATCGAAAAGAAGATTTACACATAAA
+CTCAAGCGTAAGCTCTTTGGTTCTAAAACACCTTCAGGAAGCAAAAGGGGAGACTCCGAC
+GAAAAGGCCATAGATGGGAACAATATTAACGAGGAAACAATTGAGTTAGACGAGTTATCT
+CCTCAAGGGAAAACCACCTCTTTCAATAAGAATTTTATACGTAAGAAATTCTTTGAATCA
+CGCAGCTCATCTGTGAGTAGCGAGGGAAAGACGCTCTTCAGTTCTTATGATACATTCGTA
+ACTAACCTGAGCGACGAGAAATTGAAAGTAGATGATTTCTACAAAAGAATGGAAGCTAAG
+TTCTATGAAAGATTTGACCACTTGATTAATGATTTGGAGAAGGAAGGCATTGTAACAAGA
+TTGAATGAAACTTTCAATCCTGAAATTCAAGCATTGCCTCCTTTAAGAGAAATTATTTCT
+GGTACATCAGAGACACATTCATCTAATAACCCATTTGAAATACACTCTTCAAACATCGAC
+AGTGAATTGAGAAATAGGTTTGATTACAGCGAAGAAGAAATGGATGAAGATGATGACGTT
+GACGTGTTTGCTGACACTACCGACAATACCGCCCTCTTGAATTATTCGCAATTTAACATT
+AAATCTCAGAAAAAATCATTATTAAAACAGACAATAATAAATCTTTACATAGACCTTTGC
+CAGTTGAAATCTTTTATCGAATTGAACAGAATGGGTTTCAGTAAAATTACTAAGAAGTCT
+GATAAAGTATTGCACATGAACACTAGGCAAGAATTAATAGAAAGTGAAGAATTTTTCAAA
+GACACCTACATCTTCCAGCATGAAACTTTAAGCAGTTTAAACAGTAAAATTGCACAACTT
+ATTGAATTTTATGCTGTTCTCATGGGTCAGCCTGGGAACGTAGATTCATGCAAGCAAGAG
+TTAAAGTCGTACCTGCACGACCACATTGTTTGGGAAAGAAGCAACACATGGAAAGACATG
+TTGGGCCTCTCTTCGCAAAATAACGATATAATAACTATTGAAGATGAAGCTGAGAAACTT
+ATGCAAGAAAAGCTTCAAATTGAATATTTCAAGTATCCATTGCCTAAGCCAATTAATTTG
+AAGTTTACTAAAATTGAAAATTTGGCAGTTCCTAAGCTATTTTTTGGGAAAAGAGCAATG
+AAAATAGGCTTCATTATCATTGTCACAGGTGTTTTGTTGGGTGTTAAAACTTTCAATGAC
+CCTGTCGAACACCGGTGTATGGCATTGGTAGAATGCTGTGCTTTCTTATGGGCTAGTGAA
+GCCATTCCATTACACATCACAGGTTTATTGGTTCCCCTTCTAACTGTCCTTTTTAGGGTA
+CTAAAAGACGATGACGGTAAGGTAATGGGAGCAGCAGCTGCCTCTACAGAAATCTTAGGT
+ACAATGTGGTCGTCAACAATTATGATTTTATTAGCAGGTTTCACATTGGGTGAAGCCTTG
+TCGCAATATAACGTTGCGAAAGTTTTGGCATCGTGGTTATTGGCCCTTGCAGGTACCAAG
+CCAAGAAATGTCCTTTTAATGGCAATGAGTGTTGTATTCTTTCTTTCGATGTGGATTTCC
+AACGTTGCCTCCCCAGTATTGACATATTCTCTATTAACACCCTTACTAGATCCGCTGGAC
+TACACTTCACCGTTTGCTAAGGCATTAGTCATGGGTGTTGCACTTTCGGCAGATATTGGT
+GGTATGGCTTCACCTATTTCTTCGCCACAGAATATCATCTCCATGCAGTACTTAAAACCT
+TATGGAATCGGCTGGGGGCAATTTTTTGCTGTCGCTCTGCCTACAGGTATTCTATCGATG
+CTGTGCTCCTGGGCCTTGATGATACTCACCTTTAAAATAGGCAAAACTAAACTGGAAAAA
+TTTAAACCAATAAGGACCAGATTTACTATAAAGCAATATTTTATCATCATTGTAACTATT
+GCTACTATTCTTCTATGGTGTGTAGAGTCACAAATAGAAAGTGCTTTTGGATCGTCCGGT
+GAAATTGCAGTAATACCGATAGTCCTGTTTTTTGGTACAGGTCTACTATCAACAAAGGAT
+TTCAACACATTCCCTTGGTCAATTGTTGTTCTTGCTATGGGTGGTATAGCCCTTGGTAAG
+GCAGTTTCATCTTCAGGCTTGTTGGTAACTATTGCAAGAGCATTACAAAAGAAAATTCAG
+AACGATGGTGTTTTTGCTATCTTATGTATTTTCGGTATTTTAATGTTAGTTGTGGGCACT
+TTTGTCTCACATACTGTGTCAGCAATCATCATTATTCCCTTGGTGCAAGAAGTTGGTGAC
+AAATTATCCGATCCAAAGGCAGCTCCAATTCTTGTGTTCGGTTGCGCCTTGTTAGCCTCA
+TGCGGTATGGGGTTGGCTTCATCTGGATTTCCAAACGTTACTGCTATTTCTATGACCGAT
+AAAAAGGGTAATAGATGGCTAACTGTAGGCGCTTTTATCTCCAGAGGTGTTCCTGCTTCG
+TTGTTAGCGTTTGTCTGCGTAATTACTCTCGGTTATGGTATTAGTTCTTCCGTCTTAAAA
+GGTAGCACTTAA
+>BUD5         1617 residues Pha 0 Code 0
+ATGAGAACGGCCGTACCGCAGTTGCTGGAAGCAACTGCCTGTGTCTCTAGAGAATGCCCC
+CTCGTCAAAAGAAGTCAGGACATAAAAAGAGCAAGAAAACGTCTACTCAGTGACTGGTAT
+AGGCTCGGCGCTGATGCAAACATGGATGCCGTATTATTAGTTGTTAACTCCGCCTGGAGG
+TTTCTGGCCGTCTGGCGACCCTTCGTAAACTCAATCCAACATGCAACTCAGGAATTGTAT
+CAAAATATCGCCCATTACCTTCTTCATGGCAACGTAAATATACAGAGGGTCACAGCACTA
+ATACAGCTCGTAATGGGACAGGACGATTTACTTTTTAGTATGGATGATGTTCTACAAGAG
+GTCTTCAGAATACAGCTCTATTTGAATAAGATGCTGCCGCACAACTCTCACAAATGGCAA
+AAGCCATCCCCCTTTGACTCCGCAAACTTACTACTTAACTTCAGAGACTGGACAACTGAC
+AATGCTCTCCTCCAAGAGTTGCTACTATCCTATCCCACAATTAATAAAAACAAACACAAA
+AATCACTCCGTCCCTCGTCTAATACAAATCTGGGTAGAGTCTTATTGGCAAGATAGTGAG
+ACAACATTAAAAGATATCCTCAATTTTTGGTACAGTCACTTGGCTGAATATTATGAATAC
+CAAGAACTGTTTGCAGACATAGTTCAGCTGTTTATAAACAAAAAAAGAACGAGGCAATTG
+AAGATTCATTACATTGGTCTAACTGATAAGGAAATCGAAGAAAATAAACCGCCCCTGGAC
+TACGAAAACTTATTTCTCCAATACGAGATAGACAAAACGAACGCAAATGATGAATTGTGC
+GGTGCAACTGACCTCAGTGATTTACTTTTCCAATGGAAACAGGGTGAACCTCTAGAAGTC
+GAAGCCTTCGCTCTAAACGTATCTCCATGGTCACTTGCAAAGACATTGACTCTCTTAGAA
+TCTTCTCTTTACTTGGATATTGAAACAATAGAATTCACAAGACATTTCAAACACAACGAT
+ACAACAATTGACTCCGTGTTTACGCTTTCCAACCAGTTATCGTCCTACGTTCTTGAGACA
+ACTTTGCAGCAAACGCACACCATTTCCTACTGGTTACAAGTTGCACTTGCTTGTCTATAC
+TTACGAAACTTAAACTCACTTGCTTCAATCATTACATCATTGCAAAATCATTCAATAGAA
+AGACTATCTCTCCCGATAGATGTTAAATCAGACCACCTTTTTCAGCGCCTAAAAGTCGTC
+GTACATCCAAACAACAACTACAACGTTTATAGAAGAACAATTAAACATATTTTCCACAGT
+CAGCTTCCTTGTGTACCTTTTACATCACTGCTTATCAGGGACATTACCTTCATAAGAGAC
+GGAAACGATACATTCACTAAAGATGGTAATAACGTGAATATGCAAAAGTTCAACCAAATC
+ACAAAGATAGTCGCTTTTGCGCAATATTTACAACAAAAGCAATATGAAGATATACACTGT
+TCAAATACTACTGCAAGAAGCTTATTAGGGGCTATGATAAAGGTGCACACTTTATATAAC
+GACAACAAAGACAGGGCGTATCAAGTCAGTATAGCTAAGGTTCCAAGGCTTACCTAA
+>MATALPHA2         633 residues Pha 0 Code 0
+ATGAATAAAATACCCATTAAAGACCTTTTAAATCCACAAATCACAGATGAGTTTAAATCC
+AGCATACTAGACATAAATAAAAAGCTCTTTTCTATTTGCTGTAATTTACCTAAGTTACCA
+GAGAGTGTAACAACAGAAGAAGAAGTTGAATTAAGGGATATATTAGGATTCTTATCTAGG
+GCCAACAAAAACCGTAAGATTAGTGATGAGGAGAAGAAGTTGTTGCAAACAACATCTCAA
+CTCACTACTACCATTACTGTATTACTCAAAGAAATGCGCAGCATAGAAAACGATAGAAGT
+AATTATCAACTTACACAGAAAAATAAATCGGCGGATGGGTTGGTATTTAATGTGGTAACT
+CAAGATATGATAAACAAAAGTACTAAACCTTACAGAGGACACCGGTTTACAAAAGAAAAT
+GTCCGAATACTAGAAAGTTGGTTTGCAAAGAACATCGAGAACCCATATCTAGATACCAAG
+GGCCTAGAGAATCTAATGAAGAATACCAGTTTATCTCGCATTCAAATCAAAAACTGGGTT
+TCGAATAGAAGAAGAAAAGAAAAAACAATAACAATCGCTCCAGAATTAGCGGACCTCTTG
+AGCGGTGAGCCTCTGGCAAAGAAGAAAGAATGA
+>MATALPHA1         528 residues Pha 0 Code 0
+ATGTTTACTTCGAAGCCTGCTTTCAAAATTAAGAACAAAGCATCCAAATCATACAGAAAC
+ACAGCGGTTTCAAAAAAGCTGAAAGAAAAACGTCTAGCTGAGCATGTGAGGCCAAGCTGC
+TTCAATATTATTCGACCACTCAAGAAAGATATCCAGATTCCTGTTCCTTCCTCTCGATTT
+TTAAATAAAATCCAAATTCACAGGATAGCGTCTGGAAGTCAAAATACTCAGTTTCGACAG
+TTCAATAAGACATCTATAAAATCTTCAAAGAAATATTTAAACTCATTTATGGCTTTTAGA
+GCATATTACTCACAGTTTGGCTCCGGTGTAAAACAAAATGTCTTGTCTTCTCTGCTCGCT
+GAAGAATGGCACGCGGACAAAATGCAGCACGGAATATGGGACTACTTCGCGCAACAGTAT
+AATTTTATAAACCCTGGTTTTGGTTTTGTAGAGTGGTTGACGAATAATTATGCTGAAGTA
+CGTGGTGACGGATATTGGGAAGATGTGTTTGTACATTTGGCCTTATAG
+>TSM1         4224 residues Pha 0 Code 0
+ATGATGTCCTTTTCCAAAAACGCCACTCCTAGAGCCATTGTTAGTGAATCTAGCACTTTG
+CATGAGATGAAGTTTAGAAATTTTAGAGTTGCCCATGAAAAAATCTCGTTGGATATAGAT
+CTAGCTACTCACTGCATTACCGGTAGCGCTACTATAATAATCATTCCGTTGATCCAAAAC
+CTAGAATATGTAACTTTTGATTGCAAGGAAATGACTATTAAAGATGTTCTGGTCGAAAAT
+CGTCGATGTGATCAATTTATTCATGACGACCCACTTCAAACAAATTTGAATGGATTGACT
+TCACAAAATGTATTATACAGCGACAATTCCATTGAACAGTCACATTTTTTGAGATCTAAG
+TTTGCTAGCTTGAATGAATACCCAGAAACGGACTCTAAATCCCAGTTAACTATAAAAATA
+CCATCTTCCATCAAAATATCTTTGGAGGACGCCAATGCATTAAGTAATTACACTCCGATT
+ACTCCTTCAATTAAGACTACCCCTGGGTTTCAAGAATCTGTTTTCACTCCAATTACATTA
+CAAATTGAATATGAAATCAGAAACCCAAAGTCGGGTATTAAATTCGATACTGTGTATGCT
+GACAAGCCCTGGTTATGGAACGTTTACACTTCAAATGGTGAGATTTGCAGTTCTGCATCA
+TATTGGGTCCCATGTGTCGATTTGCTTGATGAAAAATCTACATGGGAGTTAGAATTCAGC
+GTACCGAGATTGGTTAAAAATATAGGTACTTCGAAATTAATCGGACAAAATGGAGAAGAG
+AGTGAAAAAGAGAAGGAGGATACGCCTGAGCACGATGAAGAGGAAGAGGGGAAGCCGGCA
+AGAGTTATCAAAGACGAAGATAAGGATTCTAACTTGAAAAATGACGAAGAAGGCAAAAAT
+AGTAAAAGCAAAGATGCACAAGATAATGATGAAGAAGAAGAGGAAGGCGAAAGTGACGAA
+GAGGAAGAGGAAGGGGAAGAGGAAAGGCGGAATATTGAGGAAAGCAACAATCCGAGTTTG
+AGGGATGTGATTGTGTGTTGTTCAGAATATTCAAATATTAAAGAACTTCCGCACCCGATT
+GATTTGACGAAAAAAAAATGCATATTTCAGATAATTAATCCTGTGGCTCCACATCACATT
+GGTTGGGCTATAGGCGCCTTTAATTCATGGTCTTTACCTTTGATATCACCTCCAAGTGTT
+GATGCCGAGGACGAAGTAGAGGAAGACAAGTTGAGAGAGAATGTTGTGGACAATGTTAAC
+GATACTATGGATGACGACATTGGTTCGGATATTATACCCATTCAAATTTTCACACTTCCG
+ACGCAGGAAACAGATGAGTTAACAGTTATAAATTCGACAGTTGTCTGCCAAAAAATTATA
+GATTTCTACTCGAAAGAATTTGGGTCTTATCCTTTCACTTGTTACTCTATGGTGTTTTTA
+CCTACCGCACCTTCTAAGCATATGGATTTTGCAGCATTAGGCATTTGTAATACCAGATTA
+TTGTACCCTCTAGAAGTTATTGATAAAGCATTCAGTACTACGAATGAGTTAGCATGGGCA
+CTTGCTAACCAATGGTCTTGTGTGAATATAACTCCTTTAGATATGAACGACTACTGGTGC
+TGTCTTGGTATTGCTGGTTATATGGTGTTTCAGGTAACCAAAAAATTAATGGGTAATAAC
+ACGTATAAATATCAATTAAAGCGTAATAGTGAGGCGATTGTGGAACAAGACTTCGAGAAA
+CCGCCTATTGGGAGCACTTTTACCGGCAGTTCTAGGCCAATATCTTGGTCTTCTAAAGAT
+TTGTCCTTTATACAATTGAAGGCACCGATGATACTACACATACTTGACAGAAGGATGACT
+AAAACAGAACGATCTTTCGGTATGTCTCGAGTATTACCTAAAATTTTCCTTCAAGCTATG
+TCTGGTGATTTACCGAATAATTCGTTGACTTCATCGCATTTTCAACATGTTTGCGAAAGA
+GTTAATAAAAGTAAATTAGAGAATTTTTTCAACGAATGGGTATATGGGTCTGGGGTACCC
+ATATTACGTGTCACCCAAAGATTTAATAGGAAGAGGATGGTTATAGAACTGGGTATAAGG
+CAAGTTCAAGATGAAGAACTTGGCCACGAAAAAGTGGTAGGGGAGGAAGGATTTTTCAAA
+AGTGCACTAGACCACTTAGAACATCCAGATTTGAACCGAACCGAATGCTTCACGGGCTCG
+ATGACTATAAGGATCCATGAACACGATGGTACTCCGTATGAGCATATTGTGGAAATCAAA
+GATACATTCACAAAAATAGATATTCAGTACAATACAAAGTACAGAAGATTAAGGAAAAGA
+GGTGGTGGTGCAAATGATGAAAATGGTGTTGAAAACAATAATGAGGAGAAGCCTATTGTT
+GTGGATGTGAATTGTCTAGGAAATGTATACATGTCGCCCGAAGAGTGTTCCCGATTCAGT
+TTGACGGAATTTAATCGTACGTCTGAGAGTAATGAATTGCTTAAGCAAAACGAAGCATTT
+GAGTGGATACGCATAGACTCTGATCTGGAATGGATTTGCCAAATGCACATTAATCAGCCG
+GATTACATGTTTTCTTCTCAGTTGAGACAAGATGGGGACATAGAGGCCCAACTAGAAGCC
+ATACGATATTATGAGGACGTCGTTGTTAATGGTGGTGTGAAATCACTTGTTTATTCAAGT
+ATTTTGTTTAGAACGGCGATCGACGAGCGTTACTTTTTTGGCATAAGACTCGCGGCGTGC
+GAAGCGCTTAGTAAATACGTATATGATCCGGATTTTACTGGCGGTGTTAAGCATTTAATT
+CAGATTTTTCAGATTTTGTTTTGCCTAGAAGACTCTAATATTCCAAAGAGTAATAACTTT
+GAGAATCCTAAGTTGTATTTCTTACAGTGTAATATTCCCAAATATTTGGCTAAAGTGAAA
+AATGAAAATGGTAAATGTCCAAAATTGGTGAAGCAATTTTTACTGGATATTCTTGTTTAT
+AATGAGAATGGTGAAAATAAATACAGTGATGATGCGTACGTCCGCAGCTTGATTGAAAAT
+GTTGTTAAAGTTGCTTTAAATGAGTATAAAGATAAAGCATATATGGAAAAAGTTAAGACT
+CAGTTATTGAGGTACGAAAATTTGGTGAATTGGCTTTCATCATACGAGTCTTTGATTAAG
+ACTACTATCATGTATGCTAAGTACAAATTGCATAAAGTGGGTGCTTATGACTTTACGGAA
+TTGACAGGAATGATAATGCATACATTAACATTAGGTATAAATAACGGAGATATTTCCAGG
+GAAAGCTTTCAGAATGAGTTTTTAATGGTTTTGAAAATCATGCTTTTAGAAGGTGGTTTA
+AAAAACAAGGATGCCCTTGTTTTGTTTACTGAAATACTTTGCTTCCATGAGGATTCTTAT
+ATTAGGGATAAAAGTGTTGATGTGCTTTCTGAATGTGTAAATCTAGTTGTTATGGATGGT
+AGTTTGGATACCATAAGTGACGATATTAAGTCCTCCGTCCAATCTGTGCACAATGAAGTT
+AAAAATATAAAAAGTGAGGATGATATTGAGTTGTTTTTAAGTGGTCATTACGTCGATGAT
+ATGAAAATAAAAATAGAAAAGATTGGCCGTCAAAATATTAGTGGGTTAATACAAATATGC
+CGAGATATGTTTAAAGGGTATAGCCCTTTGAAGATATTACTCTGGGATGTTTTGAATTTA
+CCTGTTCTTAGCTTGTACCAGAGGAAGCAAATACATGATCTTGTTAGGGTGATGTACACC
+CTAATCAACAGTTTTGTAGTTAGATTGGAAACACCAAGGGAGAGAAGACTTGTGGCGAAG
+ATGAATAGTAATGAAGAAGGTAAACTTGATATTGTTATAAAGCGTGAAAGTATCCTAAAA
+GTACATATTAAAAAGGAAGTAACCTCTACTGTGGAGGCACCCAAGAAGGCGAATAAGATA
+AAGATAAGTTTGAAAGGTGATAAACCTGTTAGAAAAGTGGAAAAACAAATTGTGAAGCCG
+AAGGTAACTAGCAAACAAAGGAAAGTCAAAAGTCATGTGAACCGCATGGGCAGTTTACCT
+TTACGGTTTGTTAAGATCCAACAACAACCTAGAGTAATGGTGCATTTGTCATCCGTCCCG
+TATAGCCAATTCGTTCAAATTACAAAAGTCACATCAAGATCGTTTATGGTTAAGATAAGA
+ACAAAGAATGATGCTAAGAATTGA
+>YCT5        1476 residues Pha 0 Code 0
+ATGAAGCCACAGTGCATACTCATCTCTTTGCTGGTCAACCTCGCATACGCAGAGGAGTAT
+TTGGTGAGGTTCAAAAATCCCACAGCATTCCAACAATTCACTTCGAATTCCAACAGGTCA
+TGGAGACAGTTCATCGACAACAAAATTGAGAAGAAATTCTCCATCGGATCCTTCCGCGGC
+GTGACCATGAACCTGTCCAAGAACTTAGTGAACAAGCTGAAGAAAAGCCCACTGGTGGCT
+GATATTGTGCCCAACTTCAGGTTCGAAGCTTTTGAAGGCGACAGTGTAAATAGCGCCGAG
+TCGAGTTATACGTTTAACGCTACCGCCAAATACTCGTACGAAGACGTCGAGGAAGAGCAA
+AATATAACGTATCAACCAGACGCACCCCGTCACTTGGCCCGGATTTCCCGCCACTACCAA
+CTCCCATTCGACGTTGGGGACAAGGACCGCTACAAAAGCTGGTTCAATTACTACTATGAA
+CACGACTATCAAGGTCAAGACGTCAACGCCTATATCATGGATACGGGTATCTTCGCGGAC
+CATCCGGAATTCGAAGACAGAGTCATCCAGGGGATTGACTTGACCAAAGAAGGGTTTGGC
+GACCAGAATGGCCACGGAACGCACGTGGCGGGACTCGTAGGTTCCAAAACGTATGGAGCG
+GCAAAGAGGGTCAATCTTGTGGAGGTCAAAGTCTTGGGCAAAGACGGGTCTGGCGAGGCC
+AGTAACGTTCTTAGTGGTCTGGAGTTCATCGTGGAACATTGCACAAAGGTCAGTCGCCCA
+CAGGGTAAAAAATGCGTGGCCAATCTAAGTCTAGGGAGTTTCAGGAGCCCCATAATCAAC
+ATGGCAGTGGAGGGGGCCATTGAAGAAGGTATTGTATTTGTTGCCGCGGCGGGGAACTTC
+AATTTAGACGCCTACTGGGCCTCACCTGCGTCTGCAGAAAACGTTATCACCGTAGGGGCC
+TTTGATGACCACATTGACACGATTGCCAAGTTCAGCAATTGGGGGCCCTGTGTAAACATC
+TTTGCCCCAGGCGTGGAAATTGAGTCGCTATCTCATCTGAACTACAACGACACTTTAATT
+TTGTCAGGTACATCTATGTCGACGCCCATTGTCACCGGAGTTGCAGCGATCCTACTCTCG
+AAGGGAATTGAGCCTGAAATGATAGCACAGGAGATTGAGTATTTGTCCACGCGTAATGTT
+TTCCATAGAAGAACGTTGTTTTTCAAGCCTTCTACGCCAAACCAGATTCTTTACAACGGC
+GTCGATAAACTGGACGATCCATATGACGACGAAACGTTCCCTCGATTGAACATAGAGGCA
+ATTGCTAAGGAACTGGAGGAGTACAATGCCACTTTACAAACTCCTATGTCTGAGAATCTT
+CAATCTGGTTCAAAACTGTGGGGTTGGAATAACGATGTCACACTACCTCTTGGTGAGATT
+CGATTGAAGAGGCGTGATTTTATGAAAAATTTGTAG
+>PETCR46       510 residues Pha 0 Code 0
+ATGTGGAGCAGGAACGTCAGATTGCTTGGATCATGGACAAGGTCCTACATGGTCCCCGCC
+ACCAAGAGAAAAACCATCCCCGTGTACCCACCTGTGCAGCGCATAGCTTCGTCGCAGATT
+ATGAAGCAGGTGGCCCTCTCAGAAATAGAGTCTCTGGATCCCGGGGCCGTTAAGAGGAAG
+CTCATCAGTAAAAAGAACAAGGACCGCTTGAAGGCAGGCGACGTGGTCCGGATTGTGTAC
+GACTCGTCCAAGTGCTCGTACGACACCTTTGTTGGCTACATCCTTTCCATAGACCGCAAA
+CAACTGGTGCAAGACGCCTCGTTGCTGTTGCGGAACCAGATAGCCAAGACGGCCGTCGAG
+ATTAGAGTGCCATTGTTTTCGCCGCTGATCGAGAGAATCGACTTGCTAACCCCCCACGTC
+TCGAGCAGACAAAGAAACAAACACTACTACATCAGAGGTACAAGGTTGGATGTCGGCGAC
+CTCGAGGCAGGTCTAAGAAGAAAGAAATAG
+>YCT7        828 residues Pha 0 Code 0
+ATGTCACGTCCTGAGGAGTTGGCACCACCGGAGATTTTCTATAATGATAGCGAAGCACAC
+AAGTACACGGGTTCGACCAGAGTGCAGCATATCCAGGCGAAGATGACGCTGAGGGCGTTG
+GAGCTTTTGAATCTGCAGCCGTGCAGTTTCATTCTGGATATCGGGTGCGGGTCCGGACTG
+TCTGGGGAGATTTTGACGCAGGAGGGAGACCATGTGTGGTGTGGTTTGGATATATCGCCC
+AGCATGCTTGCGACCGGTCTTAGTAGAGAGCTGGAGGGCGACTTGATGTTGCAGGATATG
+GGCACCGGGATACCGTTCCGGGCGGGCTCGTTTGACGCGGCTATTAGTATCAGTGCGATC
+CAATGGCTGTGCAATGCGGACACTTCATACAACGATCCTAAACAGCGGTTGATGAGGTTT
+TTCAACACATTGTATGCTGCACTGAAGAAGGGAGGGAAATTTGTGGCCCAGTTCTACCCG
+AAAAACGACGACCAGGTGGACGACATACTGCAGTCTGCCAAGGTGGCAGGGTTCAGTGGC
+GGGCTTGTGGTGGACGACCCAGAGTCTAAAAAGAATAAGAAGTACTACCTTGTGTTGAGC
+AGTGGGGCCCCACCGCAGGGGGAGGAGCAGGTGAATTTGGACGGTGTGACCATGGACGAG
+GAGAACGTCAACTTGAAGAAACAACTGCGCCAGCGCTTGAAGGGAGGCAAAGACAAGGAG
+TCTGCCAAGAGTTTCATTCTAAGAAAGAAGGAGCTCATGAAAAGACGTGGGAGGAAAGTT
+GCGAAGGACTCCAAGTTCACCGGGAGGAAAAGAAGACACAGGTTCTAG
+>YCT9         447 residues Pha 0 Code 0
+ATGGCGCTGTCCAGGAGCGTGGGGCGAGGATCAAAACTCACGTCCCCAAAAAACGACACA
+TACTTGCTAGCATCCTTTCGGTGGAACCTCGACCGAGACTTGCTCTTCAGGTGTGAAAGG
+TACTTTTGCATGTGGGCGTCCACAGGGTACTCCTCCTCCTGCTCCTGCTTCCCTGCCACA
+CGTTCCGCCTCAGTCGACTCCACTCCTTCAGTCGACTCCACTGGCTCCACCAGCGACGTG
+GTAGACGACCGTGGCGAAACCTCCATGGACTCCTGTGGCAGGATCACGTTATCGTACGTG
+ACCGAATGCCGTTTGTTGGCTTCTGCGGAATTGAGTCTGCGGATCTTAAGAAACTCTTCG
+TCTTGCAACAAATCCTTAGTCTCCGTCATTCTTGCAATCTGTTTTGGCGCTCTTGCTGCA
+AGCCGTGCTGAACAACCACCTGCGTGA
+>ARE1         1833 residues Pha 0 Code 0
+ATGACGGAGACTAAGGATTTGTTGCAAGACGAAGAGTTTCTTAAGATCCGCAGACTCAAT
+TCCGCAGAAGCCAACAAACGGCATTCGGTCACGTACGATAACGTGATCCTGCCACAGGAG
+TCCATGGAGGTTTCGCCACGGTCGTCTACCACGTCGCTGGTGGAGCCAGTGGAGTCGACT
+GAAGGAGTGGAGTCGACTGAGGCGGAACGTGTGGCAGGGAAGCAGGAGCAGGAGGAGGAG
+TACCCTGTGGACGCCCACATGCAAAAGTACCTTTCACACCTGAAGAGCAAGTCTCGGTCG
+AGGTTCCACCGAAAGGATGCTAGCAAGTATGTGTCGTTTTTTGGGGACGTGAGTTTTGAT
+CCTCGCCCCACGCTCCTGGACAGCGCCATCAACGTGCCCTTCCAGACGACTTTCAAAGGT
+CCGGTGCTGGAGAAACAGCTCAAAAATTTACAGTTGACAAAGACCAAGACCAAGGCCACG
+GTGAAGACTACGGTGAAGACTACGGAGAAAACGGACAAGGCAGATGCCCCCCCAGGAGAA
+AAACTGGAGTCGAACTTTTCAGGGATCTACGTGTTCGCATGGATGTTCTTGGGCTGGATA
+GCCATCAGGTGCTGCACAGATTACTATGCGTCGTACGGCAGTGCATGGAATAAGCTGGAA
+ATCGTGCAGTACATGACAACGGACTTGTTCACGATCGCAATGTTGGACTTGGCAATGTTC
+CTGTGCACTTTCTTCGTGGTTTTCGTGCACTGGCTGGTGAAAAAGCGGATCATCAACTGG
+AAGTGGACTGGGTTCGTTGCAGTGAGCATCTTCGAGTTGGCTTTCATCCCCGTGACGTTC
+CCCATTTACGTCTACTACTTTGATTTCAACTGGGTCACGAGAATCTTCCTGTTCCTGCAC
+TCCGTGGTGTTTGTTATGAAGAGCCACTCGTTTGCCTTTTACAACGGGTATCTTTGGGAC
+ATAAAGCAGGAACTCGAGTACTCTTCCAAACAGTTGCAAAAATACAAGGAATCTTTGTCC
+CCAGAGACCCGCGAGATTCTGCAAAAAAGTTGCGACTTTTGCCTTTTCGAATTGAACTAC
+CAGACCAAGGATAACGACTTCCCCAACAACATCAGTTGCAGCAATTTCTTCATGTTCTGT
+TTGTTCCCCGTCCTCGTGTACCAGATCAACTACCCAAGAACGTCGCGCATCAGATGGAGG
+TATGTGTTGGAGAAGGTGTGCGCCATCATTGGCACCATCTTCCTCATGATGGTCACGGCA
+CAGTTCTTCATGCACCCGGTGGCCATGCGCTGTATCCAGTTCCACAACACGCCCACCTTC
+GGCGGCTGGATCCCCGCCACGCAAGAGTGGTTCCACCTGCTCTTCGACATGATTCCGGGC
+TTCACTGTTCTGTACATGCTCACGTTTTACATGATATGGGACGCTTTATTGAATTGCGTG
+GCGGAGTTGACCAGGTTTGCGGACAGATATTTCTACGGCGACTGGTGGAATTGCGTTTCG
+TTTGAAGAGTTTAGCAGAATCTGGAACGTCCCCGTTCACAAATTTTTACTAAGACACGTG
+TACCACAGCTCCATGGGCGCATTGCATTTGAGCAAGAGCCAAGCTACATTATTTACTTTT
+TTCTTGAGTGCCGTGTTCCACGAAATGGCCATGTTCGCCATTTTCAGAAGGGTTAGAGGA
+TATCTGTTCATGTTCCAACTGTCGCAGTTTGTGTGGACTGCTTTGAGCAACACCAAGTTT
+CTACGGGCAAGACCGCAGTTGTCCAACGTTGTCTTTTCGTTTGGTGTCTGTTCAGGGCCC
+AGTATCATTATGACGTTGTACCTGACCTTATGA
+>RSC6         1452 residues Pha 0 Code 0
+ATGGTAACACAGACCAATCCGGTCCCTGTTACATATCCAACGGATGCTTATATCCCCACG
+TATCTGCCCGATGATAAGGTCTCCAATCTGGCAGATTTGAAAAAATTGATAGAAATGGAT
+TCCAGACTAGATTTGTATCTGACAAGAAGGAGGCTGGATACGTCCATCAATTTACCTACA
+AACACCAAGACCAAGGACCATCCCCCCAATAAAGAGATGCTGAGGATTTACGTCTACAAC
+ACTACGGAAAGCAGCCCTCGCAGCGATTCTGGCACCCCAGCGGACTCAGGCAAGACTACA
+TGGACACTGAGAATAGAAGGTAAGCTTCTGCACGAGTCCGCAAACGGAAAGCACCCATTT
+AGTGAGTTTTTGGAAGGTGTCGCGGTCGACTTTAAAAGACTGAAACCGCTGGGCATGGGC
+AAGAAGAGGAAACGCGATTCGTCATTGAGCCTTCCTTTGAATCTGCAACAACCCGAATAC
+AATGATCAAGATAGCACCATGGGCGATAACGACAACGGCGAGGATGAGGACAGTGCAGAG
+GCAGAATCCAGGGAGGAAATTGTAGACGCACTGGAATGGAACTACGATGAAAACAACGTT
+GTGGAGTTTGATGGTATCGACATCAAGAGGCAAGGCAAGGATAATTTGCGATGCAGTATA
+ACCATCCAGTTGAGGGGTGTCGACGGTGGAAAAGTACAGTACTCGCCCAACTTAGCTACC
+TTGATAGGTATGCAAACGGGCTCCGTTAATGACGCGGTTTATTCGATCTACAAGTACATT
+TTGATCAACAATCTGTTTGTTACGGAACAAACAGAGGCTCAAGATGGTTCCAACGATGCC
+GAAGACAGCAGTAACGAGAATAACAATAAAAACGGTGCTGGTGACGATGATGGCGTCGAG
+GGAAGTACTCCAAAGGATAAGCCCGAATTGGGTGAAGTGAAGCTAGATTCACTCTTACAA
+AAGGTATTGGATACAAACGCCGCGCACCTCCCCTTGATGAATGTTGTGCAAACCGTGAAC
+AAACTGGTATCACCCCTACCGCCCATCATCCTAGATTATACAATTGATCTTTCCAAAGAT
+ACCACCTATGGTGCTACCACCTTGGATGTAGATGTGTCGCACATTCTCCACCAGCCTCAA
+CCCCAGCCAAATTTACAAAAAGAGGAAGAAACAGATGCTGAAGACACAGCAAAACTACGT
+GAAATCACAAAGCTTGCCTTGCAGTTGAACTCTAGTGCTCAAAAATACCAGTTTTTCCAC
+GAACTGTCTTTGCATCCAAGAGAAACGCTGACTCACTACTTATGGTCTTCCAAGCAAAAC
+GAGCTTGTGCTGCAGGGCGACCAATACTTCAATGAAGATGCTGCAAGAACGAGTGACATA
+TACAGTAACAACAACAATGACAGGTCACTAATGGGCAATATCTCACTACTGTACTCCCAA
+GGAAGACTATAA
+>THR4         1545 residues Pha 0 Code 0
+ATGCCTAACGCTTCCCAAGTTTACAGATCTACCAGATCCAGCTCTCCAAAGACAATCTCT
+TTTGAAGAGGCTATCATTCAAGGTCTGGCCACTGACGGTGGTCTTTTCATTCCACCAACT
+ATTCCACAAGTGGACCAAGCCACTCTTTTCAATGATTGGTCAAAGCTCTCCTTCCAAGAC
+TTAGCCTTTGCTATCATGAGACTATACATTGCCCAAGAAGAGATTCCAGATGCTGATCTA
+AAGGACTTGATCAAGAGATCTTATTCTACTTTCCGTTCTGATGAAGTCACCCCCTTGGTG
+CAAAACGTCACTGGTGACAAGGAGAATTTGCACATTTTAGAATTATTCCACGGTCCTACC
+TACGCTTTCAAAGACGTTGCTTTACAATTTGTCGGTAATCTTTTTGAATACTTCTTACAA
+AGAACCAACGCCAATTTACCTGAAGGCGAGAAAAAGCAAATCACTGTGGTCGGTGCTACT
+TCCGGTGACACTGGTTCTGCAGCCATCTACGGTTTAAGAGGCAAAAAGGACGTTTCCGTT
+TTCATCTTATATCCAACCGGTAGAATTTCCCCAATTCAAGAAGAACAAATGACCACCGTT
+CCAGATGAAAACGTCCAGACTTTGTCTGTTACCGGTACTTTCGACAACTGTCAAGATATC
+GTCAAAGCTATTTTCGGTGACAAAGAATTCAACTCTAAACACAACGTCGGTGCTGTTAAC
+TCCATCAACTGGGCAAGAATCTTGGCCCAAATGACCTATTACTTTTATTCATTCTTCCAA
+GCCACCAACGGTAAGGACTCCAAGAAGGTCAAGTTCGTTGTGCCAAGTGGGAACTTCGGT
+GATATATTGGCCGGTTATTTTGCCAAGAAAATGGGTTTGCCTATTGAAAAACTGGCCATC
+GCTACCAATGAAAACGACATTTTGGACAGATTTTTGAAATCTGGTCTATACGAAAGATCA
+GACAAGGTTGCTGCTACTTTATCCCCAGCAATGGATATCTTAATCTCTTCTAACTTTGAA
+AGACTACTATGGTACCTAGCTCGTGAATACCTAGCTAATGGTGATGATTTGAAAGCCGGT
+GAAATCGTCAACAATTGGTTCCAGGAATTGAAGACCAACGGTAAGTTCCAAGTTGACAAA
+TCCATCATTGAAGGCGCATCAAAGGACTTTACATCAGAAAGAGTTTCCAATGAAGAAACA
+TCTGAAACAATCAAGAAGATCTACGAATCATCTGTAAATCCAAAACATTACATCTTAGAT
+CCTCACACAGCTGTCGGTGTTTGCGCCACAGAAAGATTGATTGCAAAAGATAATGACAAG
+TCCATCCAATACATTTCTCTATCTACCGCTCACCCAGCTAAATTTGCCGATGCTGTAAAC
+AATGCATTGTCTGGATTTTCCAATTATTCATTTGAAAAGGATGTTTTGCCTGAGGAATTG
+AAGAAACTATCCACATTAAAGAAGAAATTAAAATTCATCGAAAGAGCTGACGTTGAATTG
+GTCAAAAACGCTATTGAAGAAGAACTTGCTAAAATGAAATTATAA
+>CTR86        1692 residues Pha 0 Code 0
+ATGCCTATGAACAATTTTCTAGATGAATTCAATTTATTTGATTCAATCATTACCATGATG
+AAGAACGACCCATGTTGCGTCGAGGATTATGAGCCAATCGTCGAAAACCTGAACCGTATA
+TTTCAAAGGACGTTTAATGATGAAGAACATAGGAAATCAATGGCTAACTCCCAGCTTTTT
+TGGGAACGATTAAGAGACACCTTGGAAGCAATGCTGTTGCCAGCGTCGTTAAATGAGAAT
+AGCTCAATACCGTATACAAGAACAGTGAGGGGCCTTATCTTAATGATGAGAAACCTTGCC
+GCTGAAAACCAGGAAATACCCCAAAAGCTTTTACTACAAAACCTCGTAATTCGTGGTTTT
+CTGCATGCAACTAGTGAGTATGTCGTTGACACTCCGCTAATCAAACATCTATACATCGCA
+TGTTTAACGTGCCTTTTCAATATACAGCAGAACTACTCTACAGTGGATATGACTACTTTT
+CCAGCTCTTTTACAATTTCTTCAATACCCTTATGGGATCAAATTGGAAGACGGTGAAGAA
+GAAGAGCATTTCTGGCTACCATATTTATTTCTTTTCAAGACGTATCTCAACAATGATGAA
+TTTTCCAACGAATTTTTCAGGGATAATGATACACCCCAGAAAGACTATTATTGTGTTAGG
+GATAGAATATTTTTCGATATAGTGACAGCCAAATTCATCCAGGATCAAGAGAATTCCTTT
+TTAATTGAGAAGGGCAGAAACTATCTGGATGATTCAAAATTGGAAATAACTTCTATTGAC
+CTATCTGTCTTAGAATGTATTAGCAAAAGTCTTACAACTGCTTCTTTTGGTAAATACCTC
+AATGGGTTAGAAGAAAGACAGCCAGGAAAATTCACCACTTTGTTGCAGATATTGCAATTG
+GTTGTAACGAGTAAAGAAGATTGGAATACCTATGAGTTGACTGCAATTATGTCATGGTGC
+TACCCCATTCTGCAACGTCTTGCATGCAAGGATATTCCTGCCTTTTTCAATAAAAGTTGT
+AACGATTATGCTCCTTCAGTTGCCATCCAATTACACTCCACTTTACTTTCTTGCCTGGAC
+ATAATTTCTGACTTGTGCAAATTCAATCATGTTAGAAAATTCTTAATTTCGTATGACTCT
+GTGAAAATATTGGTATCTCTCTTGGATACTTTCCAAAAGAATTTGTTGAGGATTAATTTT
+TTGAAAGGAAACGGTGATACGGTGAATGAAATTAAAATCACAGATCATGAAGGTAACAAA
+ATCGAGGACCGGTTATTAATTTTCAACCGTGTTAATACCAACGAATCCTTTATTAGGGCT
+GATAATTTTCCCCATTGTAAATTAGTAATAATCGAAATATTGGCATCGTTAGTGTATGCA
+CATCCTGAAATCCAAGATCAAATAAGAGAATTAGGTGGTCTTGCATTAATTCTTTCCAAT
+TGTGTCATCGATGATAATGATCCGTTTATCAAGGAAAGATCTATTGTTTGCTTGAAGTTT
+TTGTTAAAGAATAATGCCAAGAATCAGGAATATGTCAAAAAAATGGAAGCTCAAGACGTT
+GTTCAAGACGATGCATTGAGCAAAGCTGGGTTTGAAATATCAGTTGAAAAGGGCGGGAAA
+GTTAGATTAGTATCTAAAGAAGAAGACCCTGGGAACGAGAATTCTGAGATTATTAGCATA
+GATGAAGATTAA
+>PWP2         2772 residues Pha 0 Code 0
+ATGAAATCCGATTTCAAGTTCTCTAACCTTTTAGGTACGGTCTACAGGCAAGGTAACATC
+ACCTTTTCCGATGATGGCAAGCAACTACTCTCACCGGTGGGGAATAGGGTCAGCGTGTTT
+GACTTAATCAACAACAAATCGTTCACGTTTGAATACGAGCATCGCAAAAATATTGCTGCC
+ATTGATCTGAACAAACAAGGCACATTGCTGATTTCTATTGACGAGGACGGTCGCGCCATC
+CTTGTCAATTTCAAAGCCCGTAACGTGCTTCACCATTTCAACTTCAAAGAAAAATGCTCC
+GCTGTGAAGTTCAGCCCTGATGGGAGACTCTTTGCATTAGCCTCAGGCAGGTTTTTACAG
+ATTTGGAAGACTCCAGATGTTAATAAAGACAGACAGTTTGCTCCCTTCGTCCGCCATAGG
+GTGCATGCGGGACACTTTCAAGACATAACGTCTTTGACGTGGTCACAAGATTCCAGATTT
+ATCCTTACGACTTCCAAAGACTTAAGCGCAAAAATATGGTCCGTAGATTCAGAGGAAAAG
+AACCTTGCGGCGACAACATTTAATGGGCACAGAGACTACGTTATGGGTGCGTTCTTCAGT
+CATGATCAGGAAAAAATCTACACTGTAAGCAAAGACGGTGCTGTCTTTGTCTGGGAATTT
+ACCAAGAGGCCATCCGATGACGACGACAATGAAAGTGAAGACGACGACAAGCAAGAAGAA
+GTAGATATTTCGAAATACAGCTGGAGAATCACAAAGAAACATTTTTTTTACGCAAACCAA
+GCCAAAGTAAAGTGTGTCACCTTCCATCCAGCAACAAGGCTTTTAGCTGTCGGATTTACT
+AGTGGGGAATTCCGTCTTTACGATTTGCCTGATTTCACTTTGATTCAACAGCTTTCTATG
+GGGCAAAACCCAGTCAACACCGTTAGCGTCAACCAAACCGGCGAATGGCTGGCGTTTGGT
+TCCAGCAAACTGGGCCAATTACTAGTTTACGAATGGCAATCGGAATCGTATATCTTGAAG
+CAGCAGGGCCATTTCGATTCCACAAATAGTCTTGCATACTCTCCGGATGGTTCACGTGTA
+GTGACAGCATCCGAAGATGGGAAAATCAAAGTTTGGGACATTACATCAGGGTTTTGTTTG
+GCCACTTTTGAAGAACACACCTCTTCAGTTACTGCTGTACAGTTTGCGAAAAGGGGTCAG
+GTCATGTTCTCATCATCGTTAGATGGTACGGTGAGAGCGTGGGACTTAATCAGGTATCGT
+AATTTTAGAACATTCACTGGTACTGAAAGAATCCAATTCAATTGTTTAGCGGTGGATCCA
+TCAGGTGAAGTGGTTTGTGCCGGGTCCCTGGACAATTTTGACATTCATGTTTGGTCCGTG
+CAAACTGGTCAATTATTAGATGCTTTGTCCGGACATGAAGGCCCTGTTTCGTGTCTTTCA
+TTTAGTCAAGAGAACAGTGTCTTAGCTTCTGCATCATGGGATAAAACAATTAGAATCTGG
+TCCATATTTGGTAGAAGCCAACAAGTAGAACCTATAGAAGTTTATTCCGATGTTTTAGCC
+TTATCAATGAGACCAGATGGTAAAGAAGTTGCAGTATCTACCTTAAAGGGTCAAATATCC
+ATTTTCAACATAGAAGATGCCAAGCAGGTGGGCAACATTGACTGTAGAAAGGATATAATA
+TCTGGTAGGTTTAATCAAGATAGGTTCACTGCCAAAAATTCTGAACGATCCAAATTTTTT
+ACTACAATACATTACAGTTTTGATGGTATGGCTATTGTGGCTGGTGGTAATAATAACTCC
+ATTTGTCTATATGATGTTCCAAATGAAGTCTTGTTAAAAAGATTCATTGTGTCCAGAAAC
+ATGGCTTTGAATGGTACTCTCGAATTTTTAAACAGTAAGAAAATGACTGAAGCAGGTTCA
+TTAGATTTGATTGACGATGCAGGCGAAAATTCAGATTTGGAGGATCGTATTGATAATTCT
+TTACCAGGGTCTCAAAGAGGTGGCGACCTGTCCACAAGAAAAATGAGACCAGAGGTTAGA
+GTTACTTCGGTGCAATTCTCCCCAACGGCGAATGCATTTGCCGCTGCTTCAACGGAAGGT
+TTATTGATATATTCCACCAATGACACGATATTATTTGATCCCTTTGATCTGGATGTGGAC
+GTCACCCCCCATTCTACTGTAGAGGCGCTACGAGAAAAGCAGTTTTTAAATGCATTAGTA
+ATGGCGTTCAGGTTAAATGAAGAATATTTGATCAATAAAGTCTATGAAGCCATACCTATT
+AAGGAAATCCCCTTGGTTGCAAGTAATATTCCTGCAATATATTTACCGAGGATTCTGAAG
+TTCATCGGTGATTTTGCCATTGAATCCCAACACATTGAGTTTAACCTAATTTGGATCAAA
+GCTCTATTATCTGCGAGCGGTGGTTACATAAATGAACACAAATATCTCTTCTCGACGGCT
+ATGAGGTCGATACAAAGATTTATTGTTAGAGTGGCTAAGGAAGTAGTCAATACCACTACT
+GATAACAAATACACCTATAGATTTTTGGTATCAACTGATGGGTCCATGGAAGATGGCGCG
+GCTGATGATGACGAGGTTCTATTAAAAGATGACGCAGATGAAGATAACGAAGAGAACGAA
+GAGAACGATGTAGTCATGGAATCTGACGACGAGGAAGGATGGATTGGTTTCAATGGGAAG
+GATAACAAATTACCCTTGTCTAATGAAAATGATTCCAGTGATGAAGAAGAAAATGAGAAA
+GAGCTTCCTTGA
+>YCU9         777 residues Pha 0 Code 0
+ATGGATGACGATCACGAACAGTTGGTCGAAGAACTGGAGGCCGTCGAGGCCATCTATCCG
+GATCTTCTCTCCAAGAAGCAGGAAGACGGAAGCATCATCGTTGTGAAAGTGCCGCAGCAT
+GAATACATGACACTGCAGATCTCCTTCCCGACACACTACCCCTCCGAGGAGGCTCCTAAT
+GTCATCGAAGTTGGTGTCTGCACTTCTTTGGCTAAGCGCGATCTCTACGATACCAAGTAC
+CTTCAGCATTTGTTCCAGGAAGTGATGGACTCTGTTTTCCACCGCGGATCTGTCTGTCTA
+TTTGACTTCCTCACAGAACTCGACGGTGTCTTGTACGTTGAACCAGAGGAGGAGACAGAA
+CCGGTCCAGCAGAGTGACATTCCCACAGACCCCTTCGAGGGCTGGACCGCGTCGGACCCC
+ATTACTGATAGAGGCTCGACTTTCATGGCCTTTGCAGCACATGTTACCTCCGAGGAACAA
+GCGTTTGCCATGCTAGACCTACTGAAGACCGACTCCAAGATGCGTAAGGCAAACCATGTC
+ATGAGTGCATGGCGAATCAAGCAGGATGGCTCTGCGGCAACATATCAAGATTCCGATGAT
+GACGGTGAAACGGCCGCCGGCTCCAGAATGCTGCACCTCATCACCATCATGGATGTGTGG
+AACGTCATCGTTGTGGTGGCCCGTTGGTTCGGCGGTGCCCACATAGGTCCCGACCGGTTT
+AAACACATCAATTCTACGGCAAGAGAAGCTGTTGTCAGGGCCGGCTTCGACTCGTAA
+>YCV1        1752 residues Pha 0 Code 0
+ATGGTGCGTTTTGTTTCAATTTTAAGTTTATTCGGCTGCGCGGCGACGCTTGTCACGGCC
+CATGATGACATGGACATGGACATGGATATGGACATGGATATGGACATGAATATCGATACG
+ACAACGTCTCAATCCATAGATGTCTCATCCACGGCTTCAATCGTCCCCGTGCCACATGAA
+CCAAAACATTTGCATGGCCTTCCTATACTGCAATCGCCCTCGCTTACCCCTGCGGAGAGA
+TTGTACTGGGAAAACTACAACACCACAACCTACTTTACTACACAGGCTGGGAATAGGTCT
+GCCCTTCGCTACCACATTATTACGCTGCTCTTGGTTGCATTTGTGCTCTACCCTGTGTCC
+CTGGCGCTAAGCGCCGCCCGTTCTAGGTGGTACTTACCCCTGCTGTTTGTTAATCTATGC
+ATTTGTATTTCGTCCGTAATGGCATTGTCCGTGTTCAAAAATACTTTCCCGGAAGAAGAC
+TGGTATGCGCATAATATCTATGGCACCACTTCTGTGCTACTTCTCGTTTTTATGCTTGTT
+CACTTCTTCGCTGCGGTGCTTTCTGTCCCCGTCTCATTAGCATCGAAAAAGGAGTACCGT
+CCGGTTGACACCATCCCTCTGAATGATCTTGAATCTACGCCCGTCATGGTGAATAGTGCA
+CGTGGCTCTCCAAGTCCTTCTTCCAACAGAGACACGTTGTTCTCGCTCTCTTCAGACACC
+ACGACCGCCACGGCCACCAATAATAATAAACGGAGACGCGCTGAAGGCGAAGACGAGGGT
+GATAACACCTCCAACCACGACACTTTGCGCGACGAAGACTACGATAATGATGACGACGAA
+ATTGCTTCCATTGAAGCGCCACCTCTGCTTCCTCAAGACATACCCGTTTTCCGAATCTTG
+TTTACCAACACGAAGTACCAGATGCTTGCCGCGCACCTCTCGTGCGTCGCCAACGTGGTC
+TTTCACATGCTTACCTACCCGCTATTCATGTACATCTTTGTAGACCTAATCATCGGCTTC
+GCTGTAGGTAACTTGCTCGGCAAGGGCATCCGCATCTTTAATCTCTTGGCCCACTGGATT
+AAGGGCGGCGTATTTTTTACTCTGGGCGTTGTCTCTTTAGCAAGATACTGCGGTTTCGCA
+GCTAAGTACGGCTGGGCATGGAACAACATCAGCTTCACCTCTCAACTCACACAAACGCGT
+TCCTCCAATCTTCTTTTCCGGTTTGCTCCTGCGGGGACTTTCACCATGGAATTCGTTGAA
+TCCTTCCTCATTTTCTTTTACGGGTCCACCAACATCTTCTTGGAGCACCTGGCAGGAAAC
+GGCGGCGCATGGACTGCCAAGGATTTACAGCATGTGTCGATAAATTCTCACCGGCCCCAA
+GGTGTGTGGGCTACTCACGGAGTACAAGCTCAACCATTGGCGATTCGAGCATGCCCGCAA
+ACGGCCACAGACCGATGTAGTTGCTGCCACACCGGGGTACTCTCCAAACCCGTTCCCCGC
+TTTCACCATATTTTGGACTGGGATTCTGATGTCCCAGCACGCACAGTCCTCGCAATTTTC
+TACTACCATTCACACGCAATGGGGATACTTGTTGTCCTATGGGTCCTTCTTCCGTCTGCT
+AACATTTTTGATTCTGTTTTTGGTGCCCAACACCAACAGTGCCGCATCCAAGCCTTTCAC
+GGAGTTGATCACCTCGTTCTGTCTCCTCTGTGGTGGTCTGGTATTTATGGAGTCCACGGA
+TCAGTCCATTGA
+>G10         474 residues Pha 0 Code 0
+ATGCCGCGCATAAAGACCAGAAGATCCAAGCCTGCACCTGACGGGTTCGAAAAAATCAAG
+CCAACCCTCACAGATTTCGAAATCCAACTCAGAGATGCCCAAAAGGACAAGTCGTCTAAG
+CTCGCAGCAAAGTCCAATGAGCAGCTCTGGGAGATAATGCAACTCCACCACCAGCGCTCT
+AGATACATATATACTCTGTACTACAAGAGAAAGGCCATCTCCAAAGACCTTTACGATTGG
+TTGATAAAGGAAAAGTATGCTGATAAATTGCTAATTGCCAAATGGCGCAAAACCGGGTAT
+GAAAAACTGTGCTGTCTGCGCTGCATTCAAAAGAACGAAACTAACAACGGTAGCACTTGC
+ATCTGCAGGGTGCCTCGTGCACAGTTAGAGGAAGAAGCACGCAAAAAGGGCACACAGGTG
+TCCTTCCATCAGTGCGTCCACTGCGGCTGCCGTGGATGTGCAAGCACAGACTAA
+>HCM1         1599 residues Pha 0 Code 0
+ATGATGAATGAAGACATATCCATCATTGATGGCCATAATAGTTTTTTAACGGAAAAAAGC
+ACCGTGCTATTAACCCAAGCCAAGAGAACACTAGAAGACGAAAAGGAAATGATTACTCCC
+CCGAGCTCAACTGTGAGAAAAACAATGAAGGAAGTAAATAAGAGGCCGTCGCATCCCCTC
+TCACCGGATCACTCGTCCCCAATTGCTCCATCTAAGGCCAAGCGCCAAAGATCGGACACA
+TGCGCTCGGTCCAATGGTAACCTAACCTTGGAAGAAATTCTTCAATCTTTGGAAAGAAGA
+AGAATAAATGGTGAACTCGCCAAGAAACCTCCATATTCGTATGCAACTTTGATTTGCTTG
+GCCATTTTGCAATCTCAGGAGGGAAAGCTAACGCTATCCCAGATATATCATTGGATCCAC
+GTTCACTTCCCTTATTACAAGCAGAAAGATGCTAGTTGGCAAAATTCAATAAGACATAAC
+TTGTCTTTAAATGATGCGTTCATCAAGACTGAAAAGTCCTGCGATGGTAAGGGTCATTTC
+TGGGAGGTCAGACCGGGTGCCGAAACAAAATTTTTCAAAGGTGAAAATCGTGGTTATGAA
+TTTGTAAAGGACTCCTTACAAGACATTGGGAAGTATTTTGAAATAGATTCTACACTTGAT
+GAATTAGAACAAGTTGAGAGTGGAGAAGGCAATGATGATCTTCCTGACGAGGAAGAAAGA
+GAGGAAGCAGGGAAATTCCCTTCCATTGAAATTCAATTGAACTCCTCCCCTATACTGAGA
+GTTTCCCAGTTACATCACATACCGCAATTGAAAACAGACAACAGTGTACTGAACCCTCAC
+GAAAACCTAGAATCGATGCGGAACATGATAGAAAACGATGTCAACAATATAGATTCCTTG
+GAACCTCCTTATGTCATGAAGAAATATCATACTTCTTTAGGCTTACCGTCGCTGGTGAAT
+GCCAAAGATCATTTCCAGGCGGGTGTGAAAAACAATAATATCACCCAGGCAAATAGATTT
+AATACACTCCCTATAACTAGCGCAAAGTCTCCTCAGAATTTCAGAAAATATTTCACCTCA
+TTCAATTCAAATTTTGAAGATTTATCTCCACTTCGAAGTAATGTAGGGGCTGGTTCTCTA
+CTCGACCCACTTCCGTATTCCCCATTGAAGCTGTACGATCAGAAAAATCTTGCGCTCATG
+TCGAAACCACAATCTCAGCAATCATATTCCAATTCTCAACTTCCACCTCCACCTTCCTCT
+CATGGTTCGGACTTACTTAAAACACCCAAGATGAGGCATTCCGATGGCTTAGAGAAAACC
+CCATCGCGGTTGATAAGCACACCTAAGGACGGTAACTCGATTTTGAGGAAATGGCAGACT
+CCTTCACACCTTTTTGAAGATTTGTACTGTTCTCCGCTATTTAGAGCTATAGAGACTCCA
+ATCAGGTATATCACGACGCCGGGGGGCAACTTTGGAAACCCAAATTTCACCAAGAAAGTC
+CTCTGCACCCGATGTCCTCACAAGCGCAACGAATTCCAAATTTGCTTCAAGCGGGCTGTT
+TGGCGTGGATGTTTATTCTGTTTGGAAGCGCGCAACTGA
+>RAD18        1464 residues Pha 0 Code 0
+ATGGACCACCAAATAACCACTGCAAGCGACTTCACGACTACTTCAATACCGAGCCTGTAC
+CAATTGGATACACTTTTGAGATGTCACATTTGTAAAGATTTTCTAAAAGTCCCCGTCTTA
+ACACCTTGTGGCCATACATTTTGTTCCCTTTGTATTAGAACACATTTGAATAACCAACCA
+AATTGTCCTCTCTGCCTTTTCGAGTTCAGAGAGTCCTTGCTGAGAAGTGAGTTCCTGGTC
+AGTGAAATAATTCAAAGTTATACATCCCTACGATCTTCCTTACTAGATGCACTAAGGATA
+CCGAAGCCTACCCCTGTCCCTGAGAATGAGGAAGTACCAGGTCCTGAAAATTCTTCATGG
+ATAGAACTCATATCAGAGTCTGAAAGTGACAGTGTAAATGCCGCTGATGATGACTTGCAA
+ATTGTTGCAACAAGTGAAAGAAAACTTGCCAAAAGATCCATGACTGATATATTACCACTG
+AGTTCCAAACCATCCAAAAGGAATTTTGCAATGTTCAGAAGTGAACGTATCAAGAAAAAA
+TCAAAGCCAAATGAACAAATGGCCCAGTGCCCCATATGTCAACAATTTTATCCTCTTAAA
+GCCCTTGAAAAAACACATTTGGATGAATGCCTAACTTTACAATCACTAGGCAAAAAACCA
+AAAATTTCTACCACTTTCCCTACAGAGTCAAATCCACATAACAAAAGTTCATCCAGATTC
+AAGGTACGAACTCCAGAAGTCGACAAAAGCTCATGTGGTGAGACCTCACATGTGGATAAG
+TATTTAAACTCAATGATGAGTGCAGAACACCAAAGATTGCCGAAGATCAATTTTACGTCT
+ATGACTCAATCCCAAATAAAACAAAAACTGTCATCGTTGGGACTGTCAACTAATGGTACT
+AGGCAAAACATGATTAAAAGATACAATCACTACGAAATGCTTTGGAATTCTAATTTTTGT
+GATTCTCTAGAACCTGTTGATGAAGCTGAACTAAAAAGACAGTTGTTAAGCTGGGATGTT
+TCACACAATAAAACCCCCCAAAATAGTAGCAACAAGGGTGGAATTTCTAAATTAATGATA
+ATGAAGAGTAATGGGAAATCTTCTTCATATAGGAAATTACTTGAAAATTTCAAAAACGAT
+AAATTTAATAGGAAAGGATGGATGGTTATGTTTCGGAAGGATTTTGCTAGGCTTATCAGG
+GAAGCAAAAATGAAAATAAAAACAGGTTCATCGGACAGTTCAGGTTCAGTGGGACATTCT
+AATGATGGAGATGGTGTTGAAAAAGTTCAAAGTGACCAGGGAACCGAGGATCAGCAAATG
+GAGAAGGATCAGGACACTGTTATCAACGAAGATAGAGTTGCTGGTGAAAGAAATTTGCCT
+AACGAAGATTCAACTGATGCTGACTTATCAAGAGAATTAATGGACTTGAATGAATATAGT
+AAAGACCCACCCGGTAACAATTAA
+>CYPR         957 residues Pha 0 Code 0
+ATGTGGTTGAAATCCTTGCTGCTCTGCCTGTACTCCTTAGTACTCTGCCAAGTCCACGCT
+GCACCTTCATCAGGGAAGCAGATTACCTCCAAGGATGTTGATCTTCAGAAAAAATATGAG
+CCCAGTCCCCCCGCCACACATCGTGGAATAATCACTATCGAATACTTTGATCCCGTTTCG
+AAGTCGATGAAAGAGGCGGATCTGACTTTTGAGTTGTACGGTACTGTCGTGCCCAAAACT
+GTGAACAACTTTGCTATGCTGGCCCATGGTGTTAAGGCAGTTATCGAAGGGAAAGATCCC
+AATGATATACATACTTACTCGTACCGTAAGACCAAAATCAACAAGGTTTACCCTAACAAG
+TATATCCAGGGTGGTGTGGTTGCCCCAGATGTGGGTCCTTTCACCGTCTATGGGCCCAAA
+TTTGATGACGAAAACTTTTACTTAAAACATGACAGGCCTGAAAGACTCGCAATGGCCTAT
+TTTGGACCTGATTCTAACACCTCGGAATTCATCATCACCACTAAAGCCGATGGAAATGAG
+GAATTGGATGGCAAAAGTGTCGTGTTTGGTCAAATAACTTCTGGTCTAGATCAACTAATG
+GATGCTATTCAATACACAGAAACAGACGAATATGGAAAGCCTCAGCATGAATTACGGTTC
+CTGTATTTCGTTCTAGAAATCTTAAAAATTAGTAACATCTTAGATTTGCACGCTGCGTAC
+ACAGAAAAAGTCGAGAAGTTTAGAAATGGCGATGTGTCTGTTGGCTCCACTTTGGAAAAC
+ATCTTCCGTAACGATAAAGCCTACACACCTTTAACCACCTCCACTGGAACCACCGCCTAT
+GATTTAAACCACCCAATTTCCAGAGCCTTGATGTGTTTAACTGTTCTTGGCCTTTGTTTC
+ATTGCCTACAAGGGCATGCACGAAAAGCCTCATACGGTTTCATTAAGACACAAGTAA
+>YCW1        366 residues Pha 0 Code 0
+ATGATCAGTTCGTGTGTTACTAGATGTTTTGGTAGGGGTAAATGCCTTCCAGGGCCTGCC
+ACTGCCTCGATATACCAAACGATAAGATGTATATCCACTAATTCAAATAAAGCTGCTGAG
+GCGCCAATATTTCCAAAGCTGGAAGACGTGAAGATGCATGAGCTCATAGGAAACAACAAT
+TTTGGTAAAAAGACCTACTACGTGGAGAGAAGCAGGACCGGAAATCTACCGGTGTATTCC
+GCTTATAAAAATGGAGGTAACAAGATTATCACGGAGATCAGAAAGATTGAAGGAGATGTA
+ATTCAACTAAGAAATGACTTGCAGGAGCAACTGCCTTTCATACCCAAAAAATCATGGCTG
+TGGTGA
+>YCW2        1548 residues Pha 0 Code 0
+ATGTCCACCCTGATTCCTCCACCTTCTAAGAAACAAAAGAAAGAGGCTCAACTTCCCAGA
+GAAGTAGCTATTATTCCGAAAGATTTACCCAATGTTTCAATCAAGTTCCAAGCTTTAGAT
+ACTGGTGACAATGTAGGTGGCGCCCTGAGAGTTCCCGGTGCTATCTCCGAGAAACAGTTA
+GAAGAACTTTTAAATCAATTGAACGGTACTTCAGACGATCCAGTGCCATATACCTTCAGC
+TGTACAATTCAAGGTAAGAAGGCCAGTGACCCTGTGAAGACGATTGATATAACAGATAAC
+CTATATTCTTCATTAATAAAACCAGGCTATAACAGTACAGAAGATCAGATCACGCTACTG
+TATACGCCAAGAGCAGTTTTCAAAGTCAAGCCGGTAACTAGAAGTTCATCAGCCATTGCA
+GGTCACGGTTCCACAATTTTGTGTTCTGCCTTCGCACCACATACGAGTTCTAGGATGGTA
+ACCGGTGCAGGTGATAATACTGCAAGGATTTGGGACTGTGACACCCAAACGCCAATGCAT
+ACTCTAAAGGGTCACTACAATTGGGTTCTCTGCGTTTCCTGGTCCCCCGATGGAGAAGTA
+ATTGCTACGGGATCCATGGACAATACCATAAGATTATGGGACCCAAAAAGCGGTCAGTGT
+CTAGGTGATGCTCTCAGAGGTCATTCCAAGTGGATCACTTCTTTAAGTTGGGAACCTATA
+CATCTTGTGAAGCCGGGCTCCAAACCAAGATTAGCTTCATCTTCTAAGGATGGTACTATT
+AAGATTTGGGACACTGTGAGCAGAGTTTGCCAGTATACGATGAGTGGTCACACAAATTCA
+GTGTCTTGTGTCAAATGGGGCGGCCAAGGTCTATTGTATAGTGGCTCTCACGATAGAACC
+GTACGTGTATGGGACATCAATTCGCAGGGCAGATGTATCAACATTTTGAAGTCGCATGCG
+CACTGGGTTAATCACTTATCTTTATCTACAGATTACGCATTGCGCATTGGTGCATTCGAT
+CATACAGGTAAGAAGCCTTCTACACCAGAAGAAGCCCAGAAAAAGGCATTGGAAAATTAT
+GAAAAAATCTGTAAAAAGAATGGAAATTCAGAAGAAATGATGGTTACTGCAAGCGATGAT
+TATACCATGTTTTTATGGAACCCACTAAAATCTACCAAGCCTATAGCAAGAATGACCGGT
+CACCAAAAATTAGTCAATCATGTGGCGTTCAGCCCTGATGGTAGGTATATTGTCTCAGCG
+TCTTTTGATAACTCTATCAAACTTTGGGACGGTAGAGATGGTAAGTTTATCTCCACATTT
+AGAGGGCATATAGCCAGCGTATACCAGGTTGCGTGGTCATCGGACTGCCGACTACTGGTG
+TCATGTTCCAAAGATACCACGTTGAAAGTGTGGGATGTAAGAACTAGAAAACTTTCTGTT
+GACCTCCCTGGTCATAAAGACGAAGTTTATACCGTCGACTGGAGTGTCGACGGTAAAAGA
+GTGTGTAGTGGTGGGAAAGACAAGATGGTAAGATTGTGGACGCATTGA
+>SSK22        3945 residues Pha 0 Code 0
+ATGATGATGGATATACTGAATACACAGCAACAAAAAGCGGCTGAAGGCGGGAGAGTTCTG
+GCTCCTCATACCATCTCAAGTAAGCTCGTGAAGAGATTATCAAGTCATTCCAGCCATAAA
+CTATCAAGATCTGATTTGAAAGCATTGGGTGGCTCGGAAACAATAAGCGACGGCCCCAGT
+CAGCTGACTTTTAAGGACCGATACGTTTTCAATGAATCGCTATACTTGAAAAAGCTAAAA
+AAGACCGCTTTAGATGACTACTACACGAGGGGCATAAAACTCACTAACCGCTACGAGGAA
+GACGACGGTGATGACGAAATTATTCGGTTGTCTAATGGCGACAGAATTGATGAAGACCTG
+CACTCAGGTGTCAAGTTTTTCTCCACTACACCTTATTGCAGGAAAATGAGGTCAGACAGT
+GATGAACTAGCTTGGAATGAAATTGCGACCGAACGGTTCAAATGGCAGTCAATGCTGGCC
+AGAGTGCTGAAGGGAGATATTGTTAAAGGTGAAAAGACGAGGATTGCTAACCAAGTCAAG
+AAACCAGGGTTAAATAAGGAGCTCTCAGATGAGATATGGCTCGAATTGAAGGCATGGCTG
+AATGGGAGGACCATGCAAGAGATGGAACAGTCGCTTACATATTTAAGAGATAGTTCAGAT
+TCCGTTTTTGAAGAGATAATGAAGTTTCAAATTCCACAGGGCAAGATATTGAGCCTGGAT
+GCACTGGAGGCCATCTTACAAGACCTCATGAACAGATATCACAGCGTTGTCTCTTATTGG
+CCTAACTTGAAAAAAATGTATAAGGATAAACCAATCACCAATACTGCAGAATTTACCGCT
+AGAATAGACGTAATGAATTCTTGGCTGAACTTTAAAACGAACTTAACGTTGAGGAGGCAA
+GAGTTGGACGACTGGATAAACCGTTTCTCACCGATAAGTAGTTCGGATAATTGCCAAGAG
+GATTTTGATGGTGTGCCCCAATGGAACTGCAAAATGAAGATTCTTGCAGAACAATTGATG
+AAGGAAAAGAACATCGAGTCTATATTCCAAAAAAAAATTTTCTATCCGCTATCACCTTGG
+ATGTTCAAACTGAAACTACATTTTATAGTCTACAGAGAAACTTTGACAAAGATGAACATA
+AAATATCCTTATGAAAGGTTAAGATCACTACTGGCGTTCCCCGTCTATTTAATCAAAGAA
+GTTATTTTGACTAGATTGTCATATGCACGAAAGCTTAAAAATCCAACAATGATGATGATC
+GATCAAATGATCGATGATTTTAACGCTTTTATTCGACTTTCTGTGCAATTGAAGTACACA
+CTGACAAAATATTGCTCCAATTTGCCGTTCGATGTGGATTTTGACCCGACGTTCGAAAAT
+ACTGTAATAGAAGCCATTCGTTATTTATTTTTTCTGTTGAATTTAAAGTTGATTGATTCC
+AGTAAACAAAATTTCAAAGCACCCGATCTACTCTTGAAATACTGGGATCACCTAAAAAAC
+ACCGGTCACTATATTAACGGTGCAGAAACCGTGATTCCAAATGAATTTCTCAAGTTAACT
+TTGAGACTCGTACATAAATTGCAATTCTATCTTTTGAAACAACAAAACTTCCCACCAACA
+TTTGCTAACGCTTCAGAAGCAGAAAAATGGCTAAGTTCCATTTTCGAAAATTTGGGTGCC
+ATGAAAAGAAAGCTGAACAGGTTCAGCAATATTCTAGTCAAGGCGTTCCAAAATTCTGCT
+GTTTATCAGATTAATCATAATGCACAACTTGTTAAAAAGTTAAAAGATGCTCACTATTTT
+TTGGTATACTCCGGTAACACTTTTGAGTCTAGTGGTGTATATATGTTTGCTGCTCCTGAA
+TTATTAGGTTGTGACAATGATACCATCTTAAGAATTTTGCGAAATAAATCCATTGGCTGT
+GATTTGGTCCCAAAGCTTGACATTGGAAATAATTTGAATGTGTATGATATAACAACAAAA
+GAAACAGATTTGAACATTCTAGTATCGAAAGGGGAGGATTCCAAAGGAATTCCTTACTAC
+CGAGTAGTAGCAAATTCGTCAAGTGATTTGGACAGGCATGCTCATCAGTCCAAAAAGAAG
+AATTTTTCAACAGACCCTTTTGATCAGCACCTTGATGAAAAGAACAATGAAGTTTTTGAA
+TTGGAAGTTGCTTTGAGCTCATTGGGTGCACTAGTTGTACTATATCCTGGAGAGCCAGTA
+GTTTGGGATGGACCAGTATATAAGCTTCCAGGTAACAACCTTTTTGCATCCAACGAAATG
+GATTTAGGGAAAATTGGTAACCCAAATACGTTGATTTTACTCAATCAAGGTTCTAATTAT
+GCACTGACTTATCAAATCGACAAGTTTAATCAAACGGTAGGTGATTCTGTTTCATTCATA
+GAGAAACGTTGTTCACTCAATTCAATTGAATCCTCCCTACAAAAAATCAATAAGGCATAT
+TACAAACTTACTTATACAGTATTGAACAACTACAAAGGAATTCTAGGTAGCTTTATGAAG
+CAATGTCCGGGAAATGAGTTGTTAAATTCGATATTCATGTTTGGAAGGGATTTTGGAAGA
+AGTTTCCTTAAATATAACGCCTTTAGCTCAAAGAGGAAGTACGTTATCATCTTTCTGATG
+GTTAAATTAGGAATGAACTGGTTGAAATTCCTTGTTGAAGAGTGTGATCCTACCGATCAG
+CGAACTTTCCGATGGTGCGTTCTTGCAATGGATTTTGCGATGCAGATGACTAGTGGTTAT
+AATATCCTGGCGCTGAATGTAAAGCAATTTCAAGAACTGAAGGAGAGGGTATCAGTATGT
+ATGTCATTATTAATTTCACATTTCGACGTTATGGGTGCACGAGCCACTGAAGCTGAAAAT
+GGCATGCAACAGGCAAGATTGAATATTGATACTGAAGAGAATATTGATGAAGAGGCCACC
+CTAGAAATAAACAGCAGGTTGAGACTGGAAGCTATAAAGACGTTGGAAAAGACTATGAAG
+AGGAATCCCAGGCAAATGGGTAAGGTATTGGATGCTACAGATCAGGGAAACAAATACCTA
+CTATCGCTAGCATCCTCATTATCGAATGTATCAATGAGGTGGCAAAAAAGAAGCTTCATT
+GGCGGTGGAACATTTGGACAGGTATACTCTGCAATTAATCTGGAAAACGGTGAAATCTTA
+GCTGTTAAGGAAATAAAGATACACGATACCACAACAATGAAGAAGATTTTTCCCCTGATT
+AAAGAAGAGATGACCGTATTGGAAATGTTAAACCATCCTAATATTGTCCAGTACTATGGT
+GTCGAAGTACATCGCGATAAAGTTAACATCTTCATGGAATACTGTGAGGGTGGTTCTTTA
+GCCTCGTTATTGGATCATGGAAGAATTGAAGATGAAATGGTAACACAAGTGTACACATTC
+GAACTATTAGAAGGTTTGGCATATTTGCACCAATCTGGCGTGGTGCATCGCGACATTAAA
+CCGGAGAATATCTTGCTGGATTTCAATGGAATCATAAAATATGTGGATTTTGGTACGGCA
+CGTACCGTTGTAGGATCTAGGACTAGAACTGTGCGGAACGCAGCCGTTCAAGATTTTGGA
+GTAGAAACAAAGTCCCTCAATGAAATGATGGGGACACCGATGTATATGGCTCCAGAGACT
+ATTTCAGGCTCGGCAGTTAAGGGAAAACTTGGAGCGGACGATGTATGGGCATTAGGATGT
+GTTGTGCTAGAAATGGCCACAGGTAGACGACCTTGGTCTAACTTGGATAATGAATGGGCC
+ATCATGTACCACGTTGCTGCAGGTCGAATACCGCAACTACCCAATAGAGACGAAATGACT
+GCAGCGGGAAGAGCCCTTCTTGGAAAGGTGTTTGGTTCAAGACCCCACTATGAGGGCTAC
+TGCTGTGGAACTACTGATAGACCCTTGGATGATACAAATCCGTGA
+>SOL2          948 residues Pha 0 Code 0
+ATGACTACGACGGTACCCAAGATATTCGCGTTTCACGAGTTTTCAGACGTGGCAGAGGCC
+GTAGCTGACCATGTAGTCCACGCGCAAGACGGTGCATTGGCTCCAAAGAACGAGAGGAAA
+CACTCTGTTCCCAACATCAGCATGAATGCACTGGATATGACGAGAGAGGCCTCTTGCAAA
+AGCACAGCATCTGCCGCGGAAGGGAAAAGTGGTAGCAGTGGTAGTGGCAGTGGTAGCAGT
+AAGCCCAAAAAGGAGAAACGGTTCAAGATTGCTCTCTCCGGTGGGTCATTGATCGAAGTG
+CTACACGAAGGTCTGCTAAAACGAGACGATGTACGGTGGGGAGACTGGGACATTTACTTT
+GCAGACGAGAGACTTGTACCCTTCAGCTCGAATGAAAGCAATTATGGATGCGCCAAAAGG
+AAGATTTTGGACCTGATAGACACGGCGAAGTATGGAACTCCGAAGGTGTACCACATTGAC
+GAGTCATTGATTGACGACCCGCAAGAATGCGTTGATAACTATGAAAAGGTGCTAATCCGC
+GGGTTTGCCGGTAGAGATTCCGTCAAACTTCCGATGTTCGACTTGTTCCTGCTTGGTTGT
+GCCCCCGATGGTCATATCGCATCACTCTTCCCTAACTTCCAGGACAATCTACGTGAGAAA
+CTTGCATGGGTGGTGCCCGTGGAGAACGCTCCTAGTGGGCCCTCGACCAGAATTTCGCTG
+ACTATACCTGTAATCTGCCATTCTCACAGGGTTACTTTCGTTGTCGAAGGTGCAACCAAG
+GCGCCCATCATCAAGACCATTATGGAAAGGCCTGAAAAGGGCCTACCTAGCAGTATTGTC
+AACGAAGGTGCTGCTGGTCGTGTATCATGGTTTGTTGACGACGATGCTCTTACGGACGTC
+CTCGTCACCAAAAAAAAGTATAAATTCCACCAAGGTTTGTCTATTTAA
+>ERS1          783 residues Pha 0 Code 0
+ATGGTGTCGTTAGACGATATACTAGGTATCGTGTATGTTACGTCATGGTCGATATCGATG
+TATCCACCGATAATCACCAATTGGCGCCATAAGTCAGCGAGCGCGATATCGATGGATTTT
+GTCATGTTAAATACGGCAGGTTACTCTTACCTGGTCATATCCATATTTTTGCAATTGTAC
+TGCTGGAAAATGACGGGTGATGAGTCTGACTTGGGCAGGCCCAAGTTGACGCAATTTGAT
+TTCTGGTATTGCCTGCATGGGTGCTTGATGAATGTTGTCTTATTGACCCAGGTGGTAGCT
+GGAGCGAGAATCTGGCGATTTCCAGGTAAAGGTCACCGCAAGATGAATCCATGGTACCTA
+AGGATTTTACTCGCATCACTGGCCATTTTTTCACTGCTAACCGTACAATTTATGTACTCC
+AACTACTGGTACGATTGGCATAACTCAAGAACTCTGGCGTATTGCAACAATTTGTTTTTA
+CTCAAAATATCGATGTCACTAATCAAGTACATCCCACAAGTGACGCATAACTCGACAAGA
+AAATCTATGGATTGTTTCCCCATTCAGGGTGTGTTTCTAGATGTCACTGGCGGTATCGCC
+TCGCTGCTCCAATTGATTTGGCAGTTGTCTAACGATCAAGGTTTCAGTCTGGATACGTTC
+GTGACAAATTTTGGAAAAGTGGGACTGTCAATGGTAACTTTAATATTCAACTTCATCTTT
+ATCATGCAGTGGTTTGTATATCGATCTCGAGGCCATGATCTGGCGTCAGAGTACCCGCTG
+TAG
+>PAT1       2394 residues Pha 0 Code 0
+ATGTCCTTCTTTGGGTTAGAAAATAGCGGTAATGCGCGGGATGGTCCTCTGGACTTTGAA
+GAGAGTTACAAGGGCTATGGCGAGCACGAACTTGAGGAGAACGACTATTTGAACGACGAA
+ACATTTGGTGATAATGTTCAGGTTGGTACCGACTTTGATTTTGGAAATCCTCACAGCAGC
+GGCAGCAGCGGCAACGCAATTGGTGGTAATGGCGTCGGTGCCACGGCTAGATCATATGTT
+GCAGCTACTGCAGAAGGAATTAGCGGCCCTAGGACCGATGGAACGGCAGCAGCAGGACCT
+CTAGACCTGAAGCCAATGGAATCTTTGTGGTCTACTGCACCACCTCCAGCAATGGCGCCT
+TCACCCCAAAGTACAATGGCTCCGGCTCCTGCTCCGCAGCAAATGGCCCCCCTACAGCCA
+ATCTTGTCGATGCAAGACTTGGAAAGACAACAACGTCAAATGCAGCAACAGTTTATGAAT
+TTCCACGCCATGGGTCATCCACAGGGTCTCCCACAGGGTCCGCCTCAGCAGCAATTTCCA
+ATGCAGCCTGCGTCGGGTCAACCAGGTCCCTCACAATTTGCGCCTCCACCTCCACCTCCT
+GGCGTTAATGTGAATATGAATCAAATGCCAATGGGTCCTGTACAAGTTCCAGTTCAAGCT
+TCGCCTTCACCCATCGGTATGTCCAACACTCCTTCTCCAGGCCCTGTGGTTGGCGCAACT
+AAAATGCCTCTGCAAAGTGGACGCAGATCGAAGAGAGATTTGTCGCCTGAAGAGCAAAGA
+CGTTTGCAGATTCGTCATGCCAAAGTGGAGAAAATCTTGAAATACTCAGGTTTAATGACT
+CCTCGTGATAAGGACTTCATCACCAGATATCAGTTGTCTCAAATTGTCACTGAGGACCCT
+TACAATGAGGATTTCTACTTCCAGGTCTACAAGATTATCCAAAGAGGCGGTATCACGTCC
+GAATCCAACAAAGGTTTGATTGCTAGGGCGTATTTGGAACATTCTGGACACAGACTCGGT
+GGTCGCTATAAGAGAACCGATATTGCCCTACAGAGAATGCAAAGTCAAGTAGAAAAGGCT
+GTCACTGTGGCTAAGGAAAGACCTTCTAAGTTGAAGGATCAACAAGCGGCTGCTGGTAAC
+TCTAGCCAGGATAATAAGCAAGCAAACACGGTTCTGGGCAAAATCTCTTCCACTTTGAAC
+AGCAAGAATCCAAGAAGACAACTGCAGATCCCCAGACAACAGCCTTCTTCTGACCCCGAT
+GCGCTAAAAGACGTCACTGACTCTCTGACCAACGTGGACTTGGCCTCTTCAGGGTCCTCC
+TCTACGGGCTCTTCTGCCGCTGCTGTTGCTTCTAAGCAAAGAAGAAGATCTTCATACGCG
+TTCAACAACGGTAATGGTGCCACAAATTTGAACAAATCTGGGGGCAAAAAATTCATTCTT
+GAGTTAATTGAAACAGTTTATGAAGAGATTTTAGACTTGGAAGCTAACTTGAGGAATGGC
+CAGCAAACTGACAGCACTGCAATGTGGGAGGCCCTTCACATCGACGACAGTTCATATGAC
+GTAAACCCTTTCATTTCGATGCTATCATTTGATAAAGGTATCAAGATTATGCCTAGAATT
+TTTAATTTCTTGGATAAGCAGCAAAAATTGAAAATCCTGCAAAAAATCTTCAATGAATTA
+TCACACTTGCAAATCATCATATTGAGTTCCTACAAGACTACACCAAAACCAACTTTGACA
+CAATTGAAGAAAGTCGATCTGTTCCAAATGATCATATTAAAGATCATTGTCTCGTTTTTG
+TCTAATAACTCCAATTTTATCGAAATTATGGGTCTGTTGCTACAGTTAATCAGAAACAAC
+AACGTTTCGTTCTTGACCACCTCCAAAATTGGTCTAAATTTGATCACCATTTTGATTTCT
+CGTGCCGCATTAATCAAGCAAGATTCATCAAGATCTAATATTCTTTCCTCTCCTGAAATC
+TCCACATGGAATGAGATTTATGATAAATTATTCACTTCATTGGAAAGTAAGATTCAGCTG
+ATTTTCCCTCCAAGGGAATATAACGTCCACATCATGCGTTTACAAAATGACAAGTTTATG
+GATGAAGCATACTTTGGCCAGTTCCTAGCTAGTTTAGCACTAAGTGGAAAGCTAAACCAC
+CAGAGAATCATTATTGATGAAGTACGTGATGAAATCTTTGCCACTATTAACGAGGCGGAG
+ACCTTACAAAAGAAAGAGAAAGAATTGAGTGTATTACCTCAGAGGTCTCAAGAATTAGAC
+ACAGAGTTAAAATCTATTATTTATAATAAAGAGAAACTATACCAAGATTTGAATTTGTTC
+CTAAACGTTATGGGGTTGGTGTATCGCGATGGTGAAATATCAGAACTAAAGTAA
+>SRB8         4284 residues Pha 0 Code 0
+ATGAATAACGGTTCTGGTCGATACTTGCTGACTCCCCCAGATGATCTTCACCCCTATGTG
+CCAAGCTCGAAACCTCAGGAACAAGTATACCCTGATTTCAAGCCTTGGGAGCACACTGCA
+GCAGAAGATCAAATCCTAGCAAACTTTGTGGCTAAGGGCTTTTACCATACACCAATGGTA
+AATTTCGAGTCCATATCTGCGAGATCATCTGTTCATGAATCATTAGTCACTCAATCCAAC
+ATTCTTTCCCAGCAATTCGACAAAATTATCAAGATTAGAGAAGACCACATTAATAAGATC
+CCCTCAAATTCCACGACGACATTACACGGGCCTGGTTTTCAGTTGCCTAATAGAATAACC
+CTTACTGATCATAGAAAGGAAACGTGGTTGCATGAATTGAGTTCGTCTCACACTTCGCTG
+GTCAAAATTGGCAAGTTTATACCTCACGGCTTGAAAAGAAGGCAAGTCATCGAGCAGTGC
+TATTTAAAATTTATACCATTGAAAAGGGCGATTTGGTTGATAAAGTGCTGCTATTTTATC
+GAATGGAAATCGAACCACAAAAAGAAGAGGTCAAATGCTGCTGGGGCAGATGATGCCATT
+TCCATGCACCTGCTAAAGGACTGGACGGATACCTTTGTATACATCCTGGAAAAGCTCATC
+TTTGATATGACAAATCACTATAACGATTCTCAACAACTGCGTACGTGGAAGAGGCAGATT
+TCTTATTTTTTAAAACTTTTGGGGAATTGCTACTCACTAAGATTGATCAATAAGGAAATC
+TTTCATCATTGGCTTGTAGAGTTTATAAATAAGATGGAAAACTTCGAATTTTTGCCATTA
+TCTTTACATATTTTGATGATTTTTTGGAACGACATCTGCCAAATTGATACAAATGCTCCT
+GTTGCGGCTACAATAACATCAAGTCAAAAAGAGCCCTTCTTTCTGGTAACAAAAATCACT
+GATATGCTATTGCACAAATATTATATTGTTTCCAGCAGCAAATCAATGATAAATGACGAG
+AACTACATCATCAATGATATAAAGAAAAACAACAAGATAAAGTTGAATATTCTCAAAATA
+TTATCCAGTTTAATTTTGAAAATTTTTCAAGAACAATCTTTAGAGGTGTTTATATTTCCC
+ACATCTAACTGGGAAATTTACAAGCCCTTACTTTTTGAAATAGTCTCAAACGCCGACACT
+AATCAAAATTCTGATATGAAGAAAAAATTAGAGTTAATTAGTTACAGAAACGAGTCATTG
+AAGAATAATTCTTCTATACGAAACGTAATAATGTCTGCCAGCAACGCAAATGACTTTCAA
+TTAACTATCGTCACCTGTAAACAATTTCCAAAACTATCATGCATTCAATTAAATTGTATA
+GATACTCAGTTCACCAAGCTACTGGACGATAACCCTACAGAATTCGATTGGCCCACTTAC
+GTTGACCAAAATCCCCTTACAATGCATAAAATTATTCAATTAATTCTCTGGTCCATACAT
+CCATCAAGGCAATTTGATCACTATGAATCTAATCAACTGGTAGCGAAATTATTACTATTG
+CGAATAAATTCAACAGATGAGGATTTGCACGAATTCCAGATAGAAGATGCCATTTGGTCA
+TTGGTTTTCCAATTAGCCAAAAATTTTTCGGCCCAAAAGAGGGTGGTATCATATATGATG
+CCTTCTTTGTATCGCCTGCTTAATATACTAATTACTTATGGCATCATTAAGGTCCCTACG
+TATATCAGAAAGCTAATCAGTTCCGGCCTACTTTATCTCCAAGATTCCAATGATAAGTTT
+GTGCATGTCCAGCTGTTAATTAACTTGAAAATTTCACCGTTGATGAAAAGTCAATACAAT
+ATGGTATTGAGGAACGTTATGGAATATGACGTTAAATTTTATGAAATTTTTAATTTCGAC
+CAACTCGTGGAAATCACAGAACAAATCAAAATGCGAATACTCTCCAATGATATAACTAAT
+TTGCAACTGTCGAAAACTCCTCTGAGCATTAAAATCATGGTTGCAGAATGGTACTTATCA
+CATTTATGTTCCGGTATTTTATCTAGTGTTAACCGCACAGTGTTGCTAAAAATATTCAAG
+ATTTTTTGTATCGATCTGGAGGTTTTCCACCACTTTTTTAAGTGGATCGAGTTTATTGTC
+TACCATCAATTGCTAAGTGATATAGAATCTCTGGAGGCATTGATGGACATCTTGCTATGC
+TACCAAAAATTGTTCTCACAATTCATTAATGACCATATTCTTTTTACGAAGACGTTCATA
+TTCATTTACAAGAAAGTTTTGAAAGAAAAAGACGTGCCTGCTTATAATGTGACTTCATTT
+ATGCCATTCTGGAAATTTTTTATGAAAAACTTCCCTTTTGTTTTAAAGGTGGATAACGAT
+TTAAGGATTGAGTTACAATCTGTTTACAATGATGAGAAATTGAAAACTGAGAAGCTGAAG
+AATGATAAATCAGAAGTCTTGAAGGTGTATTCCATGATCAATAATTCAAACCAAGCTGTT
+GGACAGACTTGGAATTTTCCCGAGGTGTTTCAAGTAAACATCAGGTTTCTACTACACAAC
+TCCGAGATCATTGATACAAATACAAGCAAACAGTTCCAGAAAGCACGAAACAATGTCATG
+CTTTTGATTGCCACTAACTTGAAGGAGTACAATAAATTTATGTCCATTTTCTTGAAAAGG
+AAAGACTTTACTAACAAAAATTTAATTCAATTGATCTCTCTAAAACTTCTAACTTTTGAA
+GTGACGCAGAATGTGTTGGGGCTCGAGTATATTATTCGATTATTACCAATAAACTTGGAA
+AATAATGACGGCTCATATGGTCTGTTTTTGAAGTATCATAAAGAACAATTCATAAAGTCA
+AATTTTGAGAAAATTTTACTTACATGTTATGAATTAGAAAAAAAATATCATGGCAACGAA
+TGTGAAATAAATTATTATGAGATCCTATTGAAAATTTTAATAACTTATGGGTCATCTCCC
+AAATTACTTGCAACATCTACAAAAATCATTATGTTGTTATTGAATGATAGCGTGGAAAAC
+TCATCTAATATTTTGGAGGATATTTTGTACTACTCAACTTGTCCGTCGGAAACCGATCTT
+AACGATATTCCATTGGGTAGTGGACAACCAGACAATGACACTGTTGTAACCAACGATGAT
+AAAAGTGACGATGATGATCACACAGTCGACGAAATTGATCATGTAGAATATTACGTTATG
+ATGGACTTTGCCAATCTTTGGGTTTTCCAAGCGTTTACCTGTTTCTGCATCAAAAAAATC
+ATGGAGAATAATGAGCCAGCAATGGCAATGGAAGACTTGAAGAACTTCATATTCCAAATT
+ATCGAAATAACTAATTCTAATGATTTATGTTCACAAATATTTGACCAACTGAAGGATATG
+CAGACCATTGAGATGATAACCCAAATAGTGGAGAAAGATTTCTGCACTTCTTGTTTGCAA
+AACAACAACCAAAAGATAGATGATAATTACATCGTTGTGGTGATCGAGATTATAACGTCA
+TTATCGATGAGGTTTCAAAGAGAAACTTCTGGTATGATAGTTATTTCCATGGAGAACTAT
+CATTTACTAATAAAGATCATAAGACAATTAAGTGAACTGAACGAAGGAAATTTATCTAAG
+AGAGAAATCCAAATAGATGCCGTCTTGAAAATTTTTAGCTTTCATCAGGATTCCATTTTC
+CAACGCATCATCGCTGATTTATCAGCTGATAAACCCACAAGTCCATTCATTGATAGCATA
+TGCAAGCTGTTTGATAAAATATCATTTAATTTAAGATTGAAGCTGTTCTTGTACGAAATT
+TTGTCTTCATTGAAATCATTCGCCATCTATTCATCCACAATTGATGCCCCAGCATTCCAC
+ACAAGCGGTAAGGTCGAACTACCGAAGAAATTGCTGAACTTACCACCATTCCAAGTGTCC
+TCTTTCGTTAAGGAAACAAAACTTCATAGTGGCGACTACGGGGAAGAAGAAGATGCAGAC
+CAAGAAGAATCGTTTAGTTTAAATTTAGGAATCGGCATAGTTGAAATAGCGCACGAAAAC
+GAACAGAAATGGCTCATTTATGACAAGAAAGATCATAAATATGTCTGCACATTTTCCATG
+GAGCCGTACCACTTCATCTCCAACTATAATACCAAGTACACAGATGACATGGCTACAGGC
+AGTAATGATACGACTGCGTTTAACGATTCCTGTGTAAACCTGAGTCTTTTTGATGCTCGG
+TTTGAGAGGAAAAATCCACATTGA
+>YCX3         384 residues Pha 0 Code 0
+ATGTTGTTCTATAAGCCTGTGATGAGGATGGCGGTGAGACCGCTAAAAAGCATAAGATTC
+CAGTCCTCATACACCAGTATTACTAAATTGACGAACCTAACAGAATTTAGGAATTTGATC
+AAGCAAAATGATAAACTAGTCATCGATTTTTATGCTACTTGGTGTGGCCCCTGTAAGATG
+ATGCAACCACACTTAACGAAATTAATTCAGGCTTATCCAGATGTAAGATTTGTCAAGTGC
+GACGTGGACGAATCACCAGATATTGCCAAAGAGTGTGAAGTGACGGCTATGCCCACCTTT
+GTTCTTGGCAAGGATGGCCAACTCATCGGCAAGATCATTGGAGCTAACCCTACTGCTTTA
+GAGAAGGGAATCAAAGATCTATAA
+>TUP1         2142 residues Pha 0 Code 0
+ATGACTGCCAGCGTTTCGAATACGCAGAATAAGCTGAATGAGCTTCTCGATGCCATCAGA
+CAGGAGTTTCTCCAAGTCTCACAAGAGGCAAATACCTACCGTCTTCAAAACCAAAAGGAT
+TACGATTTCAAAATGAACCAGCAGCTGGCTGAGATGCAGCAGATAAGAAACACCGTCTAC
+GAACTGGAACTAACTCACAGGAAAATGAAGGACGCGTACGAAGAAGAGATCAAGCACTTG
+AAACTAGGGCTGGAGCAAAGAGACCATCAAATTGCATCTTTGACCGTCCAGCAACAGCGG
+CAACAGCAACAGCAGCAACAGGTCCAGCAGCATTTACAACAGCAACAGCAGCAGCTAGCC
+GCTGCATCTGCATCTGTTCCAGTTGCGCAACAACCACCGGCTACTACTTCGGCCACCGCC
+ACTCCAGCAGCAAACACAACTACTGGTTCGCCATCGGCCTTCCCAGTACAAGCTAGCCGT
+CCTAATCTGGTTGGCTCACAGTTGCCTACCACCACTTTGCCTGTGGTGTCCTCAAACGCC
+CAACAACAACTACCACAACAGCAACTGCAACAGCAGCAACTTCAACAACAGCAACCACCT
+CCCCAGGTTTCCGTGGCACCATTGAGTAACACAGCCATCAACGGATCTCCTACTTCTAAA
+GAGACCACTACTTTACCCTCTGTCAAGGCACCTGAATCTACGTTGAAAGAAACTGAACCG
+GAAAATAATAATACCTCGAAGATAAATGACACCGGATCCGCCACCACGGCCACCACTACC
+ACCGCAACTGAAACTGAAATCAAACCTAAGGAGGAAGACGCCACCCCGGCTAGTTTGCAC
+CAGGATCACTACTTAGTCCCTTATAATCAAAGAGCAAACCACTCTAAACCTATCCCACCT
+TTCCTTTTGGATCTAGATTCCCAGTCTGTTCCCGATGCTCTGAAGAAGCAAACAAATGAT
+TATTATATTTTATACAACCCGGCACTACCAAGAGAAATTGACGTTGAGTTACACAAATCT
+TTGGATCATACTTCAGTTGTTTGTTGCGTGAAGTTCAGTAACGATGGTGAATACTTAGCC
+ACAGGCTGCAACAAAACTACTCAAGTGTATCGCGTTTCAGATGGTTCTCTGGTGGCCCGT
+CTATCTGACGATTCTGCTGCCAATAACCATCGAAATTCGATCACTGAAAATAACACCACC
+ACGTCCACGGATAACAATACAATGACAACCACTACTACCACCACAATTACTACCACAGCG
+ATGACTTCGGCAGCAGAATTGGCAAAAGATGTGGAAAACCTGAACACTTCGTCTTCCCCA
+TCATCCGACTTGTATATCCGTTCAGTGTGTTTTTCTCCAGATGGGAAATTTTTGGCAACA
+GGTGCTGAAGACAGACTGATTAGAATTTGGGATATTGAAAATAGAAAGATTGTTATGATT
+CTTCAAGGCCACGAACAAGATATTTATTCATTGGACTACTTTCCCTCAGGTGACAAATTA
+GTCTCCGGTTCTGGTGACCGTACCGTTCGTATTTGGGACTTACGTACAGGCCAGTGTTCA
+TTGACTTTATCCATTGAAGATGGTGTTACCACCGTCGCTGTATCACCAGGTGATGGTAAA
+TACATCGCTGCTGGTTCTCTAGATCGTGCTGTGAGAGTTTGGGATTCCGAGACCGGATTC
+TTGGTGGAAAGACTAGATTCGGAAAACGAATCCGGTACAGGCCACAAGGACTCTGTTTAT
+AGCGTTGTCTTCACTAGAGATGGACAAAGCGTTGTATCCGGCTCATTAGATAGATCTGTT
+AAGCTCTGGAATTTGCAGAATGCAAACAACAAGAGCGATTCGAAAACTCCAAATTCCGGC
+ACTTGTGAAGTTACGTATATCGGGCATAAAGACTTTGTATTGTCCGTGGCCACCACACAA
+AATGATGAGTACATCTTGTCCGGTTCCAAAGATCGTGGTGTCCTGTTTTGGGATAAGAAA
+TCCGGCAATCCGTTATTGATGTTGCAAGGTCATAGGAATTCAGTTATATCTGTGGCTGTG
+GCAAACGGGTCTCCGCTGGGTCCAGAATATAACGTTTTTGCTACTGGTAGCGGTGATTGT
+AAAGCAAGGATTTGGAAGTATAAAAAAATAGCGCCAAATTAA
+>YC16        462 residues Pha 0 Code 0
+ATGGTTACGTTCAACTGTGAGGTGTGTAATGATACTGTGCCCAAGAAGAATACCGAAAAG
+CATTATTATAGATGTCCTAACGCGTACTATACATGCATAGATTGCTCCAAGACGTTTGAA
+GATGGCGTGAGTTACAAGAATCACACGTCTTGCATCAGCGAGGACGAGAAGTACCAGAAA
+GCGTTGTACAAGGGCAACAAGAAGCAGAAGCAGAAGCAGCAGCAGAAGCAGCAGCAGAAG
+CAGCACCAGCACCAGCCAGTGGCAACTCCTGCAAAGAAAGTGGAGAAGCCTGTGATCAAG
+AAGGCAGAGAAAGTGGAAAAGACCTCGAACGGTATCGAGCTTCACAAGGGCAAGTCGTTG
+TACAAAATTTTGAAAACCATGAAGGATAAAGGGGCAAAAAAGACCTTCTTGAAAAGTCTG
+GTTGTGGATTCTGAGGGGCAAATCAGGTATGCAAAGGAATAA
+>ABP1         1779 residues Pha 0 Code 0
+ATGGCTTTGGAACCTATTGATTATACTACTCACTCGAGAGAGATCGACGCAGAGTACCTG
+AAGATTGTCAGAGGCTCCGATCCTGACACCACCTGGTTGATTATTTCACCCAATGCGAAA
+AAAGAATACGAACCTGAGTCTACCGGTTCCTCCTTTCACGATTTCTTGCAATTGTTTGAT
+GAAACCAAGGTCCAGTACGGACTGGCACGTGTGTCCCCACCAGGGTCAGACGTTGAGAAG
+ATTATTATCATTGGTTGGTGTCCTGATTCTGCGCCATTGAAGACAAGGGCCTCTTTCGCC
+GCCAATTTTGCTGCAGTTGCTAATAATCTGTTCAAGGGTTACCACGTTCAAGTTACCGCC
+AGAGACGAGGACGATCTTGACGAAAATGAACTGTTGATGAAAATCAGTAACGCGGCCGGT
+GCCCGTTATTCTATTCAGACTTCCTCCAAGCAACAGGGGAAGGCTTCCACTCCTCCCGTG
+AAGAAATCCTTCACACCTTCCAAGAGCCCTGCTCCAGTTTCTAAGAAGGAACCAGTCAAG
+ACTCCTTCCCCAGCACCTGCTGCTAAGATTTCTTCCCGTGTTAACGACAACAATGACGAC
+GACGATTGGAATGAGCCTGAATTAAAGGAACGCGACTTCGATCAGGCTCCCCTGAAACCA
+AATCAATCATCTTACAAACCAATTGGCAAAATCGACTTGCAAAAAGTGATTGCTGAAGAA
+AAGGCTAAGGAGGACCCACGTCTTGTTCAAAAGCCAACCGCTGCTGGTTCCAAGATTGAT
+CCTAGTTCTGATATCGCTAATTTAAAGAACGAATCAAAATTAAAGAGGGACTCCGAGTTT
+AACTCCTTTTTGGGCACCACTAAACCCCCCTCCATGACGGAATCTTCATTAAAGAATGAT
+GATGATAAAGTCATTAAGGGTTTTAGAAACGAGAAATCACCTGCTCAATTATGGGCCGAA
+AGAAAGGCAAAGCAAAACAGCGGCAACGCCGAAACTAAGGCTGAGGCACCAAAACCTGAA
+GTTCCAGAAGATGAGCCTGAAGGTGAACCTGACGTCAAAGATTTGAAATCAAAATTTGAA
+GGATTGGCCGCTTCAGAAAAAGAGGAGGAAGAAATGGAAAACAAATTTGCTCCTCCTCCA
+AAGAAATCAGAACCAACTATTATCTCACCAAAACCCTTCTCCAAGCCACAAGAACCTGTG
+AAAGCTGAAGAAGCCGAGCAGCCTAAGACTGATTACAAGAAGATCGGCAACCCATTACCC
+GGTATGCACATTGAAGCGGATAATGAGGAAGAACCAGAAGAGAATGATGATGACTGGGAT
+GATGATGAAGACGAGGCTGCTCAACCTCCTTTGCCTTCGAGGAATGTTGCGTCAGGAGCA
+CCAGTGCAAAAAGAAGAGCCTGAACAAGAAGAGATCGCCCCAAGCTTACCTTCTAGAAAC
+TCGATCCCAGCTCCAAAACAAGAAGAAGCACCTGAACAAGCACCTGAAGAAGAAATTGAA
+GAAGAAGCTGAGGAAGCCGCTCCACAGCTGCCATCAAGAAGCTCTGCAGCTCCTCCTCCG
+CCTCCAAGACGAGCAACTCCAGAGAAAAAGCCAAAGGAAAATCCTTGGGCCACAGCAGAA
+TATGATTACGATGCTGCAGAAGATAACGAACTGACCTTTGTGGAAAATGACAAGATTATC
+AATATTGAATTTGTCGACGATGACTGGTGGCTAGGGGAACTAGAGAAAGACGGCTCAAAA
+GGTCTCTTCCCCAGCAATTATGTGTCTTTGGGCAACTAG
+>KIN82        2181 residues Pha 0 Code 0
+ATGACTCAGCAAGAATACCGTTCCCCCTCACAACGCTTATCCAAGGGGAGGAGCATGTCG
+CTACCCAAAATATTTGCTCGTAATTTGAGATCTCTGCAAAACAATGCACCTCCTGGCAAA
+AACATCAATGTCAATTGTTTGAACGTCAATTCTTGTTCGTTGTCCGCAAGCCCAAGCTCA
+CAAATTAATATGGCTTGTAATGGAAACAAGCAAGATCTTCCCATACCGTTTCCCCTGCAT
+GTAGAATGCAACGATAGCTGGTCAAGCTCCAAACTTAACAAGTTCAAATCAATGTTTAAT
+CATAACAGATCAAAGAGCAGTGGTACTACAGATGCGTCAACTTCAGAAAAAGGTACGCAT
+AAGCGTGAACCCCGGTCGACGATACATACAGAGCTGTTACAAAGTTCCATTATCGGTGAG
+CCAAATGTCCATAGTACTACAAGTAGCACACTTATACCCAATGAGGCGATATGCTCCACA
+CCTAATGAGATCTCAGGTAGCTCTTCTCCGGACGCGGAGTTATTTACCTTTGACATGCCC
+ACAGACCCGTCATCCTTCCACACTCCTAGCTCCCCAAGTTATATAGCAAAGGACAGTAGA
+AACCTGAGTAATGGATCTTTGAATGATATTAACGAAAATGAAGAGCTCCAAAATTTCCAT
+AGAAAAATCAGCGAAAATGGCAGTGCCTCCCCCCTGGCTAACTTGTCATTATCCAATTCA
+CCAATTGATTCCCCAAGGAAAAATAGCGAAACCAGAAAGGATCAAATACCTATGAACATA
+ACACCACGTTTAAGGAGGGCCGCTTCCGAACCGTTCAATACGGCAAAGGATGGGTTAATG
+CGGGAAGATTACATTGCCTTGAAACAACCTCCAAGCTTGGGAGATATTGTAGAACCGAGG
+AGATCTCGTCGTTTAAGAACCAAGTCATTCGGTAACAAGTTCCAAGACATTACTGTCGAA
+CCTCAATCCTTCGAAAAAATTAGACTACTTGGCCAAGGTGACGTAGGTAAAGTGTATTTA
+GTGAGGGAACGCGATACCAACCAGATATTCGCCCTGAAAGTTTTGAATAAACATGAGATG
+ATCAAGAGGAAGAAAATTAAACGAGTACTCACTGAACAGGAAATTCTCGCGACAAGTGAT
+CATCCATTTATTGTGACACTGTATCATTCCTTTCAAACCAAAGACTATTTGTATCTCTGT
+ATGGAATACTGCATGGGAGGGGAATTCTTTAGAGCCTTACAAACAAGAAAAAGTAAATGC
+ATTGCAGAAGAAGATGCGAAGTTTTACGCCAGTGAAGTAGTAGCAGCTTTGGAATATTTA
+CACCTACTGGGCTTCATATACAGAGATTTGAAACCCGAAAACATATTACTGCATCAATCT
+GGTCATGTCATGCTTTCTGACTTTGATTTATCCATCCAAGCAACGGGATCAAAAAAACCC
+ACCATGAAAGACTCTACGTATTTAGATACAAAAATTTGTTCAGATGGATTCAGAACTAAT
+TCCTTTGTTGGTACTGAAGAGTATTTAGCTCCAGAAGTAATCAGAGGGAATGGCCACACT
+GCAGCAGTAGACTGGTGGACTTTAGGAATATTGATTTACGAGATGCTATTTGGCTGTACT
+CCATTTAAAGGAGATAATTCAAATGAAACATTCTCTAACATTTTAACCAAGGACGTCAAA
+TTTCCACATGATAAGGAAGTTTCGAAGAATTGTAAAGACCTGATAAAGAAACTACTAAAC
+AAAAACGAGGCAAAAAGGCTTGGTTCCAAATCAGGAGCTGCAGACATAAAGAGACATCCC
+TTCTTCAAAAAAGTTCAGTGGTCGTTCTTAAGAAACCAAGACCCCCCTCTAATACCTGCA
+TTAAATGATAACGGCTGCGAACTTCCTTTTATATTGTCTTGCAATAAACACCCGAAAAGG
+AACTCAGTGAGTGAACAGGAAACCAAAATGTTCTGTGAGAAAGTTGCAAACGATGATGAA
+ATTGATGAGGCTGATCCATTCCATGATTTTAATTCTATGAGTTTAACGAAGAAAGATCAC
+AATATCTTAACCTACTCTGAAAATTATACTACGGAAAAATTCTATACAAAGCAACTTGTA
+CAAGGCCAAGGCATAACAGCTCACATAGAAGTTTCTTTAAAGACATCATACCTGAACTAT
+AACATGTTTACAGAAAGATAA
+>MSH3         3144 residues Pha 0 Code 0
+ATGGTGATAGGTAATGAACCTAAACTGGTACTTTTGAGAGCCAAAAGCAGTGCAAATAGA
+TTTATTTTGTTGAATCTATTAACAATAATGGCGGGACAACCCACAATAAGCAGGTTTTTC
+AAGAAGGCGGTAAAATCAGAGCTGACGCATAAGCAAGAACAAGAAGTTGCGGTTGGAAAT
+GGCGCTGGTAGCGAATCCATCTGCCTTGACACTGATGAAGAGGACAATTTATCTTCTGTT
+GCAAGCACAACAGTAACTAATGATAGCTTTCCACTCAAAGGCAGTGTTTCTTCCAAGAAT
+TCGAAAAATTCAGAAAAGACTAGTGGTACTTCGACAACATTTAATGATATTGACTTTGCT
+AAGAAATTGGATAGGATTATGAAAAGACGAAGTGATGAAAATGTTGAGGCTGAAGATGAT
+GAGGAAGAGGGTGAGGAAGATTTCGTAAAAAAAAAAGCCAGAAAGTCCCCTACAGCGAAA
+CTTACTCCCTTGGACAAACAGGTGAAGGACCTGAAAATGCATCATAGAGATAAAGTGCTT
+GTTATTAGAGTAGGCTACAAGTACAAATGTTTTGCAGAGGATGCAGTAACGGTTAGCAGA
+ATACTTCACATCAAACTTGTGCCTGGAAAATTGACTATCGATGAGTCTAATCCTCAAGAT
+TGCAATCATAGGCAGTTTGCGTACTGTTCTTTCCCGGATGTCAGATTAAACGTTCACCTA
+GAGAGACTTGTGCATCATAATTTAAAGGTTGCCGTGGTAGAGCAAGCAGAAACAAGCGCT
+ATTAAGAAGCATGATCCAGGTGCCAGCAAATCAAGCGTTTTTGAAAGAAAGATTTCAAAT
+GTCTTTACCAAAGCTACATTTGGTGTTAATTCCACCTTTGTCCTTAGGGGGAAACGTATT
+CTCGGTGATACAAACAGTATATGGGCTTTGTCCCGTGACGTACATCAGGGAAAGGTGGCT
+AAATATTCCTTAATTTCTGTCAATTTAAATAACGGGGAAGTCGTGTATGATGAATTTGAA
+GAGCCTAATCTTGCTGATGAGAAACTACAGATACGAATCAAATATTTACAGCCCATAGAA
+GTACTGGTAAATACAGATGATCTTCCATTACATGTAGCGAAATTTTTCAAAGATATTTCA
+TGTCCTTTAATACACAAGCAGGAGTATGATTTGGAAGATCATGTAGTTCAGGCAATAAAA
+GTAATGAATGAGAAAATTCAACTCTCGCCGTCTCTCATACGCTTAGTTTCTAAGTTATAT
+TCGCATATGGTTGAGTACAATAATGAGCAGGTGATGTTGATTCCTTCTATCTATTCGCCC
+TTCGCATCAAAAATACATATGTTACTTGATCCTAACTCCCTGCAAAGTTTGGACATTTTT
+ACCCATGATGGTGGTAAAGGTTCTTTGTTTTGGTTATTGGACCATACAAGGACATCGTTT
+GGATTAAGAATGTTGAGAGAATGGATTCTCAAACCTTTGGTTGATGTACACCAAATTGAA
+GAGCGGCTTGATGCCATTGAGTGCATTACATCCGAAATCAACAACAGTATATTTTTTGAA
+TCGTTGAATCAAATGTTGAATCATACCCCTGACTTATTAAGAACTTTAAATCGCATAATG
+TATGGTACAACTTCTAGAAAAGAAGTCTATTTCTATTTAAAGCAAATAACTTCTTTCGTT
+GATCACTTCAAGATGCATCAATCTTACCTGTCAGAACATTTCAAGTCATCAGATGGAAGG
+ATAGGCAAACAATCTCCTTTACTTTTTAGACTATTTAGTGAATTGAATGAACTACTTTCT
+ACCACTCAGTTGCCTCATTTTTTGACCATGATCAACGTTTCTGCGGTAATGGAAAAAAAT
+TCAGATAAGCAAGTAATGGATTTTTTTAATTTAAATAACTATGATTGTTCAGAGGGTATA
+ATAAAAATTCAAAGGGAAAGCGAATCAGTACGGTCACAGTTAAAGGAAGAATTGGCAGAA
+ATACGAAAATATCTCAAACGTCCATATCTAAATTTTAGAGATGAAGTTGATTACTTAATC
+GAAGTGAAAAACTCGCAAATTAAGGACTTGCCAGATGATTGGATAAAAGTTAACAATACG
+AAGATGGTCAGTAGATTTACCACTCCCAGAACCCAGAAACTGACTCAAAAGCTAGAATAT
+TACAAGGACTTATTAATTCGGGAATCTGAACTACAGTATAAAGAATTCTTGAACAAAATT
+ACGGCAGAATATACAGAGCTCCGTAAAATTACACTCAATTTGGCGCAGTATGACTGTATT
+TTGTCGTTAGCAGCCACATCATGCAACGTAAATTATGTTAGACCAACTTTTGTGAATGGT
+CAACAAGCCATAATCGCAAAAAATGCAAGAAATCCAATTATCGAGTCGCTGGATGTTCAT
+TATGTACCAAATGATATCATGATGTCCCCAGAAAACGGTAAAATCAATATTATAACGGGG
+CCGAATATGGGTGGGAAATCATCTTATATTAGACAAGTGGCACTGCTTACTATAATGGCA
+CAGATCGGCTCATTTGTCCCCGCAGAAGAGATCAGATTAAGCATATTTGAAAACGTACTC
+ACTCGAATCGGTGCGCACGATGATATTATAAACGGTGATTCTACTTTTAAAGTGGAAATG
+CTTGATATCCTACACATCTTGAAAAATTGCAATAAACGGTCTTTACTATTATTAGACGAA
+GTGGGAAGAGGTACTGGCACGCACGATGGTATAGCAATTTCTTATGCTTTAATAAAGTAT
+TTTTCTGAGTTAAGTGACTGCCCCTTGATATTATTTACTACCCATTTTCCCATGCTGGGA
+GAAATCAAATCTCCGTTAATAAGGAATTATCATATGGATTACGTGGAAGAACAAAAAACT
+GGCGAGGACTGGATGAGTGTAATTTTTCTATATAAGTTAAAAAAGGGATTGACTTATAAT
+AGTTATGGGATGAATGTGGCGAAATTGGCACGCCTGGACAAAGATATTATAAATCGGGCA
+TTCAGTATTTCAGAAGAATTGCGGAAGGAATCCATTAACGAAGACGCGTTGAAATTATTC
+AGCTCTTTGAAAAGAATATTAAAAAGTGATAATATAACAGCAACGGATAAACTCGCGAAA
+TTACTATCATTGGATATCCACTGA
+>CDC39        6327 residues Pha 0 Code 0
+ATGCTATCGGCCACATACCGTGATTTGAACACAGCATCTAATTTAGAAACATCAAAGGAA
+AAACAGGCCGCTCAAATCGTCATTGCACAAATTAGTTTATTATTCACGACTCTTAACAAC
+GACAATTTTGAATCCGTGGAAAGAGAAATTAGACATATTTTAGACAGGTCGTCCGTAGAT
+ATTTACATAAAAGTTTGGGAACGATTATTAACCTTAAGTTCTCGGGATATTTTACAAGCG
+GGAAAATTTTTACTTCAAGAAAATCTACTACACAGACTACTATTAGAATTTGCGAAGGAT
+TTACCGAAGAAAAGCACAGACCTTATTGAGCTTTTGAAAGAACGAACCTTCAATAACCAG
+GAGTTTCAAAAACAAACAGGAATTACATTATCACTTTTCATTGATCTATTTGATAAATCT
+GCAAACAAGGACATTATAGAGTCACTTGACCGCTCCTCTCAGATTAACGATTTCAAGACA
+ATTAAGATGAATCATACAAATTATTTAAGGAATTTTTTTCTTCAAACCACACCAGAAACA
+CTAGAGTCCAATCTACGCGACTTATTGCATTCCTTGGAAGGTGAAAGTCTAAATGACTTA
+TTAGCTCTTTTACTGTCCGAAATACTTTCACCTGGGTCTCAGAATTTACAAAATGATCCC
+ACACGGAGTTGGTTGACACCTCCGATGGTTTTAGACGCAACGAACCGTGGGAACGTTATA
+GCAAGATCTATAAGTTCTCTGCAAGCCAACCAGATAAATTGGAATCGTGTGTTTAATTTA
+ATGTCAACAAAGTATTTCTTGAGCGCACCATTGATGCCTACTACAGCATCTTTGAGTTGC
+TTATTTGCAGCATTGCACGATGGTCCAGTTATTGATGAATTTTTCAGTTGCGACTGGAAA
+GTTATTTTCAAACTAGATTTGGCCATTCAACTTCATAAGTGGTCGGTACAGAATGGTTGC
+TTTGACTTATTAAATGCAGAAGGTACCAGGAAAGTTTCTGAAACCATCCCAAACACAAAG
+CAATCTTTACTCTACTTATTATCCATTGCATCATTGAATTTAGAATTGTTCCTACAAAGG
+GAGGAATTGTCTGATGGTCCTATGCTAGCTTATTTTCAAGAGTGCTTCTTTGAAGATTTC
+AACTACGCCCCTGAATATCTTATTTTAGCATTAGTCAAAGAAATGAAGCGGTTCGTTTTA
+TTGATAGAAAACAGGACAGTCATAGACGAAATACTTATTACCTTATTGATTCAAGTGCAT
+AATAAATCACCGTCATCGTTCAAGGACGTTATTTCTACAATAACCGATGATTCTAAAATC
+GTAGATGCAGCAAAAATCATAATCAACTCGGATGACGCACCTATTGCCAACTTTTTAAAA
+TCGTTGTTAGATACGGGAAGATTAGATACGGTCATTAATAAACTTCCTTTCAATGAAGCT
+TTTAAAATTTTGCCATGCGCAAGACAAATTGGTTGGGAGGGGTTCGATACTTTCTTAAAA
+ACAAAAGTTTCTCCATCTAATGTCGATGTAGTGCTGGAATCACTAGAGGTTCAAACGAAA
+ATGACTGATACAAACACTCCATTTAGGTCATTAAAGACATTTGACTTATTCGCTTTTCAT
+TCATTAATTGAAGTACTGAACAAATGCCCACTAGATGTTCTCCAATTACAAAGGTTTGAA
+TCCTTGGAATTTTCCTTATTAATTGCATTTCCTAGATTGATCAATTTTGGTTTTGGACAC
+GATGAAGCTATTTTAGCCAATGGTGACATCGCAGGGATTAATAATGATATTGAAAAGGAG
+ATGCAGAACTATTTACAGAAAATGTATAGTGGTGAGTTAGCCATTAAAGATGTAATCGAA
+CTTCTGAGAAGGTTAAGAGATAGCGACTTGCCAAGGGACCAGGAAGTCTTCACATGTATT
+ACCCATGCCGTTATAGCAGAATCGACATTCTTCCAAGATTATCCATTGGATGCATTGGCT
+ACTACATCTGTTCTTTTTGGATCCATGATTCTCTTTCAACTGTTACGTGGATTCGTATTA
+GACGTCGCATTTAGGATAATCATGAGGTTTGCCAAGGAGCCTCCAGAGTCCAAGATGTTT
+AAGTTTGCTGTACAAGCTATTTATGCATTTAGGATACGTTTGGCCGAATATCCACAGTAT
+TGTAAGGACCTCTTGAGAGATGTTCCGGCTTTGAAGTCTCAGGCTCAAGTTTACCAATCT
+ATCGTCGAAGCTGCTACCCTAGCAAATGCTCCAAAGGAAAGGTCAAGACCCGTCCAGGAA
+ATGATCCCATTAAAATTTTTTGCTGTAGATGAAGTTTCATGTCAGATCAATCAAGAAGGT
+GCTCCTAAAGATGTCGTAGAAAAAGTTCTTTTTGTTCTCAACAACGTTACTCTGGCTAAC
+TTGAATAATAAGGTTGATGAATTGAAAAAAAGTTTGACACCAAATTATTTTTCTTGGTTT
+TCCACATATTTAGTTACGCAAAGGGCTAAAACAGAACCTAACTATCATGATCTTTATAGC
+AAGGTTATAGTTGCTATGGGGTCAGGGTTGCTACATCAGTTCATGGTCAACGTTACTTTG
+AGACAATTATTTGTCCTACTATCTACAAAAGACGAGCAAGCCATCGATAAAAAGCACCTA
+AAGAATTTGGCTTCATGGTTAGGATGTATCACATTAGCTTTGAATAAACCAATTAAACAC
+AAGAATATCGCATTCAGGGAAATGTTAATCGAAGCTTATAAGGAAAATAGACTTGAAATA
+GTTGTGCCTTTTGTAACAAAGATTTTACAAAGGGCTTCTGAATCAAAAATTTTCAAGCCT
+CCAAATCCCTGGACTGTTGGCATATTAAAGCTGTTGATTGAGTTGAACGAAAAAGCAAAC
+TGGAAATTAAGTTTGACTTTCGAAGTTGAGGTTTTATTAAAATCTTTTAATTTGACCACC
+AAATCTCTCAAGCCCTCGAATTTCATCAATACTCCGGAAGTTATAGAAACTTTATCCGGT
+GCTTTGGGATCAATCACTCTGGAGCAACAACAAACAGAGCAACAAAGGCAAATTATACTA
+ATGCAACAACACCAGCAACAGATGCTAATATATCAACAGAGACAACAACAACAACAACAA
+AGGCAACAACAACAACAACATCATATTAGTGCAAATACAATCGCAGACCAACAAGCGGCA
+TTTGGCGGCGAGGGTTCAATTTCACACGACAATCCTTTTAACAACTTACTTGGTTCTACT
+ATTTTTGTAACCCACCCTGACTTGAAGAGGGTATTTCAAATGGCTTTAGCCAAGTCAGTT
+CGCGAAATTTTGTTGGAAGTAGTCGAAAAGTCATCAGGAATTGCTGTTGTTACGACGACA
+AAAATAATACTTAAAGACTTTGCCACTGAAGTTGATGAGTCTAAGTTGAAGACGGCTGCA
+ATCATTATGGTAAGGCATTTGGCACAAAGTTTAGCTCGAGCTACTTCAATTGAACCATTG
+AAAGAAGGCATACGTTCTACTATGCAATCACTAGCACCGAATTTAATGTCTCTTTCTTCT
+TCACCTGCAGAGGAGCTTGACACGGCAATAAATGAAAATATTGGCATTGCTCTAGTTTTG
+ATTGAGAAAGCATCTATGGACAAGTCTACTCAAGATTTAGCAGACCAATTGATGCAAGCG
+ATTGCTATTCGTCGTTATCACAAGGAAAGAAGGGCAGACCAACCATTTATTACGCAAAAT
+ACCAATCCATATTCACTGTCTTTACCAGAACCTCTTGGTTTGAAAAACACTGGTGTTACT
+CCTCAACAATTCAGGGTATACGAAGAATTTGGTAAGAATATTCCAAACTTGGATGTTATT
+CCGTTTGCAGGATTGCCCGCTCACGCTCCACCGATGACTCAAAATGTGGGTTCAACTCAG
+CCTCAGCAACAACAAGCGCAAATGCCTACCCAAATCCTAACCTCCGAACAAATAAGAGCT
+CAACAACAACAGCAGCAATTACAGAAAAGCCGTTTGAATCAGCCATCCCAGTCGGCTCAA
+CCTCCAGGAGTGAATGTCCCAAATCCTCAAGGTGGGATTGCTGCAGTTCAATCAGATTTG
+GAACAGAATCAACGTGTTCTCGTTCACCTCATGGACATTTTAGTTTCTCAAATTAAAGAA
+AATGCTACGAAGAATAACTTAGCTGAATTAGGCGATCAAAACCAAATTAAAACCATCATT
+TTTCAAATTTTGACATTCATTGCAAAAAGCGCACAAAAGGATCAATTAGCTTTAAAGGTA
+TCCCAAGCTGTCGTTAATAGCCTTTTTGCCACTAGTGAGAGTCCTCTCTGCAGAGAAGTT
+TTGTCCCTACTTTTGGAAAAGTTATGTTCTTTATCCCTCGTTGCTAGAAAAGACGTTGTC
+TGGTGGTTAGTTTATGCCTTGGACAGTAGGAAATTCAATGTTCCCGTTATCAGATCCCTT
+CTAGAAGTTAATTTAATTGATGCTACAGAATTAGATAACGTTTTAGTTACTGCAATGAAA
+AATAAAATGGAGAACTCAACTGAATTTGCTATGAAATTAATTCAGAATACTGTCTTGTCT
+GATGATCCAATTTTGATGAGAATGGACTTCATTAAAACCTTAGAACACTTGGCCTCTTCG
+GAAGATGAAAATGTAAAGAAATTCATCAAAGAGTTCGAAGATACTAAGATAATGCCAGTG
+AGGAAAGGTACCAAAACCACAAGAACAGAAAAGCTTTACTTAGTATTTACGGAATGGGTA
+AAATTACTTCAAAGAGTTGAGAATAACGACGTAATCACAACTGTTTTTATCAAGCAATTA
+GTCGAAAAGGGTGTTATCAGCGATACTGATAATTTACTTACATTTGTCAAAAGTTCTCTT
+GAGCTATCAGTTTCTTCATTCAAAGAAAGTGACCCGACTGATGAGGTTTTCATCGCTATT
+GATGCTCTAGGATCGCTAATTATAAAATTGTTGATTTTACAGGGTTTCAAAGATGATACA
+AGAAGAGATTACATAAATGCAATATTTTCTGTGATCGTTTTAGTGTTTGCTAAGGATCAT
+AGCCAAGAGGGTACCACATTCAATGAACGACCATATTTCAGACTATTTTCTAACATCTTA
+TACGAATGGGCTACCATCAGGACGCACAATTTTGTTAGAATATCTGATTCCAGCACTAGG
+CAGGAGCTGATCGAATTTGATTCTGTATTTTACAACACTTTCTCAGGATATTTGCACGCT
+CTGCAACCATTTGCCTTCCCTGGATTCTCATTTGCATGGGTGACACTATTATCACACAGA
+ATGTTATTACCAATTATGCTAAGATTACCCAATAAAATAGGTTGGGAAAAGTTAATGCTT
+TTGATTATCGATTTGTTTAAATTTTTGGACCAATACACAAGTAAACATGCAGTCTCTGAC
+GCTGTTTCGGTTGTTTATAAGGGAACACTGCGTGTTATTTTAGGCATTTCGAATGATATG
+CCATCCTTTTTGATTGAAAATCACTATGAATTAATGAACAATCTACCTCCAACATATTTC
+CAACTAAAGAATGTTATTTTATCTGCTATTCCTAAGAATATGACCGTTCCCAACCCATAT
+GACGTGGATCTTAATATGGAGGATATTCCAGCATGTAAAGAACTACCTGAAGTCTTCTTT
+GATCCTGTAATTGATTTACACTCATTGAAAAAGCCAGTTGACAACTACCTACGTATTCCC
+TCAAATTCATTATTAAGAACAATACTAAGCGCTATTTACAAGGATACCTATGACATAAAA
+AAGGGCGTAGGCTACGACTTTTTATCTGTTGATAGTAAATTAATTCGCGCTATTGTATTA
+CATGTGGGCATTGAAGCTGGAATAGAGTATAAGAGAACTTCTTCAAATGCGGTATTTAAT
+ACGAAGTCTTCTTATTATACTTTATTGTTCAATCTGATTCAAAATGGTAGCATCGAAATG
+AAATATCAAATTATTCTGTCTATTGTGGAACAATTGCGGTATCCAAACATCCACACCTAT
+TGGTTCAGCTTTGTGTTAATGAATATGTTCAAAAGTGACGAATGGAATGATCAAAAACTT
+GAAGTCCAAGAAATTATTTTAAGAAACTTTTTAAAAAGAATTATTGTTAACAAACCACAT
+ACCTGGGGTGTTTCAGTTTTCTTTACTCAGTTGATAAACAATAACGATATTAATCTTTTA
+GACCTGCCCTTTGTACAAAGTGTTCCCGAAATTAAACTAATTTTACAACAATTAGTAAAA
+TATTCCAAAAAATACACAACCAGTGAACAAGATGACCAATCCGCCACCATCAATAGAAGG
+CAAACCCCTCTACAATCCAACGCATAA
+>YCY4        1176 residues Pha 0 Code 0
+ATGGTTTCATTGTTCAAAAGAGGTAAGGCTCCACCGCTCACGAAAGAAGGCCCCACTTCT
+AAAAAGCCTCCTAACACAGCGTTTAGACAACAAAGGCTTAAGGCATGGCAACCAATACTG
+TCTCCTCAAAGTGTGCTTCCGTTGTTAATATTCGTTGCATGTATATTTACTCCTATTGGT
+ATTGGACTCATTGTAAGCGCTACTAAGGTACAAGATCTAACAATTGATTATAGTCATTGT
+GATACAAAAGCATCTACAACTGCTTTTGAAGATATACCAAAGAAGTACATTAAATATCAC
+TTTAAAAGTAAAGTTGAAAATAAACCACAATGGAGGCTAACCGAAAATGAAAATGGCGAA
+CAATCATGCGAACTGCAGTTCGAAATCCCAAACGATATCAAGAAATCCATTTTTATATAT
+TATAAAATAACCAATTTTTATCAAAATCATCGCAGATATGTCCAATCGTTTGACACAAAG
+CAAATATTAGGGGAGCCTATCAAAAAAGATGATCTGGATACAAGCTGTAGTCCAATAAGA
+AGTAGGGAAGACAAAATAATATATCCCTGTGGGTTGATCGCTAATTCCATGTTTAATGAT
+ACATTTTCTCAGGTGTTGAGTGGTATAGATGACACAGAAGACTATAATTTAACTAACAAG
+CATATATCATGGAGTATTGATCGTCACAGATTTAAAACCACCAAGTATAATGCTAGCGAT
+ATTGTTCCACCGCCAAACTGGATGAAGAAGTATCCCGATGGGTATACAGATGAAAATCTT
+CCTGATATCCATACTTGGGAAGAGTTCCAGGTATGGATGAGGACTGCAGCCTTTCCCAAG
+TTTTACAAGTTGACGTTGAAAAATGAATCTGCTTCTTTACCGAAGGGTAAATATCAAATG
+AACATTGAGTTGAATTATCCGATTTCACTCTTTGGTGGCACAAAATCATTTGTACTGACT
+ACAAATGGAGCTATTGGTGGTAGAAATATGTCACTAGGCGTACTGTACCTCATCGTTGCA
+GGGCTTTGCGCCTTATTTGGCATCATTTTTTTGGTTAAATTAATCTTCCAACCAAGAGCG
+ATGGGTGATCACACTTATTTGAATTTTGATGATGAAGAAAACGAGGATTATGAGGATGTA
+CACGCAGAGAATACAACATTGAGGGAAATTTTATAG
+>A2            360 residues Pha 0 Code 0
+ATGCGCAGCATAGAAAACGATAGAAGTAATTATCAACTTACACAGAAAAATAAATCGGCG
+GATGGGTTGGTATTTAATGTGGTAACTCAAGATATGATAAACAAAAGTACTAAACCTTAC
+AGAGGACACCGGTTTACAAAAGAAAATGTCCGAATACTAGAAAGTTGGTTTGCAAAGAAC
+ATCGAGAACCCATATCTAGATACCAAGGGCCTAGAGAATCTAATGAAGAATACCAGTTTA
+TCTCGCATTCAAATCAAAAACTGGGTTTCGAATAGAAGAAGAAAAGAAAAAACAATAACA
+ATCGCTCCAGAATTAGCGGACCTCTTGAGCGGTGAGCCTCTGGCAAAGAAGAAAGAATGA
+>GIT1         1557 residues Pha 0 Code 0
+ATGGAAGACAAAGATATCACATCGGTAAATGAGAAGGAAGTGAACGAGAACACTAATCCT
+AGAATAATAAAATATGATGCCGAGAGGCGTGCAACCCGTACTGAAACCTCAAAGAAAGAT
+AAATGGAAAAACATAGTTACAATCATTGCGTCCGGTTTTGCTCTGATAAGTGATGGTTAC
+GTAAATGGTTCAATGAGTATGCTAAACAAGGTTTTTGTTATGGAGTACGGTAAGAAAAAC
+TATAGCTCAAAAGTGTCGACTAGAGTTTCCAACGCAGCCCTAGTTGGTATTATTTTTGGC
+CAATTCTTTATGGGTATCGCTGCTGATTATTATAGTAGAAAATCTTGTATCCTTGTGGCC
+ACTGCTATCTTGGTTATTGGTAGTGCTCTGTGTGCTGCCTCTCACGGTACTACTGTACCT
+GGCATGTTTTGGATGTTAACAGTTATGAGAGGTTTGGTAGGTATTGGTGTTGGTGCAGAA
+TATCCTACCAGTACATTAAGTGCTAATGAGTCTGCTAATGAATATACCACTACCAAAAGA
+GGTGGTATCCTGGTTATGGTGACAAATTTGCCACTAGCCTTCGGTGGTCCATTTGCTACG
+ATCATCTTTTTAATCGTCTACAAAATCTGTTCAGGAACAAAACATTTAGAGGCGATCTGG
+AGGACTGTTTTTGCAATAGGGTGCTTCTGGCCATTGAGTGTGTTCTATTTTAGATGGAAG
+ACTGCTACTACAGAAGTCTATGAAAAAGGTAGAATCAAGAGAAATATACCATATTTCCTA
+GCATTGAAATTTTATTGGAAAAGGTTACTTGGTACATGTGGTACATGGTTTATGTATGAT
+TTTGTTACCTTCCCAAATGGTATTTTCAGTTCAACAATTATCAGTTCCGTTATCAAGGAC
+CAAAATGATTTAGTAAAAGTGGCAGAGTGGAACTTACTGTTGGGAGTTTTAGCTGTACTG
+GGTGTACCAATTGGTGCTTATCTGTCCGATCGTATTGGTCGTAAATATACGTTGATGTTT
+GGTTTCTCTGGGTACATCATCTTTGGTCTAATCATTGGATGTGCGTACGACCAATTGAAA
+AAAATCACCCCCTTGTTTATTATCTTCTACGCATTCATGAATATGTTAGGTAATGCTGGA
+CCAGGTGATATGCTTGGTGTTATTAGTAGTGAAGCGTCAGCAACCGCTGTTAGAGGTGTT
+TTCTATGGTTTATCTGCTGTGACTGGTAAAATCGGTTCTGTAGTAGGCGTCGAATGTTTC
+CAACCCATTAGGGATAATTTGGGTGCAAGATGGACTTTTATTATTGCTGCAATTTGTGGT
+CTTATTGGTATCATTATTACATATTTCTTTGTTCCACATTCTCTTGAAAGCGATTTAATG
+AAGCAAGACGTTGAATTTCACAACTATTTGGTATCCAATGGCTGGACTGGTAAGATGGGA
+TTTGATGAGACAGATGAAGAATCAATGGTTAGAACTATTGAAGTTGAAGAGAATGGTACT
+AATTGTAGTAAGAAAAACGCAGAAATAATTTCAGTCAGACAGGTCGATCAAAGTTGA
+>YCZ0         951 residues Pha 0 Code 0
+ATGTCATCTACGGACATCTGGATATCCAATGATGCATCTACTTTTCAAAAGGCACAGCTG
+CCTACTCAATTACGGCACGTCAAAGTGATTAAAATTCGTGAAGATTCTATCGGAAGGATC
+ATCCTTCTTATATCGACAGAAATCACAAATGAGGAAAATGCTGATCCAGATCTCTCAGAG
+ATTTTCATATCAGATTCGCAAGGGTTGAAATTCTCACCTGTTGAATGGACACCAAACCAT
+CAGTTTGGAAATTTTAGGCTCACTTTTCCTGATTTCTTGAAAGGGACAATATTTGGATCG
+TTTCATCCTTCCATTGACTATTCTAATCACCAAGTAAACTATACTGAAAATATAGCCGGA
+GGAGAAACCAAAATATCCGTTGATAACGGCCTCACATGGTCAAATTTGAAAGTTGTTGAT
+GAAGAAAATGCCGATTCGTTCGGCTGTGATATCACTAGGCCTGAGAGATGTTCACTTCAG
+GGTTATTTTTACAATCTAAAACTTTCAAATCCTTCTGCTGGGATCATATTAATGACAGGT
+TCTGTTGGCGATGACAATGAATTCGATCGGAAGGACCGAAAAACTTTCATTTCTAGAGAC
+GGTGGTCTAACATGGAGGGTGGCCCATAATTCTTCTGGATTATATGCTACTGGTGATCTG
+GGAAATATTATTGTATATATCCCGTCTCCTTCATATAAAGATGGTGATGTACAATCCAAA
+CTTTATTTTTCCTTGGACCAAGGTAGAACATGGAATCAATATGAGCTTGTTGACGCTTTA
+TTTTATATCCATCCATTAGAGTTGATTAATACAACGCCAGATGGATCAGGCTCAAAATTT
+ATTTTAAGCGGACATCTCATTACTACGGCTAGTCAAGAAGGAAACAACACCAACATCTCA
+TATATTGCAAGAAGTGTCCTGTATGCGATCGATTTTTCTGCTGCATTTTGA
+>YCZ1        549 residues Pha 0 Code 0
+ATGATATTACTTCATGCCATATATACTCTTTGGGTAATTATACTACTTCCGCTACTCAAT
+GCAGAGAAATTTGTCCCAAAAGTAACGGAGGCTCCTATAGAAACATCATTTAATCTAGTG
+AGTTTTGATGATTCCAACACTTCTATCAGATTAGATGGTTGGGGGGTTGTATGGATAAGT
+TTCGACGCTGGAGAAAATTGGGAAACGGTCAAAGAAATTGAAGAGCGCATTTTCAGATTT
+ACTGTTGATCCTTTCCATGGACAGGAAAGAGGTTTCGCTTTTATATGTGAATCACCCAAA
+TTCTACATTACCGACGACCGTGGGGAGTCATGGAGGGCTTTAACTATACCCTCATCAGAA
+GAATATTTAGATGGCGACTGTTTTATAACTACTCATCCTAGAAACAAAGAACTTCTTATT
+GCGAATTGCTATAGCTATATGATAGACGCAGACGTTTTATATGACCCAAGTGAAATTTAC
+TTGAGCAATGATGGGAATCCTTTTTTAAAATTAAACCTTCCTTGGAAAAGAAAAAAGACG
+ACGATATAA
+>YCZ2        1107 residues Pha 0 Code 0
+ATGAAGGCTGTCGTCATTGAAGACGGTAAAGCGGTTGTCAAAGAGGGCGTTCCCATTCCT
+GAATTGGAAGAAGGATTCGTATTGATTAAGACACTCGCTGTTGCTGGTAACCCGACTGAT
+TGGGCACACATTGACTACAAGGTCGGGCCTCAAGGATCTATTCTGGGATGTGACGCTGCC
+GGCCAAATTGTCAAATTGGGCCCAGCCGTCGATCCTAAAGACTTTTCTATTGGTGATTAT
+ATTTATGGGTTCATTCACGGATCTTCCGTAAGGTTTCCTTCCAATGGTGCTTTTGCTGAA
+TATTCTGCTATTTCAACTGTGGTTGCCTACAAATCACCCAATGAACTCAAATTTTTGGGT
+GAAGATGTTCTACCTGCCGGCCCTGTCAGGTCTTTGGAAGGGGCAGCCACTATCCCAGTG
+TCACTGACCACAGCTGGCTTGGTGTTGACCTATAACTTGGGCTTGAACCTGAAGTGGGAG
+CCATCAACCCCACAAAGAAACGGCCCCATCTTATTATGGGGCGGTGCAACTGCAGTAGGT
+CAGTCGCTCATCCAATTAGCCAATAAATTGAATGGCTTCACCAAGATCATTGTTGTGGCT
+TCTCGGAAACACGAAAAACTGTTGAAAGAATATGGTGCTGATCAACTATTTGATTACCAT
+GATATTGACGTGGTAGAACAAATTAAACACAAGTACAACAATATCTCGTATTTAGTCGAC
+TGTGTCGCGAATCAAAATACGCTTCAACAAGTGTACAAATGTGCGGCCGATAAACAGGAT
+GCTACCGTTGTCGAATTAACTAATTTGACAGAAGAAAACGTCAAAAAGGAGAATAGGAGG
+CAAAATGTCACTATTGACAGAACAAGACTGTATTCAATAGGCGGCCATGAAGTACCATTT
+GGTGGCATTACTTTCCCTGCTGACCCAGAAGCCAGGAGAGCTGCCACCGAATTCGTCAAG
+TTCATCAATCCAAAGATTAGTGATGGGCAAATTCACCATATTCCAGCAAGGGTCTATAAG
+AACGGGCTTTACGATGTTCCTCGTATCCTGGAAGACATTAAAATCGGTAAGAACTCTGGT
+GAAAAACTAGTTGCCGTATTAAACTAG
+>YCZ3        336 residues Pha 0 Code 0
+ATGGAGATGCTCTTGTTTCTGAACGAATCATACATCTTTCATAGGTTTCGTATGTGGAGT
+ATTGTTTTATGGCACTCATGTGTATTCGTATGCGCAGAATGTGGGAATGCCAATTATAGG
+GGTGCCGGGGTGCCTTGCAAAACCCTTTTACGCGCGCCTGTGAAGTTTCCGCTTTCGGTC
+AAAAAGAATATCCGAATTTTAGATTTGGACCCTCGTTCAGAAGCTTATTGTCTAAGCCTA
+AATTCAGTCTGCTTTAAACGGCTTCCGCGGAAGAAATATTTCCATCTCTTGAATTCGTAC
+AACATTAAACGTGTGTTGGGAGTCGTATACTGTTAG
+>PAU3          375 residues Pha 0 Code 0
+ATGGTCAAATTAACTTCAATCGCTGCTGGTGTTGCCGCCATCGCTGCCGGTATTGCCGCT
+GCCCCAGCCACTACCACTCTATCTCCATCTGACGAAAGGGTCAACTTGGTCGAATTGGGT
+GTTTACGTCTCCGATATCAGAGCTCATTTGGCTCAATACTACTTGTTTCAAGCAGCTCAT
+CCAACTGAGACCTACCCAGTTGAGATTGCTGAAGCTGTTTTCAACTATGGTGACTTCACC
+ACTATGTTGACTGGTATTCCAGCTGAACAAGTCACCAGAGTCATCACTGGTGTCCCATGG
+TACTCCACTAGATTGAGACCAGCCATCTCCAGTGCTCTATCTAAGGACGGTATCTACACT
+GCTATTCCAAAATAG
+>YCZ5        1086 residues Pha 0 Code 0
+ATGCTTTACCCAGAAAAATTTCAGGGCATCGGTATTTCCAACGCAAAGGATTGGAAGCAT
+CCTAAATTAGTGAGTTTTGACCCAAAACCCTTTGGCGATCATGACGTTGATGTTGAAATT
+GAAGCCTGTGGTATCTGCGGATCTGATTTTCATATAGCCGTTGGTAATTGGGGTCCAGTC
+CCAGAAAATCAAATCCTTGGACATGAAATAATTGGCCGCGTGGTGAAGGTTGGATCCAAG
+TGCCACACTGGGGTAAAAATCGGTGACCGTGTTGGTGTTGGTGCCCAAGCCTTGGCGTGT
+TTTGAGTGTGAACGTTGCAAAAGTGACAACGAGCAATACTGTACCAATGACCACGTTTTG
+ACTATGTGGACTCCTTACAAGGACGGCTACATTTCACAAGGAGGCTTTGCCTCCCACGTG
+AGGCTTCATGAACACTTTGCTATTCAAATACCAGAAAATATTCCAAGTCCGCTAGCCGCT
+CCATTATTGTGTGGTGGTATTACAGTTTTCTCTCCACTACTAAGAAATGGCTGTGGTCCA
+GGTAAGAGGGTAGGTATTGTTGGCATCGGTGGTATTGGGCATATGGGGATTCTGTTGGCT
+AAAGCTATGGGAGCCGAGGTTTATGCGTTTTCGCGAGGCCACTCCAAGCGGGAGGATTCT
+ATGAAACTCGGTGCTGATCACTATATTGCTATGTTGGAGGATAAAGGCTGGACAGAACAA
+TACTCTAACGCTTTGGACCTTCTTGTCGTTTGCTCATCATCTTTGTCGAAAGTTAATTTT
+GACAGTATCGTTAAGATTATGAAGATTGGAGGCTCCATCGTTTCAATTGCTGCTCCTGAA
+GTTAATGAAAAGCTTGTTTTAAAACCGTTGGGCCTAATGGGAGTATCAATCTCAAGCAGT
+GCTATCGGATCTAGGAAGGAAATCGAACAACTATTGAAATTAGTTTCCGAAAAGAATGTC
+AAAATATGGGTGGAAAAACTTCCGATCAGCGAAGAAGGCGTCAGCCATGCCTTTACAAGG
+ATGGAAAGCGGAGACGTCAAATACAGATTTACTTTGGTCGATTATGATAAGAAATTCCAT
+AAATAG
+>YCZ6       2499 residues Pha 0 Code 0
+ATGGATTCGATTACAGTAAAAAAACCTCGGTTAAGATTGGTTTGCCTGCAATGCAAAAAG
+ATCAAACGGAAATGTGATAAACTGCGGCCTGCTTGCTCGCGATGCCAACAAAATTCATTA
+CAGTGTGAATATGAAGAGAGAACAGATTTATCTGCCAATGTTGCAGCAAACGACTCTGAT
+GGATTCAATTCCTCTCATAAGCTCAATTTCGAACAGCAACCTGTACTTGAAAGGACTGGG
+CTTAGATATTCCTTACAAGTGCCTGAAGGTGTCGTTAATGCTACGCTGTCGATATGGAAC
+GCCGAAGATATGCTAGTTATAGTAGGATTAGTTACATTTCTGGATTATCCTTTTGCTGCG
+CATAGTCTGGCGCAACATGACCAGTATATCAGGGCACTTTGTGCTTCGTTGTACGGCATG
+GCGCTTGTTGACTTTAGCAATTATGCTAATGGTATTCCTTGTGAAGACACATCAAGAAGT
+ATACTAGGACCATTGTCATTCATAGAAAAGGCCATTTTTAGACGGATAGAACATAGTAAG
+CAATTTCGAGTTCAGTCTGCCGCCTTAGGGTTATTATACAATGCATTTTCAATGGAAGAA
+GAAAACTTCTCGACTCTTCTACCGTCACTCATCGCTGAAGTGGAAGACGTGTTGATGCAA
+AAAAAAGACTGTGAAATACTTTTGAGGTGTTTCTATCAAAATATTTATCCCTTCTATCCT
+TTTATGGACATTTCACTCTTTGAGAGCGATCTCACTAGTTTGCTTTTACAAGACGACAAT
+AATCGTTGGAAAATTAGTACTGAAGTTAAAAATGTGCGCAAAAAAATAGAAACTTTGTCA
+TTACTTACAATAGTAATGGCCATGGCCTTGATGCATTCAAAATTGGATGCAAATCTTCTT
+TCAATGGTAAAAGAAAATGCCTCCGAAAGTGCCAGGAAACTTTCTCTTTTATGTCATAAA
+CTATTATGCCTCCTGGATGTATTTCGCTATCCAAATGAGAACACTTTTACTTGCCTTTTA
+TATTTCTACGTTTCAGAGCATTTAGATCCCGAGAGTCCCGATTGTGTACTGAGCCCCACT
+AACTTGCTTACTCTGCACCATCTTTTAAATTTGTCCATGACCTTAGGTCTTCAATATGAG
+CCTTCGAAGTACAAACGTTTCAAAGATCCAGAAGTGATAAGGCAGAGACGGATATTATGG
+TTAGGAGTTCAGTCATTACTTTTTCAAATTTCTCTTGCTGAAGGTGATGCTGGTAAATCA
+AATAGTGAATATATGGAGGCATATTTAACAGACTTCGAAGAATATATTGAAGCTTCCTCA
+GAGTATGAAAAAAGTTCTGCGAGTGAATCGAACGTGCAAATGAATGATATTGTTTGGAAT
+AAGTACAAATTTCACGTCATTTTGAGTAAACTAATGTCTGATTGCACTTCAGTTATACAA
+CATCCGCAGCTTTTCCACATTTTAGGAAATATTAAAAGATCTGAAGATTTTATGGCTGAG
+AACTTTCCTACAAGTTCGATTTACCAACCCCTTCATGAAAAGGAACCAAATGCGATCAAA
+GTTGGCAAAAGTACGGTTCTCGATGTCATGGATATTCAAAAAACTGAAATATTTCTTACA
+AATATTGTGGGAAGTATGTGTTTTTTAAACATTTTTGATGTCCTATCGTTACATTTTGAA
+AAAAAATGTGTTATGCACTGGGAAGAATATGAAAAGAACTATCATTTCCTTACTTTGAAA
+AGTTTCAATGCATACTTAAAGCTAGCAGGGTTGATATCTGATTATCTCGAGAATAAGTTT
+CAAGGGAACATTTTAGAGAGTCGCGGTTATATCATAGATAAACAAATATGTTTTATGCTT
+GTAAGGATCTGGATGTTCCAATGTCGTATTTTGTTAAGGTTTTCATACAAGCAAGAAAGT
+CAGAAAAAATTGGCCTCTTCCAGTATATCCACTAACGATAATGAAAAAGAAGATGAAATG
+ATTGTCATTTTAGAAAGACTTATTAAACACATTCGTAACCAAATGGCACATTTAGTGGAT
+CTAGCAAAGGGAAAACTTCAAGATAGTTACTTTGGTGCTTACCAAACTGTTCCCATGTTT
+AGATACGTTGTGTATTTGATCGATGTTGGCGGCTTAGTATCTGTGACAAATGGGTTTTGG
+GATAAGATTTCCAGTGATGGTGAAATACCGCCAAAAGTACAACAAGCCGTGAGATTGAAA
+TGGGGATTGGACTGCAATAATTCGAGAAGAATCAAACAAAAGTTAATAAGCAGCCAGAGT
+TTGCAGAGTTTCAATCAAGTTCTGTTGTGCCAGATGGAGGATGCAGTTCTCTCCAGTTCC
+TTCGCAATAAAAGCCAATACCGCTATGTCCCAAAACACGGCTGAAGAATTTTTCAATATC
+AGCGAAGAAGAGGCTTTAAATCAACTATTGGAAAACAACAATTTTGATGCCTTCTGGGAT
+TTATTAGGTGAAAATCTGAGCGATATGCCTTCTTTGTGA
+>YCZ7       1092 residues Pha 0 Code 0
+ATGATTGGGTCCGCGTCCGACTCATCTAGCAAGTTAGGACGCCTCCGATTTCTTTCTGAA
+ACTGCCGCTATTAAAGTATCCCCGTTAATCCTAGGAGAAGTCTCATACGATGGAGCTCGT
+TCGGATTTTCTCAAATCAATGAACAAGAATCGAGCTTTTGAATTGCTTGATACTTTTTAC
+GAGGCAGGTGGAAATTTCATTGATGCCGCAAACAACTGCCAAAACGAGCAATCAGAAGAA
+TGGATTGGTGAATGGATACAGTCCAGAAGGTTACGTGATCAAATTGTCATTGCAACCAAG
+TTTATAAAAAGCGATAAAAAGTATAAAGCAGGTGAAAGTAACACTGCCAACTACTGTGGT
+AATCACAAGCGTAGTTTACATGTGAGTGTGAGGGATTCTCTCCGCAAATTGCAAACTGAT
+TGGATTGATATACTTTACGTTCACTGGTGGGATTATATGAGTTCAATCGAAGAATTTATG
+GATAGTTTGCATATTCTGGTCCAGCAGGGCAAGGTCCTCTATTTGGGTGTATCTGATACA
+CCTGCTTGGGTTGTTTCTGCGGCAAACTACTACGCTACATCTTATGGTAAAACTCCCTTT
+AGTATCTACCAAGGTAAATGGAACGTGTTGAACAGAGATTTTGAGCGTGATATTATTCCA
+ATGGCTAGGCATTTCGGTATGGCCCTCGCCCCATGGGATGTCATGGGAGGTGGAAGATTT
+CAGAGTAAAAAAGCAATGGAGGAACGGAGGAAGAATGGAGAGGGTATTCGTTCTTTCGTT
+GGCGCCTCCGAACAAACAGATGCAGAAATCAAGATTAGTGAAGCATTGGCCAAGATTGCT
+GAGGAACATGGCACTGAGTCTGTTACTGCTATTGCTATTGCCTATGTTCGCTCTAAGGCG
+AAAAATTTTTTTCCGTCGGTTGAAGGAGGAAAAATTGAGGATCTCAAAGAGAACATTAAG
+GCTCTCAGTATCGATCTAACGCCAGACAATATAAAATACTTAGAAAGTATAGTTCCTTTT
+GACATCGGATTTCCTAATAATTTTATCGTGTTAAATTCCTTGACTCAAAAATATGGTACG
+AATAATGTTTAG
diff --git a/menu.c b/menu.c
new file mode 100755
index 0000000..b96de7b
--- /dev/null
+++ b/menu.c
@@ -0,0 +1,1302 @@
+/**************************************************************************/
+/* CodonW codon usage analysis package                                    */
+/* Copyright (C) 2005            John F. Peden                            */
+/* This program is free software; you can redistribute                    */
+/* it and/or modify it under the terms of the GNU General Public License  */
+/* as published by the Free Software Foundation; version 2 of the         */
+/* License,                                                               */
+/*                                                                        */
+/* This program is distributed in the hope that it will be useful, but    */
+/* WITHOUT ANY WARRANTY; without even the implied warranty of             */
+/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the           */
+/* GNU General Public License for more details.                           */
+/* You should have received a copy of the GNU General Public License along*/
+/* with this program; if not, write to the Free Software Foundation, Inc.,*/
+/* 675 Mass Ave, Cambridge, MA 02139, USA.                                */
+/*                                                                        */
+/*                                                                        */
+/* The author can be contacted by email (jfp#hanson-codonw at yahoo.com Anti-*/
+/* Spam please change the # in my email to an _)                          */
+/*                                                                        */
+/* For the latest version and information see                             */
+/* http://codonw.sourceforge.net 					  */
+/**************************************************************************/
+#include <stdlib.h> 
+#ifdef _WINDOWS
+#include <process.h>
+#endif
+#include <stdio.h>    
+#include <string.h>
+#include <ctype.h>
+#include "codonW.h" 
+
+/************** Main menu   **********************************************/
+/* Drives the menu system                                                */
+/*************************************************************************/
+void    main_menu ( int menu )
+{
+    switch ( menu ) {                                 /* go to menu X    */
+    case 0:
+        menu_initial();
+        break;
+    case 1:
+        menu_1();
+        break;
+    case 2:
+        menu_2();
+        break;
+    case 3:
+        menu_3();
+        break;
+    case 4:
+        menu_4();
+        break;
+    case 5:
+        menu_5();
+        break;
+    case 6:
+        menu_6();
+        break;
+    case 7:
+        menu_7();
+        break;
+    case 8:
+        menu_8();
+        break;
+    case 9:
+        printinfo();
+        welcome();
+        pause;
+        clearscr(pm->term_length);
+        break;                     
+    default:
+        fprintf ( stderr,"ERROR: Unrecognised menu in main_menu\n");
+        break;
+    }
+}
+
+
+/* This is the first menu presented when running CodonW                   */ 
+
+void menu_initial (void)
+{
+    int loop = TRUE;
+    int c;
+    
+    while (loop) {                                             /* loop    */
+        printf (" Initial Menu \n");
+        printf (" Option\n\t (1) Load sequence file\n"); 
+        
+/*      printf ("\t (2) Check sequence file for redundancy\n");           */
+        printf ("\t ( )\n");
+        printf ("\t (3) Change defaults\n");
+        printf ("\t (4) Codon usage indices\n");
+        printf ("\t (5) Correspondence analysis\n");
+
+/*      printf ("\t (6) Basic statistics\n");                             */
+        printf ("\t ( ) \n");
+
+        printf ("\t (7) Teach yourself codon usage\n");
+        printf ("\t (8) Change the output written to file\n");
+        printf ("\t (9) About C-codons\n");
+        printf ("\t (R) Run C-codons \n");
+        printf ("\t (Q) Quit \n");
+        printf (" Select a menu choice, (Q)uit or (H)elp -> ");
+
+        gets(pm->junk);
+        
+        if (isalpha((int)pm->junk[0])) {
+            c = toupper( (int) pm->junk[0]);
+
+            switch  (c) {
+            case 'Q':       
+                my_exit(2,"main menu");
+                break;                
+            case 'R':    
+                /* test that all the required files are opened             */
+                if ( pm->inputfile && pm->outputfile && pm->tidyoutfile)
+                    loop = FALSE;
+                else {                  
+                    printf("Not all required files are open\n");
+		            printf("About to open input and output files\n");
+		            pause;
+		            main_menu(1);   
+		            loop = FALSE;
+                 } 
+		    break;
+            case 'H':                                              /* help */     
+                 chelp ( "main_menu" );       
+                 break;   
+            default:
+                fprintf( stderr, "The answer %s is not valid\n", pm->junk);
+                pause;
+                break;
+           }                                            /* end of switch c */     
+        } else if (isdigit((int) pm->junk[0])) {
+            c =    atoi( pm->junk);
+            if (c > 0 && c <= 9 )  
+                main_menu( (int) c );
+            else 
+                fprintf( stderr, "The answer %s is not valid\n", pm->junk);
+        }
+        clearscr(pm->term_length);
+    }
+    return;
+}
+
+/************************* menu_1 ******************************************/
+/* Opens input and output files                                            */
+/* It tests if a sequence file is already in memory                        */
+/* if so you have the option to reopen the same file when loaded the       */
+/* pm->file_loaded is set to true and the 20 characters of the new filename*/
+/* are stored                      							               */
+/***************************************************************************/
+void    menu_1 (void)
+{
+    char    root[MAX_FILENAME_LEN];
+    int n;
+    
+    clearscr(pm->term_length);
+    printf (" Loading sequence menu (type h for help)\n");
+
+    if ( strlen(pm->curr_infilename) ) {
+        printf ( "The current active file is \"%s\"\n",pm->curr_infilename);
+        fileclose(&pm->inputfile);
+        if (!(pm->inputfile = open_file("input sequence file",
+            pm->curr_infilename, "r", FALSE)))
+            my_exit(1,"menu 1");
+    } else {
+        printf( " No sequence file is currently loaded\n");
+        if (!(pm->inputfile = open_file("input sequence file\t",
+            "input.dat", "r", FALSE)))
+            my_exit(1,"menu 1");
+    }
+    /* copies the filename into pm->curr_infilename                        */
+    /* next finds the root of this filename                                */
+    /* which is used to construct other filenames                          */
+
+
+    strncpy(pm->curr_infilename, pm->junk, MAX_FILENAME_LEN - 1);
+    strncpy(root, pm->curr_infilename    , MAX_FILENAME_LEN - 5);
+
+    /* open the .out filename                                              */
+    for (n = (int) strlen(root); n && root[n] != '.' ; --n);
+    if (n)        root[n] = '\0';           /* define root of the filename */
+
+    if ( strlen(pm->curr_outfilename)) {
+        printf( "\nThe previous  output file was \"%s\"\n", 
+            pm->curr_outfilename );
+        fclose( pm->outputfile);
+    } 
+        if (!(pm->outputfile = open_file("output sequence file\t",
+            strcat(root, ".out"), "w", (int)pm->verbose)))
+            my_exit(1,"output menu1");
+
+    /* open the .blk filename                                              */
+
+    strncpy(pm->curr_outfilename, pm->junk, MAX_FILENAME_LEN - 1);
+    strncpy(root, pm->curr_infilename     , MAX_FILENAME_LEN - 5);
+ 
+    for (n = (int) strlen(root); n && root[n]!='.'  ; --n);
+    if ( n  ) root[n] = '\0';                   /* find root of filename  */
+
+    if ( strlen(pm->curr_tidyoutname)) {
+        printf( "\nThe previous bulk output file was \"%s\"\n", 
+            pm->curr_tidyoutname );
+        fclose( pm->tidyoutfile);
+   }
+        if (!(pm->tidyoutfile = open_file("bulk output file\t",
+            strcat(root, ".blk"), "w", (int) pm->verbose)))
+            my_exit(1,"tidyout menu1");
+   
+    strncpy(pm->curr_tidyoutname, pm->junk, MAX_FILENAME_LEN - 1);
+
+    clearscr(pm->term_length);
+    return;
+}
+
+/************************* menu_2 ******************************************/
+/* Not currently implemented                                               */
+/***************************************************************************/
+void    menu_2 (void)
+{
+    int loop = TRUE;
+    int  c;
+    
+    clearscr(pm->term_length);
+    while ( loop ) {
+        printf (" Menu 2 \n");
+        printf (" Purifying sequences menu\n");
+        printf ("\t ( ) Sorry currently unimplemented \n");
+        printf ("\t (X) Exit this menu\n");
+        printf (" Select a menu choice, (Q)uit or (H)elp -> ");
+        gets(pm->junk);
+        clearscr(pm->term_length);
+
+        if (isalpha((int)pm->junk[0]) || pm->junk[0]=='\0' ) {
+            c = toupper( (int) pm->junk[0]);
+            switch ( c ) {
+            case 'Q':
+                my_exit(2,"menu 2");
+                break;
+            case 'X':
+            case '\0':
+                return;
+            case 'H':
+                chelp("menu_2");
+                break;
+            default:
+                fprintf( stderr, "The answer %s is not a valid\n", pm->junk);
+                pause;
+                break;
+            }
+        } 
+    }
+    return;
+}
+
+/************************* menu_3 ******************************************/
+/* To improve flexibility, many of the default values used internally by   */
+/* CodonW (defined in the header file codonW.h) can be altered at runtime  */
+/* using this menu. Ten default values can be customised.                  */
+/***************************************************************************/
+void    menu_3 (void)
+{
+    int loop = TRUE;
+    int i;
+    int c;
+    
+    clearscr(pm->term_length);
+    while (loop) {
+        printf (" Changing defaults\n");
+        printf (" Options\n");
+        printf (" %-40.40s", "(1) Change the ASCII delimiter in output");
+        printf ("{%s}\n", 
+            (pm->seperator == ' ' ) ? "space" : 
+            (pm->seperator == '\t') ? "tab" : 
+            (pm->seperator == ',' ) ? "," : 
+            "ERROR" );
+
+        printf (" %-40.40s", "(2) Run silently, No Warnings");
+        printf ("{%s}\n", (pm->verbose) ? "FALSE" : "TRUE");
+        printf (" %-40.40s", "(3) Log warnings/information to a file");
+        printf ("{%s}\n", (strlen(pm->curr_logfilename) > 1) ? "TRUE" : 
+                "FALSE");
+        printf (" %-40.40s", "(4) Number of lines on screen");
+        printf ("{%d}\n", pm->term_length);
+        printf (" %-40.40s", "(5) Change the genetic code");
+        printf ("{%s}\n", cu[pm->code].des);
+        printf (" %-40.40s", "(6) Change the Fop/CBI values");
+        printf ("{%s}\n", fop[pm->f_type].des);
+        printf (" %-40.40s", "(7) Change the CAI values");
+        printf ("{%s}\n", cai[pm->c_type].des);
+        printf (" %-40.40s", "(8) Output Human or Computer readable");
+        printf ("{%s readable}\n", (pm->seq_format == 'M') ? "Computer" : 
+                "Human"); 
+        printf (" %-40.40s", "(9) Concatenate or individual genes");
+        printf ("{%s genes}\n", (pm->totals == TRUE ? "concatenate":
+                "individual"));      
+        printf (" %s", "(10) Correspondence analysis defaults\n");
+    
+        printf (" (X) Return to previous menu\n");
+        printf ("Choices enclosed with curly brackets are the current "
+                "defaults\n");
+        printf (" Select a menu choice, (Q)uit or (H)elp -> ");
+        gets(pm->junk);
+        clearscr(pm->term_length);
+
+        if (isalpha((int) pm->junk[0])|| pm->junk[0]=='\0') {
+            switch (c = toupper((int) pm->junk[0])){
+              case 'Q': 
+                my_exit(2,"menu 3");           /* decided to quit program  */
+                break;
+              case 'H':
+                chelp("menu_3");
+                break;
+              case 'X':
+              case '\0':
+                return; /*     way out of loop is X or blank line          */
+                break;
+              default:
+                fprintf(stderr,"The answer %s is not a valid\n", pm->junk);
+                pause;
+                continue;
+                break;
+                }
+        }
+
+        c=0;
+        if (isdigit((int)pm->junk[0]))
+            c = atoi(pm->junk);
+        if ( c <= 0 && c > 10 ) {
+            fprintf( stderr, "The answer %s is not valid\n", pm->junk);
+            continue;
+        }
+
+        switch ((int) c) {
+        case 1:
+            clearscr(pm->term_length);
+            printf (" The current separator is  \"%s\"\n",  
+                (pm->seperator == ' ' ) ? "space" : 
+                (pm->seperator == '\t') ? "tab" : 
+                (pm->seperator == ',' ) ? "," : 
+                "ERROR" );
+            printf (" Please select a new separator \t:");
+            gets(pm->junk);
+            c = pm->junk[0];             /* take first character of string */
+
+            if ( strchr ("\t, ", (int)c) == NULL || c == '\0' ) {
+                                     /* remember the \0 is in every string */
+                printf( "WARNING: The chosen separator %s is unsuitable\n", 
+                        pm->junk);
+                printf( "\tSeparator is unchanged try comma,tab "
+                        "or space\n\n");
+            } else
+                pm->seperator = (char) c;  /* specify the column separator */
+
+            break;
+        case 2:                            /* warn about overwriting files?*/
+            clearscr(pm->term_length);
+            pm->verbose = (char) ((pm->verbose) ? FALSE : TRUE);
+            pm->warn         = (char) ((pm->warn        ) ? FALSE : TRUE);
+            break;
+        case 3:                            /* redirect errors to a file    */
+            if ( strlen(pm->curr_logfilename) > 1 ) {
+                strcpy(pm->curr_logfilename , "" );   /* blank logfilename */
+                pm->my_err = stderr;                  /* redirects errors  */
+                                                      /* to stderr         */
+                fclose(pm->logfile);                  /* close logfile     */ 
+            } else {
+                                       /* open logfile and redirect stderr */
+                if (!(pm->logfile = open_file("log filename        \t",
+                    "warning.log", "w", (int) pm->verbose)))
+                    my_exit(1," open log file menu 3");
+                pm->my_err = pm->logfile;
+                strncpy(pm->curr_logfilename, pm->junk, MAX_FILENAME_LEN-1);
+            }                                                 /* end of if */
+            break;
+
+        case 4:                                       /* No of line on term*/
+            printf("Please give the new height of the screen [%i] ", 
+                    pm->term_length);
+            gets(pm->junk);
+            if ( isdigit( (int) pm->junk[0]))
+                pm->term_length = atoi(pm->junk) ;
+            break;
+
+        case 5:                                      /*Change genetic code */
+            clearscr(pm->term_length);
+            printf(" Genetic codes currently supported are\n");
+           /* NumGeneticCodes is given in codonW.h                         */
+           for ( i = 0 ; i < NumGeneticCodes ; i++) {
+                (pm->code == i) ? printf ( " (%i) {%-45.45s %-17.17s}", i, 
+                    cu[i].des, cu[i].typ) : 
+                    printf ( " (%i)  %-45.45s %-17.17s ", i, cu[i].des, 
+                        cu[i].typ) ;
+                printf("\n");
+            }
+            printf("Choice enclosed with curly brackets is "
+                   "the current code\n");
+            printf("Please select a new code [no change]\n");
+            gets(pm->junk);
+            if ( isdigit( (int) pm->junk[0]) ) {
+                c = (char)atoi(pm->junk);
+                if ( c > 0 && c < NumGeneticCodes && pm->code!= (char) c ){ 
+                    pm->code = (char) c;
+                    initilize_point(pm->code,pm->f_type, pm->c_type);  
+                    }
+            }
+            break;
+
+        case 6:                                     /*Change optimal codons*/
+            clearscr(pm->term_length);
+            printf(" Fop values pre-loaded are\n");
+            /* NumFopSpecies  defined with the Fop_struct in codonW.h      */
+            for ( i = 0 ; i < NumFopSpecies ; i++) {
+                (pm->f_type == i) ? printf (" (%i) {%-25.25s %-40.40s}", 
+                    i, fop[i].des, fop[i].ref) : 
+                    printf (" (%i)  %-25.25s %-40.40s ", i, fop[i].des, 
+                        fop[i].ref) ;
+                printf("\n");
+            }
+            printf ("Choice enclosed with curly brackets is the current "
+                "selection\n");
+            printf ("Please select a type [no change]\n");
+            gets(pm->junk);
+            if ( isdigit( (int) pm->junk[0]) ) {
+                c = (char)atoi(pm->junk);
+                if ( c > 0 && c < NumFopSpecies && pm->f_type!=(char) c) {
+                        pm->f_type = (char) c;  
+                        initilize_point(pm->code,pm->f_type, pm->c_type);
+                }
+            }
+            break;
+
+        case 7:                                      /*Change CAI w values */
+            clearscr(pm->term_length);
+            printf(" CAI types currently supported are\n");
+
+            /*  NumCaiSpecies currently defined in codonW.h                */
+            for ( i = 0 ; i < NumCaiSpecies ; i++) {
+                (pm->c_type == i) ? printf (" (%i) {%-25.25s %-40.40s}", 
+                    i, cai[i].des, cai[i].ref) : 
+                    printf (" (%i)  %-25.25s %-40.40s ", i, cai[i].des, 
+                        cai[i].ref) ;
+                printf("\n");
+            }
+            printf ("Choice enclosed with curly brackets is the current "
+                "selection\n");
+            printf ("Please chose a new CAI [no change]\n");
+            gets(pm->junk);
+            if ( isdigit( (int) pm->junk[0]) ) {
+                c = (char)atoi( pm->junk);
+
+                /* if valid value and different from the current choice    */
+                if (  c > 0 && c < NumCaiSpecies && pm->c_type!=(char) c){
+                    pm->c_type = (char) c;
+                    initilize_point(pm->code,pm->f_type, pm->c_type);
+                    }
+            }
+            break;
+       case 8:                       /* machine or human readable format  */
+             clearscr(pm->term_length);
+             pm->seq_format = 
+                (char) (  pm->seq_format == 'M' ? 'H' : 'M'); /*toggle    */
+             break;
+      case 9:                        /* concatenate genes?                */
+            clearscr(pm->term_length);
+            pm->totals    = (char) (pm->totals == TRUE ? FALSE : TRUE); 
+            break;
+     case 10:                       /* change COA default then go to menu5*/
+           clearscr(pm->term_length);
+           if( !pm->coa ) 
+                menu_5();
+           else 
+                menu_coa();           
+           break;
+     default:
+            fprintf( stderr, "The answer %s is not a valid\n", pm->junk);
+            break;
+     }
+    }
+  return;
+}
+
+/************************* menu_4 ******************************************/
+/* Select which indices to calculate                                       */
+/***************************************************************************/
+void    menu_4 (void)
+{
+    char    loop = TRUE;
+    char    *choices[] = {
+        " ",
+        "Codon Adaptation Index       (CAI)",
+        "Frequency of OPtimal codons  (Fop)", 
+        "Codon bias index             (CBI)",
+        "Effective Number of Codons   (ENc)",
+        "GC content of gene           (G+C)",
+        "GC of silent 3rd codon posit.(GC3s)",
+        "Silent base composition",    
+        "Number of synonymous codons  (L_sym)",
+        "Total number of amino acids  (L_aa )",
+        "Hydrophobicity of protein    (Hydro)",
+        "Aromaticity of protein       (Aromo)",
+        "Select all"
+    }; 
+    int i,NumChoices;
+    int c;
+    
+    
+    NumChoices = (char) 12;                      /* size of choices array */
+
+    clearscr(pm->term_length);
+    while (loop) {
+        printf (" Codon usage indices\n");
+        printf (" Options\n");
+
+        for (i = 1; i <= NumChoices; i++) {
+            printf(" (%2i) ", i);
+            switch ((int) i) {
+            case 1:
+                (pm->cai) ? printf ("{%-45.45s}", choices[i]) : 
+                printf (" %s ", choices[i]);
+                break;
+            case 2:
+                (pm->fop) ? printf ("{%-45.45s}", choices[i]) : 
+                printf (" %s ", choices[i]);
+                break;
+            case 3:
+                (pm->cbi) ? printf ("{%-45.45s}", choices[i]) : 
+                printf (" %s ", choices[i]);
+                break;         
+            case 4:
+                (pm->enc) ? printf ("{%-45.45s}", choices[i]) : 
+                printf (" %s ", choices[i]);
+                break;
+            case 5:
+                (pm->gc)  ? printf ("{%-45.45s}", choices[i]) : 
+                printf (" %s ", choices[i]);
+                break;
+            case 6:
+                (pm->gc3s)? printf ("{%-45.45s}", choices[i]) : 
+                printf (" %s ", choices[i]);
+                break;
+            case 7:
+                (pm->sil_base) ?  printf ("{%-45.45s}", choices[i]) : 
+                printf (" %s ", choices[i]);
+                break;
+            case 8:
+                (pm->L_sym) ?  printf ("{%-45.45s}", choices[i]) : 
+                printf (" %s ", choices[i]);
+                break;
+            case 9:
+                (pm->L_aa)?  printf ("{%-45.45s}", choices[i]) : 
+                printf (" %s ", choices[i]);
+                break;
+            case 10:
+                (pm->hyd ) ? printf ("{%-45.45s}", choices[i]) : 
+                printf (" %s ", choices[i]);
+                break;
+            case 11:    
+                (pm->aro ) ? printf ("{%-45.45s}", choices[i]): 
+                printf (" %s ", choices[i]);
+                break;
+            case 12:
+                printf (" %s ", choices[i]);
+                break;                      
+            default:
+                fprintf(stderr, "programming error \n");
+                my_exit(99, "menu 4");
+                break;
+            }
+            printf("\n");
+        }
+        printf (" (X)  Return to previous menu\n");
+        printf ("Choices enclosed with curly brackets are the current"
+                " selections\n");
+        printf (" Select a menu choice, (Q)uit or (H)elp -> ");
+
+
+        gets(pm->junk);
+
+        if (isalpha( (int) pm->junk[0]) || pm->junk[0]=='\0') {
+            switch (c = toupper( (int) pm->junk[0])){
+                case 'Q': 
+                 my_exit(2,"menu 4");     /* User decides to quit programme*/
+                 break;
+                case 'X':
+                case '\0':
+                    return;               /* <-back to previous menu->     */
+                    break;
+                case 'H':
+                    chelp("menu_4");
+                    continue;
+                    break;
+                default:
+                    fprintf( stderr, "The answer %s is not a valid choice\n",
+                    pm->junk);
+                    continue;
+                    break;
+                }
+        } else if (isdigit ( (int) pm->junk[0] ) ) {
+            c =  atoi(pm->junk);
+            switch ((int) c) {
+            /* User wants to calculate CAI then we explain that it is     */
+            /* dependent on the choice of CAI adaptiveness values         */
+            case 1: 
+                pm->cai = (char) ((pm->cai)   ? FALSE : TRUE);    
+                if( pm->cai){
+                clearscr(pm->term_length);
+                printf("\nTo calculate CAI a reference set of highly ");
+                printf("expressed genes \nmust be selected\n\n");
+                printf("The reference set currently selected is that of "
+                    "%s\n\n",cai[pm->c_type].des);  
+                printf("See the menu 'Change defaults' to change this "
+                       "selection\n\n");  
+                printf("If you wish to use a personal choice of CAI "
+                       "vaules\n");
+                printf("\tplease continue and you will be prompted for"
+                       " input\n\n");  
+                pause;
+                }
+                break ;
+            case 2: 
+            /* User wants to calculate Fop then we explain that it is     */
+            /* dependent on the choice of optimal codons                  */
+                pm->fop = (char) ((pm->fop)   ? FALSE : TRUE); 
+                if(pm->fop){   
+                clearscr(pm->term_length);              
+                printf("\n\nYou have chosen to calculate Fop\n\n");
+                printf("To calculate Fop a set of optimal "
+                       "codons must be selected\n");
+                printf("The optimal codons of %s are the current selection"
+                       "\n\n",fop[pm->f_type].des);  
+                printf("See the menu 'Change defaults' to change Fop "
+                       "selection\n\n");
+                printf("If you wish to use a personal choice of Fop "
+                       "vaules\n");
+                printf("\tplease continue and you will be prompted for "
+                       "input\n\n");            
+                pause;
+                }
+                break ; 
+           case 3: 
+            /* User wants to calculate CBI then we remind then that it is */
+            /* dependent on the choice of optimal codons                  */
+                pm->cbi = (char) ((pm->cbi)   ? FALSE : TRUE); 
+                if(pm->cbi){   
+                clearscr(pm->term_length);              
+                printf("\n\nYou have chosen to calculate CBI\n\n");
+                printf("To calculate CBI a set of optimal "
+                       "codons must be selected\n");
+                printf("The optimal codons of %s are the current selection"
+                       "\n\n",fop[pm->f_type].des);  
+                printf("See the menu 'Change defaults' to change CBI "
+                       "selection\n\n");
+                printf("If you wish to use a personal choice of CBI "
+                       "vaules\n");
+                printf("\tplease continue and you will be prompted for "
+                       "input\n\n");               
+                pause;
+                }
+                break ;                
+            case 4:                                      /* calc Nc       */
+                pm->enc = (char) ( (pm->enc)   ? FALSE : TRUE);    
+                break ;
+            case 5:                                      /* calc GC       */
+                pm->gc =  (char) ((pm->gc )   ? FALSE : TRUE);    
+                break ;
+            case 6:                                      /* calc GC3s     */   
+                pm->gc3s =(char) ( (pm->gc3s) ? FALSE : TRUE);    
+                break ;
+            case 7:                                      /* calc sil base */   
+                pm->sil_base = (char) ((pm->sil_base) ? FALSE : TRUE); 
+                break ; 
+            case 8:                                      /* No. synonyms  */
+                pm->L_sym = (char) ((pm->L_sym) ? FALSE : TRUE); 
+                break ; 
+            case 9:                                      /* No. AminoAcids*/   
+                pm->L_aa  = (char) ((pm->L_aa)  ? FALSE : TRUE); 
+                break ; 
+            case 10:                                     /* hydropathicity*/
+                pm->hyd   =(char) ( (pm->hyd )  ? FALSE : TRUE);
+                break;
+            case 11:                                     /* aromatic      */
+                pm->aro   = (char) ((pm->aro )  ? FALSE : TRUE);                                         
+                break;
+            case 12:                                     /* all the above */
+                pm->cai   = (char)  TRUE;    
+                pm->fop   = (char)  TRUE;
+                pm->cbi   = (char)  TRUE;    
+                pm->enc   = (char)  TRUE;    
+                pm->gc    = (char)  TRUE;    
+                pm->gc3s  = (char)  TRUE;    
+                pm->sil_base 
+                          = (char)  TRUE; 
+                pm->L_sym = (char)  TRUE; 
+                pm->L_aa  = (char)  TRUE;
+                pm->hyd   = (char)  TRUE;
+                pm->aro   = (char)  TRUE;          
+                break ;             
+            default:
+                fprintf( stderr, "The answer %s is not a valid\n", pm->junk);
+                break;
+            }
+        } else
+            fprintf( stderr, "The answer %s is not a valid choice\n", 
+            pm->junk);
+    }
+    return;
+}
+
+/************************* menu_5 ******************************************/
+/* Select what type of COA                                                 */
+/***************************************************************************/
+void    menu_5 (void)
+{ 
+    char    *choices[] = {
+        "",
+        "COA on codon usage",
+        "COA on RSCU",
+        "COA on Amino Acid usage",
+        "Do not perform a COA"
+    };
+    int loop = TRUE;
+    int i,c,NumChoices;
+
+    NumChoices = 4;
+
+    clearscr(pm->term_length);
+
+    while ( loop ) {
+        printf (" Menu 5  Correspondence analysis\n");
+        printf ("  Correspondence analysis (COA) \n");
+
+        for (i = 1; i <= NumChoices; i++) {
+            printf(" (%i) ", i);
+            switch ((int) i) {
+            case 1:
+                (pm->coa=='c') ? printf ("{%-45.45s}", choices[1]):
+                printf (" %s ", choices[1]);
+                break;
+            case 2:
+                (pm->coa=='r') ? printf ("{%-45.45s}", choices[2]):
+                printf (" %s ", choices[2]);
+                break;
+            case 3:
+                (pm->coa=='a') ? printf ("{%-45.45s}", choices[3]):
+
+                printf (" %s ", choices[3]);
+                break;
+            case 4:
+                (pm->coa== 0 ) ? printf ("{%-45.45s}", choices[4]):
+                printf (" %s ", choices[4]);
+                break;                  
+            default:
+                fprintf(stderr, "programming error \n");
+                my_exit(99,"menu 5");
+                break;
+            }
+            printf("\n");
+        }
+        printf (" (X) Exit this menu\n");
+        printf (" Select a menu choice, (Q)uit or (H)elp -> ");
+        gets(pm->junk);                                              
+        clearscr(pm->term_length);
+
+        if (isalpha( (int) pm->junk[0]) ||   pm->junk[0]=='\0') {
+            c =  toupper( (int) pm->junk[0]);
+            switch ( c ) {
+            case 'Q':
+                my_exit(2,"menu 5");
+                break;
+            case 'X':
+            case '\0':
+                return;
+                break;
+            case 'H':
+                chelp("menu_5_coa");
+                continue;
+                break;
+            default:
+                fprintf( stderr, "The answer %s is not a valid\n", pm->junk);
+                break;
+            }
+        } else {
+            c =  atoi(pm->junk);
+            if ( c > 0 && c <= 4 ) {
+            switch ((int) c){
+            case 1: 
+                pm->coa = 'c';                              /* COA of CU  */
+                break ;
+            case 2: 
+                pm->coa = 'r';                              /* COA of RSCU*/
+                break ;
+            case 3: 
+                pm->coa = 'a';                              /* COA of AA  */
+                break ;
+            case 4: 
+                pm->coa = FALSE;        
+                break;
+#ifdef DEBUG
+            default:
+                fprintf(pm->my_err,"Error in switch in coa_raw_out\n");
+#endif          
+              }
+            } else {
+                fprintf(stderr,"The answer %s is not a valid\n", pm->junk);
+                break;
+            }
+        }
+ 
+     if ( pm->coa ) {  
+         printf( " Do you wish to see the advanced COA menu (Y/N) [N] ");
+         gets( pm->junk );
+
+        /* Select the default codon/AAs to analyse, based on genetic code */
+         initilize_coa  (pm->code);
+         
+         if ( (char) toupper( (int) pm->junk[0]) == 'Y' ) menu_coa(); 
+         }
+        
+    } /* while loop */ 
+    return;
+}
+
+/************************* menu_6 ******************************************/
+/* Originally designed for the calculation of correlations and             */
+/* other simple stats. This code is currently implemented as a perl module */
+/* and is waiting to be ported to C hence the menu is unimplemented        */
+/***************************************************************************/
+
+void    menu_6 (void)
+{
+    int loop = TRUE;
+    int c;
+    
+    clearscr(pm->term_length);
+    while ( loop ) {
+        printf (" Menu 6-Basic Stats\n");
+        printf ("\n");
+        printf ("\t ( ) Sorry currently unimplemented \n");
+        printf ("\t (X) Exit this menu\n");
+        printf (" Select a menu choice, (Q)uit or (H)elp -> ");
+        gets(pm->junk);
+        clearscr(pm->term_length);
+
+        if (isalpha( (int) pm->junk[0])|| pm->junk[0] == '\0') {
+            c =  toupper( (int) pm->junk[0]);
+            switch ( c ) {
+            case 'Q':
+                my_exit(2,"menu 6");
+                break;
+            case 'X': 
+            case '\0':
+                return;
+            case 'H':
+                 chelp("menu_6");
+                 break;
+            default:
+                fprintf( stderr, "The answer %s is not a valid\n", pm->junk);
+                pause;
+                break;
+            }
+        } else {
+            c =  atoi(pm->junk);
+            if ( c > 0 && c <= 9 )
+                main_menu((int) c);
+            else {
+                fprintf( stderr, "The answer %s is not a valid\n", pm->junk);
+                continue;
+            }
+        }
+    }
+    return;
+}
+
+/************************* menu_7 ******************************************/
+/* This selection generates random questions about the genetic code that   */
+/* has been selected. For more information see tester.c                    */
+/***************************************************************************/
+void    menu_7 (void)
+{
+    int loop = TRUE;
+    int c;
+    
+    clearscr(pm->term_length);
+    while ( loop ) {
+        printf (" Menu 7 A Bit of fun \n");
+        printf ("\n");
+        printf (" (1) Test your knowledge of the genetic code \n");
+        printf (" (X) Exit this menu\n");
+        printf (" Select a menu choice, (Q)uit or (H)elp -> ");
+        gets(pm->junk);
+        clearscr(pm->term_length);
+
+        if (isalpha( (int) pm->junk[0]) || pm->junk[0]=='\0') {
+            c =  toupper( (int) pm->junk[0]);
+            switch ( c ) {
+            case 'Q':
+                my_exit(2,"menu 7");
+                break;
+            case 'X': case '\0':
+                return;
+
+            case 'H':
+                chelp("menu_7");
+                continue;
+                break;
+
+            default:
+                fprintf( stderr, "The answer %s is not a valid\n", pm->junk);
+                pause;
+                break;
+            }
+        } else {
+            c = atoi(pm->junk);
+            if ( c == 1 )
+                tester();        /****** call tester () ********************/
+            else {
+                fprintf( stderr, "The answer %s is not a valid\n", pm->junk);
+                continue;
+            }
+        }
+    }
+    return;
+}
+
+/************************* menu_8 ******************************************/
+/* This menu allows the selection of the output to be written to the file  */
+/* .blk. Only one selection can be made at a time. However CodonW can be   */
+/* rerun with the same input file but with different output options. To    */
+/* make this easier each time this menu is selected the user is given the  */
+/* choice of changing the output file                                      */
+/***************************************************************************/
+
+
+void    menu_8 (void)
+{
+  struct multi {                    /* struct of menu items                */
+    char    *string;                /* description string                  */
+    char    prog;                   /* programme name                      */
+  };
+  char  loop = TRUE;
+  int  c;
+  int  ans1,NumChoices;
+
+  struct multi aii[] = {
+    " ", ' ',              /* Initialise a single value of choices in menu */
+    "Fasta format output of DNA sequence", 'T',
+    "Reader format output of DNA sequence",'R',
+    "Translate input file to AA sequence", 'N',
+    "Codon Usage"                        , 'C',
+    "Amino acid usage"                   , 'A',
+    "RSCU values"                        , 'S',
+    "Relative Amino Acid usage"          , 'L',
+    "Dinucleotide frequencies"           , 'D',
+    "Exhaustive base compostion analysis", 'B',
+    "No output written to file"          , 'X' };
+  
+  NumChoices = 10;                            /* Number of choices in Menu */
+  
+            /* if there is already an output file available the user may   */
+            /* select to change it                                         */
+
+  clearscr(pm->term_length);
+
+  /* because only one type of bulk option is permitted each time 
+     codonw runs, it may be necessary to rerun with the same data
+     file but changing the blk output options, if so the user
+     is prompted with the choice of changing the blk filename             */
+
+  if ( pm->analysis_run  ) {
+    printf (" The current bulk output file is %s do you "
+            "wish to change this (y/n) [n] ", pm->curr_tidyoutname);
+    gets(pm->junk);
+   
+    if ( toupper( (int) pm->junk[0]) == 'Y') {
+      fileclose(&pm->tidyoutfile);
+    
+      if (!(pm->tidyoutfile = open_file("codon usage output file",
+               pm->curr_tidyoutname, "w",(int)pm->verbose)))
+               my_exit(1, "menu 8");
+      strncpy(pm->curr_tidyoutname, pm->junk, MAX_FILENAME_LEN - 1);
+    }        /* matches  if ( !strlen (pm->junk) || toupper= ............. */
+  
+  } else {   /* matches  if( strlen( pm->curr_cufilename)  )               */    
+    printf("Note: No output file has been selected !\n");
+  }
+  
+
+  while ( loop ) {
+    printf (" Menu 8\n");
+    printf (" This output will be saved to %s\n\n", pm->curr_tidyoutname);
+    
+    for ( ans1 = 1; ans1 <= NumChoices; ans1++) {
+      if (aii[ans1].prog != (char) pm->bulk)
+         printf("\n\t (%2d) %s", ans1, aii[ans1].string);
+      else
+         printf("\n\t{(%2d) %-45.45s\t\t}", ans1, aii[ans1].string);
+    }
+
+    printf ("\n\t ( X) To return to previous menu\n");
+    
+    printf ("Values enclosed with curly{} brackets are the current "
+            "selection\n");
+    printf (" Select a menu choice, (Q)uit or (H)elp -> ");
+    gets(pm->junk);
+    clearscr(pm->term_length);
+    
+    if (isalpha( (int) pm->junk[0]) || pm->junk[0]=='\0') {
+      switch (c =  toupper( (int) pm->junk[0])){
+        case 'Q':  
+            my_exit(2,"menu 8");         /* User decides to quit          */
+            break;
+        case 'X':
+        case '\0':
+            return;                      /* <-back to previous menu->     */      
+        case 'H':
+            chelp("menu_8_blk");
+            continue;
+            break;
+        default:
+          fprintf( stderr, "The answer %s is not a valid\n", pm->junk);
+          pause;
+          break;
+      }
+    } else {
+      c = atoi(pm->junk);
+      if ( c > 0 && c <= NumChoices )
+         pm->bulk = aii[c].prog;
+      else
+         fprintf( stderr, "The answer %s is not a valid\n", pm->junk);
+    }
+  }                                      /* match while                  */
+  return;
+}
+
+
+/*********************** menu_coa   ***************************************/
+/* This is the advanced correspondence menu, this menu is optional, when a*/
+/* a correspondence analysis is chosen, then the user is given a choice of*/
+/* entering this menu                                                     */
+/**************************************************************************/
+void menu_coa (void)
+{
+  int   loop = TRUE;
+  char *p;
+  int c;
+  int i;
+  
+  clearscr(pm->term_length);
+  while ( loop ) {
+    printf ("Advanced Correspondence Analysis\n");         
+    printf (" (1) (Un)Select %s\n", (pm->coa=='a')? "amino acids": "codons");         
+    printf (" (2) Change the number of axis (factors) recorded to file\n");
+    printf (" (3) Add additional genes after COA\n");       
+    printf (" (4) Toggle level of COA output  [%s]\n",
+                  (pcoa->level=='e')? "Exhaustive":"Normal");
+
+    if(pm->coa != 'a' )
+    printf (" (5) No. genes used to identify optimal codons [%i%s]\n", 
+        (pcoa->fop_gene <0)? (pcoa->fop_gene*-1): pcoa->fop_gene,
+            (pcoa->fop_gene <0)? "%"                : " genes");
+
+    printf (" (X) Exit this menu\n");
+    printf (" Select a menu choice, (Q)uit or (H)elp -> ");
+    gets(pm->junk);
+    clearscr(pm->term_length);
+    
+    if (isalpha( (int) pm->junk[0]) || pm->junk[0]=='\0' ) {
+      c =        toupper( (int) pm->junk[0]);
+      switch ( c ) {
+       case 'Q':
+         my_exit(2, "menu coa");
+         break;
+       case 'X' :
+       case '\0':
+        return;
+       case 'H':
+        chelp("menu_coa");
+        continue;
+        break;
+      default:
+        fprintf( stderr, "The answer %s is not a valid\n", pm->junk);
+        pause;
+        break;
+      }
+    }else{
+      c =       atoi(pm->junk);
+      switch ( (int) c ) {
+      case 1:
+    select_coa( pm->coa );                /*  select what to analysis     */
+    break;
+      case 2:                             /* Num of axis to record        */
+    printf ( "Changing the number of axis generated from %i " 
+        "Please input new value [%i]", (int)pcoa->axis,(int)pcoa->axis);
+    gets(pm->junk);
+    if ( !strlen(pm->junk)   ) break;   
+    if ( isalpha( (int) pm->junk[0])) break;
+    i = (char)atoi(pm->junk);
+    if ( pm->coa == 'a' && (i > 20 || i<0)  || ( i<0 || i>59 )) { 
+      fprintf(pm->my_err,"Value is out of range adjusting to max value\n");
+      if ( pm->coa == 'a' ) pcoa->axis = 20;
+      else                  pcoa->axis = 59;
+    } else {  
+      pcoa->axis = i;
+    } 
+    break;
+      case 3:                              /* Add additional genes          */
+    printf("You have elected to add genes after the initial COA is complete\n"
+           "these will not affect the generation of axis (factors) but can\n"
+           "identify were these additional genes fall based on the trends \n"
+           "identified among the original genes\n"
+           "You must have a separate file containing sequence(s) that are\n"
+           "to be added (these genes must be DNA in fasta format)\n"
+           "Please input filename [cancel this option]: ");
+    gets(pm->junk);
+    if ( !strlen(pm->junk) ) break;
+    strncpy(pcoa->add_row,pm->junk,MAX_FILENAME_LEN-1);
+    break;
+      case 4:                             /* report analysis of inertia     */
+	pcoa->level = (char) ( (pcoa->level=='n')? 'e':'n'); 
+	break;
+      case 5:                             /* how to identify optimal codons */
+        printf ("You have elected to alter the number of genes used \n"
+                "to identify the optimal codons\n"
+                "You can input either an absolute number of genes or a\n"
+                "percentage (example 10%%)\n "  
+                "\tPlease input your choice []");
+    gets ( pm->junk);
+    if( !strlen(pm->junk) ) continue;
+    if( (p=strchr ( pm->junk,'%')) != NULL) {
+          *p='\0';
+      pcoa->fop_gene=atoi(pm->junk)*-1;
+          if ( pcoa->fop_gene == 0 || pcoa->fop_gene < 50 ) { /* err_catch */
+        printf ( " Limits are >0%% and less than 50%%\n");
+        pcoa->fop_gene= (-10);                           /* assume default */
+      }
+    }else {
+      pcoa->fop_gene=atoi(pm->junk);                      /* set No. genes */
+        }
+    break;
+      default :
+    fprintf(pm->my_err,"Answer out of range\n");
+    break;  
+      }              
+    }    
+  }
+  return;
+}
+
+/*********************** select_coa ****************************************/
+/* This menu is called if the user wants to change the default codons/AA   */
+/* to be analysised in the COA. It is called from menu_coa                 */
+/***************************************************************************/
+void select_coa ( char choice ) 
+{
+ int   loop = TRUE;
+ int   last_row[4];
+ int   toggle;
+ int   x;
+ 
+ char  *startpoint, *endpoint;
+ 
+ clearscr(pm->term_length);
+
+ while ( loop ) { 
+   if ( choice == 'a' ) {                   /* if AA analysis then         */
+     for ( x = 1 ; x < 22 ; x++ ) {     
+       if (!pcoa->amino[x] ) 
+     printf("[(%2i)_%s_%s] ", x, paa->aa3[x],paa->aa1[x] );
+       else
+     printf(" (%2i)_%s_%s  ", x, paa->aa3[x],paa->aa1[x] );
+       
+       if ( !(x % 4) ) printf( "\n");
+     }
+     printf( "\n");
+
+/*************** Sample of aa choice output    ****************************/
+/* ( 1)_Phe_F   ( 2)_Leu_L   ( 3)_Ile_I   ( 4)_Met_M                      */
+/* ( 5)_Val_V   ( 6)_Ser_S   ( 7)_Pro_P   ( 8)_Thr_T                      */
+/* ( 9)_Ala_A   (10)_Tyr_Y  [(11)_TER_*]  (12)_His_H                      */
+/* (13)_Gln_Q   (14)_Asn_N   (15)_Lys_K   (16)_Asp_D                      */
+/* (17)_Glu_E   (18)_Cys_C   (19)_Trp_W   (20)_Arg_R                      */
+/* (21)_Gly_G                                                             */
+
+   }else {
+     printf ( "Using %s \n", pcu->des ); 
+     for ( x = 1 ; x < 65 ; x++ ) { 
+       
+       if ( !pcoa->codons[x] )          printf("[");
+       else                         printf(" ");
+       
+       if (last_row[x%4] != pcu->ca[x] )
+     printf( "(%2i) %s\t%s", x,paa->aa3[pcu->ca[x]],paa->cod[x]);
+       else
+     printf( "(%2i)    \t%s", x,paa->cod[x]);
+       
+       if ( !pcoa->codons[x] )         printf("]");
+       else                            printf(" ");    
+       
+       last_row[x%4] = pcu->ca[x];
+       
+       if ( !(x % 4) ) 
+     printf( "\n");
+       if ( !(x % 16)) 
+     printf( "\n");
+     }
+   }
+      
+/*************** Sample of codon choice output      ***********************/
+/*   Using Universal Genetic code                                         */
+/* ( 1) Phe       UUU  ( 2) Ser   UCU  ( 3) Tyr   UAU  ( 4) Cys   UGU     */
+/* ( 5)           UUC  ( 6)       UCC  ( 7)       UAC  ( 8)       UGC     */
+/* ( 9) Leu       UUA  (10)       UCA [(11) TER   UAA][(12) TER   UGA]    */
+/* (13)           UUG  (14)       UCG [(15)       UAG][(16) Trp   UGG]    */
+
+   printf("%s bracketed will be excluded from the COA. ", 
+      (pm->coa == 'a')? "Amino Acids": "Codons" );
+   printf("Select number(s) that\nidentify the %s you wish to toggle "
+          "(X to exit, H for help) [X] ",
+      (pm->coa == 'a')? "Amino Acids": "Codons" );
+   
+   gets(pm->junk);
+   
+   if ( !strlen(pm->junk) || toupper( (int) pm->junk[0]) == 'X' ) {
+     loop=FALSE;  
+     continue;
+   }
+    
+   if ( toupper( (int) pm->junk[0]) == 'H' ) {
+       chelp("select");
+       continue;
+       }
+
+
+   endpoint   = pm->junk;
+   startpoint = pm->junk;
+   
+   /* now toggle the codons and amino acids to be analysed                */
+
+   while ( toggle = (int) strtol(startpoint,&endpoint,10) ) {
+     if(endpoint == startpoint )    break;
+     startpoint = endpoint;
+     
+     if (pm->coa == 'a' )  {
+       if ( toggle>21 || toggle<1 ) continue;      /* check value is valid */    
+       pcoa->amino [toggle]= (char)((pcoa->amino [toggle])?FALSE:TRUE);
+     }else{
+       if ( toggle>64 || toggle<1 ) continue;      /* check value is valid */       
+       pcoa->codons[toggle]= (char)((pcoa->codons[toggle])?FALSE:TRUE);
+     }
+   } 
+ }
+ return;
+}
+
+/************************* Welcome *****************************************/
+/* Prints a Banner                                                         */
+/* the \'s are a problem as they must be escaped                           */
+/***************************************************************************/
+void    welcome ( void )
+{
+ printf ("\n\n");
+ printf ("  //   \\   //    \\  |I    \\   //    \\  |I\\    I  /     \n");
+ printf (" |I       |I      I |I     I |I      I |I\\\\   I  \\___  \n");
+ printf (" |I       |I      I |I     I |I      I |I \\\\  I      \\ \n");
+ printf (" |I       |I      I |I     I |I      I |I  \\\\ I       |\n");
+ printf ("  \\\\___/   \\\\____/  |I____/   \\\\____/  |I   \\\\I  \\___/\n");
+}
+
+/********************** printinfo  *****************************************/
+/* Prints a summary about this programme, date, version and author of code */
+/* whether a debug version                                                 */
+/***************************************************************************/
+int  printinfo(void) {  
+# if defined (__FILE__ )
+  printf("\n\tSource   : %s", __FILE__);
+# endif 
+# if defined  (DEBUG)
+  printf("(Debug version)");
+# endif
+
+  printf("\n\tAuthor   : John Peden\n");
+  printf("\tVersion  : %.*s\n", strlen(Revision) , Revision ); 
+  printf("\tRevised  :%.*s %s %.*s\n",(int) strlen(Update) - 7, Update + 6,
+	 (*(Update + 7) ? "\n\t     by  :" : ""),
+	 (int) strlen(Author) - 10, Author + 9);
+  
+#if defined(__DATE__ ) && defined(__TIME__)
+  printf("\n\tCompiled : %s %s\n", __DATE__, __TIME__);
+#endif
+  
+  printf("\n\t-------------------------------\n\n");
+  
+  printf(" All sequences must be in a single file separated by title "
+      " lines whose\n first character is either ; or > \n\t any number"
+      " or length of genes is acceptable\n\n");
+  return 1;
+}
+
+
diff --git a/open_fil.c b/open_fil.c
new file mode 100755
index 0000000..09ae020
--- /dev/null
+++ b/open_fil.c
@@ -0,0 +1,236 @@
+/**************************************************************************/
+/* CodonW codon usage analysis package                                    */
+/* Copyright (C) 2005            John F. Peden                            */
+/* This program is free software; you can redistribute                    */
+/* it and/or modify it under the terms of the GNU General Public License  */
+/* as published by the Free Software Foundation; version 2 of the         */
+/* License,                                                               */
+/*                                                                        */
+/* This program is distributed in the hope that it will be useful, but    */
+/* WITHOUT ANY WARRANTY; without even the implied warranty of             */
+/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the           */
+/* GNU General Public License for more details.                           */
+/* You should have received a copy of the GNU General Public License along*/
+/* with this program; if not, write to the Free Software Foundation, Inc.,*/
+/* 675 Mass Ave, Cambridge, MA 02139, USA.                                */
+/*                                                                        */
+/*                                                                        */
+/* The author can be contacted by email (jfp#hanson-codonw at yahoo.com Anti-*/
+/* Spam please change the # in my email to an _)                          */
+/*                                                                        */
+/* For the latest version and information see                             */
+/* http://codonw.sourceforge.net 					  */
+/**************************************************************************/
+
+/* This is a general subroutine, so we might as well redefine TRUE & FALSE*/
+#ifndef TRUE
+#define TRUE 1
+#endif
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+/* What to do if we can't locate the file we where asked to open          */
+/* On most systems we will try and be nice and show a choice of filenames */
+
+#ifdef _DOS
+#define no_file_found() system("dir/w");
+#elif BSD || SYSV            
+#define no_file_found() system("ls -F");
+#elif defined (WIN32) || defined (_WIN) 
+#define no_file_found() system("dir/w");
+#else
+#define no_file_found() printf("This would have presented a list of files\n\tbut I do not know howto your operating system\n");
+#endif
+
+/* Include header files                                                   */
+#include <stdio.h>             
+#include <limits.h> 
+#include <stdlib.h>  
+#include <string.h>
+#include <ctype.h>         
+#include "codonW.h"
+
+/************** open_file **************************************************/
+/* This subroutine is a front end to fopen open. It takes four parameters  */
+/* the parameters are used to generate a user prompt for the               */
+/* filename, and to give a suggested filename, to give the write perms     */
+/* for the file, and whether or not to overwrite existing files.           */
+/* File_needed is just a description of the file being opened. It is       */
+/* assumed that if this descriptor is missing the file is to be opened     */
+/* without further user input. If default_filename is blank then there is  */
+/* no default_filename                                                     */
+/* write_perm sets up the type of file being opened                        */
+/* verbose tells this function whether to check if there is a         */
+/* previous version of any file being opened for writing                   */
+/***************************************************************************/
+
+FILE *open_file(char *file_needed, char *default_filename, 
+char *write_perm, int  verbose )
+{
+    char   infile_name[MAX_FILENAME_LEN]="";
+    FILE  *input=NULL;
+    char   temp[4];
+    char  *answer = pm->junk;
+
+    /**********************************************************************/
+    /* If a string has been given for file_needed it is assumed           */
+    /* that the user will have a choice of file_names to choose           */
+    /* therefore (s)he will be prompted for a name                        */
+    /* if a default filename was supplied by the calling function this    */
+    /* will be suggested as well, otherwise there is no default           */
+    /**********************************************************************/
+
+    if ( strlen(file_needed)) {
+        while (!strlen(infile_name) )  {
+            printf("\nName of %s (h for help) [%s] ", 
+                      file_needed,default_filename);
+            gets(infile_name);                          /* get filename   */
+            
+            if ( WasHelpCalled ( infile_name ) ) {
+                     chelp("open_file_query");          /* Help ....      */
+                     infile_name[0]='\0';
+                     continue;
+                }
+
+            if ( !strlen(infile_name) && default_filename )
+                strcpy(infile_name, default_filename);
+        }                                         /* end of get filename  */
+    } else if ( strlen(default_filename) )        /* use default filename */
+        strcpy(infile_name, default_filename);
+    else {                                        /* not enough info      */
+        fprintf(stderr, "Programming error: no filename supplied\n");
+        my_exit (0,"open file");
+    }
+
+
+    /**********************************************************************/
+    /* At this point infile_name contains a possible filename             */
+    /* Depending on the mode (write_perm) string this is tested 3 ways    */
+    /*                                                                    */
+    /* (r or r+) Test if the file exists if not, all the files in the     */
+    /* current directory are listed and the the user is prompted for      */
+    /* an alternative name or they may quit the programme                 */
+    /*                                                                    */
+    /* (a, a+) Not tested, just open the file                             */
+    /*                                                                    */
+    /* (w, w+) If the variable verbose = FALSE then no test          */
+    /* If verbose == TRUE then the file is checked to see if         */
+    /* it already exsists, if it does then the user is prompted for       */
+    /* either for permission to overwrite this file or to                 */
+    /* suggest an alternative file_name which is then tested as well      */
+    /* the user can type q to quit at any stage of this prompting process */
+    /**********************************************************************/
+
+    if ( !strcmp(write_perm, "r") || !strcmp(write_perm, "r+") 
+       ||!strcmp(write_perm, "rb") ){
+        while ( !(input = fopen (infile_name , write_perm )))  {
+            fprintf(stderr,"\nThese are the files in the current directory "
+                "I cannot find %.*s \n\n",strlen(infile_name),infile_name);
+            no_file_found();
+            fprintf(stderr, "\n\nPlease enter another filename, "
+                " (Q)uit, (H)elp [%s] ",infile_name);
+            gets(answer);
+            
+            if (strlen (answer)==1 && 
+                   ((char)toupper((int)answer[0])=='Q'))
+                   my_exit(2,"open_file");
+            else if (WasHelpCalled ( infile_name )){
+                   chelp ("File_not_found");
+                }
+            else if (strlen (answer))
+                strcpy (infile_name, answer);  
+		}                                     /* end of while loop */       
+        strcpy ( answer,infile_name);           /* allow transfer    */
+        return input;
+    }                                               
+
+    /************************* Append  ***********************************/
+    else if ( !strcmp(write_perm, "a") || !strcmp(write_perm, "a+")
+           || !strcmp(write_perm, "ab") ) {
+        input = fopen (infile_name, write_perm);
+        strcpy ( answer,infile_name);      
+        return input;
+    }                                              
+    /************************* Write    **********************************/
+    else if ( !strcmp( write_perm, "w") || !strcmp(write_perm, "w+") 
+            ||!strcmp( write_perm, "wb") ) {
+
+         while ( verbose == TRUE ) {
+            if ( (input = fopen (infile_name , "r")) ) {
+                fclose(input);                  /* close the filehandle  */
+                fprintf(stderr, "\nWarning :File %.*s "
+                    "exists already \n\tDo you wish to"
+                    " overwrite ? (y/n/h/q)\t [y] ",
+                    strlen(infile_name), infile_name);
+                fgets(temp, 3, stdin);
+
+                switch (toupper( (int) temp[0])) {
+                case 'Y': 
+                case '\0': 
+                case '\n':
+                    verbose = FALSE;
+                    continue;
+                case 'Q':
+                    my_exit(2,"open_file2");
+                    break;
+                case 'H':
+                    chelp("file_exists");
+                    continue;
+                    break;
+                default:
+                    fprintf(stderr, 
+                        "\nYou decided not to overwrite, please enter\n"
+                        " another filename, (q)uit, (a)ppend, (h)elp \n"
+                        " (a/q/h/filename)\t[a] ");
+                    gets(answer);
+                }
+
+                /* if the answer is 'a' then the default file is opened  */
+                /* as appendable else if 'q' then the programme exits    */
+                /* anything else is taken as a file name                 */
+
+                if ( strlen(answer) <= 1 ) {
+                    switch (toupper( (int) answer[0])) {
+                    case 'Q':
+                        return (NULL);
+                    case 'A':
+                    case '\0':
+                    case'\n':
+                        verbose = FALSE;  /* leave the while loop   */
+                        strcpy(write_perm, "a+");
+                        break;
+                    case 'H':
+                     chelp("file_append");        
+                     continue;
+                     break;
+                    default:
+                        continue;
+                    };                                /* end of switch   */
+                }               
+            } else                              /* filename is unique    */
+              verbose = FALSE;             /* exit the while loop   */
+              }                                 /* match while preserve  */
+        input = fopen (infile_name,write_perm); /* opens filehandle      */
+        strcpy ( answer,infile_name);         
+        return input;
+    }                                           /* matchs if w or w+     */
+    return (NULL);
+}
+
+/************** Main just for testing purposes ***************************/
+/* uncomment to test function as a standalone subroutine                 */
+/* will also need to replace my_exit with exit calls                     */
+/*************************************************************************/
+/*  main ()
+ {
+ FILE *test=NULL;
+ if( test = open_file( "test file","","r",NULL))
+    printf( "Success\n");
+ else 
+    printf( "Failed\n");
+ } */
+/*************************************************************************/
+
+
+
diff --git a/tester.c b/tester.c
new file mode 100755
index 0000000..016d673
--- /dev/null
+++ b/tester.c
@@ -0,0 +1,239 @@
+/**************************************************************************/
+/* CodonW codon usage analysis package                                    */
+/* Copyright (C) 2005            John F. Peden                            */
+/* This program is free software; you can redistribute                    */
+/* it and/or modify it under the terms of the GNU General Public License  */
+/* as published by the Free Software Foundation; version 2 of the         */
+/* License,                                                               */
+/*                                                                        */
+/* This program is distributed in the hope that it will be useful, but    */
+/* WITHOUT ANY WARRANTY; without even the implied warranty of             */
+/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the           */
+/* GNU General Public License for more details.                           */
+/* You should have received a copy of the GNU General Public License along*/
+/* with this program; if not, write to the Free Software Foundation, Inc.,*/
+/* 675 Mass Ave, Cambridge, MA 02139, USA.                                */
+/*                                                                        */
+/*                                                                        */
+/* The author can be contacted by email (jfp#hanson-codonw at yahoo.com Anti-*/
+/* Spam please change the # in my email to an _)                          */
+/*                                                                        */
+/* For the latest version and information see                             */
+/* http://codonw.sourceforge.net 					  */
+/******** Tester      *****************************************************/
+/* This function is used to teach the genetic code, it generates a random */
+/* series of questions about the selected genetic code.                   */
+/* The questions include                                                  */
+/*  1 and 3 letter amino acid names                                       */
+/*  The translation of each codon                                         */
+/*  The size of each amino acid family                                    */
+/**************************************************************************/
+
+#define rand_num(z) (int)((((float)rand()/((long)RAND_MAX))*(float)z)+1)
+
+#ifdef _WINDOWS
+#define beeep Beep(150,150)
+#include <time.h>
+#include <conio.h>
+#else
+#define beeep printf("\007")
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <string.h>  
+#include <time.h>
+#include <ctype.h>
+#include "codonW.h"
+
+/* The accuracy of the answers are recorded using these three variable     */
+int num_questions = 0;                      
+int num_cheats = 0;
+int num_wrong = 0;
+
+
+void    tester ( void ) {
+    char    loop;
+    char    main_loop=TRUE;
+    char    tmp_AA [4];
+    char    tmp_AA2[4];
+
+    srand( (unsigned)time( NULL ) );          /* initialise random num gen */
+
+    printf(" Welcome to TESTER \n(which just tests your "
+	     "knowledge of the Genetic code)\n"
+           " The genetic code used is dependant on\n what"
+	     " code is selected in menu 3\n"
+           " The current code is %s %s\n"
+           "\n If you get stuck try typing ? for a hint\n"
+           " To leave type exit or quit\n", pcu->des, pcu->typ);
+
+    /*******************  main loop            ****************************/
+    while ( main_loop )   {
+      int i,x;
+
+        i = rand_num(10);           /*  random number to between 1 and 10 */
+
+        printf("Type Help for help:");
+        /* the switch biases the questions so their freq is not equal     */
+        switch (i) {
+        case 1: 
+        case 2:                     /*  amino acid question               */
+            i = rand_num(21);
+            loop = TRUE;
+            while ( loop ) {
+                printf("\nWhat is the three letter equivalent for the AA"
+                    " %s ", paa->aa1[i]);
+                gets( pm->junk ) ;
+                strcpy ( tmp_AA, paa->aa3[i] );
+                for ( x = 0 ; x < (int)strlen(tmp_AA); x++) 
+                    tmp_AA[x] = (char) toupper( (int) tmp_AA[x]);
+                for ( x = 0 ; x < (int)strlen(pm->junk  ); x++) 
+                    pm->junk  [x] =  (char) toupper(  (int) pm->junk[x]);
+                if ( !strcmp ( pm->junk, "QUIT" ) || 
+                     !strcmp ( pm->junk, "EXIT" )) {
+                    asummary();                 
+                    main_loop = FALSE;
+                    break;
+                }
+
+                if ( !strcmp ( pm->junk,"HELP")) {
+                    chelp("fun");
+                    continue;
+                    }
+
+                if ( !strcmp (pm->junk, "?" ) ) {
+                    printf( "Cheat %s", paa->aa3[i]);
+                    num_cheats++;             /*     The user cheated     */
+                    continue; 
+                }
+                if ( !strcmp (pm->junk  , tmp_AA )) {
+                    loop = FALSE;
+                } else {
+                    num_wrong++;              /*     Wrong answer       */
+                    printf("Wrong answer (try ?)\n");
+                }
+            }
+            break;
+        case 3:                             /* How big is this AA family*/
+            i = rand_num(21);
+            loop = TRUE;
+            while ( loop ) {
+                printf("\nHow many codons encode the Amino Acid %s ",
+                        paa->aa1[i]);
+                gets( pm->junk ) ;
+                for ( x = 0 ; x < (int)strlen(pm->junk); x++) 
+                    pm->junk[x] = (char) toupper( (int) pm->junk[x]);
+  
+                if ( !strcmp ( pm->junk, "QUIT" ) || 
+                     !strcmp ( pm->junk, "EXIT" )) {
+                    asummary();
+                    main_loop = FALSE;
+                    break;
+                }
+  
+                if ( !strcmp ( pm->junk,"HELP")) {
+                    chelp("fun");
+                    continue;
+                    }
+
+                if ( !strcmp (pm->junk, "?" ) ) {
+                    printf( "Cheat %i\n", *(da + i) );
+                    num_cheats++;
+                    continue;
+
+               }
+                
+               
+
+                if ( atoi(pm->junk) == *(da + i) )
+                    loop = FALSE;
+                
+                else {
+                    num_wrong++;
+                    printf("Wrong answer (try ?)\n");
+                }
+            }
+            break;
+        case 4:                                 /* 60% of the time ask    */
+        case 5:                                 /* ask questions about    */     
+        case 6:                                 /* codon to aa translation*/ 
+        case 7: 
+        case 8:
+        case 9: 
+        case 10:
+            i = rand_num(64);
+            loop = TRUE;
+            while ( loop ) {
+                printf("\nName the Amino Acid encoded by the codon %s ", paa->cod[i]);
+                gets( pm->junk );
+                for ( x = 0 ; x < (int)strlen(pm->junk ); x++) 
+                    pm->junk[x] = (char) toupper( (int) pm->junk[x]);
+                if ( !strcmp ( pm->junk, "QUIT" ) || 
+                     !strcmp ( pm->junk, "EXIT" )) {
+                    asummary();
+                    main_loop = FALSE;
+                    break;
+                }
+
+                
+                if ( !strcmp ( pm->junk,"HELP")) {
+                    chelp("fun");
+                    continue;
+                    }
+
+                if ( !strcmp (pm->junk, "?" ) ) {
+                    printf( "Cheat %s (%s)", paa->aa1[pcu->ca[i]]
+                        , paa->aa3[pcu->ca[i]]);
+                    num_cheats++;             /* tell me the answer      */
+                    continue;
+                }
+                /* allow 1 or 3 letter amino acid code as the ans        */
+                strcpy ( tmp_AA, paa->aa1[pcu->ca[i]] );
+                strcpy ( tmp_AA2, paa->aa3[pcu->ca[i]] );
+
+                /* uppercase everything, the AA names and the answer     */
+                for ( x = 0 ; x < (int)strlen(tmp_AA); x++) 
+                    tmp_AA[x] = (char)toupper( (int) tmp_AA[x]);
+                for ( x = 0 ; x < (int)strlen(tmp_AA2); x++) 
+                    tmp_AA2[x] = (char)toupper((int) tmp_AA2[x]);
+                for ( x = 0 ; x < (int)strlen(pm->junk  ); x++) 
+                    pm->junk  [x] = (char)toupper((int) pm->junk[x]);
+
+                if ( !strcmp(tmp_AA, pm->junk) || 
+                     !strcmp(tmp_AA2,pm->junk)  ) {         
+                    loop = FALSE;
+                } else {
+                    printf("Wrong answer (try ?)\n");
+                    num_wrong++;
+                }
+            }
+            break;
+        default:
+            printf("mistake == %i \n", i);
+            exit(0);                             /* error catch            */ 
+            break;
+        }                                        /* end of switch          */
+        num_questions++;
+
+    }                                            /* end of while           */
+
+    return;
+}                                                /* end of main            */
+
+/*********** Asummary ******************************************************/
+/* Write out a summary of the users results                                */
+/***************************************************************************/
+void    asummary (void) {
+    printf ( " You answered\n \t %5i questions\n", num_questions);
+    printf ( " \t %5i answers were wrong\n", num_wrong);
+    printf ( " \t %5i times you had to ask for a hint\n", num_cheats);
+    printf ( " \t  %3.0f%c accuracy \n", (float) ( (num_questions) ?                 
+        (float)100 * (num_questions - num_wrong) / 
+        (float)num_questions : 0 ),'%');
+    pause;
+    return;
+}
+
+

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/codonw.git



More information about the debian-med-commit mailing list