[med-svn] r91 - in trunk/packages: . poa poa/branches poa/branches/upstream poa/branches/upstream/current

Charles Plessy charles-guest at costa.debian.org
Wed Aug 23 13:45:43 UTC 2006


Author: charles-guest
Date: 2006-08-23 13:45:39 +0000 (Wed, 23 Aug 2006)
New Revision: 91

Added:
   trunk/packages/poa/
   trunk/packages/poa/branches/
   trunk/packages/poa/branches/upstream/
   trunk/packages/poa/branches/upstream/current/
   trunk/packages/poa/branches/upstream/current/Makefile
   trunk/packages/poa/branches/upstream/current/README
   trunk/packages/poa/branches/upstream/current/align_lpo.c
   trunk/packages/poa/branches/upstream/current/align_lpo2.c
   trunk/packages/poa/branches/upstream/current/align_lpo_po.c
   trunk/packages/poa/branches/upstream/current/align_lpo_po2.c
   trunk/packages/poa/branches/upstream/current/align_score.c
   trunk/packages/poa/branches/upstream/current/black_flag.c
   trunk/packages/poa/branches/upstream/current/black_flag.h
   trunk/packages/poa/branches/upstream/current/blosum80.mat
   trunk/packages/poa/branches/upstream/current/buildup_lpo.c
   trunk/packages/poa/branches/upstream/current/create_seq.c
   trunk/packages/poa/branches/upstream/current/default.h
   trunk/packages/poa/branches/upstream/current/fasta_format.c
   trunk/packages/poa/branches/upstream/current/heaviest_bundle.c
   trunk/packages/poa/branches/upstream/current/lpo.c
   trunk/packages/poa/branches/upstream/current/lpo.h
   trunk/packages/poa/branches/upstream/current/lpo_format.c
   trunk/packages/poa/branches/upstream/current/main.c
   trunk/packages/poa/branches/upstream/current/multidom.seq
   trunk/packages/poa/branches/upstream/current/numeric_data.c
   trunk/packages/poa/branches/upstream/current/poa.h
   trunk/packages/poa/branches/upstream/current/project.h
   trunk/packages/poa/branches/upstream/current/remove_bundle.c
   trunk/packages/poa/branches/upstream/current/seq_util.c
   trunk/packages/poa/branches/upstream/current/seq_util.h
   trunk/packages/poa/branches/upstream/current/stringptr.c
   trunk/packages/poa/tags/
Log:
[svn-inject] Installing original source of poa

Added: trunk/packages/poa/branches/upstream/current/Makefile
===================================================================
--- trunk/packages/poa/branches/upstream/current/Makefile	2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/Makefile	2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,52 @@
+
+AR=ar rc
+
+TARGETS=poa liblpo.a poa_doc libbflag.a
+
+# align_score.c CAN BE USED TO ADD CUSTOMIZED SCORING FUNCTIONS
+OBJECTS= \
+	align_score.o \
+	main.o
+
+
+LIBOBJECTS= \
+	black_flag.o \
+	seq_util.o \
+	fasta_format.o \
+	align_lpo2.o \
+	align_lpo_po2.o \
+	buildup_lpo.o \
+	lpo.o \
+	heaviest_bundle.o \
+	lpo_format.o \
+	create_seq.o \
+	remove_bundle.o \
+	numeric_data.o \
+	stringptr.o
+
+
+CC = gcc
+#CFLAGS= -g -ansi-strict -W -Wall -DUSE_WEIGHTED_LINKS -DUSE_PROJECT_HEADER -I.
+CFLAGS= -g -DUSE_WEIGHTED_LINKS -DUSE_PROJECT_HEADER -I. -DTRUNCATE_GAP_LENGTH=8
+# -I$(HOME)/lib/include
+# -DREPORT_MAX_ALLOC
+
+clean:
+	rm -f $(OBJECTS) $(LIBOBJECTS) $(TARGETS)
+
+liblpo.a: $(LIBOBJECTS)
+	rm -f $@
+	$(AR) $@ $(LIBOBJECTS)
+	ranlib $@
+
+
+
+# NB: LIBRARY MUST FOLLOW OBJECTS OR LINK FAILS WITH UNRESOLVED REFERENCES!!
+poa: $(OBJECTS) liblpo.a
+	$(CC) -o $@ $(OBJECTS) -lm liblpo.a
+
+what:
+	@echo poa: partial-order based sequence alignment program
+	@echo liblpo.a: partial-order alignment and utilities function library
+
+

Added: trunk/packages/poa/branches/upstream/current/README
===================================================================
--- trunk/packages/poa/branches/upstream/current/README	2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/README	2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,235 @@
+POA installation notes Sept. 2001
+Chris Lee
+Dept. of Chemistry & Biochemistry
+UCLA
+
+I. COMPILATION
+To compile this program, simply type 'make poa'.
+
+This produces an executable for sequence alignment (poa) and also a linkable 
+library liblpo.a.  The software has been compiled and tested on LINUX.
+
+II. RUNNING POA
+
+Poa has a variety of command line options.  Running poa without any arguments 
+will print a list of the possible command line arguments.  POA may be used to
+construct a PO-MSA, or to analyze a PO-MSA.   
+
+A.  Constructing a PO-MSA
+-------------------------
+
+1.  Required Input:
+
+i. An Alignment Score Matrix File:
+A score matrix file is required, because poa uses it to get the residue
+alphabet and indexing.  Even if poa is not being used to perform 
+multiple sequence alignment, this file must be provided. Any basic alignment 
+matrix which may be used with BLAST may be used here.  This file must be the 
+first command line argument without a flag in order to be interpreted by poa 
+as the score matrix file.  An example score matrix file, blosum80.mat, is 
+provided in this directory. 
+
+Note:  Poa is Case Sensitive.
+Poa can align amino acid sequences to nucleotide sequences.  In order to
+distinguish amino acid residues from nucleic acid residues, poa is case 
+sensitive.  Residues that are upper case are interpreted as amino acids
+while residues that are lower case are interpreted as nucleotides.  Poa
+can handle mixed score matrices containing scores for aligning amino acid 
+residues to nucleotide residues as long as the column and row labels of the 
+matrix are case sensitive.  See blosum80.mat file for an example of just
+such a mixed score matrix.
+
+ii. A FASTA File:
+A FASTA file is required only if poa is being used to construct a 
+new PO-MSA from a list of sequences, or to align a list of sequences to
+an already existing PO-MSA (see Analyzing a PO-MSA below).  This FASTA file 
+should contain sequences to be aligned by poa.  The command line argument
+to get poa to accept a FASTA file as input is '-read_fasta FILENAME'.  Poa 
+will interpret FILENAME as the FASTA sequence file.  An example file, 
+multidom.seq, is provided in this directory.  
+
+Poa is case sensitive (see note above).  All residues in protein sequences 
+in the FASTA file must be upper case to be interpreted as amino acids by poa, 
+while all residues in nucleotide sequences in the FASTA file must be lower 
+case to be interpreted as nucleotides by poa.  To switch the case of all of 
+the letters in the FASTA file to upper case, use the '-toupper' command 
+line argument.  To switch the case of all the letters in the FASTA file to 
+lower case, use the '-tolower' command line argument.
+
+2.  MSA Construction Options:
+
+i.  Aggressive Fusion:
+During the building up of a PO-MSA, if a node i with label 'a' is aligned
+to an align ring which already contains a node j with label 'a', poa simply
+adds the node to the align ring.  It is possible to force poa to do 
+aggressive fusion, so that when a node i with label 'a' is aligned to
+an align ring which already contains a node j with label 'a', node i is
+fused to node j.  The command line argument for accomplishing this is 
+'-fuse_all'.
+
+3.  MSA Output Formats:
+Poa can output a PO-MSA in several formats simultaneously including CLUSTAL, 
+PIR, and PO.  The PO format is the best format since it contains all of the 
+information in the PO-MSA.  The other formats accurately represent the MSA, 
+but since they are RC-MSA formats, they may lose some of the information in the 
+full PO-MSA.
+
+i.  CLUSTAL format:
+This format is the standard CLUSTAL format.  The command line argument to get
+the MSA output in this format is '-clustal FILENAME'.
+
+ii.  PIR format:
+This format is the standard PIR format, which is like FASTA with a '.'
+character representing gaps.  The command line argument to get the MSA
+output in this format is '-pir FILENAME'.
+
+iii.  PO format:
+This format is the standard PO format.  It is described below in the section
+PO format.  The command line argument to get the MSA output in this format is 
+'-po FILENAME'.
+
+Example:  Constructing a MSA of Four Protein Sequences
+Running poa with the following statement will take the fasta formatted 
+sequences in the multidom.seq file, construct a PO-MSA using the scoring 
+matrix in the file blosum80.mat, and then output the PO-MSA in CLUSTAL format
+in the file multidom.aln.
+
+poa -clustal multidom.aln blosum80.mat multidom.seq
+
+The output should be identical to the results of figs. 6 & 7 in the paper.
+
+4.  Other Output:
+
+i. Score Matrix
+Poa will also print to stdout the score matrix stored in the '.mat' file.  
+The command line argument to get poa to do this is '-printmatrix LETTERSET',
+where LETTERSET is a string of letters to be printed with the score matrix.
+For example, if the score matrix is designed for protein alignment the 
+letter set might be 'ARNDCQEGHILKMFPSTWYV'.
+
+ii. Verbose Mode
+Poa will run in verbose mode, printing additional information generated  
+during the run to stdout.  The command line argument to get poa to do run in 
+verbose mode is '-v'.
+
+B.  Analyzing a PO-MSA
+-----------------------
+Poa can also take a PO format file as input and rebuild the PO-MSA data
+structure.  Once this data structure has been rebuilt, it may be analyzed
+for features.  In 'liblpo.a', the linkable poa library, we have included the
+functions necessary to do heaviest bundling and thereby find consensus
+sequences in the PO-MSA (the details of the heaviest bundling algorithm 
+are described elsewhere).  Poa has been written so that users may create 
+their own functions for analyzing a PO-MSA.  We have not included in the 
+'liblpo.a' library the functions that we wrote to analyze PO-MSAs 
+constructed with ESTs and genome sequence to find snps and alternative 
+splice sites.  However, it is possible to design modular library functions 
+that will look for highly specific  biological features in any PO-MSA data 
+structure.
+
+1.  Required Input:
+
+Before the PO-MSA data structure can be analyzed it must be built.  It can
+be built either from a PO file or from a FASTA file, or from both a PO file 
+and a FASTA file.
+
+Note:  POA Requires Either A PO File or a FASTA File
+If neither files are read in by POA it will terminate early, since it 
+has not received any sequence data.
+
+i. A PO file:
+Poa will read in a PO formatted file.  The command line argument to get
+poa to read in a PO formatted file and rebuild the PO-MSA data structure
+is '-read_po FILENAME'.  
+
+It is possible to filter the PO-MSA data structure as it is being rebuilt.  In 
+order to filter the PO-MSA in the PO file to include only a subset of sequences 
+use the command line argument '-subset FILENAME', where the file named FILENAME 
+contains the list of sequence names to be included in the new PO-MSA. In order 
+to filter the PO-MSA in the PO file to exclude a subset of sequences use the 
+command line argument '-remove FILENAME', where the file named FILENAME contains 
+the list of sequences to be excluded from the new PO-MSA.  The names of 
+sequences to be included or excluded should be in the format 'SOURCENAME= *"
+as they are in the PO file.  Lists of sequence source names can be created by 
+using the unix grep utility on the PO file.     
+
+ii. A FASTA File:
+The FASTA file should contain sequences to be aligned by poa.  The command line 
+argument to get poa to accept a FASTA file as input is '-read_fasta FILENAME'.  
+Poa will interpret FILENAME as the FASTA sequence file.  An example file, 
+multidom.seq, is provided in this directory.  (See note above on case sensitivity).
+
+Note:  POA Can Take Both A PO File And A FASTA File As Input
+If both the '-read_po FILENAME' argument and the '-read_fasta FILENAME' 
+argument are given to poa on the command line, then poa will first rebuild the 
+PO-MSA in the PO file, and then it will align the sequences in the FASTA file 
+to this PO-MSA.   
+
+2.  Additional PO Utilities:
+
+i.  Consensus Generation Via Heaviest Bundling Algorithm:
+The heaviest bundling algorithm finds consensus sequences in the
+PO-MSA.  The command line argument for heaviest bundling is '-hb'.  This
+function adds the new consensus sequences to the PO-MSA by storing new
+consensus sequence indices on the in the PO-MSA nodes corresponding to 
+the consensus sequence paths.  The sequence source names for consensus 
+sequences generated by heaviest bundling is CONSENS'i' where 'i' is the
+index of the bundle corresponding to the consensus sequence.
+
+The heaviest bundling algorithm can also take as input a bundling 
+threshold value.  The command line argument for setting a bundling 
+threshold value for heaviest bundling is '-hbmin VALUE'.  This threshold 
+is used during the process of associating sequences with bundles.  If a 
+sequence has a percentage of nodes shared with bundle 'i' greater than this 
+threshold value, it is associated with bundle 'i'.  Iterative heaviest 
+bundling can also be affected by the bundling threshold.  A detailed 
+description of heaviest bundling and heaviest bundling thresholds is given 
+elsewhere.  The consensus sequences corresponding to bundles generated
+by heaviest bundling are listed in the sequence source list.  Additionally,
+in the SOURCEINFO line for each sequence the index of the bundle to which
+that sequence belongs is give.  Finally, using the command line argument 
+'-best' restricts the MSA output to the consensus sequences generated 
+by heaviest bundling.   
+
+III.  PO FILE FORMAT
+
+****************************HEADER****************************************
+VERSION= ~Current version of poa~
+NAME=  ~Name of PO-MSA.  Defaults to name of 1st sequence in PO-MSA~
+TITLE=  ~Title of PO-MSA.  Defaults to title of 1st sequence in PO-MSA~
+LENGTH=  ~Number of nodes in PO-MSA~
+SOURCECOUNT=  ~Number of sequences in PO-MSA~
+
+*********************SEQUENCE SOURCE LIST*********************************
+
+/* For each sequence in the PO-MSA: */
+SOURCENAME= ~Name of sequence taken from FASTA sequence header~
+SOURCEINFO= ~Number of nodes in sequence~ 
+            ~Index of first node containing sequence~
+            ~Sequence weight~
+            ~Index of bundle containing sequence~
+            ~Title of sequence taken from FASTA sequence header~
+
+/* Example: */
+SOURCENAME=GRB2_HUMAN
+SOURCEINFO=217 10 0 3 GROWTH FACTOR RECEPTOR-BOUND PROTEIN 2 (GRB2 ADAPTOR PROTEIN)(SH2)
+
+********************PO-MSA DATA STRUCTURE*********************************
+
+/* For each node in the PO-MSA:  */
+~Residue label~:~'L' delimited index list of other nodes with edges into node~
+                ~'S' delimited index list of sequences stored in each node~
+                ~'A' index of next node in same align ring~ 
+                     NB: align ring indices must form a cycle.
+                     e.g. if two nodes 121 and 122 are aligned, then 
+                     the line for node 121 indicates "A122", and
+                     the line for node 122 indicates "A121".
+
+/* Example: */
+F:L156L155L22S2S3S7A158
+
+********************END***************************************************
+
+
+For more information, see http://www.bioinformatics.ucla.edu/poa.
+

Added: trunk/packages/poa/branches/upstream/current/align_lpo.c
===================================================================
--- trunk/packages/poa/branches/upstream/current/align_lpo.c	2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/align_lpo.c	2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,329 @@
+
+
+#include "default.h"
+#include "poa.h"
+#include "seq_util.h"
+#include "lpo.h"
+
+
+
+typedef struct {
+  unsigned char x:6;
+  unsigned char y:1;
+  unsigned char is_aligned:1;
+} LPOMove_T;
+
+
+typedef unsigned char LPOGapLength_T;
+
+
+
+
+
+void trace_back_lpo_alignment(int len_x,LPOLetter_T seq_x[],
+			      int len_y,LPOLetter_T seq_y[],
+			      LPOMove_T **move,
+			      LPOLetterRef_T best_x,
+			      LPOLetterRef_T best_y,
+			      LPOLetterRef_T **x_to_y,
+			      LPOLetterRef_T **y_to_x)
+{
+  int i;
+  LPOLetterRef_T new_x,*x_al=NULL,*y_al=NULL;
+  LPOLetterLink_T *left;
+
+  
+  CALLOC(x_al,len_x,LPOLetterRef_T);
+  CALLOC(y_al,len_y,LPOLetterRef_T);
+  LOOP (i,len_x) x_al[i]= INVALID_LETTER_POSITION;
+  LOOP (i,len_y) y_al[i]= INVALID_LETTER_POSITION;
+
+  while (best_x>=0 && best_y>=0) {
+    if (move[best_x][best_y].is_aligned) {/* ALIGNED! MAP best_x <--> best_y */
+      x_al[best_x]=best_y;
+      y_al[best_y]=best_x;
+    }
+
+    if (0==move[best_x][best_y].x /* HIT START OF THE ALIGNMENT, SO QUIT */
+	&& 0==move[best_x][best_y].y)
+      break;
+
+    if ((i=move[best_x][best_y].x)>0) { /* TRACE BACK ON X */
+      for (left= &seq_x[best_x].left;i-- >0;left=left->more) /* USE iTH MOVE*/
+	new_x = left->ipos;
+    }
+    else /* NO MOVE ON X */
+      new_x=best_x;
+
+    if (move[best_x][best_y].y>0) /* ASSUMING seq_y A LINEAR SEQUENCE*/
+      best_y=SEQ_Y_LEFT(best_y); /* TRACE BACK ON Y */
+    best_x=new_x;
+  }
+
+
+  if (x_to_y) /* HAND BACK ALIGNMENT RECIPROCAL MAPPINGS */
+    *x_to_y = x_al;
+  else
+    free(x_al);
+  if (y_to_x)
+    *y_to_x = y_al;
+  else
+    free(y_al);
+  return;
+}
+
+
+
+
+
+/* FOR CONTROLLING THE FREEING OF score[] ARRAYS */
+typedef struct {
+  LPOLetterRef_T last_right; /* LAST POSITION WHICH REFERENCES THIS POSITION*/
+  LPOLetterRef_T my_pos; /* INDEX OF THIS POSITION */
+} LastRightList_T;
+
+
+/* SORT IN ASCENDING ORDER BY last_right */
+int last_right_qsort_cmp(const void *void_a,const void *void_b)
+{
+  const LastRightList_T *a=(const LastRightList_T *)void_a,
+  *b=(const LastRightList_T *)void_b;
+  if (a->last_right < b->last_right)
+    return -1;
+  if (a->last_right == b->last_right)
+    return 0;
+  else
+    return 1;
+}
+
+
+
+
+LastRightList_T *last_right_list(int len,LPOLetter_T seq[])
+{
+  int i;
+  LastRightList_T *list=NULL;
+  LPOLetterLink_T *right;
+
+  CALLOC(list,len,LastRightList_T);
+  LOOP (i,len) {
+    list[i].last_right=list[i].my_pos=i; /* DEFAULT: NOTHING RIGHT OF THIS*/
+    for (right= &seq[i].right;right && right->ipos>=0;right=right->more)
+      if (right->ipos>list[i].last_right)
+	list[i].last_right=right->ipos;
+  }
+  qsort(list,len,sizeof(LastRightList_T),last_right_qsort_cmp);
+  return list;
+}
+
+
+
+
+
+typedef struct {
+  LPOScore_T *score;
+  LPOGapLength_T *gap_length;
+} LPORowFreeList_T;
+
+
+void free_row_list(int nfree_list,LPORowFreeList_T free_list[])
+{
+/*  fprintf(stderr,"Maximum #rows allocated was %d\n\n",nfree_list);*/
+  while (nfree_list-- >0) { /* DUMP EVERYTHING STORED ON THE FREE LIST */
+    free(free_list[nfree_list].score);
+    free(free_list[nfree_list].gap_length);
+  }
+}
+
+
+
+
+
+/** performs partial order alignment:
+  seq_x[] may be a partial order;
+  seq_y[] is assumed to be a linear order (regular sequence);
+  returns the alignment in x_to_y[] and y_to_x, and also 
+  returns the alignment score as the return value */
+LPOScore_T align_lpo (LPOSequence_T *lposeq_x,
+		      LPOSequence_T *lposeq_y,
+		      ResidueScoreMatrix_T *m,
+		      LPOLetterRef_T **x_to_y,
+		      LPOLetterRef_T **y_to_x,
+		      int use_global_alignment)
+{
+  int len_x = lposeq_x->length;
+  int len_y = lposeq_y->length;
+  LPOLetter_T *seq_x = lposeq_x->letter;
+  LPOLetter_T *seq_y = lposeq_y->letter;
+
+  int i,j,j_left,best_x,best_y,nfree_list=0,ilast_right=0;
+  LPOScore_T **score=NULL,*my_score,*my_matrix;
+  LPOMove_T **move=NULL,*move_base=NULL,*my_move;
+  LPOGapLength_T **gap_length=NULL,*my_gap_length;
+  LPOLetterLink_T *left,*my_left;
+  int new_gap_len,insert_x,previous_x,previous_y,
+    i_x,new_score,new_x,new_y,current_gap_length;
+  LPOScore_T match_score,previous_score,
+    insert_x_try,insert_x_score,insert_y_score,best_score= -999999;
+  LPORowFreeList_T *free_list=NULL;
+  LastRightList_T *last_right=NULL;
+  
+  LPOScore_T *gap_penalty_x, *gap_penalty_y;
+  double gap_decrement;
+  int L_trunc = m->trunc_gap_length, L_decay = m->decay_gap_length;
+  
+  CALLOC (gap_penalty_x, L_trunc + L_decay + 1, LPOScore_T);
+  CALLOC (gap_penalty_y, L_trunc + L_decay + 1, LPOScore_T);
+
+  gap_penalty_x[0] = m->gap_penalty_set[0][0];  /* INITIALIZE GAP PENALTY LOOKUP */
+  for (i=1;i<L_trunc;i++)
+    gap_penalty_x[i] = m->gap_penalty_set[0][1];
+  gap_decrement = (m->gap_penalty_set[0][1] - m->gap_penalty_set[0][2]) / ((double)(L_decay + 1));
+  for (i=L_trunc;i<L_trunc+L_decay+1;i++)
+    gap_penalty_x[i] = m->gap_penalty_set[0][1] - (i-L_trunc+1) * gap_decrement;
+  gap_penalty_x[L_trunc + L_decay] = m->gap_penalty_set[0][2];  /* (REDUNDANT) */
+
+  gap_penalty_y[0] = m->gap_penalty_set[1][0];  /* DIFFERENT PENALTY FOR Y-GAP */
+  for (i=1;i<L_trunc;i++)
+    gap_penalty_y[i] = m->gap_penalty_set[1][1];
+  gap_decrement = (m->gap_penalty_set[1][1] - m->gap_penalty_set[1][2]) / ((double)(L_decay + 1));
+  for (i=L_trunc;i<L_trunc+L_decay+1;i++)
+    gap_penalty_y[i] = m->gap_penalty_set[1][1] - (i-L_trunc+1) * gap_decrement;
+  gap_penalty_y[L_trunc + L_decay] = m->gap_penalty_set[1][2];  /* (REDUNDANT) */
+
+  CALLOC(score,len_x,LPOScore_T *); /* ALLOCATE MATRIX STORAGE: ROW POINTERS */
+  CALLOC(move,len_x,LPOMove_T *);
+  CALLOC(gap_length,len_x,LPOGapLength_T *);
+  CALLOC(free_list,len_x,LPORowFreeList_T);
+  CALLOC(move_base,len_x*(len_y+1),LPOMove_T); /*ALLOCATE MATRIX RECTANGLE */
+  last_right=last_right_list(len_x,seq_x); /* GET SORTED LIST TO CONTROL FREE*/
+
+  LOOPF (i,len_x) {/* BUILD UP DP MATRIX, ROW BY ROW */
+    if (nfree_list>0) { /* TAKE THE NEW ROW FROM THE FREE LIST */
+      nfree_list--; /* MOVE BACK TO LAST FREE LIST ENTRY */
+      score[i]=free_list[nfree_list].score+1; /* LEAVE SPACE FOR [-1] ENTRY */
+      gap_length[i]=free_list[nfree_list].gap_length+1;
+    }
+    else { /* NEED TO ALLOCATE A NEW ROW */
+      CALLOC(score[i],len_y+1,LPOScore_T);
+      score[i]++; /* LEAVE SPACE FOR [-1] ENTRY */
+      CALLOC(gap_length[i],len_y+1,LPOGapLength_T);
+      gap_length[i]++; /* LEAVE SPACE FOR [-1] ENTRY */
+    }
+    move[i]=move_base+i*(len_y+1)+1; /* LEAVE SPACE FOR [-1] ENTRY */
+    my_move=move[i]; /* USED TO SPEED UP MATRIX ACCESS INSIDE INNER LOOP*/
+    my_score=score[i];
+    my_gap_length=gap_length[i];
+    score[i][-1]= -999999; /* UNACCEPTABLE SCORE ENSURES -1 NEVER CHOSEN*/
+    my_matrix=m->score[seq_x[i].letter];
+    if (seq_x[i].left.ipos>=0) /* AT LEAST ONE VALID POSITION TO THE LEFT */
+      my_left= &seq_x[i].left;
+    else /* THERE IS NO POSITION TO THE LEFT */
+      my_left=NULL;
+    LOOPF (j,len_y) {
+      j_left=SEQ_Y_LEFT(j); /* POSITION TO THE LEFT OF j */
+      previous_score=previous_x=previous_y=0;
+      i_x=1;
+      insert_x_score= -999999;
+      for (left=my_left;left;left=left->more) {
+	if (move[left->ipos][j].x>0) /* COULD BE [X,0] GAP CONTINUATION */
+	  current_gap_length=gap_length[left->ipos][j];
+	else/*NOT AN EXTENSION OF A [X,0] GAP, SO TREAT AS START OF NEW GAP*/
+	  current_gap_length=0;
+	insert_x_try=score[left->ipos][j] /* FIND BEST [X,0] MOVE */
+	  + left->score /* INCLUDE WEIGHTING FROM THIS EDGE */
+	  - gap_penalty_x[current_gap_length];
+	if (insert_x_try>insert_x_score) { /* IF BEST insert_x MOVE, SAVE*/
+	  insert_x=i_x;
+	  insert_x_score=insert_x_try;
+	  new_gap_len=current_gap_length+1;
+	  if (new_gap_len>L_trunc+L_decay)  /* PREVENT OVERFLOW */
+	    new_gap_len=L_trunc+L_decay;
+	}
+
+	 /* FIND BEST [X,1] MOVE */
+	if (score[left->ipos][j_left]+left->score>previous_score) {
+	  previous_score=score[left->ipos][j_left]+left->score;
+	  previous_x=i_x;
+	  previous_y=1; /* ASSUMING seq_y JUST LINEAR SEQUENCE */
+	}
+	i_x++; /* ADVANCE X MOVE INDEX */
+      } /* DONE SCANNING PREDECESSORS ON X */
+
+      match_score = previous_score /* TAKE BEST PREDECESSOR */
+	+ my_matrix[seq_y[j].letter];
+
+      if (match_score>insert_x_score) { /* PREFER [X,1] MOVE */
+	new_score=match_score;
+	new_x=previous_x;
+	new_y=previous_y;
+	new_gap_len=0;
+      }
+      else { /* PREFER [X,0] MOVE */
+	new_score=insert_x_score;
+	new_x=insert_x;
+	new_y=0;
+      }
+
+       /* [0,1] MOVE */
+      if (my_move[j_left].y==1) /* COULD BE [0,1] GAP CONTINUATION */
+	current_gap_length=my_gap_length[j_left];
+      else /* NOT AN EXTENSION OF A [0,1] GAP, SO TREAT AS START OF NEW GAP*/
+	current_gap_length=0;
+      insert_y_score=my_score[j_left]-gap_penalty_y[current_gap_length];
+
+      if (insert_y_score<new_score) { /* [new_x,new_y] MOVE IS BEST */
+	my_score[j]=new_score;
+	my_move[j].x=new_x;
+	my_move[j].y=new_y;
+	my_gap_length[j]=new_gap_len;
+	if (new_gap_len==0)
+	  my_move[j].is_aligned=1;
+      }
+      else { /* [0,1] MOVE IS BEST */
+	my_score[j]=insert_y_score;
+	my_move[j].x=0;
+	my_move[j].y=1;
+	my_gap_length[j]=current_gap_length+1;
+	if (my_gap_length[j]>L_trunc+L_decay) /* PREVENT OVERFLOW */
+	  my_gap_length[j]=L_trunc+L_decay;
+      }
+      if (my_score[j]>best_score) { /* RECORD BEST MOVE */
+	best_score=my_score[j];
+	best_x=i;
+	best_y=j;
+      }
+    }
+    while (ilast_right<len_x && last_right[ilast_right].last_right<=i) {
+      free_list[nfree_list].gap_length= /* PUSH ROW ONTO FREE LIST FOR REUSE*/
+	gap_length[last_right[ilast_right].my_pos]-1;/*READJUST TO BASE ADDR!*/
+      free_list[nfree_list].score=score[last_right[ilast_right].my_pos]-1;
+      score[last_right[ilast_right].my_pos]=NULL; /*PREVENT DANGLING POINTER!*/
+      gap_length[last_right[ilast_right].my_pos]=NULL;
+      nfree_list++; /* INCREMENT FREE LIST SIZE */
+      ilast_right++; /* MOVE TO THE NEXT POTENTIAL ROW FOR FREEING */
+    }
+  } /* DYNAMIC PROGRAMING MATRIX COMPLETE, NOW TRACE BACK FROM best_x,best_y*/
+
+  IF_GUARD(best_x>=len_x || best_y>=len_y,1.1,(ERRTXT,"Bounds exceeded!\nbest_x,best_y:%d,%d\tlen:%d,%d\n",best_x,best_y,len_x,len_y),CRASH);
+  trace_back_lpo_alignment(len_x,seq_x,len_y,seq_y,move,best_x,best_y,
+			   x_to_y,y_to_x);
+
+
+  FREE (gap_penalty_x);
+  FREE (gap_penalty_y);
+
+  free_row_list(nfree_list,free_list);
+  FREE(move_base); /* DUMP ALLOCATED MATRIX */
+  FREE(score);
+  FREE(move);
+  FREE(gap_length);
+  FREE(free_list);
+  FREE(last_right);
+  return best_score;
+}
+
+
+
+
+

Added: trunk/packages/poa/branches/upstream/current/align_lpo2.c
===================================================================
--- trunk/packages/poa/branches/upstream/current/align_lpo2.c	2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/align_lpo2.c	2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,389 @@
+
+#include "default.h"
+#include "poa.h"
+#include "seq_util.h"
+#include "lpo.h"
+
+
+/** set nonzero for old scoring (gap-opening penalty for X-Y transition) */
+#define DOUBLE_GAP_SCORING (0)
+
+
+typedef struct {
+  unsigned char x:7;
+  unsigned char y:1;
+}
+DPMove_T;
+
+typedef struct {
+  LPOScore_T score;
+  short gap_x, gap_y;
+}
+DPScore_T;
+
+/** is node 'i' the first node in any sequence in lposeq? */
+static int is_initial_node (int i, LPOSequence_T *lposeq)
+{
+  LPOLetterSource_T *src = &((lposeq->letter[i]).source);
+  while (src != NULL && src->iseq >= 0) {
+    if (src->ipos == 0) {
+      return 1;
+    }
+    src = src->more;
+  }
+  return 0;
+}
+
+/** is node 'i' the last node in any sequence in lposeq? */
+static int is_final_node (int i, LPOSequence_T *lposeq)
+{
+  LPOLetterSource_T *src = &((lposeq->letter[i]).source);
+  while (src != NULL && src->iseq >= 0) {
+    if (src->ipos == (lposeq->source_seq[src->iseq]).length - 1) {
+      return 1;
+    }
+    src = src->more;
+  }
+  return 0;
+}
+
+
+static void get_seq_left_and_final (LPOSequence_T *lposeq_x,
+				    LPOLetterLink_T ***x_left_ptr,
+				    int **is_final_node_ptr)
+{
+  int i, len_x = lposeq_x->length;
+  LPOLetter_T *seq_x = lposeq_x->letter;
+  int *is_final_node_x = NULL;
+  LPOLetterLink_T **x_left = NULL;
+  
+  CALLOC (is_final_node_x, len_x, int);
+  
+  for (i=0; i<len_x; i++) {
+    is_final_node_x[i] = is_final_node (i, lposeq_x);
+  }
+  
+  CALLOC (x_left, len_x, LPOLetterLink_T *);
+  
+  for (i=0; i<len_x; i++) {
+    if (is_initial_node (i, lposeq_x) && seq_x[i].left.ipos != -1) {
+      CALLOC (x_left[i], 1, LPOLetterLink_T);
+      x_left[i]->ipos = -1;
+      x_left[i]->score = 0;
+      x_left[i]->more = &seq_x[i].left;
+    }
+    else {
+      x_left[i] = &seq_x[i].left;
+    }
+  }
+
+  (*is_final_node_ptr) = is_final_node_x;
+  (*x_left_ptr) = x_left;
+}
+
+  
+static void trace_back_lpo_alignment (int len_x, int len_y,
+				      DPMove_T **move,
+				      LPOLetterLink_T **x_left,
+				      LPOLetterRef_T best_x, LPOLetterRef_T best_y,
+				      LPOLetterRef_T **x_to_y,
+				      LPOLetterRef_T **y_to_x)
+{
+  int i, xmove, ymove;
+  LPOLetterRef_T *x_al = NULL, *y_al = NULL;
+  LPOLetterLink_T *left;
+  
+  CALLOC (x_al, len_x, LPOLetterRef_T);
+  CALLOC (y_al, len_y, LPOLetterRef_T);
+  LOOP (i,len_x) x_al[i] = INVALID_LETTER_POSITION;
+  LOOP (i,len_y) y_al[i] = INVALID_LETTER_POSITION;
+  
+  while (best_x >= 0 && best_y >= 0) {
+
+    xmove = move[best_y][best_x].x;
+    ymove = move[best_y][best_x].y;
+    
+    if (xmove>0 && ymove>0) { /* ALIGNED! MAP best_x <--> best_y */
+      x_al[best_x]=best_y;
+      y_al[best_y]=best_x;
+    }
+
+    if (xmove == 0 && ymove == 0) { /* FIRST ALIGNED PAIR */
+      x_al[best_x]=best_y;
+      y_al[best_y]=best_x;
+      break;  /* FOUND START OF ALIGNED REGION, SO WE'RE DONE */
+    }
+
+    if (xmove>0) { /* TRACE BACK ON X */
+      left = x_left[best_x];
+      while ((--xmove)>0) {
+	left = left->more;
+      }
+      best_x = left->ipos;
+    }
+    
+    if (ymove>0) { /* TRACE BACK ON Y */
+      best_y--;
+    }
+  }
+    
+  if (x_to_y) /* HAND BACK ALIGNMENT RECIPROCAL MAPPINGS */
+    *x_to_y = x_al;
+  else
+    free(x_al);
+  if (y_to_x)
+    *y_to_x = y_al;
+  else
+    free(y_al);
+  
+  return;
+}
+
+
+/** performs partial order alignment:
+    lposeq_x may be a partial order;
+    lposeq_y is assumed to be a linear order (regular sequence);
+    returns the alignment in x_to_y[] and y_to_x[], and also 
+    returns the alignment score as the return value.
+*/
+
+LPOScore_T align_lpo (LPOSequence_T *lposeq_x,
+		      LPOSequence_T *lposeq_y,
+		      ResidueScoreMatrix_T *m,
+		      LPOLetterRef_T **x_to_y,
+		      LPOLetterRef_T **y_to_x,
+		      int use_global_alignment)
+{
+  int len_x = lposeq_x->length;
+  int len_y = lposeq_y->length;
+  LPOLetter_T *seq_x = lposeq_x->letter;
+  LPOLetter_T *seq_y = lposeq_y->letter;
+  
+  int i, j, xcount, prev_gap, next_gap;
+  int best_x = -2, best_y = -2;
+  LPOScore_T best_score = -999999;
+  int *is_final_node_x;
+  LPOLetterLink_T **x_left = NULL, *xl;
+  DPMove_T **move = NULL, *my_move;
+  
+  DPScore_T *curr_score = NULL, *prev_score = NULL, *init_col_score = NULL, *my_score, *swap;
+  
+  LPOScore_T *gap_penalty_x, *gap_penalty_y;
+  double gap_decrement;
+  int *next_gap_array, *next_perp_gap_array;
+  int L_trunc = m->trunc_gap_length, L_decay = m->decay_gap_length;
+    
+  LPOScore_T try_score, insert_x_score, insert_y_score, match_score;
+  int insert_x_x, insert_x_gap;
+  int insert_y_y, insert_y_gap;
+  int match_x, match_y;
+  
+  long n_edges = 0;
+  
+  CALLOC (gap_penalty_x, L_trunc + L_decay + 2, LPOScore_T);
+  CALLOC (gap_penalty_y, L_trunc + L_decay + 2, LPOScore_T);
+  CALLOC (next_gap_array, L_trunc + L_decay + 2, int);
+  CALLOC (next_perp_gap_array, L_trunc + L_decay + 2, int);
+  
+  gap_penalty_x[0] = m->gap_penalty_set[0][0];  /* INITIALIZE GAP PENALTY LOOKUP */
+  for (i=1;i<L_trunc;i++)
+    gap_penalty_x[i] = m->gap_penalty_set[0][1];
+  gap_decrement = (m->gap_penalty_set[0][1] - m->gap_penalty_set[0][2]) / ((double)(L_decay + 1));
+  for (i=L_trunc;i<L_trunc+L_decay+1;i++)
+    gap_penalty_x[i] = m->gap_penalty_set[0][1] - (i-L_trunc+1) * gap_decrement;
+  gap_penalty_x[L_trunc + L_decay] = m->gap_penalty_set[0][2];  /* (REDUNDANT) */
+  gap_penalty_x[L_trunc + L_decay + 1] = 0;  /* INITIAL STATE FOR LOCAL ALIGNMENT */
+
+  gap_penalty_y[0] = m->gap_penalty_set[1][0];  /* DIFFERENT PENALTY FOR Y-GAP */
+  for (i=1;i<L_trunc;i++)
+    gap_penalty_y[i] = m->gap_penalty_set[1][1];
+  gap_decrement = (m->gap_penalty_set[1][1] - m->gap_penalty_set[1][2]) / ((double)(L_decay + 1));
+  for (i=L_trunc;i<L_trunc+L_decay+1;i++)
+    gap_penalty_y[i] = m->gap_penalty_set[1][1] - (i-L_trunc+1) * gap_decrement;
+  gap_penalty_y[L_trunc + L_decay] = m->gap_penalty_set[1][2];  /* (REDUNDANT) */
+  gap_penalty_y[L_trunc + L_decay + 1] = 0;  /* INITIAL STATE FOR LOCAL ALIGNMENT */
+  
+  for (i=0; i<L_trunc+L_decay+2; i++) {
+    /* GAP LENGTH EXTENSION RULE: */
+    /* 0->1, 1->2, 2->3, ..., M-1->M, M->M, M+1->M+1. */
+    next_gap_array[i] = (i < L_trunc+L_decay) ? i+1 : i;
+    next_perp_gap_array[i] = (DOUBLE_GAP_SCORING && i < L_trunc+L_decay+1) ? 0 : next_gap_array[i];
+  }
+  
+  get_seq_left_and_final (lposeq_x, &x_left, &is_final_node_x);
+  
+  CALLOC (move, len_y, DPMove_T *);
+  for (i=0; i<len_y; i++) {
+    CALLOC (move[i], len_x, DPMove_T);
+  }
+  
+  CALLOC (init_col_score, len_y+1, DPScore_T);
+  init_col_score = &(init_col_score[1]);
+  
+  CALLOC (curr_score, len_x+1, DPScore_T);
+  curr_score = &(curr_score[1]);
+  CALLOC (prev_score, len_x+1, DPScore_T);
+  prev_score = &(prev_score[1]);
+  
+  /* FILL INITIAL ROW. */
+  /* GLOBAL ALIGNMENT: no free gaps */
+  /* LOCAL ALIGNMENT: free initial gap */
+  
+  curr_score[-1].score = 0;
+  curr_score[-1].gap_x = curr_score[-1].gap_y = 
+    (use_global_alignment) ? 0 : TRUNCATE_GAP_LENGTH+1;
+  
+  for (i=0; i<len_x; i++) {
+    for (xcount = 1, xl = x_left[i]; xl != NULL; xcount++, xl = xl->more) {
+      prev_gap = curr_score[xl->ipos].gap_x;
+      try_score = curr_score[xl->ipos].score + xl->score - gap_penalty_x[prev_gap];
+      if (xcount == 1 || try_score > curr_score[i].score) {
+	curr_score[i].score = try_score;
+	curr_score[i].gap_x = next_gap_array[prev_gap];
+	curr_score[i].gap_y = next_perp_gap_array[prev_gap];
+      }
+    }
+  }
+  
+  /* FILL INITIAL COLUMN. */
+  
+  init_col_score[-1] = curr_score[-1];
+  for (i=0; i<len_y; i++) {
+    prev_gap = init_col_score[i-1].gap_y;
+    init_col_score[i].score = init_col_score[i-1].score - gap_penalty_y[prev_gap];
+    init_col_score[i].gap_x = next_perp_gap_array[prev_gap];
+    init_col_score[i].gap_y = next_gap_array[prev_gap];
+  }
+  
+  
+  /** MAIN DYNAMIC PROGRAMMING LOOP **/
+
+
+  /* OUTER LOOP (i-th position in linear seq y): */
+  for (i=0; i<len_y; i++) {
+    
+    swap = prev_score; prev_score = curr_score; curr_score = swap;
+    
+    curr_score[-1] = init_col_score[i];
+    
+    /* INNER LOOP (j-th position in LPO x): */
+    for (j=0; j<len_x; j++) {
+      
+      /* ONLY POSSIBLE Y-INSERTION: trace back to (i-1, j). */
+      prev_gap = prev_score[j].gap_y;
+      insert_y_score = prev_score[j].score - gap_penalty_y[prev_gap];
+      insert_y_gap = prev_gap;
+      
+      /* ONLY ONE Y-PREDECESSOR */
+      match_y = insert_y_y = 1;
+
+      /* LOOP OVER x-predecessors: */
+      for (xcount = 1, xl = x_left[j]; xl != NULL; xcount++, xl = xl->more) {
+	
+	/* IMPROVE XY-MATCH?: trace back to (i-1, j'=xl->ipos) */
+	try_score = prev_score[xl->ipos].score + xl->score;
+	if (xcount == 1 || try_score > match_score) {
+	  match_score = try_score;
+	  match_x = xcount;
+	}
+	
+	/* IMPROVE X-INSERTION?: trace back to (i, j'=xl->ipos) */
+	prev_gap = curr_score[xl->ipos].gap_x;
+	try_score = curr_score[xl->ipos].score + xl->score - gap_penalty_x[prev_gap];
+	if (xcount == 1 || try_score > insert_x_score) {
+	  insert_x_score = try_score;
+	  insert_x_x = xcount;
+	  insert_x_gap = prev_gap;
+	}
+      }
+
+      if (0 == use_global_alignment && match_score <= 0) {
+	match_score = 0;
+	match_x = match_y = 0;  /* FIRST ALIGNED PAIR */
+      }
+      
+      n_edges += (xcount-1);
+      match_score += m->score[(int)seq_x[j].letter][(int)seq_y[i].letter];
+      
+      my_score = &curr_score[j];
+      my_move = &move[i][j];
+      
+      if (match_score > insert_y_score && match_score > insert_x_score) {
+	/* XY-MATCH */
+	my_score->score = match_score;
+	my_score->gap_x = 0;
+	my_score->gap_y = 0;
+	my_move->x = match_x;
+	my_move->y = match_y;
+      }
+      else if (insert_x_score > insert_y_score) {
+	/* X-INSERTION */
+	my_score->score = insert_x_score;
+	my_score->gap_x = next_gap_array[insert_x_gap];
+	my_score->gap_y = next_perp_gap_array[insert_x_gap];
+	my_move->x = insert_x_x;
+	my_move->y = 0;
+      }
+      else {
+	/* Y-INSERTION */
+	my_score->score = insert_y_score;
+	my_score->gap_x = next_perp_gap_array[insert_y_gap];
+	my_score->gap_y = next_gap_array[insert_y_gap];
+	my_move->x = 0;
+	my_move->y = insert_y_y;
+      }
+
+      /* RECORD BEST START FOR TRACEBACK */
+      /* KEEPING ONLY FINAL-FINAL BESTS FOR GLOBAL ALIGNMENT */    
+      if (my_score->score >= best_score && (0 == use_global_alignment || (is_final_node_x[j] && i==len_y-1))) {
+	if (my_score->score > best_score || (j == best_x && i < best_y) || j < best_x) {
+	  best_score = my_score->score;
+	  best_x = j;
+	  best_y = i;
+	}
+      }
+    }
+  }
+  
+  IF_GUARD(best_x>=len_x || best_y>=len_y,1.1,(ERRTXT,"Bounds exceeded!\nbest_x,best_y:%d,%d\tlen:%d,%d\n",best_x,best_y,len_x,len_y),CRASH);
+  
+  /*
+    fprintf (stderr, "aligned (%d nodes, %ld edges) to (%d nodes)\n", len_x, n_edges/len_y, len_y);
+    fprintf (stderr, "best score %d @ (%d %d)\n", best_score, best_x, best_y);
+  */
+  
+  /* DYNAMIC PROGRAMING MATRIX COMPLETE, NOW TRACE BACK FROM best_x, best_y */
+  trace_back_lpo_alignment (len_x, len_y, move, x_left,
+			    best_x, best_y,
+			    x_to_y, y_to_x);
+  
+  
+  FREE (gap_penalty_x);
+  FREE (gap_penalty_y);
+  FREE (next_gap_array);
+  FREE (next_perp_gap_array);
+  
+  prev_score = &(prev_score[-1]);
+  FREE (prev_score);
+  curr_score = &(curr_score[-1]);
+  FREE (curr_score);
+  
+  init_col_score = &(init_col_score[-1]);
+  FREE (init_col_score);
+  
+  FREE (is_final_node_x);
+  
+  for (i=0; i<len_x; i++) {
+    if (x_left[i] != NULL && x_left[i] != &seq_x[i].left) {
+      FREE (x_left[i]);
+    }
+  }
+  FREE (x_left);
+  
+  for (i=0; i<len_y; i++) {
+    FREE (move[i]);
+  }
+  FREE (move);
+  
+  return best_score;
+}

Added: trunk/packages/poa/branches/upstream/current/align_lpo_po.c
===================================================================
--- trunk/packages/poa/branches/upstream/current/align_lpo_po.c	2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/align_lpo_po.c	2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,272 @@
+
+
+
+#include "default.h"
+#include "poa.h"
+#include "seq_util.h"
+#include "lpo.h"
+
+
+
+
+
+typedef struct {
+  unsigned char x;
+  unsigned char y:7;
+  unsigned char is_aligned:1;
+} LPOMove_T;
+
+
+typedef unsigned char LPOGapLength_T;
+
+
+
+
+
+void trace_back_lpo_po_alignment(int len_x,LPOLetter_T seq_x[],
+				 int len_y,LPOLetter_T seq_y[],
+				 LPOMove_T **move,
+				 LPOLetterRef_T best_x,
+				 LPOLetterRef_T best_y,
+				 LPOLetterRef_T **x_to_y,
+				 LPOLetterRef_T **y_to_x)
+{
+  int i;
+  LPOLetterRef_T new_x,*x_al=NULL,*y_al=NULL;
+  LPOLetterLink_T *left;
+
+  
+  CALLOC(x_al,len_x,LPOLetterRef_T);
+  CALLOC(y_al,len_y,LPOLetterRef_T);
+  LOOP (i,len_x) x_al[i]= INVALID_LETTER_POSITION;
+  LOOP (i,len_y) y_al[i]= INVALID_LETTER_POSITION;
+
+  while (best_x>=0 && best_y>=0) {
+    if (move[best_x][best_y].is_aligned) {/* ALIGNED! MAP best_x <--> best_y */
+      x_al[best_x]=best_y;
+      y_al[best_y]=best_x;
+    }
+
+    if (0==move[best_x][best_y].x /* HIT START OF THE ALIGNMENT, SO QUIT */
+	&& 0==move[best_x][best_y].y)
+      break;
+
+    if ((i=move[best_x][best_y].x)>0) { /* TRACE BACK ON X */
+      for (left= &seq_x[best_x].left;--i >0;left=left->more); /* USE iTH MOVE*/
+      new_x = left->ipos;
+    }
+    else new_x=best_x;
+    if ((i=move[best_x][best_y].y)>0) { /* TRACE BACK ON Y */
+      for (left= &seq_y[best_y].left;--i >0;left=left->more); /* USE iTH MOVE*/
+      best_y = left->ipos;
+    }
+    best_x=new_x;
+  }
+
+
+  if (x_to_y) /* HAND BACK ALIGNMENT RECIPROCAL MAPPINGS */
+    *x_to_y = x_al;
+  else
+    free(x_al);
+  if (y_to_x)
+    *y_to_x = y_al;
+  else
+    free(y_al);
+  return;
+}
+
+
+
+/** performs partial order alignment:
+  seq_x[] may be a partial order;
+  seq_y[] may be a partial order;
+  returns the alignment in x_to_y[] and y_to_x, and also 
+  returns the alignment score as the return value */
+LPOScore_T align_lpo_po (LPOSequence_T *lposeq_x,
+			 LPOSequence_T *lposeq_y,
+			 ResidueScoreMatrix_T *m,
+			 LPOLetterRef_T **x_to_y,
+			 LPOLetterRef_T **y_to_x,
+			 LPOScore_T (*scoring_function)
+			 (int,int,LPOLetter_T [],LPOLetter_T [],ResidueScoreMatrix_T *),
+			 int use_global_alignment)
+{
+  int len_x = lposeq_x->length;
+  int len_y = lposeq_y->length;
+  LPOLetter_T *seq_x = lposeq_x->letter;
+  LPOLetter_T *seq_y = lposeq_y->letter;
+  
+  int i,j,j_left,best_x,best_y,nfree_list=0,ilast_right=0;
+  LPOScore_T **score=NULL,*my_score,*my_matrix,*score_base=NULL;
+  LPOMove_T **move=NULL,*move_base=NULL,*my_move;
+  LPOGapLength_T **gap_length=NULL,*my_gap_length,*gap_length_base=NULL;
+  LPOLetterLink_T *left,*my_left,*y_left;
+  int new_gap_len,insert_x,previous_x,previous_y,
+    i_x,new_score,new_x,new_y,current_gap_length,i_y,insert_y;
+  LPOScore_T match_score,previous_score,
+    insert_x_try,insert_x_score,insert_y_score,best_score= -999999;
+
+  LPOScore_T *gap_penalty_x, *gap_penalty_y;
+  double gap_decrement;
+  int L_trunc = m->trunc_gap_length, L_decay = m->decay_gap_length;
+    
+  CALLOC (gap_penalty_x, L_trunc + L_decay + 1, LPOScore_T);
+  CALLOC (gap_penalty_y, L_trunc + L_decay + 1, LPOScore_T);
+
+  gap_penalty_x[0] = m->gap_penalty_set[0][0];  /* INITIALIZE GAP PENALTY LOOKUP */
+  for (i=1;i<L_trunc;i++)
+    gap_penalty_x[i] = m->gap_penalty_set[0][1];
+  gap_decrement = (m->gap_penalty_set[0][1] - m->gap_penalty_set[0][2]) / ((double)(L_decay + 1));
+  for (i=L_trunc;i<L_trunc+L_decay+1;i++)
+    gap_penalty_x[i] = m->gap_penalty_set[0][1] - (i-L_trunc+1) * gap_decrement;
+  gap_penalty_x[L_trunc + L_decay] = m->gap_penalty_set[0][2];  /* (REDUNDANT) */
+
+  gap_penalty_y[0] = m->gap_penalty_set[1][0];  /* DIFFERENT PENALTY FOR Y-GAP */
+  for (i=1;i<L_trunc;i++)
+    gap_penalty_y[i] = m->gap_penalty_set[1][1];
+  gap_decrement = (m->gap_penalty_set[1][1] - m->gap_penalty_set[1][2]) / ((double)(L_decay + 1));
+  for (i=L_trunc;i<L_trunc+L_decay+1;i++)
+    gap_penalty_y[i] = m->gap_penalty_set[1][1] - (i-L_trunc+1) * gap_decrement;
+  gap_penalty_y[L_trunc + L_decay] = m->gap_penalty_set[1][2];  /* (REDUNDANT) */
+
+  CALLOC(score,len_x,LPOScore_T *); /* ALLOCATE MATRIX STORAGE: ROW POINTERS */
+  CALLOC(move,len_x,LPOMove_T *);
+  CALLOC(gap_length,len_x,LPOGapLength_T *);
+  CALLOC(score_base,len_x*(len_y+1),LPOScore_T); /*ALLOCATE MATRIX RECTANGLE */
+  CALLOC(move_base,len_x*(len_y+1),LPOMove_T); /*ALLOCATE MATRIX RECTANGLE */
+  CALLOC(gap_length_base,len_x*(len_y+1),LPOGapLength_T); /*ALLOCATE MATRIX RECTANGLE */
+
+
+  LOOPF (i,len_x) {/* BUILD UP DP MATRIX, ROW BY ROW */
+    score[i]=score_base+i*(len_y+1)+1; /* LEAVE SPACE FOR [-1] ENTRY */
+    move[i]=move_base+i*(len_y+1)+1; /* LEAVE SPACE FOR [-1] ENTRY */
+    gap_length[i]=gap_length_base+i*(len_y+1)+1; /*LEAVE SPACE FOR [-1] ENTRY*/
+    my_move=move[i]; /* USED TO SPEED UP MATRIX ACCESS INSIDE INNER LOOP*/
+    my_score=score[i];
+    my_gap_length=gap_length[i];
+    score[i][-1]= -999999; /* UNACCEPTABLE SCORE ENSURES -1 NEVER CHOSEN*/
+    /* my_matrix=m->score[seq_x[i].letter]; NOT USED */
+    if (seq_x[i].left.ipos>=0) /* AT LEAST ONE VALID POSITION TO THE LEFT */
+      my_left= &seq_x[i].left;
+    else /* THERE IS NO POSITION TO THE LEFT */
+      my_left=NULL;
+    LOOPF (j,len_y) {
+      j_left=SEQ_Y_LEFT(j); /* POSITION TO THE LEFT OF j */
+      previous_score=previous_x=previous_y=0;
+      i_x=1;
+      insert_x_score= -999999;
+      for (left=my_left;left;left=left->more) {
+	if (move[left->ipos][j].x>0) /* COULD BE [X,0] GAP CONTINUATION */
+	  current_gap_length=gap_length[left->ipos][j];
+	else/*NOT AN EXTENSION OF A [X,0] GAP, SO TREAT AS START OF NEW GAP*/
+	  current_gap_length=0;
+	insert_x_try=score[left->ipos][j] /* FIND BEST [X,0] MOVE */
+	  + left->score /* INCLUDE WEIGHTING FROM THIS EDGE */
+	  - gap_penalty_x[current_gap_length];
+	if (insert_x_try>insert_x_score) { /* IF BEST insert_x MOVE, SAVE*/
+	  insert_x=i_x;
+	  insert_x_score=insert_x_try;
+	  new_gap_len=current_gap_length+1;
+	  if (new_gap_len>TRUNCATE_GAP_LENGTH) /* PREVENT OVERFLOW */
+	    new_gap_len=TRUNCATE_GAP_LENGTH;
+	}
+
+	 /* FIND BEST [X,Y] MOVE */
+	if (seq_y[j].left.ipos>=0){/*AT LEAST ONE VALID POSITION TO THE LEFT*/
+	  i_y=1;
+	  for (y_left= &seq_y[j].left;y_left;y_left=y_left->more) {
+	    if (score[left->ipos][y_left->ipos] + left->score + y_left->score
+		>previous_score) {
+	      previous_score=score[left->ipos][y_left->ipos] 
+		+ left->score + y_left->score;
+	      previous_x=i_x;
+	      previous_y=i_y;
+	    }
+	    i_y++;
+	  }
+	}
+	i_x++; /* ADVANCE X MOVE INDEX */
+      } /* DONE SCANNING PREDECESSORS ON X */
+
+      match_score = previous_score /* TAKE BEST PREDECESSOR */
+	+ scoring_function(i,j,seq_x,seq_y,m);
+#ifdef USE_LOCAL_NEUTRALITY_CORRECTION /* NO LONGER USED */
+      if (seq_x[i].score<seq_y[j].score) /* USE STRONGEST NEUTRALITY ADJ'T*/
+	match_score += seq_x[i].score;
+      else
+	match_score += seq_y[j].score;
+#endif
+      if (match_score>insert_x_score) { /* PREFER [X,Y] MOVE */
+	new_score=match_score;
+	new_x=previous_x;
+	new_y=previous_y;
+	new_gap_len=0;
+      }
+      else { /* PREFER [X,0] MOVE */
+	new_score=insert_x_score;
+	new_x=insert_x;
+	new_y=0;
+      }
+
+       /* [0,Y] MOVE */
+      insert_y_score= -999999;
+      if (seq_y[j].left.ipos>=0){/*AT LEAST ONE VALID POSITION TO THE LEFT*/
+	i_y=1;
+	for (y_left= &seq_y[j].left;y_left;y_left=y_left->more) {
+	  if (my_move[y_left->ipos].y>0) /* COULD BE [0,1] GAP CONTINUATION */
+	    current_gap_length=my_gap_length[y_left->ipos];
+	  else/*NOT AN EXTENSION OF A [0,1] GAP, SO TREAT AS START OF NEW GAP*/
+	    current_gap_length=0;
+	  if (insert_y_score <
+	      my_score[y_left->ipos]-gap_penalty_y[current_gap_length]) {
+	    insert_y_score=my_score[y_left->ipos]-gap_penalty_y[current_gap_length];
+	    insert_y=i_y;
+	  }
+	  i_y++;
+	}
+      }
+
+      if (insert_y_score<new_score) { /* [new_x,new_y] MOVE IS BEST */
+	my_score[j]=new_score;
+	my_move[j].x=new_x;
+	my_move[j].y=new_y;
+	my_gap_length[j]=new_gap_len;
+	if (new_gap_len==0)
+	  my_move[j].is_aligned=1;
+      }
+      else { /* [0,Y] MOVE IS BEST */
+	my_score[j]=insert_y_score;
+	my_move[j].x=0;
+	my_move[j].y=insert_y;
+	my_gap_length[j]=current_gap_length+1;
+	if (my_gap_length[j]>TRUNCATE_GAP_LENGTH) /* PREVENT OVERFLOW */
+	  my_gap_length[j]=TRUNCATE_GAP_LENGTH;
+      }
+      if (my_score[j]>best_score) { /* RECORD BEST MOVE */
+	best_score=my_score[j];
+	best_x=i;
+	best_y=j;
+      }
+    }
+  } /* DYNAMIC PROGRAMING MATRIX COMPLETE, NOW TRACE BACK FROM best_x,best_y*/
+
+  IF_GUARD(best_x>=len_x || best_y>=len_y,1.1,(ERRTXT,"Bounds exceeded!\nbest_x,best_y:%d,%d\tlen:%d,%d\n",best_x,best_y,len_x,len_y),CRASH);
+  trace_back_lpo_po_alignment(len_x,seq_x,len_y,seq_y,move,best_x,best_y,
+			      x_to_y,y_to_x);
+  
+  FREE (gap_penalty_x);
+  FREE (gap_penalty_y);
+
+  FREE(score_base); /* FREE MEMORY */
+  FREE(move_base);
+  FREE(gap_length_base);
+  FREE(score);
+  FREE(move);
+  FREE(gap_length);
+  return best_score;
+}
+
+
+
+
+

Added: trunk/packages/poa/branches/upstream/current/align_lpo_po2.c
===================================================================
--- trunk/packages/poa/branches/upstream/current/align_lpo_po2.c	2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/align_lpo_po2.c	2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,427 @@
+
+#include "default.h"
+#include "poa.h"
+#include "seq_util.h"
+#include "lpo.h"
+
+
+/** set nonzero for old scoring (gap-opening penalty for X-Y transition) */
+#define DOUBLE_GAP_SCORING (0)
+
+
+typedef struct {
+  unsigned char x;
+  unsigned char y;
+}
+DPMove_T;
+
+typedef struct {
+  LPOScore_T score;
+  short gap_x, gap_y;
+}
+DPScore_T;
+
+/** is node 'i' the first node in any sequence in lposeq? */
+static int is_initial_node (int i, LPOSequence_T *lposeq)
+{
+  LPOLetterSource_T *src = &((lposeq->letter[i]).source);
+  while (src != NULL && src->iseq >= 0) {
+    if (src->ipos == 0) {
+      return 1;
+    }
+    src = src->more;
+  }
+  return 0;
+}
+
+/** is node 'i' the last node in any sequence in lposeq? */
+static int is_final_node (int i, LPOSequence_T *lposeq)
+{
+  LPOLetterSource_T *src = &((lposeq->letter[i]).source);
+  while (src != NULL && src->iseq >= 0) {
+    if (src->ipos == (lposeq->source_seq[src->iseq]).length - 1) {
+      return 1;
+    }
+    src = src->more;
+  }
+  return 0;
+}
+
+
+static void get_seq_left_and_final (LPOSequence_T *lposeq_x,
+				    LPOLetterLink_T ***x_left_ptr,
+				    int **is_final_node_ptr)
+{
+  int i, len_x = lposeq_x->length;
+  LPOLetter_T *seq_x = lposeq_x->letter;
+  int *is_final_node_x = NULL;
+  LPOLetterLink_T **x_left = NULL;
+  
+  CALLOC (is_final_node_x, len_x, int);
+  
+  for (i=0; i<len_x; i++) {
+    is_final_node_x[i] = is_final_node (i, lposeq_x);
+  }
+  
+  CALLOC (x_left, len_x, LPOLetterLink_T *);
+  
+  for (i=0; i<len_x; i++) {
+    if (is_initial_node (i, lposeq_x) && seq_x[i].left.ipos != -1) {
+      CALLOC (x_left[i], 1, LPOLetterLink_T);
+      x_left[i]->ipos = -1;
+      x_left[i]->score = 0;
+      x_left[i]->more = &seq_x[i].left;
+    }
+    else {
+      x_left[i] = &seq_x[i].left;
+    }
+  }
+
+  (*is_final_node_ptr) = is_final_node_x;
+  (*x_left_ptr) = x_left;
+}
+
+
+static void trace_back_lpo_alignment (int len_x, int len_y,
+				      DPMove_T **move,
+				      LPOLetterLink_T **x_left,
+				      LPOLetterLink_T **y_left,
+				      LPOLetterRef_T best_x, LPOLetterRef_T best_y,
+				      LPOLetterRef_T **x_to_y,
+				      LPOLetterRef_T **y_to_x)
+{
+  int i, xmove, ymove;
+  LPOLetterRef_T *x_al = NULL, *y_al = NULL;
+  LPOLetterLink_T *left;
+  
+  CALLOC (x_al, len_x, LPOLetterRef_T);
+  CALLOC (y_al, len_y, LPOLetterRef_T);
+  LOOP (i,len_x) x_al[i] = INVALID_LETTER_POSITION;
+  LOOP (i,len_y) y_al[i] = INVALID_LETTER_POSITION;
+  
+  while (best_x >= 0 && best_y >= 0) {
+
+    xmove = move[best_y][best_x].x;
+    ymove = move[best_y][best_x].y;
+    
+    if (xmove>0 && ymove>0) { /* ALIGNED! MAP best_x <--> best_y */
+      x_al[best_x]=best_y;
+      y_al[best_y]=best_x;
+    }
+
+    if (xmove == 0 && ymove == 0) { /* FIRST ALIGNED PAIR */
+      x_al[best_x]=best_y;
+      y_al[best_y]=best_x;
+      break;  /* FOUND START OF ALIGNED REGION, SO WE'RE DONE */
+    }
+    
+    if (xmove>0) { /* TRACE BACK ON X */
+      left = x_left[best_x];
+      while ((--xmove)>0) {
+	left = left->more;
+      }
+      best_x = left->ipos;
+    }
+    
+    if (ymove>0) { /* TRACE BACK ON Y */
+      left = y_left[best_y];
+      while ((--ymove)>0) {
+	left = left->more;
+      }
+      best_y = left->ipos;
+    }
+  }
+  
+  if (x_to_y) /* HAND BACK ALIGNMENT RECIPROCAL MAPPINGS */
+    *x_to_y = x_al;
+  else
+    free(x_al);
+  if (y_to_x)
+    *y_to_x = y_al;
+  else
+    free(y_al);
+  
+  return;
+}
+
+
+/** performs partial order alignment:
+    lposeq_x and lposeq_y are partial orders;
+    returns the alignment in x_to_y[] and y_to_x[], and also 
+    returns the alignment score as the return value.
+*/
+
+LPOScore_T align_lpo_po (LPOSequence_T *lposeq_x,
+			 LPOSequence_T *lposeq_y,
+			 ResidueScoreMatrix_T *m,
+			 LPOLetterRef_T **x_to_y,
+			 LPOLetterRef_T **y_to_x,
+			 LPOScore_T (*scoring_function)
+			 (int, int, LPOLetter_T *, LPOLetter_T *, ResidueScoreMatrix_T *),
+			 int use_global_alignment)
+{
+  int len_x = lposeq_x->length;
+  int len_y = lposeq_y->length;
+  LPOLetter_T *seq_x = lposeq_x->letter;
+  LPOLetter_T *seq_y = lposeq_y->letter;
+  
+  int i, j, xcount, ycount, prev_gap, next_gap;
+  int best_x = -1, best_y = -1;
+  LPOScore_T best_score = -999999;
+  int *is_final_node_x, *is_final_node_y;
+  LPOLetterLink_T **x_left = NULL, **y_left = NULL, *xl, *yl;
+  DPMove_T **move = NULL, *my_move;
+  
+  DPScore_T *curr_score = NULL, *prev_score = NULL, *init_col_score = NULL, *my_score;
+  DPScore_T **score_rows = NULL;
+  
+  LPOScore_T *gap_penalty_x, *gap_penalty_y;
+  double gap_decrement;
+  int *next_gap_array, *next_perp_gap_array;
+  int L_trunc = m->trunc_gap_length, L_decay = m->decay_gap_length;
+  
+  LPOScore_T try_score, insert_x_score, insert_y_score, match_score;
+  int insert_x_x, insert_x_gap;
+  int insert_y_y, insert_y_gap;
+  int match_x, match_y;
+  
+  long n_edges = 0;
+
+  CALLOC (gap_penalty_x, L_trunc + L_decay + 2, LPOScore_T);
+  CALLOC (gap_penalty_y, L_trunc + L_decay + 2, LPOScore_T);
+  CALLOC (next_gap_array, L_trunc + L_decay + 2, int);
+  CALLOC (next_perp_gap_array, L_trunc + L_decay + 2, int);  
+
+  gap_penalty_x[0] = m->gap_penalty_set[0][0];  /* INITIALIZE GAP PENALTY LOOKUP */
+  for (i=1;i<L_trunc;i++)
+    gap_penalty_x[i] = m->gap_penalty_set[0][1];
+  gap_decrement = (m->gap_penalty_set[0][1] - m->gap_penalty_set[0][2]) / ((double)(L_decay + 1));
+  for (i=L_trunc;i<L_trunc+L_decay+1;i++)
+    gap_penalty_x[i] = m->gap_penalty_set[0][1] - (i-L_trunc+1) * gap_decrement;
+  gap_penalty_x[L_trunc + L_decay] = m->gap_penalty_set[0][2];  /* (REDUNDANT) */
+  gap_penalty_x[L_trunc + L_decay + 1] = 0;  /* INITIAL STATE FOR LOCAL ALIGNMENT */
+
+  gap_penalty_y[0] = m->gap_penalty_set[1][0];  /* DIFFERENT PENALTY FOR Y-GAP */
+  for (i=1;i<L_trunc;i++)
+    gap_penalty_y[i] = m->gap_penalty_set[1][1];
+  gap_decrement = (m->gap_penalty_set[1][1] - m->gap_penalty_set[1][2]) / ((double)(L_decay + 1));
+  for (i=L_trunc;i<L_trunc+L_decay+1;i++)
+    gap_penalty_y[i] = m->gap_penalty_set[1][1] - (i-L_trunc+1) * gap_decrement;
+  gap_penalty_y[L_trunc + L_decay] = m->gap_penalty_set[1][2];  /* (REDUNDANT) */
+  gap_penalty_y[L_trunc + L_decay + 1] = 0;  /* INITIAL STATE FOR LOCAL ALIGNMENT */
+  
+  for (i=0; i<L_trunc+L_decay+2; i++) {
+    /* GAP LENGTH EXTENSION RULE: */
+    /* 0->1, 1->2, 2->3, ..., M-1->M, M->M, M+1->M+1. */
+    next_gap_array[i] = (i < L_trunc+L_decay) ? i+1 : i;
+    next_perp_gap_array[i] = (DOUBLE_GAP_SCORING && i < L_trunc+L_decay+1) ? 0 : next_gap_array[i];
+  }
+  
+  get_seq_left_and_final (lposeq_x, &x_left, &is_final_node_x);
+  get_seq_left_and_final (lposeq_y, &y_left, &is_final_node_y);
+  
+  CALLOC (move, len_y, DPMove_T *);
+  for (i=0; i<len_y; i++) {
+    CALLOC (move[i], len_x, DPMove_T);
+  }
+
+  CALLOC (init_col_score, len_y+1, DPScore_T);
+  init_col_score = &(init_col_score[1]);
+  
+  CALLOC (score_rows, len_y+1, DPScore_T *);
+  score_rows = &(score_rows[1]);
+  for (i=-1; i<len_y; i++) {
+    CALLOC (score_rows[i], len_x+1, DPScore_T);
+    score_rows[i] = &(score_rows[i][1]);
+  }
+  curr_score = score_rows[-1];
+  
+  /* FILL INITIAL ROW. */
+  /* GLOBAL ALIGNMENT: no free gaps */
+  /* LOCAL ALIGNMENT: free initial gap */
+  
+  curr_score[-1].score = 0;
+  curr_score[-1].gap_x = curr_score[-1].gap_y =
+    (use_global_alignment) ? 0 : TRUNCATE_GAP_LENGTH+1;
+  
+  for (i=0; i<len_x; i++) {
+    for (xcount = 1, xl = x_left[i]; xl != NULL; xcount++, xl = xl->more) {
+      prev_gap = curr_score[xl->ipos].gap_x;
+      try_score = curr_score[xl->ipos].score + xl->score - gap_penalty_x[prev_gap];
+      if (xcount == 1 || try_score > curr_score[i].score) {
+	curr_score[i].score = try_score;
+	curr_score[i].gap_x = next_gap_array[prev_gap];
+	curr_score[i].gap_y = next_perp_gap_array[prev_gap];
+      }
+    }
+  }
+  
+  /* FILL INITIAL COLUMN. */
+  
+  init_col_score[-1] = curr_score[-1];
+  for (i=0; i<len_y; i++) {
+    for (ycount = 1, yl = y_left[i]; yl != NULL; ycount++, yl = yl->more) {
+      prev_gap = init_col_score[yl->ipos].gap_y;
+      try_score = init_col_score[yl->ipos].score + yl->score - gap_penalty_y[prev_gap];
+      if (ycount == 1 || try_score > init_col_score[i].score) {
+	init_col_score[i].score = try_score;
+	init_col_score[i].gap_x = next_perp_gap_array[prev_gap];
+	init_col_score[i].gap_y = next_gap_array[prev_gap];
+      }
+    }
+  }
+
+  
+  /** MAIN DYNAMIC PROGRAMMING LOOP **/
+
+
+  /* OUTER LOOP (i-th position in LPO y): */
+  for (i=0; i<len_y; i++) {
+    
+    curr_score = score_rows[i];
+    
+    curr_score[-1] = init_col_score[i];
+          
+    /* INNER LOOP (j-th position in LPO x): */
+    for (j=0; j<len_x; j++) {
+      
+      /* LOOP OVER y-predecessors: */
+      for (ycount = 1, yl = y_left[i]; yl != NULL; ycount++, yl = yl->more) {
+	
+	prev_score = score_rows[yl->ipos];
+	
+	/* IMPROVE Y-INSERTION?: trace back to (i'=yl->ipos, j) */
+	prev_gap = prev_score[j].gap_y;
+	try_score = prev_score[j].score + yl->score - gap_penalty_y[prev_gap];
+	if (ycount == 1 || try_score > insert_y_score) {
+	  insert_y_score = try_score;
+	  insert_y_y = ycount;
+	  insert_y_gap = prev_gap;
+	}
+	
+	/* LOOP OVER x-predecessors: */
+	for (xcount = 1, xl = x_left[j]; xl != NULL; xcount++, xl = xl->more) {
+	  
+	  /* IMPROVE XY-MATCH?: trace back to (i'=yl->ipos, j'=xl->ipos) */
+	  try_score = prev_score[xl->ipos].score + xl->score + yl->score;
+	  if ((xcount == 1 && ycount == 1) || try_score > match_score) {
+	    match_score = try_score;
+	    match_x = xcount;
+	    match_y = ycount;
+	  }
+	}
+      }
+      
+      /* LOOP OVER x-predecessors */
+      /* IMPROVE X-INSERTION?: trace back to (i, j'=xl->ipos) */
+      for (xcount = 1, xl = x_left[j]; xl != NULL; xcount++, xl = xl->more) {
+	prev_gap = curr_score[xl->ipos].gap_x;
+	try_score = curr_score[xl->ipos].score + xl->score - gap_penalty_x[prev_gap];
+	if (xcount == 1 || try_score > insert_x_score) {
+	  insert_x_score = try_score;
+	  insert_x_x = xcount;
+	  insert_x_gap = prev_gap;
+	}
+      }
+      
+      if (0 == use_global_alignment && match_score <= 0) {
+	match_score = 0;
+	match_x = match_y = 0;  /* FIRST ALIGNED PAIR */
+      }
+      
+      n_edges += (xcount-1)*(ycount-1);
+      match_score += scoring_function (j, i, seq_x, seq_y, m);
+      
+      my_score = &curr_score[j];
+      my_move = &move[i][j];
+      
+      if (match_score > insert_y_score && match_score > insert_x_score) {
+	/* XY-MATCH */
+	my_score->score = match_score;
+	my_score->gap_x = 0;
+	my_score->gap_y = 0;
+	my_move->x = match_x;
+	my_move->y = match_y;
+      }
+      else if (insert_x_score > insert_y_score) {
+	/* X-INSERTION */
+	my_score->score = insert_x_score;
+	my_score->gap_x = next_gap_array[insert_x_gap];
+	my_score->gap_y = next_perp_gap_array[insert_x_gap];
+	my_move->x = insert_x_x;
+	my_move->y = 0;
+      }
+      else {
+	/* Y-INSERTION */
+	next_gap = next_gap_array[insert_y_gap];
+	my_score->score = insert_y_score;
+	my_score->gap_x = next_perp_gap_array[insert_y_gap];
+	my_score->gap_y = next_gap_array[insert_y_gap];
+	my_move->x = 0;
+	my_move->y = insert_y_y;
+      }
+
+      /* RECORD BEST START FOR TRACEBACK */
+      /* KEEPING ONLY FINAL-FINAL BESTS FOR GLOBAL ALIGNMENT */    
+      if (my_score->score >= best_score && (0 == use_global_alignment || (is_final_node_x[j] && is_final_node_y[i]))) {
+	if (my_score->score > best_score || (j == best_x && i < best_y) || j < best_x) {
+	  best_score = my_score->score;
+	  best_x = j;
+	  best_y = i;
+	}
+      }
+    }
+  }
+  
+  IF_GUARD(best_x>=len_x || best_y>=len_y,1.1,(ERRTXT,"Bounds exceeded!\nbest_x,best_y:%d,%d\tlen:%d,%d\n",best_x,best_y,len_x,len_y),CRASH);
+  
+  /*
+    fprintf (stderr, "aligned (%d nodes, %ld edges) to (%d nodes)\n", len_x, n_edges, len_y);
+    fprintf (stderr, "best score %d @ (%d %d)\n", best_score, best_x, best_y);
+  */
+  
+  /* DYNAMIC PROGRAMING MATRIX COMPLETE, NOW TRACE BACK FROM best_x, best_y */
+  trace_back_lpo_alignment (len_x, len_y, move, x_left, y_left,
+			    best_x, best_y,
+			    x_to_y, y_to_x);
+
+
+  FREE (gap_penalty_x);
+  FREE (gap_penalty_y);
+  FREE (next_gap_array);
+  FREE (next_perp_gap_array);
+  
+  for (i=-1; i<len_y; i++) {
+    score_rows[i] = &(score_rows[i][-1]);
+    FREE (score_rows[i]);
+  }
+  score_rows = &(score_rows[-1]);
+  FREE (score_rows);
+  
+  init_col_score = &(init_col_score[-1]);
+  FREE (init_col_score);
+  
+  FREE (is_final_node_x);
+  FREE (is_final_node_y);
+  
+  for (i=0; i<len_x; i++) {
+    if (x_left[i] != &seq_x[i].left) {
+      FREE (x_left[i]);
+    }
+  }
+  FREE (x_left);
+  
+  for (i=0; i<len_y; i++) {
+    if (y_left[i] != &seq_y[i].left) {
+      FREE (y_left[i]);
+    }
+  }
+  FREE (y_left);
+  
+  for (i=0; i<len_y; i++) {
+    FREE (move[i]);
+  }
+  FREE (move);
+  
+  return best_score;
+}

Added: trunk/packages/poa/branches/upstream/current/align_score.c
===================================================================
--- trunk/packages/poa/branches/upstream/current/align_score.c	2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/align_score.c	2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,31 @@
+
+
+
+
+#include "default.h"
+#include "poa.h"
+#include "seq_util.h"
+#include "lpo.h"
+
+
+
+typedef struct {
+  unsigned char x;
+  unsigned char y:7;
+  unsigned char is_aligned:1;
+} LPOMove_T;
+
+
+
+/* CATIE: YOU CAN PUT ANY SCORING METHOD YOU WANT INSIDE THIS
+   FUNCTION. JUST REPLACE THE CONTENTS OF THE FUNCTION WITH
+   YOUR SCORING METHOD */
+LPOScore_T caties_scoring_function(int i,
+				   int j,
+				   LPOLetter_T seq_x[],
+				   LPOLetter_T seq_y[],
+				   ResidueScoreMatrix_T *m)
+{
+  return m->score[seq_x[i].letter][seq_y[j].letter]; /*TRIVIAL SCORING FUNC:
+						       JUST USE MATRIX VALUE*/
+}

Added: trunk/packages/poa/branches/upstream/current/black_flag.c
===================================================================
--- trunk/packages/poa/branches/upstream/current/black_flag.c	2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/black_flag.c	2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,125 @@
+
+
+
+#include "default.h" /* ~~I */
+
+
+
+
+char ERRTXT[1024]
+  ="";
+char *Program_name="black_flag";
+char *Program_version="unknown";
+int Already_reported_crash=0;
+
+int black_flag(int bug_level,
+	       char sourcefile[],
+	       int sourceline,
+	       char sourcefile_revision[])
+{
+  static int last_line= -1;
+  char *error_names[max_black_flag_type]
+    ={"CRASH","DIED","EXCEPTION","BAD_DATA","WARNING","DEBUG"};
+
+  switch (bug_level) {
+#ifndef DEBUG_USER_VERSION
+  case DEBUG_black_flag_type:  /* IN DEV'T VERSION JUST CRASH!!! */
+#ifdef DEBUG_VERSION
+  case TRAP_black_flag_type: /* FOR DEBUG VERSION AND TRAP, CAUSE A CORE DUMP*/
+#endif
+#endif
+  case CRASH_black_flag_type:  /* COULD INCLUDE MECHANISMS TO SEND EMAIL? */
+    /* print message at level 1, i.e. if we are printing anything at all */
+    PRINT_DEBUG(1,(DBOUT,"black_flag: %s %s:%s %s %s,%d\n%s\nend_black_flag\n",
+		   error_names[bug_level],
+		   Program_name,Program_version,
+		   sourcefile,sourcefile_revision,sourceline,
+		   ERRTXT[0]? ERRTXT:""));
+    if (Already_reported_crash)
+      return 1;
+    Already_reported_crash=1;
+    abort();  /* FORCE A CORE DUMP */
+
+
+  default: /* JUST PRINT THE ERROR */
+    PRINT_DEBUG(1,(DBOUT,"black_flag: %s %s:%s %s %s,%d\n%s\nend_black_flag\n",
+		   error_names[bug_level],
+		   Program_name,Program_version,
+		   sourcefile,sourcefile_revision,sourceline,
+		   ERRTXT[0]? ERRTXT:""));
+    break;
+    
+  }
+
+  ERRTXT[0]='\0'; /* RESET THE ERROR TEXT */
+  last_line=sourceline;
+  return 1;  /* SEND SIGNAL TO HANDLER CLAUSE TO DEAL WITH THIS ERROR */
+}
+
+
+
+
+
+
+
+
+void handle_crash(int sigcode)
+{
+  char *crash_mode;
+  
+  if (Already_reported_crash)
+    exit(-1); /* IN ENDLESS LOOP REPORTING CRASH OVER & OVER? */
+
+  Already_reported_crash=1;
+  black_flag(CRASH_black_flag_type,"",0,"");
+  if ((crash_mode=getenv("HANDLE_CRASH")) && 0==strcmp(crash_mode,"NOCORE"))
+    exit(-1);
+  else
+    return; /*ENVIRONMENT SETTING ASKS US TO DUMP A CORE IMMEDIATELY */
+}
+
+
+
+
+
+int handle_crash_init(void (*crash_fun)())
+{
+#define HANDLE_CRASH_MAX 5
+  int i,signal_type[HANDLE_CRASH_MAX]
+    ={SIGSEGV,
+#ifdef SIGBUS /* LINUX DOESN'T HAVE BUS ERRORS? */
+	SIGBUS,
+#else
+	SIGSEGV, /* REUSE SEGV AS DUMMY ENTRY */
+#endif
+	SIGABRT,SIGFPE,SIGTRAP}; /*LIST OF SIGNALS TO HANDLE*/
+  if (!crash_fun) /* NO CRASH FUNCTION SUPPLIED, SO RESET TO DEFAULT */
+    crash_fun = SIG_DFL; /* RESET TO STANDARD CRASH BEHAVIOR */
+
+  LOOP (i,HANDLE_CRASH_MAX) /* SET THIS HANDLER FOR ALL OUR SIGNALS */
+    signal(signal_type[i],crash_fun);
+  return 0;
+}
+
+void black_flag_init_args(int narg,char *arg[],char progversion[])
+{
+  int i,len=0;
+  LOOP (i,narg)
+    len+=strlen(arg[i])+2;
+  CALLOC(Program_name,len,char);
+  LOOPF (i,narg) {
+    strcat(Program_name,arg[i]);
+    strcat(Program_name," ");
+  }
+  if (progversion)
+    Program_version=progversion;
+  handle_crash_init(handle_crash);
+}
+
+
+
+void black_flag_init(char progname[],char progversion[])
+{
+  black_flag_init_args(1,&progname,progversion);
+}
+

Added: trunk/packages/poa/branches/upstream/current/black_flag.h
===================================================================
--- trunk/packages/poa/branches/upstream/current/black_flag.h	2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/black_flag.h	2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,245 @@
+
+
+
+#ifndef BLACK_FLAG_HEADER_INCLUDED
+#define BLACK_FLAG_HEADER_INCLUDED 1
+
+#include <signal.h>
+
+extern char ERRTXT[];
+#define DBOUT stderr
+
+enum {
+  CRASH_black_flag_type,
+  TRAP_black_flag_type,
+  COPE_black_flag_type,
+  USERR_black_flag_type,
+  WARN_black_flag_type,
+  DEBUG_black_flag_type,
+  max_black_flag_type
+  };
+
+
+#define NOERRMSG (ERRTXT,"")
+
+
+
+
+
+/********************************************************************
+  *
+  *     IF_PARANOID:
+  *     IF_PARANOID(CONDITION,REVISION,MESSAGE)
+  *
+  *     error checking that will significantly slow execution or
+  *     otherwise is desirable only in situations of extreme
+  *     unction, e.g. during our debugging!!!
+  *
+  *     The beauty of the IF_PARANOID idea is that you should sprinkle
+  *     it liberally EVERYWHERE in your code without thought for
+  *     whether it is necessary or might hurt performance,
+  *     because it will NOT even be compiled into the program in
+  *     the production version!!!!!!
+  *
+  *     use IF_PARANOID checks EVERYWHERE you can think of
+  *     definite error signals, even if you think "That shouldn't
+  *     EVER happen".
+  *
+  *     if the IF_PARANOID CONDITIONAL is TRUE, the program will abort();
+  *
+  *
+  *
+  *     Also, our emacs custom highlighting system has been programmed
+  *     to show IF_PARANOID and IF_DEBUG lines in a dim gray, so
+  *     that these extensive error checks will not visually obscure
+  *     the layout & organization of your algorithms.
+  *
+  *     black_flag() is smart about printing both version information
+  *     such as the vmake version name the executable was created by,
+  *     and also the exact revision number of the file in which the
+  *     error occured.
+  *
+  *
+  *-------------------------------------------------
+  *
+  *     IF_DEBUG:
+  *     IF_DEBUG(CONDITION,REVISION,MESSAGE)
+  *
+  *     also not expected to be included in a final production
+  *     release, but should not impact performance so significantly that
+  *     it's unpleasant to test such a version in regular use patterns.
+  *     Classic examples would be fairly pedantic checks at the entry
+  *     and exit of all functions, testing for conditions "that shouldn't
+  *     ever happen."
+  *
+  *     if the IF_DEBUG CONDITIONAL is TRUE, the program will abort();
+  *
+  *
+  *-------------------------------------------------
+  *
+  *     IF_GUARD:
+  *     IF_GUARD(CONDITION,REVISION,MESSAGE,LEVEL)
+  *
+  *     checks INCLUDED in final production versions, but taking
+  *     advantage of the black_flag system to allow us to easily
+  *     control what will be done in response to an error
+  *     in any given executable, via compile-time flags:
+  *
+  *     e.g.
+  *     print error info on stderr, or to special log files,
+  *
+  *     force a core dump,
+  *
+  *     run dbx via an auto script to generate a stack frame,
+  *     and mail the results to develop-support at mag.com,
+  *     etc.
+  *
+  *     IF_GUARD differs from IF_PARANOID and IF_DEBUG in that it
+  *     requires an error_level argument, which must be one of
+  *
+  *     CRASH                     ... the error is fatal, abort()
+  *
+  *     TRAP                      ... the error is being trapped, but
+  *                                   not handled. e.g. quiting from
+  *                                   a function because some essential
+  *                                   file was missing...
+  *
+  *     COPE                      ... the error is being handled nicely.
+  *                                   the handler code following IF_GUARD
+  *                                   knows how to correct for the situation.
+  *
+  *     USERR                     ... the user appears to have done something
+  *                                   that makes no sense; they must be
+  *                                   confused by our interface.
+  *
+  *     WARN                      ... suspicious data or input, not
+  *                                   definitely an error.
+  *
+  *
+  *
+  *-------------------------------------------------
+  *
+  *     Examples:
+  * 
+  *     IF_PARANOID((iatom<0),4.6,(ERRTXT,"wacky iatom=%d",iatom));
+  *
+  *     IF_DEBUG((iatom<0),4.6,(ERRTXT,"wacky iatom=%d",iatom));
+  *
+  *     IF_GUARD((iatom<0),4.6,(ERRTXT,"wacky iatom=%d",iatom),CRASH);
+  *
+  *     IF_GUARD((iatom<0),4.6,(ERRTXT,"wacky iatom=%d",iatom),COPE) {
+  *       put some code to handle the error condition here;
+  *     }
+  *
+  *     USE THE 4.6 KEYWORD TO PUT IN VERSION NUMBERS AUTOMATICALLY.
+  *
+  *******************************************************************/
+
+
+#if defined(PARANOID_VERSION)  /*???????????????????*/
+#ifndef DEBUG_VERSION
+#define DEBUG_VERSION
+#endif
+
+#define IF_PARANOID(CONDITION,REVISION,MESSAGE) \
+  if (CONDITION) {\
+    sprintf MESSAGE ;\
+    black_flag(DEBUG_black_flag_type,__FILE__,__LINE__,STRINGIFY(REVISION));\
+  }
+
+#else /*????????????????????????????????????????????*/
+#define IF_PARANOID(CONDITION,REVISION,MESSAGE)
+#endif  /* !PARANOID_VERSION  ??????????????????????*/
+
+
+
+
+
+
+
+
+
+
+#if defined(DEBUG_VERSION) /*??????????*/
+#define IF_DEBUG(CONDITION,REVISION,MESSAGE) \
+  if (CONDITION) {\
+    sprintf MESSAGE ;\
+    black_flag(DEBUG_black_flag_type,__FILE__,__LINE__,STRINGIFY(REVISION));\
+  }
+
+#else  /*???????????????????????????????????????????????????????????*/
+#define IF_DEBUG(CONDITION,REVISION,MESSAGE)
+#endif  /* !DEBUG_VERSION   ????????????????????????????????????????*/
+
+
+
+
+
+
+
+
+/*********************************************************************
+  *
+  *     IF_GUARD:
+  *     the basic black_flag macro, for production version trapping
+  *     and handling of errors.
+  *
+  *     Essentially adds the flexibility of handling / recording
+  *     errors however you like in black_flag().  Also, the programmer
+  *     can provide, or omit, a clause following this macro that
+  *     will only be executed if the CONDITION is true, allowing
+  *     any kind of handling you wish.
+  *
+  ********************************************************************/
+
+
+#define IF_GUARD(CONDITION,REVISION,MESSAGE,LEVEL) \
+  if ((CONDITION) ? \
+      (sprintf MESSAGE,\
+       black_flag(CONCAT_MACRO(LEVEL,_black_flag_type),__FILE__,__LINE__,\
+		  STRINGIFY(REVISION))) : 0)
+
+
+
+
+#define WARN_DEBUG(REVISION,MESSAGE,LEVEL) \
+(sprintf MESSAGE,\
+ black_flag(CONCAT_MACRO(LEVEL,_black_flag_type),__FILE__,__LINE__,\
+	    STRINGIFY(REVISION)))
+
+
+#define WARN_MSG(LEVEL,MESSAGE,REVISION) \
+(sprintf MESSAGE,\
+ black_flag(CONCAT_MACRO(LEVEL,_black_flag_type),__FILE__,__LINE__,\
+	    REVISION))
+
+
+
+/*********************************************************************
+  *
+  *     OUT_OF_BOUNDS:
+  *     checks if
+  *       MIN <= INDEX < MAX
+  *
+  *     e.g.
+  *     OUT_OF_BOUNDS(ivar,0,nvar)
+  *
+  ********************************************************************/
+
+#define OUT_OF_BOUNDS(INDEX,MINIMUM_BOUND,MAXIMUM_BOUND) \
+((INDEX)<(MINIMUM_BOUND) || (INDEX)>=(MAXIMUM_BOUND))
+
+void handle_crash(int sigcode);
+int handle_crash_init(void (*crash_fun)());
+int black_flag(int bug_level,
+	       char sourcefile[],
+	       int sourceline,
+	       char sourcefile_revision[]);
+
+char *Program_name;
+char *Program_version;
+
+void black_flag_init(char progname[],char progversion[]);
+void black_flag_init_args(int narg,char *arg[],char progversion[]);
+
+#endif

Added: trunk/packages/poa/branches/upstream/current/blosum80.mat
===================================================================
--- trunk/packages/poa/branches/upstream/current/blosum80.mat	2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/blosum80.mat	2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,40 @@
+#  Blosum80
+#  Matrix made by matblas from blosum80.iij
+#  * column uses minimum score
+#  BLOSUM Clustered Scoring Matrix in 1/3 Bit Units
+#  Blocks Database = /data/blocks_5.0/blocks.dat
+#  Cluster Percentage: >= 80
+#  Entropy =   0.9868, Expected =  -0.7442
+GAP-PENALTIES=12 6 6
+   A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V  B  Z  X  ?  a  g  t  c  u  ]  n
+A  7 -3 -3 -3 -1 -2 -2  0 -3 -3 -3 -1 -2 -4 -1  2  0 -5 -4 -1 -3 -2 -1 -9 -9 -9 -9 -9 -9 -9 -9
+R -3  9 -1 -3 -6  1 -1 -4  0 -5 -4  3 -3 -5 -3 -2 -2 -5 -4 -4 -2  0 -2 -9 -9 -9 -9 -9 -9 -9 -9
+N -3 -1  9  2 -5  0 -1 -1  1 -6 -6  0 -4 -6 -4  1  0 -7 -4 -5  5 -1 -2 -9 -9 -9 -9 -9 -9 -9 -9
+D -3 -3  2 10 -7 -1  2 -3 -2 -7 -7 -2 -6 -6 -3 -1 -2 -8 -6 -6  6  1 -3 -9 -9 -9 -9 -9 -9 -9 -9
+C -1 -6 -5 -7 13 -5 -7 -6 -7 -2 -3 -6 -3 -4 -6 -2 -2 -5 -5 -2 -6 -7 -4 -9 -9 -9 -9 -9 -9 -9 -9
+Q -2  1  0 -1 -5  9  3 -4  1 -5 -4  2 -1 -5 -3 -1 -1 -4 -3 -4 -1  5 -2 -9 -9 -9 -9 -9 -9 -9 -9
+E -2 -1 -1  2 -7  3  8 -4  0 -6 -6  1 -4 -6 -2 -1 -2 -6 -5 -4  1  6 -2 -9 -9 -9 -9 -9 -9 -9 -9
+G  0 -4 -1 -3 -6 -4 -4  9 -4 -7 -7 -3 -5 -6 -5 -1 -3 -6 -6 -6 -2 -4 -3 -9 -9 -9 -9 -9 -9 -9 -9
+H -3  0  1 -2 -7  1  0 -4 12 -6 -5 -1 -4 -2 -4 -2 -3 -4  3 -5 -1  0 -2 -9 -9 -9 -9 -9 -9 -9 -9
+I -3 -5 -6 -7 -2 -5 -6 -7 -6  7  2 -5  2 -1 -5 -4 -2 -5 -3  4 -6 -6 -2 -9 -9 -9 -9 -9 -9 -9 -9
+L -3 -4 -6 -7 -3 -4 -6 -7 -5  2  6 -4  3  0 -5 -4 -3 -4 -2  1 -7 -5 -2 -9 -9 -9 -9 -9 -9 -9 -9
+K -1  3  0 -2 -6  2  1 -3 -1 -5 -4  8 -3 -5 -2 -1 -1 -6 -4 -4 -1  1 -2 -9 -9 -9 -9 -9 -9 -9 -9
+M -2 -3 -4 -6 -3 -1 -4 -5 -4  2  3 -3  9  0 -4 -3 -1 -3 -3  1 -5 -3 -2 -9 -9 -9 -9 -9 -9 -9 -9
+F -4 -5 -6 -6 -4 -5 -6 -6 -2 -1  0 -5  0 10 -6 -4 -4  0  4 -2 -6 -6 -3 -9 -9 -9 -9 -9 -9 -9 -9
+P -1 -3 -4 -3 -6 -3 -2 -5 -4 -5 -5 -2 -4 -6 12 -2 -3 -7 -6 -4 -4 -2 -3 -9 -9 -9 -9 -9 -9 -9 -9
+S  2 -2  1 -1 -2 -1 -1 -1 -2 -4 -4 -1 -3 -4 -2  7  2 -6 -3 -3  0 -1 -1 -9 -9 -9 -9 -9 -9 -9 -9
+T  0 -2  0 -2 -2 -1 -2 -3 -3 -2 -3 -1 -1 -4 -3  2  8 -5 -3  0 -1 -2 -1 -9 -9 -9 -9 -9 -9 -9 -9
+W -5 -5 -7 -8 -5 -4 -6 -6 -4 -5 -4 -6 -3  0 -7 -6 -5 16  3 -5 -8 -5 -5 -9 -9 -9 -9 -9 -9 -9 -9
+Y -4 -4 -4 -6 -5 -3 -5 -6  3 -3 -2 -4 -3  4 -6 -3 -3  3 11 -3 -5 -4 -3 -9 -9 -9 -9 -9 -9 -9 -9
+V -1 -4 -5 -6 -2 -4 -4 -6 -5  4  1 -4  1 -2 -4 -3  0 -5 -3  7 -6 -4 -2 -9 -9 -9 -9 -9 -9 -9 -9
+B -3 -2  5  6 -6 -1  1 -2 -1 -6 -7 -1 -5 -6 -4  0 -1 -8 -5 -6  6  0 -3 -9 -9 -9 -9 -9 -9 -9 -9
+Z -2  0 -1  1 -7  5  6 -4  0 -6 -5  1 -3 -6 -2 -1 -2 -5 -4 -4  0  6 -1 -9 -9 -9 -9 -9 -9 -9 -9
+X -1 -2 -2 -3 -4 -2 -2 -3 -2 -2 -2 -2 -2 -3 -3 -1 -1 -5 -3 -2 -3 -1 -2 -9 -9 -9 -9 -9 -9 -9 -9
+? -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9
+a -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9  4 -2 -2 -2 -2 -9  0
+g -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -2  4 -2 -2 -2 -9  0
+t -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -2 -2  4 -2  4 -9  0
+c -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -2 -2 -2  4 -2 -9  0
+u -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -2 -2  4 -2  4 -9  0
+] -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9
+n -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9  0  0  0  0  0 -9  0

Added: trunk/packages/poa/branches/upstream/current/buildup_lpo.c
===================================================================
--- trunk/packages/poa/branches/upstream/current/buildup_lpo.c	2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/buildup_lpo.c	2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,376 @@
+
+
+#include "default.h"
+#include "poa.h"
+#include "seq_util.h"
+#include "lpo.h"
+
+
+
+void fuse_ring_identities(int len_x,LPOLetter_T seq_x[],
+			  int len_y,LPOLetter_T seq_y[],
+			  LPOLetterRef_T al_x[],
+			  LPOLetterRef_T al_y[])
+{
+  int i,j;
+  LOOP (i,len_y) {
+    if (al_y[i]<0 || seq_x[al_y[i]].letter == seq_y[i].letter)
+      continue; /* NOT ALIGNED, OR ALREADY IDENTICAL, SO SKIP */
+    for (j=seq_x[al_y[i]].align_ring;j!=al_y[i];j=seq_x[j].align_ring)
+      if (seq_x[j].letter == seq_y[i].letter) { /* IDENTICAL! SO FUSE! */
+	al_x[al_y[i]]= INVALID_LETTER_POSITION; /* DISCONNECT FROM OLD */
+	al_y[i]=j; /* CONNECT TO NEW IDENTITY */
+	al_x[j]=i;
+	break; /* SEARCH YE NO FURTHER */
+      }
+  }
+}
+
+
+
+
+/** aligns the sequences in seq[] to the sequence or partial order in
+  new_seq; seq[] must be linear orders (regular sequences);
+  the alignment is built up by iterative partial order alignment,
+  and the resulting partial order is returned in new_seq */
+LPOSequence_T *buildup_lpo(LPOSequence_T *new_seq,
+			   int nseq,LPOSequence_T seq[],
+			   ResidueScoreMatrix_T *score_matrix,
+			   int use_aggressive_fusion,
+			   int use_global_alignment)
+{
+  int i,max_alloc=0,total_alloc;
+  LPOLetterRef_T *al1=NULL,*al2=NULL;
+
+  lpo_index_symbols(new_seq,score_matrix); /* MAKE SURE LPO IS TRANSLATED */
+  for (i=0;i<nseq;i++) { /* ALIGN ALL SEQUENCES TO my_lpo ONE BY ONE */
+    if (seq[i].letter == NULL) /* HMM.  HASN'T BEEN INITIALIZED AT ALL YET */
+      initialize_seqs_as_lpo(1,seq+i,score_matrix);
+    total_alloc=new_seq->length*seq[i].length
+      + sizeof(LPOLetter_T)*new_seq->length;
+    if (total_alloc>max_alloc) { /* DP RECTANGLE ARRAY SIZE */
+      max_alloc=total_alloc;
+#ifdef REPORT_MAX_ALLOC
+      fprintf(stderr,"max_alloc: %d bytes\n",max_alloc);
+#endif
+      if (max_alloc>POA_MAX_ALLOC) {
+	WARN_MSG(TRAP,(ERRTXT,"Exceeded memory bound: %d\n Exiting!\n\n",max_alloc),"$Revision: 1.2.2.1 $");
+	break; /* JUST RETURN AND FINISH */
+      }
+    }
+    align_lpo(new_seq,&seq[i],
+	      score_matrix,&al1,&al2,use_global_alignment); /* ALIGN ONE MORE SEQ */
+    if (use_aggressive_fusion) 
+      fuse_ring_identities(new_seq->length,new_seq->letter,
+			   seq[i].length,seq[i].letter,al1,al2);
+    fuse_lpo(new_seq,seq+i,al1,al2); /* BUILD COMPOSITE LPO */
+
+    free_lpo_letters(seq[i].length,seq[i].letter,TRUE);/*NO NEED TO KEEP*/
+    seq[i].letter=NULL; /* MARK AS FREED... DON'T LEAVE DANGLING POINTER! */
+    FREE(al1); /* DUMP TEMPORARY MAPPING ARRAYS */
+    FREE(al2);
+  }
+
+  return new_seq;
+}
+/**@memo example: aligning a set of sequences to a partial order: 
+      lpo_out=buildup_lpo(lpo_in,nseq,seq,&score_matrix,0);
+*/
+
+
+
+/** CLIPS seq->letter[] TO JUST THE SEGMENT ALIGNED TO letter_x[] via al_x[]
+ DOES *NOT* FREE existing seq->letter[]; YOU MUST KEEP IT OR FREE IT YOURSELF*/
+int clip_unaligned_ends(LPOSequence_T *seq,
+			LPOLetterRef_T al[],
+			int len_x,LPOLetter_T letter_x[],
+			LPOLetterRef_T al_x[],int *offset,int *match_length)
+{
+  int i,j=0,start,end,new_length,allow_end_length=0,nidentity=0;
+  LPOLetter_T *temp=NULL;
+  CALLOC(temp,seq->length,LPOLetter_T); /* ALLOCATE NEW letter[] COPY */
+  for (start=0;start<seq->length;start++) /* FIND 1ST ALIGNED POS */
+    if (al[start]>=0)
+      break;
+
+  for (end=seq->length -1;end>=0;end--) /* FIND LAST ALIGNED POS */
+    if (al[end]>=0)
+      break;
+
+  for (i=start;i<=end;i++) /* COUNT IDENTITIES TO letter_x[] */
+    if (al[i]>=0 && seq->letter[i].letter==letter_x[al[i]].letter)
+      nidentity++;
+  if (match_length) /* RETURN THE MATCH LENGTH TO THE CALLER */
+    *match_length = end-start+1;
+
+  if (start>allow_end_length) /* ALLOW EXTRA RESIDUES ON EITHER END*/
+    start-=allow_end_length;
+  else /* KEEP IN BOUNDS */
+    start=0;
+  if (end+allow_end_length<seq->length)
+    end+=allow_end_length;
+  else /* KEEP IN BOUNDS */
+    end=seq->length-1;
+
+  LOOP (i,len_x) /* WE ARE SHIFTING al TO THE RIGHT BY start POSITIONS */
+    if (al_x[i]>=0) /* SO WE HAVE TO TRANSLATE al_x CORRESPONDINGLY */
+      al_x[i]-= start;
+
+  seq->length=end-start+1; /* NOW TRANSLATE left, right, align_ring, ring_id*/
+  memcpy(temp,seq->letter+start,sizeof(LPOLetter_T)*(seq->length));
+  LOOP (i,seq->length) { /* THIS *ONLY* WORKS FOR PURE LINEAR SEQUENCE!!! */
+    temp[i].left.ipos -= start; /*IF <0, BECOMES INVALID BY DEFINITION, OK*/
+    temp[i].right.ipos -= start;
+    if (temp[i].right.ipos>=seq->length) /* PAST THE NEW, CLIPPED END */
+      temp[i].right.ipos= INVALID_LETTER_POSITION;
+    temp[i].ring_id=temp[i].align_ring=i;
+  }
+
+  if (offset) /* RETURN THE OFFSET TO THE CALLER */
+    *offset = start;
+
+  seq->letter=temp; /* NEW START: FIRST ALIGNED POSITION */
+  return nidentity; /* NEW LENGTH: FROM 1ST TO LAST ALIGNED POS*/
+}
+
+
+
+
+
+void restore_lpo_size(LPOSequence_T *seq,int length,LPOLetter_T *letter)
+{
+
+  free_lpo_letters(seq->length,seq->letter,TRUE); /* DUMP CLIPPED VERSION*/
+  seq->length=length; /* RESTORE ORIGINAL length AND letter[] */
+  seq->letter=letter;
+}
+
+
+
+/** BUILDS UP ALIGNMENT, BUT CLIPS UNALIGNED ENDS OF EACH NEW SEQUENCE ADDED  
+-------------------------------------------------------
+---------------------------------------------------------------------------
+*/
+LPOSequence_T *buildup_clipped_lpo(LPOSequence_T *new_seq,
+				   int nseq,LPOSequence_T seq[],
+				   ResidueScoreMatrix_T *score_matrix,
+				   int use_global_alignment)
+{
+  int i,ntemp,offset=0,nidentity,length_max=0,match_length=0;
+  int total_alloc,max_alloc=0;
+  LPOLetterRef_T *al1=NULL,*al2=NULL;
+  LPOLetter_T *temp;
+  float identity_max=0.,f;
+
+  lpo_index_symbols(new_seq,score_matrix); /* MAKE SURE LPO IS TRANSLATED */
+  for (i=0;i<nseq;i++) { /* ALIGN ALL SEQUENCES TO new_seq ONE BY ONE */
+    if (seq[i].letter == NULL) /* HMM.  HASN'T BEEN INITIALIZED AT ALL YET */
+      initialize_seqs_as_lpo(1,seq+i,score_matrix);
+    total_alloc=new_seq->length*seq[i].length
+      + sizeof(LPOLetter_T)*new_seq->length;
+    if (total_alloc>max_alloc) { /* DP RECTANGLE ARRAY SIZE */
+      max_alloc=total_alloc;
+#ifdef REPORT_MAX_ALLOC
+      fprintf(stderr,"max_alloc: %d bytes (%d x %d)\n",max_alloc,
+	      new_seq->length,seq[i].length);
+#endif
+      if (max_alloc>POA_MAX_ALLOC) {
+	WARN_MSG(TRAP,(ERRTXT,"Exceeded memory bound: %d\n Exiting!\n\n",max_alloc),"$Revision: 1.2.2.1 $");
+	break; /* JUST RETURN AND FINISH */
+      }
+    }
+    align_lpo(new_seq, &seq[i],
+	      score_matrix,&al1,&al2,use_global_alignment); /* ALIGN ONE MORE SEQ */
+    ntemp=seq[i].length; /* SAVE letter[] BEFORE CLIPPING IT TO ALIGNED AREA*/
+    temp=seq[i].letter;
+    if ((nidentity=clip_unaligned_ends(seq+i,al2,/*THERE IS AN ALIGNED REGION*/
+			new_seq->length,new_seq->letter,al1,&offset,
+				       &match_length))>0) {
+      f=nidentity/(float)match_length; /* CALCULATE IDENTITY FRACTION */
+      if (0==i /*f>identity_max*/) { /* REPORT IDENTITY OF TOP HIT */
+	identity_max=nidentity;
+	length_max=match_length;
+      }
+      fuse_lpo(new_seq,seq+i,al1,al2+offset); /*ADD CLIPPED REGION TO LPO*/
+    }
+    restore_lpo_size(seq+i,ntemp,temp); /* REVERT FROM CLIPPED TO ORIGINAL*/
+    FREE(al1); /* DUMP TEMPORARY MAPPING ARRAYS FROM align_lpo() */
+    FREE(al2);
+  }
+
+  fprintf(stderr,"%s\tmaximum identity\t%3.1f%%\t%.0f/%d\n",new_seq->name,
+	  100*identity_max/length_max,identity_max,length_max);
+  return new_seq;
+}
+
+
+int find_seq_name(int nseq,LPOSequence_T seq[],char name[])
+{
+  int i;
+  LOOP (i,nseq)
+    if (0==strcmp(seq[i].name,name))
+      return i;
+  return -1;
+}
+
+
+typedef struct {
+  double score;
+  double bitscore;
+  int i;
+  int j;
+} SeqPairScore_T;
+
+
+/* SORT IN ASCENDING ORDER BY score, THEN DESCENDING ORDER by bitscore */
+int seqpair_score_qsort_cmp(const void *void_a,const void *void_b)
+{
+  const SeqPairScore_T *a=(const SeqPairScore_T *)void_a,
+  *b=(const SeqPairScore_T *)void_b;
+  if (a->score < b->score)
+    return -1;
+  else if (a->score == b->score) {
+    if (a->bitscore > b->bitscore)
+      return -1;
+    else if (a->bitscore == b->bitscore)
+      return 0;
+  }
+  else
+    return 1;
+}
+
+
+
+
+SeqPairScore_T *read_seqpair_scorefile(int nseq,LPOSequence_T seq[],
+			       FILE *ifile,int *p_nscore)
+{
+  int i,j,nscore=0;
+  SeqPairScore_T *score=NULL;
+  double v,x;
+  char name1[256],name2[256];
+
+  CALLOC(score,nseq*nseq/2,SeqPairScore_T);
+  while (fscanf(ifile," %s %s %lf %lf",name1,name2,
+		&v,&x)==4) { /*READ SCORE FILE*/
+    i=find_seq_name(nseq,seq,name1);      
+    j=find_seq_name(nseq,seq,name2);
+    if (i<0 || j<0) {
+      WARN_MSG(USERR,(ERRTXT,"invalid sequence pair, not found: %s,%s",name1,name2),"$Revision: 1.2.2.1 $");
+      FREE(score);
+      return NULL;
+    }
+
+    /*    fprintf(stderr,"i=%d,j=%d,x=%e\n",i,j,x);*/
+    if (i<j) { /* DON'T SAVE UPPER, DUPLICATE HALF OF THE MATRIX */
+      /*      fprintf(stderr,"Saving score %s,%s:%e\n",name1,name2,x);*/
+      score[nscore].score=x; /* SAVE THE SCORE INTO THE MATRIX */
+      score[nscore].bitscore=v;
+      score[nscore].i=i;
+      score[nscore].j=j;
+      nscore++;
+    }
+  }
+
+  /* NOW SORT THESE IN ASCENDING ORDER AND HAND BACK TO CALLER */
+  qsort(score,nscore,sizeof(SeqPairScore_T),seqpair_score_qsort_cmp);
+  if (p_nscore) /* RETURN LENGTH OF PAIR SCORE TABLE IF REQUESTED */
+    *p_nscore=nscore;
+  return score;
+}
+
+
+LPOSequence_T *buildup_progressive_lpo(int nseq,LPOSequence_T seq[],
+				       ResidueScoreMatrix_T *score_matrix,
+				       int use_aggressive_fusion,
+				       char score_file[],
+				       LPOScore_T (*scoring_function)
+				       (int,int,LPOLetter_T [],LPOLetter_T [],
+					ResidueScoreMatrix_T *),
+				       int use_global_alignment)
+{
+  int i,j,max_alloc=0,total_alloc;
+  LPOLetterRef_T *al1=NULL,*al2=NULL;
+  SeqPairScore_T *score=NULL;
+  FILE *ifile;
+  LPOSequence_T *new_seq=NULL;
+  int *seq_cluster=NULL,cluster_i,cluster_j,nscore=0,iscore;
+  
+  ifile=fopen(score_file,"r");
+  if (ifile) {
+    if ((score=read_seqpair_scorefile(nseq,seq,ifile,&nscore))==NULL)
+      goto free_and_exit;
+    fclose(ifile);
+  }
+  else
+    goto free_and_exit;
+
+  CALLOC(seq_cluster,nseq,int); /* MAPS SEQS TO CLUSTER THEY'RE IN */
+  LOOP (i,nseq) /* CREATE TRIVIAL MAPPING, EACH SEQ ITS OWN CLUSTER */
+    seq_cluster[i]=i;
+
+  for (iscore=0;iscore<nscore;iscore++) {
+    if (seq_cluster[score[iscore].i] < seq_cluster[score[iscore].j]) {
+      cluster_i=seq_cluster[score[iscore].i];
+      cluster_j=seq_cluster[score[iscore].j];
+    }
+    else if (seq_cluster[score[iscore].j] < seq_cluster[score[iscore].i]) {
+      cluster_i=seq_cluster[score[iscore].j];
+      cluster_j=seq_cluster[score[iscore].i];
+    }
+    else /* CLUSTERS ALREADY FUSED, SO SKIP THIS PAIR */
+      continue;
+
+    fprintf(stderr,"Fusing cluster %s --> %s... score %e,%lf\n",
+	    seq[cluster_j].name,seq[cluster_i].name,score[iscore].score,
+	    score[iscore].bitscore);
+    new_seq=seq+cluster_i; /* THIS WILL BECOME THE NEW MASTER CLUSTER */
+    if (seq[cluster_i].letter == NULL) /* NOT INITIALIZED AT ALL YET */
+      initialize_seqs_as_lpo(1,seq+cluster_i,score_matrix);
+    if (seq[cluster_j].letter == NULL) /* NOT INITIALIZED AT ALL YET */
+      initialize_seqs_as_lpo(1,seq+cluster_j,score_matrix);
+    total_alloc=new_seq->length*seq[cluster_j].length
+      + sizeof(LPOLetter_T)*new_seq->length;
+    if (total_alloc>max_alloc) { /* DP RECTANGLE ARRAY SIZE */
+      max_alloc=total_alloc;
+#ifdef REPORT_MAX_ALLOC
+      fprintf(stderr,"max_alloc: %d bytes\n",max_alloc);
+#endif
+      if (max_alloc>POA_MAX_ALLOC) {
+	WARN_MSG(TRAP,(ERRTXT,"Exceeded memory bound: %d\n Exiting!\n\n",max_alloc),"$Revision: 1.2.2.1 $");
+	break; /* JUST RETURN AND FINISH */
+      }
+    }
+
+#ifdef USE_LOCAL_NEUTRALITY_CORRECTION /* NO LONGER USED */
+    if (score_matrix->nfreq>0) { /* CALCULATE BALANCED SCORING ON EACH PO */
+      balance_matrix_score(new_seq->length,new_seq->letter,score_matrix);
+      balance_matrix_score(seq[cluster_j].length,seq[cluster_j].letter,
+			   score_matrix);
+    }
+#endif
+
+    align_lpo_po(new_seq, &seq[cluster_j],
+		 score_matrix,&al1,&al2,
+		 scoring_function,use_global_alignment); /* ALIGN ONE MORE SEQ */
+    if (use_aggressive_fusion) 
+      fuse_ring_identities(new_seq->length,new_seq->letter,
+			   seq[cluster_j].length,seq[cluster_j].letter,
+			   al1,al2);
+    fuse_lpo(new_seq,seq+cluster_j,al1,al2); /* BUILD COMPOSITE LPO */
+
+    free_lpo_letters(seq[cluster_j].length,seq[cluster_j].letter,TRUE);
+    seq[cluster_j].letter=NULL; /*MARK AS FREED. DON'T LEAVE DANGLING POINTER*/
+    FREE(al1); /* DUMP TEMPORARY MAPPING ARRAYS */
+    FREE(al2);
+
+    LOOP (i,nseq) /* REINDEX ALL MEMBERS OF cluster_j TO JOIN cluster_i */
+      if (seq_cluster[i]==cluster_j)
+	seq_cluster[i]=cluster_i;
+  }
+
+ free_and_exit:
+  FREE(score);
+  FREE(seq_cluster);
+  return new_seq; /* RETURN THE FINAL MASTER CLUSTER */
+}

Added: trunk/packages/poa/branches/upstream/current/create_seq.c
===================================================================
--- trunk/packages/poa/branches/upstream/current/create_seq.c	2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/create_seq.c	2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,53 @@
+
+
+#include "default.h"
+#include "seq_util.h"
+
+
+
+
+void save_sequence_fields(Sequence_T *seq,
+			  char seq_name[],char seq_title[],int length)
+{
+  STRNCPY(seq->name,seq_name,SEQUENCE_NAME_MAX);
+  if (seq_title)
+    seq->title=strdup(seq_title);
+  else
+    seq->title=strdup("untitled");
+  seq->length=length; /* SAVE LENGTH */
+}
+
+
+
+int create_seq(int nseq,Sequence_T **p_seq,
+	       char seq_name[],char seq_title[],char tmp_seq[],
+	       int do_switch_case)
+{
+  int i,j;
+  Sequence_T *seq;
+
+  REBUFF(*p_seq,nseq,SEQUENCE_BUFFER_CHUNK,Sequence_T); /* ALLOCATE MEMORY*/
+  seq= (*p_seq)+nseq; /* SET POINTER TO NEWLY ALLOCATED ELEMENT */
+
+  for (i=j=0;tmp_seq[i];i++) /* ELIMINATE WHITE SPACE */
+    if (!isspace(tmp_seq[i]))
+      tmp_seq[j++]=tmp_seq[i];
+  tmp_seq[j]='\0'; /* TERMINATE COMPRESSED STRING*/
+  seq->sequence=strdup(tmp_seq); /* SAVE A DYNAMIC COPY */
+  save_sequence_fields(seq,seq_name,seq_title,j);
+
+  switch (do_switch_case) {
+  case switch_case_to_lower:
+    LOOP (i,seq->length)
+      seq->sequence[i]=tolower(tmp_seq[i]);
+    break;
+  case switch_case_to_upper:
+    LOOP (i,seq->length)
+      seq->sequence[i]=toupper(tmp_seq[i]);
+    break;
+  }
+
+  return 1;
+}
+
+

Added: trunk/packages/poa/branches/upstream/current/default.h
===================================================================
--- trunk/packages/poa/branches/upstream/current/default.h	2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/default.h	2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,253 @@
+#ifndef DEFAULT_HEADER_INCLUDED
+#define DEFAULT_HEADER_INCLUDED 1
+
+#ifndef MODULE_NAME
+#define MODULE_NAME "main"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <math.h>
+#include <string.h>
+#include <ctype.h>
+
+
+#include "black_flag.h"
+
+
+
+typedef void *voidptr;  /* ~~e: should be moved out to generic typing header
+			   --- */
+typedef int (*funptr)();
+
+#define LOOPB(i,size) for ((i)=(size);(i)-- >0;)
+#define LOOP(i,size) for ((i)=(size);(i)-- >0;)
+#define LOOPF(i,size) for ((i)=0;(i)<(size);(i)++)
+#define LOOP_FINISHED(i,size) ((i)<0 || (i)>=(size))
+
+
+/**@memo example usage of argument reading macros 
+  FOR_ARGS(i,argc) {
+    ARGMATCH_VAL("-tolower",do_switch_case,switch_case_to_lower);
+    ARGMATCH("-seq_err_report",count_sequence_errors);
+    ARGGET("-printmatrix",print_matrix_letters);
+    NEXTARG(matrix_filename);
+  } */
+
+#define FOR_ARGS(INDEX,ARGC) for (INDEX=1;INDEX<ARGC;INDEX++)
+#define ARGMATCH(FLAGSTR,VAR) if (0==strcmp(argv[i],FLAGSTR)) (VAR)=1
+#define ARGMATCH_VAL(FLAGSTR,VAR,VAL) if (0==strcmp(argv[i],FLAGSTR)) (VAR)=(VAL)
+#define ARGGET(FLAGSTR,VAR) if (0==strcmp(argv[i],FLAGSTR)) {(VAR)=argv[++i];continue;}
+#define NEXTARG(VAR) if ('-'!=argv[i][0] && !(VAR)) {(VAR)=argv[i];continue;}
+
+
+
+
+#ifndef TRUE  /* DEFINE true AND false SYMBOLS IN CASE NOT ALREADY THERE*/
+#define TRUE 1
+#endif
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+  
+#if defined(__STDC__) || defined(__ANSI_CPP__)  /*###################*/
+/* ~~a: ansi-dependent concatenation macro */
+#define CONCAT_MACRO0(a,b)  a ## b
+#else
+#define CONCAT_MACRO0(a,b) a/**/b
+#endif /* !__STDC__ ##################################################*/
+  
+#define CONCAT_MACRO(a,b)  CONCAT_MACRO0(a,b)
+
+#if defined(__STDC__) || defined(__ANSI_CPP__)  /*###################*/
+#define STRINGIFY(NAME) # NAME
+#else
+#define STRINGIFY(NAME) "NAME"
+#endif /* !__STDC__ ##################################################*/
+
+/* #define PRINT_DEBUG(MESSAGE)  printf MESSAGE */
+
+
+/***********************************************************************
+  *
+  *     STRNCPY: guaranteed overflow-protected, null-terminated strcpy
+  *
+  *     NB: this macro forces termination at end position S1[LEN-1]
+  *     which will cause non-intuitive side-effects ESPECIALLY
+  *     if LEN is incorrect!!!!  ie. STRNCPY will garbage your
+  *     memory by sticking zeroes into it if you give it the wrong
+  *     LEN.
+  *
+  *     Make sure that the LEN you give it is correct!!!!!!!
+  *
+  **********************************************************************/
+
+#define STRNCPY(S1,S2,LEN) \
+  (strncpy(S1,S2,LEN),(((char *)(S1))[(LEN)-1]='\0'))
+
+
+
+
+
+
+
+
+/*******************************************************************
+  *
+  *     STRINGPTR:
+  *     holds string and count of last alloc'd size.  Conveniently
+  *     manages static and dynamic strings, allowing auto-resize
+  *     without worrying about memory management.
+  *
+  ***********************************************************************/
+
+typedef struct { /* ~~d */
+  char *p;
+  int last_alloc;
+} stringptr; /* --- */
+
+
+#define STRINGPTR_BUFFER_CHUNK 4096
+#define STRINGPTR_EMPTY_INIT {NULL,0}
+
+char *stringptr_cat(stringptr *s1,const char s2[]);
+char *stringptr_cpy(stringptr *s1,const char s2[]);
+int stringptr_free(stringptr *s);
+
+
+
+
+
+
+#ifdef DEBUG
+#define PRINT_DEBUG(CODE,MESSAGE) {if (CODE<=2) fprintf MESSAGE ;}
+#else
+#define PRINT_DEBUG(CODE,MESSAGE) {if (CODE<=1) fprintf MESSAGE ;}
+#endif
+
+
+
+
+#define MEM0_REQUEST_BAD 0
+
+/* DEFAULT ACTION TO PERFORM IF malloc FAILS */
+#define MALLOC_FAILURE_ACTION abort()
+/* DEFAULT ACTION TO PERFORM IF realloc FAILS */
+#define REALLOC_FAILURE_ACTION abort()
+
+
+
+#define CALLOC(memptr,N,ATYPE) \
+/*  printf("CALLOC(%s=%d,%s=%d,sizeof(%s)=%d), %s %d\n",STRINGIFY(memptr),(memptr), \
+	 STRINGIFY(N),(N),STRINGIFY(ATYPE),sizeof(ATYPE),__FILE__,__LINE__);*/ \
+  if ((N)<=0)   {                     \
+    if (MEM0_REQUEST_BAD)  {\
+      fprintf(stderr,"%s, line %d: *** invalid memory request: %s[%d].\n",\
+	      __FILE__,__LINE__,STRINGIFY(memptr),(N));   \
+      MALLOC_FAILURE_ACTION;                                            \
+    }\
+  }                                                                 \
+  else if (NULL == ((memptr)=(ATYPE *)calloc((size_t)(N),sizeof(ATYPE))))  { \
+    fprintf(stderr,"%s, line %d: *** out of memory \n",__FILE__,__LINE__);                \
+    fprintf(stderr,"Unable to meet request: %s[%d]\n",STRINGIFY(memptr),(N));    \
+    fprintf(stderr,"requested %d x %d bytes \n",(N),sizeof(ATYPE));   \
+    MALLOC_FAILURE_ACTION;                                            \
+  }
+
+
+
+
+
+
+
+
+
+/*******************************************************************
+  *
+  *     FREE:
+  *     dumps storage if a non-zero pointer, and resets pointer to
+  *     NULL to indicate its data freed.
+  *
+  ******************************************************************/
+
+#define FREE(P) if (P) {free(P);(P)=NULL;}
+
+
+#define REALLOC(memptr,NUM,ATYPE) \
+/*  printf("REALLOC(%s=%d,%s=%d,sizeof(%s)=%d), %s %d\n",STRINGIFY(memptr), \
+    (memptr),STRINGIFY(NUM),(NUM),STRINGIFY(ATYPE),sizeof(ATYPE),\
+    __FILE__,__LINE__);*/ \
+  if ((NUM)<=0)   {                            \
+    if (MEM0_REQUEST_BAD)  {\
+      fprintf(stderr,"%s, line %d: *** invalid memory request: %s[%d].\n",\
+	      __FILE__,__LINE__,STRINGIFY(memptr),(NUM));   \
+      REALLOC_FAILURE_ACTION;                                             \
+    }\
+  }                                                                 \
+  else { \
+    voidptr temp_ptr=realloc((void *)(memptr),sizeof(ATYPE)*(size_t)(NUM));\
+    if (temp_ptr) \
+      (memptr)=(ATYPE *)temp_ptr;\
+    else { \
+      fprintf(stderr,"%s, line %d: *** out of memory \n",__FILE__,__LINE__); \
+      fprintf(stderr,"Unable to meet request: %s\n",STRINGIFY(memptr));  \
+      fprintf(stderr,"requested %d x %d bytes \n",(NUM),sizeof(ATYPE));   \
+      REALLOC_FAILURE_ACTION;                                             \
+    } \
+  }
+
+
+/**********************************************************************
+  *
+  *     REBUFF
+  *     GUARANTEES ALLOCATION OF NULL-INITIALIZED BLOCK OF BUF ELEMENTS
+  *     OF THE CORRECT DATA TYPE & SIZE, SUFFICIENT TO HOLD AT LEAST ONE
+  *     ELEMENT ADDED TO THE CURRENT SIZE OF THE ARRAY
+  *
+  **********************************************************************/
+
+#define REBUFF(memptr,NUM,BUF,ATYPE) if ((NUM)<=0) { \
+    CALLOC(memptr,BUF,ATYPE) } \
+  else if (0 == (NUM)%(BUF)) { \
+    char *p_rebuff_size;\
+    REALLOC(memptr,(NUM)+(BUF),ATYPE); \
+    p_rebuff_size=(char *)(memptr)+(NUM)*sizeof(ATYPE); \
+    memset((voidptr)p_rebuff_size,0,(size_t)((BUF)*sizeof(ATYPE)));\
+  }
+
+
+
+
+
+
+
+
+
+/*******************************************************************
+  *
+  *     GETMEM:
+  *     allocates memory either via malloc or realloc, guaranteeing
+  *     space to store at least one more data entry.  Data is
+  *     alloc'd in blocks of size BUF; the total # alloc'd is
+  *     kept in LAST.  The algorithm can go wrong if LAST
+  *     is incorrect.
+  *
+  ******************************************************************/
+
+#define GETMEM(memptr,NUM,LAST,BUF,TYPE) \
+  if ((LAST)<=0) {\
+    CALLOC(memptr,((NUM)+1)-((NUM)+1)%(BUF)+(BUF),TYPE);\
+    (LAST)=((NUM)+1)-((NUM)+1)%(BUF)+(BUF);\
+  }\
+  else if (((NUM)+1)>(LAST))  { \
+    REALLOC(memptr,((NUM)+1)-((NUM)+1)%(BUF)+(BUF),TYPE);\
+    (LAST)=((NUM)+1)-((NUM)+1)%(BUF)+(BUF);\
+  }
+
+
+
+
+
+#endif

Added: trunk/packages/poa/branches/upstream/current/fasta_format.c
===================================================================
--- trunk/packages/poa/branches/upstream/current/fasta_format.c	2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/fasta_format.c	2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,89 @@
+
+#include "default.h"
+#include "seq_util.h"
+
+
+
+/** reads FASTA formatted sequence file, and saves the sequences to
+  the array seq[]; any comment line preceded by a hash-mark will be saved
+  to comment */
+int read_fasta(FILE *seq_file,Sequence_T **seq,
+	       int do_switch_case,char **comment)
+{
+  int c,nseq=0,length=0;
+  char seq_name[FASTA_NAME_MAX]="",
+  line[SEQ_LENGTH_MAX],seq_title[FASTA_NAME_MAX]="";
+  char *p;
+  stringptr tmp_seq=STRINGPTR_EMPTY_INIT;
+	
+ /* read in sequences */
+  while (fgets(line,sizeof(line)-1,seq_file)) {
+    if ((p=strrchr(line,'\n'))) /* REMOVE NEWLINE FROM END OF LINE */
+      *p= '\0'; /* TRUNCATE THE STRING */
+    switch (line[0]) {
+    case '#':  /* SEQUENCE COMMENT, SAVE IT */
+      if (comment) /* SAVE COMMENT FOR CALLER TO USE */
+	*comment = strdup(line+1);
+      break;
+
+    case '>':  /* SEQUENCE HEADER LINE */
+      if (seq_name[0] && tmp_seq.p && tmp_seq.p[0]) { /* WE HAVE A SEQUENCE, SO SAVE IT! */
+	if (create_seq(nseq,seq,seq_name,seq_title,tmp_seq.p,do_switch_case))
+	  nseq++;
+      }
+      seq_name[0]='\0';
+      if (sscanf(line+1,"%s %[^\n]",  /* SKIP PAST > TO READ SEQ NAME*/
+	     seq_name,seq_title)<2)
+	strcpy(seq_title,"untitled"); /* PROTECT AGAINST MISSING NAME */
+      if (tmp_seq.p)
+	tmp_seq.p[0]='\0'; /* RESET TO EMPTY SEQUENCE */
+      length=0;
+      break;
+
+    case '*': /* IGNORE LINES STARTING WITH *... DON'T TREAT AS SEQUENCE! */
+      break;
+
+    default:  /* READ AS ACTUAL SEQUENCE DATA, ADD TO OUR SEQUENCE */
+      if (seq_name[0]) /* IF WE'RE CURRENTLY READING A SEQUENCE, SAVE IT */
+	stringptr_cat_pos(&tmp_seq,line,&length);
+    }
+
+    c=getc(seq_file); /* ?FIRST CHARACTER IS UNIGENE CLUSTER TERMINATOR? */
+    if (c==EOF)
+      break;
+    else {
+      ungetc(c,seq_file); /* PUT THE CHARACTER BACK */
+      if (c=='#' && nseq>0) /* UNIGENE CLUSTER TERMINATOR, SO DONE!*/
+	break;
+    }
+  }
+  if (seq_name[0] && tmp_seq.p && tmp_seq.p[0]) { /* WE HAVE A SEQUENCE, SO SAVE IT! */
+    if (create_seq(nseq,seq,seq_name,seq_title,tmp_seq.p,do_switch_case))
+      nseq++;
+  }
+  stringptr_free(&tmp_seq);
+  return nseq; /* TOTAL NUMBER OF SEQUENCES CREATED */
+}
+
+/**@memo example: reading FASTA format file: 
+    seq_ifile=fopen(seq_filename,"r");
+    if (seq_ifile) {
+      nseq=read_fasta(seq_ifile,&seq,do_switch_case,&comment);
+      fclose(seq_ifile);
+    }
+*/
+
+
+
+/** writes a FASTA formatted file, saving the sequence given in seq[] */
+void write_fasta(FILE *ifile,char name[],char title[],char seq[])
+{
+  int j;
+
+  fprintf(ifile,">%s %s\n",name,title? title : "untitled");
+  for (j=0;j<strlen(seq);j+=60)
+    fprintf(ifile,"%.60s\n",seq+j);
+
+  return;
+}
+

Added: trunk/packages/poa/branches/upstream/current/heaviest_bundle.c
===================================================================
--- trunk/packages/poa/branches/upstream/current/heaviest_bundle.c	2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/heaviest_bundle.c	2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,173 @@
+
+
+#include "default.h"
+#include "poa.h"
+#include "seq_util.h"
+#include "lpo.h"
+
+
+
+
+/** finds the heaviest traversal of the LPO seq[], using dynamic programming;
+  at each node the heaviest link is chosen to buildup traversals; finally,
+  the traversal with the heaviest overall link weight is returned as an
+  array of position indices.  The length of the array is stored in 
+  *p_best_len*/
+LPOLetterRef_T *heaviest_bundle(int len,LPOLetter_T seq[],
+				int nsource_seq,LPOSourceInfo_T source_seq[],
+				int *p_best_len)
+{
+  int i,j,best_right,iright,ibest= -1,best_len=0;
+  LPOLetterRef_T *best_path=NULL,*path=NULL;
+  LPOLetterLink_T *right;
+  LPOLetterSource_T *source;
+  LPOScore_T *score=NULL,best_score= -999999,right_score;
+  int *contains_pos=NULL,my_overlap,right_overlap;
+
+  CALLOC(path,len,LPOLetterRef_T); /* GET MEMORY FOR DYNAMIC PROGRAMMING */
+  CALLOC(score,len,LPOScore_T);
+  CALLOC(contains_pos,nsource_seq,int);
+  LOOP (i,nsource_seq) /* RESET TO NOT MATCH ANY POSITIONS */
+    contains_pos[i]= INVALID_LETTER_POSITION;
+
+  LOOPB (i,len) { /* FIND HEAVIEST PATH BY DYNAMIC PROGRAMMING */
+    source= &seq[i].source;  /* MARK SEQUENCES CONTAINING THIS POSITION */
+    memset(contains_pos,0,nsource_seq*sizeof(int)); /* ERASE ARRAY */
+    do
+      if (source_seq[source->iseq].weight>0) /* EXCLUDE ZERO WEIGHT SEQS */
+	contains_pos[source->iseq]=source->ipos+1; /* right MUST BE ADJACENT*/
+    while (source=source->more); /* KEEP COUNTING TILL NO more */
+
+    right_score=right_overlap=0;  /*DEFAULT MOVE: NOTHING TO THE RIGHT*/
+    best_right= INVALID_LETTER_POSITION;
+    for (right= &seq[i].right;right && right->ipos>=0;right=right->more) {
+      my_overlap=0; /* OVERLAP CALCULATION */
+      source= &seq[right->ipos].source;/*COUNT SEQS SHARED IN i AND right*/
+      do /* BIAS OVERLAP CALCULATION BY SEQUENCE WEIGHTING */
+	if (contains_pos[source->iseq]==source->ipos) /* YES, ADJACENT! */
+	  my_overlap += source_seq[source->iseq].weight;
+      while (source=source->more); /* KEEP COUNTING TILL NO more */
+      
+      if (my_overlap>right_overlap /* FIND BEST RIGHT MOVE: BEST OVERLAP */
+	  || (my_overlap==right_overlap && score[right->ipos]>right_score)) {
+	right_overlap=my_overlap;
+	right_score=score[right->ipos];
+	best_right=right->ipos;
+      }
+    }
+
+    path[i]=best_right; /* SAVE THE BEST PATH FOUND */
+    score[i]=right_score+right_overlap; /* SAVE THE SCORE */
+    if (score[i]>best_score) { /* RECORD BEST SCORE IN WHOLE LPO */
+      ibest=i;
+      best_score=score[i];
+    }
+  }
+
+  CALLOC(best_path,len,LPOLetterRef_T); /* MEMORY FOR STORING BEST PATH */
+  for (;ibest>=0;ibest=path[ibest])  /* BACK TRACK THE BEST PATH */
+    best_path[best_len++]=ibest;
+
+  FREE(path); /* DUMP SCRATCH MEMORY */
+  FREE(score);
+  FREE(contains_pos);
+
+  if (p_best_len) /* RETURN best_path AND ITS LENGTH */
+    *p_best_len = best_len;
+  return best_path;
+}
+
+
+
+
+int assign_sequence_bundle_id(int path_length,LPOLetterRef_T path[],
+			      LPOSequence_T *seq,int bundle_id,
+			      float minimum_fraction)
+{
+  int i,*bundle_count=NULL,nseq_in_bundle=0;
+  LPOLetterSource_T *source;
+  
+  CALLOC(bundle_count,seq->nsource_seq,int);
+  LOOP (i,path_length) /* COUNT #POSITIONS OF EACH SEQ ARE IN path */
+    for (source= &seq->letter[path[i]].source;source;source=source->more)
+      bundle_count[source->iseq]++;
+
+  LOOP (i,seq->nsource_seq) {/* FOR EACH SEQ OVER THRESHOLD, ASSIGN bundle_id*/
+/*    printf("bundle %d:\t%s\t%d/%d %d",bundle_id,seq->source_seq[i].name,
+	   bundle_count[i],seq->source_seq[i].length,seq->source_seq[i].weight);*/
+    if (seq->source_seq[i].bundle_id<0 /* NOT YET BUNDLED */
+	&& seq->source_seq[i].length*minimum_fraction <= bundle_count[i]) {
+/*      printf("   +++++++++++++++++");*/
+      seq->source_seq[i].bundle_id = bundle_id; /* ASSIGN TO THIS BUNDLE */
+      seq->source_seq[i].weight = 0; /* REMOVE FROM FUTURE heaviest_bundle */
+      nseq_in_bundle++;
+    }
+  /*  printf("\n");*/
+  }
+
+  FREE(bundle_count);
+  return nseq_in_bundle; /* RETURN COUNT OF SEQUENCES IN BUNDLE */
+}
+
+
+
+
+/** assigns weights for bundling based upon /hb_weight arguments
+    in source_seq titles */
+
+void assign_hb_weights(int nsource_seq,LPOSourceInfo_T source_seq[])
+{
+  int i,weight;
+  char *p;
+  LOOP (i,nsource_seq) {
+    if (source_seq[i].title &&
+	(p=strstr(source_seq[i].title,"/hb_weight="))) {
+      weight=atoi(p+11);
+      if (weight!=0){ /* 0 COULD MEAN atoi FAILED TO PARSE ARG.  IGNORE IT*/
+	source_seq[i].weight = weight;
+	fprintf(stderr,"assigned weight=%d to %s\n",source_seq[i].weight,source_seq[i].name);
+      }
+      else
+	WARN_MSG(USERR,(ERRTXT,"hb_weight zero or unreadable: %s\nIgnored",p),"$Revision: 1.2 $");
+    }
+  }
+}
+
+
+
+/** generates the complete set of heaviest_bundle traversals of the the LPO
+ seq, using iterative heaviest_bundle() and requiring that at least
+ minimum_fraction of the positions in a sequence match the heaviest
+ bundle path, for that sequence to be assigned to that bundle 
+---------------------------------------------------------------
+------------------------------------------------------------*/
+void generate_lpo_bundles(LPOSequence_T *seq,float minimum_fraction)
+{
+  int nbundled=0,ibundle=0,path_length,iseq,count;
+  LPOLetterRef_T *path=NULL;
+  char name[256],title[1024];
+
+  /*  assign_hb_weights(seq->nsource_seq,seq->source_seq); TURN THIS ON!!*/
+  while (nbundled < seq->nsource_seq) {/* PULL OUT BUNDLES ONE BY ONE */
+    path=heaviest_bundle(seq->length,seq->letter,/*GET NEXT HEAVIEST BUNDLE*/
+			 seq->nsource_seq,seq->source_seq,&path_length);
+    if (!path || path_length<10) /* ??!? FAILED TO FIND A BUNDLE ??? */
+      goto premature_warning;
+    sprintf(name,"CONSENS%d",ibundle);
+    /* NEXT, MARK SEQUENCES THAT FIT THIS BUNDLE ADEQUATELY */
+    count=assign_sequence_bundle_id(path_length,path,seq,ibundle,
+				    minimum_fraction);
+    sprintf(title,"consensus produced by heaviest_bundle, containing %d seqs",
+	    count); /* DON'T INCLUDE CONSENSUS ITSELF IN THE COUNT! */
+    iseq=add_path_sequence(path_length,path,seq,name,title);/*BUILD CONSENSUS*/
+    seq->source_seq[iseq].bundle_id=ibundle++; /* INCREMENT BUNDLE ID */
+    nbundled+=count; /* KEEP TRACK OF TOTAL SEQUENCES BUNDLED */
+
+    if (count<1) {
+    premature_warning:
+      fprintf(stderr,"*** WARNING: bundling ended prematurely after %d bundles.\nNo sequences fit inside this last bundle.\nA total of %d sequences incuding consensus were bundled.\n\n",ibundle,nbundled);
+      break;
+    }
+  }
+}
+

Added: trunk/packages/poa/branches/upstream/current/lpo.c
===================================================================
--- trunk/packages/poa/branches/upstream/current/lpo.c	2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/lpo.c	2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,743 @@
+
+
+#include "default.h"
+#include "poa.h"
+#include "seq_util.h"
+#include "lpo.h"
+
+
+/** INITIALIZES LINEARIZED PARTIAL ORDER DATA STRUCTURES FOR
+   A LINEAR SEQUENCE */
+void lpo_init(LPOSequence_T *seq)
+{
+  int i;
+
+  CALLOC(seq->letter,seq->length,LPOLetter_T);
+
+  LOOP (i,seq->length) {
+    seq->letter[i].left.ipos = SEQ_Y_LEFT(i); /* JUST A LINEAR SEQ */
+    seq->letter[i].right.ipos= SEQ_Y_RIGHT(i);
+    seq->letter[i].source.iseq=0; /* TRIVIAL SOURCE: POINT TO SELF */
+    seq->letter[i].source.ipos=i;
+    seq->letter[i].align_ring = seq->letter[i].ring_id=i; /* POINT AT SELF */
+    seq->letter[i].letter = seq->sequence[i]; /* COPY OUR AA LETTER */
+  }
+  seq->letter[seq->length -1].right.ipos= INVALID_LETTER_POSITION;
+  /* NB: letter[0].left.ipos IS INVALID_LETTER_POSITION THANKS TO SEQ_Y_LEFT()
+     ABOVE */
+  /* BECAUSE PO CAN CONTAIN MULTIPLE SEQUENCES, WE ALSO KEEP A SOURCE LIST.
+     FOR PURE LINEAR SEQUENCE, THE LIST IS JUST OUR STARTING SEQUENCE */
+  save_lpo_source(seq,seq->name,seq->title,seq->length,1,NO_BUNDLE,0,NULL);
+  return;
+}
+
+
+
+
+
+/**@memo initialize one or more regular sequences (linear orders) to LPO form.
+  This step is REQUIRED before running partial order alignment.  
+  This routine processes each sequence with limit_residues() and 
+  index_symbols(), then builds a linear LPO using lpo_init(). */
+  
+void initialize_seqs_as_lpo(int nseq, Sequence_T seq[],ResidueScoreMatrix_T *m)
+{
+  int i;
+  LOOP (i,nseq) {/* EXCLUDE LETTERS THAT AREN'T IN MATRIX */
+    limit_residues(seq[i].sequence,m->symbol);
+    /* TRANSLATE FROM ASCII LETTERS TO COMPACTED NUMBERICAL INDEX*/
+    index_symbols(seq[i].length,seq[i].sequence,seq[i].sequence,
+		  m->nsymbol,m->symbol);
+    lpo_init(seq+i); /* CREATE TRIVIAL, LINEAR SEQUENCE LPO */
+  }
+}
+
+
+/** translates letter symbols on lpo.letter[i] to indexes used in scoring 
+    matrix*/
+void lpo_index_symbols(Sequence_T *lpo,ResidueScoreMatrix_T *m)
+{
+  int i;
+
+  if (lpo->letter == NULL) { /* HMM.  HASN'T BEEN INITIALIZED AT ALL YET */
+    initialize_seqs_as_lpo(1,lpo,m);
+    return;
+  }
+  if (lpo->letter[0].letter < m->nsymbol)
+    return; /* LOOKS LIKE IT'S ALREADY TRANSLATED TO INDEXES */
+  LOOP (i,lpo->length) /* READ FROM FILE, MAY NOT BE TRANSLATED YET */
+    index_symbols(1,&lpo->letter[i].letter,&lpo->letter[i].letter,
+		  m->nsymbol,m->symbol);
+}
+
+
+
+
+
+/** finds ipos for the designated sequence in the designated letter,
+    or INVALID_LETTER_POSITION if iseq is not found. */
+int find_letter_source(LPOLetter_T *letter,int iseq)
+{
+  LPOLetterSource_T *source;
+  for (source= &letter->source;source;source=source->more)/*SCAN SOURCES*/
+    if (source->iseq == iseq) /*MATCH! */
+      return source->ipos; /* RETURN ITS SEQUENCE POSITION */
+  return INVALID_LETTER_POSITION;
+}
+
+
+
+
+
+/**@memo finds links to iseq, starting from letter[ipos] and proceeding in the 
+    specified direction up to max_step links.  Optionally, the search 
+    can be constrained to only match node iseq:match_ipos.  
+    If iseq is found, the number 
+    of links connecting its letter to letter[ipos] is reported.  Otherwise 
+    it returns max_step+1 to indicate iseq was not found within the given 
+    distance.  If p_ipos or p_letter_pos are non-NULL, it will return
+    the ipos of the position found in iseq, or the index of the found 
+    letter[], respectively. */
+int find_sequence_link(int iseq,
+		       int match_ipos,
+		       int start_pos,
+		       LPOLetter_T letter[],
+		       int max_step,
+		       int please_go_right,
+		       int *p_ipos,
+		       int *p_letter_pos)
+{
+  int nstep,ipos;
+  LPOLetterLink_T *link,*link0;
+  
+  if (please_go_right) /* CHOOSE THE DESIRED DIRECTION */
+    link0= &letter[start_pos].right;
+  else
+    link0= &letter[start_pos].left;
+
+  for (link=link0;link && link->ipos>=0;link=link->more) /*SCAN ALL LINKS*/
+    if ((ipos=find_letter_source(letter+link->ipos,iseq))>=0
+	&& (match_ipos<0 /* NO CONSTRAINT ON match_ipos */
+	    || match_ipos==ipos)) { /* POSITION MATCHES */
+      if (p_ipos) /* HAND BACK THE SEQUENCE ipos TO CALLER */
+	*p_ipos = ipos;
+      if (p_letter_pos) /* HAND BACK THE letter[] INDEX TO CALLER */
+	*p_letter_pos = link->ipos;
+      return 1; /* FOUND SEQUENCE ONLY ONE LINK AWAY FROM HERE! */
+    }
+
+  if (max_step>1) { /* OK TO TRY ANOTHER LAYER OF RECURSION */
+    for (link=link0;link && link->ipos>=0;link=link->more) { /*SCAN ALL LINKS*/
+      nstep=find_sequence_link(iseq,match_ipos,link->ipos,letter,max_step-1,
+			       please_go_right,p_ipos,p_letter_pos);
+      if (nstep<max_step) /* FOUND IT! SO RETURN LINK COUNT */
+	return nstep+1;
+    }
+  }
+
+  return max_step+1; /* INDICATE iseq NOT FOUND IN THE SPECIFIED RANGE */
+}
+
+
+
+/** constructs a mapping from [iseq,ipos] -> iletter and back.  This is 
+  saved as an index seq_to_po[ipos]==>iletter and 
+  po_to_seq[iletter]==>ipos .
+  At the same time, it also constructs the actual sequence of the 
+  individual source sequences from the data stored in the LPO. */
+void build_seq_to_po_index(LPOSequence_T *seq)
+{
+  int i,j;
+  LPOLetterSource_T *source;
+
+  LOOP (i,seq->nsource_seq) { /* DUMP EXISTING INDEX, ALLOC NEW*/
+    FREE(seq->source_seq[i].seq_to_po);
+    FREE(seq->source_seq[i].po_to_seq);
+    FREE(seq->source_seq[i].sequence);
+    CALLOC(seq->source_seq[i].seq_to_po,seq->source_seq[i].length,int);
+    CALLOC(seq->source_seq[i].po_to_seq,seq->length,int);
+    CALLOC(seq->source_seq[i].sequence,seq->source_seq[i].length+1,char);
+    LOOP (j,seq->length) /*DEFAULT: PO LETTERS DON'T MAP TO ANY LETTER IN SEQ*/
+      seq->source_seq[i].po_to_seq[j]= INVALID_LETTER_POSITION;
+  }
+
+  LOOP (i,seq->length) { /* MAP EVERY LETTER ONTO SOURCE INDEXES */
+    for (source= &seq->letter[i].source;source;source=source->more) {
+      seq->source_seq[source->iseq].seq_to_po[source->ipos]=i;/*INVERSE*/
+      seq->source_seq[source->iseq].po_to_seq[i]=source->ipos;/*MAPPING*/
+      seq->source_seq[source->iseq].sequence[source->ipos]=seq->letter[i].letter;
+    }
+  }
+}
+
+
+
+
+/** CREATES A NEW SOURCEINFO ENTRY ON seq->source_seq[], SAVING THE 
+ FIELDS PASSED BY THE CALLER */
+int save_lpo_source(LPOSequence_T *seq, 
+		     char name[],
+		     char title[],
+		     int length,
+		     int weight,
+		     int bundle_id,
+		    int ndata,
+		    LPONumericData_T data[])
+{
+  int i,j;
+
+  REBUFF(seq->source_seq,seq->nsource_seq,SOURCE_SEQ_BUFFER_CHUNK,
+	 LPOSourceInfo_T);
+  i=seq->nsource_seq;
+  seq->source_seq[i].title=strdup(title? title:"untitled");
+  seq->source_seq[i].length=length;
+  seq->source_seq[i].weight=weight; /* DEFAULT WEIGHTING */
+  seq->source_seq[i].bundle_id= bundle_id;
+  STRNCPY(seq->source_seq[i].name,name,SEQUENCE_NAME_MAX);
+
+  LOOPF(j,ndata) /* SAVE NUMERIC DATA FOR THIS SOURCE */
+    cp_numeric_data(seq->source_seq+i,data+j);
+
+  return seq->nsource_seq++; /* INCREMENT SOURCE SEQUENCE LIST COUNT */
+}
+
+
+
+/** SAVE source_seq[] ENTRIES FROM ONE LPO TO ANOTHER */
+int *save_lpo_source_list(LPOSequence_T *seq,
+			  int nsource_seq,
+			  LPOSourceInfo_T source_seq[])
+{ 
+  int i,*list=NULL;
+
+  CALLOC(list,nsource_seq,int);
+  LOOPF (i,nsource_seq)  
+    list[i]=save_lpo_source(seq,source_seq[i].name,source_seq[i].title,
+			    source_seq[i].length,source_seq[i].weight,
+			    source_seq[i].bundle_id,
+			    source_seq[i].ndata,source_seq[i].data);
+
+  return list; /*HAND BACK LIST OF NEW INDICES ASSIGNED TO THESE source_seq*/
+}
+
+
+
+/** ADDS A LINK TO ipos TO THE LINKED LIST STORED IN list,
+ ALLOCATING A NEW LPOLetterLink IF NEEDED */
+LPOLetterLink_T *add_lpo_link(LPOLetterLink_T *list,LPOLetterRef_T ipos)
+{
+  if (list->ipos <0) { /* FIRST ENTRY IS EMPTY, SO USE IT */
+    list->ipos = ipos;
+    return list;/*RETURNS PTR TO LINK IN WHICH ipos STORED */
+  }
+  do { /* SCAN LIST CHECKING IF ALREADY STORED */
+    if (list->ipos == ipos) /* ALREADY STORED, NO NEED TO STORE AGAIN */
+      return list;/*RETURNS PTR TO LINK IN WHICH ipos STORED */
+  } while (list->more? (list=list->more):0);
+
+  CALLOC(list->more,1,LPOLetterLink_T); /* ADD ENTRY TO LINKED LIST */
+  list->more->ipos=ipos; /* SAVE THE LETTER REFERENCE */
+  return list->more;/*RETURNS PTR TO LINK IN WHICH ipos STORED */
+}
+
+
+
+
+void add_lpo_sources(LPOLetterSource_T *new_s,LPOLetterSource_T *old_s,
+		     int iseq_new[])/*TRANSLATION TO NEW source_seq[] INDEX*/
+{
+  for (;new_s->more;new_s=new_s->more);/* GO TO END*/
+  for (;old_s;old_s=old_s->more) {/*SAVE SOURCES*/
+    if (new_s->ipos>=0) { /* ALREADY A SOURCE HERE, SO CREATE NEW ENTRY */
+      CALLOC(new_s->more,1,LPOLetterSource_T);
+      new_s=new_s->more;
+    }
+    new_s->iseq=iseq_new[old_s->iseq]; /* SAVE SEQUENCE ID, POSITION */
+    new_s->ipos=old_s->ipos;
+  }
+}
+
+
+
+
+void copy_lpo_letter(LPOLetter_T *new,LPOLetter_T *old,
+		     LPOLetterRef_T old_to_new[],
+		     int iseq_new[])
+{
+  LPOLetterLink_T *link;
+  new->letter=old->letter; /* SAVE ITS SEQUENCE LETTER */
+  add_lpo_sources(&new->source,&old->source,iseq_new); /* SAVE SOURCES */
+  for (link= &old->left;link && link->ipos>=0;link=link->more) /*SAVE left*/
+    add_lpo_link(&new->left,old_to_new[link->ipos]);
+  for (link= &old->right;link && link->ipos>=0;link=link->more)/*SAVE right*/
+    add_lpo_link(&new->right,old_to_new[link->ipos]);
+  return;
+}
+
+
+
+ /** FUSES RING a AND RING b BY CROSSLINK OPERATION */
+void crosslink_rings(LPOLetterRef_T a,LPOLetterRef_T b,LPOLetter_T seq[])
+{
+  LPOLetterRef_T align_ring;
+
+  if (seq[a].ring_id==seq[b].ring_id) /* ALREADY ON SAME RING, DO NOTHING! */
+    return;
+  else if (seq[a].ring_id<seq[b].ring_id) { /* a<b SO RESET b.ring_id */
+    align_ring=b; /* TRAVERSE RING b */
+    do seq[align_ring].ring_id=seq[a].ring_id; /* ENFORCE a.ring_id == b.ring_id*/
+    while ((align_ring=seq[align_ring].align_ring) != b);
+  }
+  else { /* a>b SO RESET a.ring_id */
+    align_ring=a; /* TRAVERSE RING a */
+    do seq[align_ring].ring_id=seq[b].ring_id; /* ENFORCE a.ring_id == b.ring_id*/
+    while ((align_ring=seq[align_ring].align_ring) != a);
+  }
+
+  align_ring=seq[a].align_ring; /* JUST LIKE A SWAP */
+  seq[a].align_ring=seq[b].align_ring; /* CROSSLINK THE TWO RINGS */
+  seq[b].align_ring=align_ring;
+  return;
+}
+
+
+
+
+void copy_old_ring_to_new(LPOLetterRef_T start, /* START OF RING TO COPY*/
+			  LPOLetter_T old_lpo[],
+			  LPOLetter_T new_lpo[],
+			  LPOLetterRef_T old_to_new[]) /* MAPPING */
+{
+  LPOLetterRef_T ipos,next_pos;
+  for (ipos=start;(next_pos=old_lpo[ipos].align_ring) != start;ipos=next_pos)
+    crosslink_rings(old_to_new[ipos],old_to_new[next_pos],new_lpo);
+}
+
+
+
+/* TEMPORARY: THESE CONSTANTS CONTROL SEGMENT FUSION
+   TESTING THE FOLLOWING NUMBERS ON DNA ASSEMBLY*/
+#define FISSION_BREAK 5 /* LENGTH OF MISMATCH THAT SPLITS INTO SEGMENTS */
+#define MINIMUM_FUSION 10 /* MINIMUM #IDENTITIES FOR SEGMENT TO FUSE */
+#define FUSION_PERCENT 0.8 /* MINIMUM OVERALL IDENTITY FOR SEGMENT TO FUSE*/
+
+void mark_fusion_segments(int len_x,LPOLetter_T seq_x[],
+			  int len_y,LPOLetter_T seq_y[],
+			  LPOLetterRef_T y_to_x[],
+			  int fission_break,
+			  int minimum_fusion_length,
+			  float minimum_fusion_identity,
+			  char do_fuse[])
+{
+  int i,i_x,i_y,mismatch_length=0,identity_count=0,fission_break_point= -1;
+
+  LOOP (i_y,len_y) {
+    if ((i_x=y_to_x[i_y])>=0 && seq_x[i_x].letter==seq_y[i_y].letter)
+      do_fuse[i_y]=1;
+  }
+#ifdef SOURCE_EXCLUDED
+  LOOPF (i_y,len_y) {
+    if ((i_x=y_to_x[i_y])<0 /* NOT ALIGNED AT ALL */
+	|| seq_x[i_x].letter!=seq_y[i_y].letter /* MISMATCH */
+	|| i_y==len_y-1) { /* END OF SEQ, MUST CHECK LAST SEGMENT! */
+      if (++mismatch_length>=fission_break /* BREAK POINT */
+	  || i_y==len_y-1) { /* END OF SEQ, MUST CHECK LAST SEGMENT! */
+	if (identity_count>=minimum_fusion_length /* END OF A FUSION SEGMENT?*/
+	    && minimum_fusion_identity
+	       *(i_y-mismatch_length-fission_break_point)<= identity_count) {
+	  /* YES, MARK PRECEEDING SEGMENT FOR FUSION*/
+	  for (i=i_y-fission_break;i>fission_break_point;i--)/*MARK SEGMENT!*/
+	    if ((i_x=y_to_x[i])>=0 && seq_x[i_x].letter==seq_y[i].letter)
+	      do_fuse[i]=1; /* IDENTITY! MARK THIS POSITION TO BE FUSED */
+	}
+	fission_break_point=i_y; /* NO FUSION SEGMENT CAN EXTEND PAST HERE */
+	identity_count=0; /* RESET IDENTITY COUNTER FOR STARTING NEXT SEGMENT*/
+      }
+    }
+    else { /* PERFECT IDENTITY */
+      mismatch_length=0;
+      identity_count++;
+    }
+  }
+#endif
+}
+
+
+
+
+int reindex_lpo_fusion(int len_x,LPOLetter_T seq_x[],
+		       int len_y,LPOLetter_T seq_y[],
+		       LPOLetterRef_T x_to_y[],
+		       LPOLetterRef_T y_to_x[],
+		       LPOLetterRef_T new_x[],
+		       LPOLetterRef_T new_y[],
+		       int fission_break,
+		       int minimum_fusion_length,
+		       float minimum_fusion_identity)
+{
+  int new_len=0;
+  LPOLetterRef_T i_x,i_y,i_ring,end_of_ring= -1;
+  char *do_fuse=NULL;
+
+  CALLOC(do_fuse,len_y,char); /* MARK POSITIONS TO FUSE seq_y TO seq_x */
+  mark_fusion_segments(len_x,seq_x,len_y,seq_y,y_to_x,fission_break,
+		       minimum_fusion_length,minimum_fusion_identity,do_fuse);
+
+  for (i_x=i_y=new_len=0;i_x<len_x;i_x++) { /* BUILD new_x, new_y */
+    for (i_ring=i_x;i_ring<len_x && seq_x[i_ring].ring_id==seq_x[i_x].ring_id;
+	 i_ring++) /* SCAN X RING, AND SEE IF ANYTHING ALIGNS TO Y */
+      if (x_to_y[i_ring]>=0) { /* IF SO, INSERT Y NOW TO KEEP X RING TOGETHER*/
+	while (i_y<x_to_y[i_ring]) /* CONCATENATE LBEFORE ALIGNED LETTER */
+	  new_y[i_y++]=new_len++; /* ADD TO new_lpo */
+	break;
+      }
+	
+    if (x_to_y[i_x]>=0   /* ALIGNED, SO FIRST INSERT PRECEEDING FROM seq_y */
+	&& i_y<len_y) {
+      for (i_ring=seq_y[i_y].align_ring;i_ring!=i_y; /* CIRCLE THE RING*/
+	   i_ring=seq_y[i_ring].align_ring) /*FIND END OF i_y ALIGNMENT RING*/
+	if (i_ring>end_of_ring) /* FIND MAXIMUM INDEX ON THIS RING */
+	  end_of_ring=i_ring; /* RING GUARANTEED TO BE ONE CONTIGUOUS BLOCK*/
+
+      if (do_fuse[i_y]) /* THIS POSITION MEETS OUR FUSION CRITERIA, SO FUSE*/
+	new_y[i_y++]=new_len; /* USE SAME INDEX AS WILL BE USED FOR i_x */
+      else /* NOT IDENTICAL, SO GIVE IT ITS OWN LETTER */
+	new_y[i_y++]=new_len++;  /* ADD TO new_lpo */
+    }
+    new_x[i_x]=new_len++; /* ADD TO new_lpo */
+
+    while (i_y<=end_of_ring) /* CONCATENATE LETTERS ALIGNED TO i_y */
+      new_y[i_y++]=new_len++; /*KEEP ALIGNED LETTERS AS ONE CONTIGUOUS BLOCK!*/
+  }
+  while (i_y<len_y) { /* CONCATENATE TAIL OF seq_y ONTO new_lpo */
+    new_y[i_y++]=new_len++; /* ADD TO new_lpo */
+  }
+
+  FREE(do_fuse);
+  return new_len;
+}
+
+
+void remap_x_to_new(int nremap_x,/*TRANSLATE INDICES IN remap_x USING new_x*/
+		    LPOLetterRef_T remap_x[],/* ARRAY OF INDICES TO TRANSLATE*/
+		    int len_x,
+		    LPOLetterRef_T new_x[]) /* MAPPING FROM OLD -> NEW */
+{
+  int i;
+
+  LOOP (i,nremap_x) {
+    if (remap_x[i]>=0 && remap_x[i]<len_x) /* KEEP IN BOUNDS */
+      remap_x[i]=new_x[remap_x[i]];
+    else /* BOOM!! OUT OF RANGE!! */
+      abort(); /* DELIBERATELY CRASH TO TRAP THE ERROR! */
+  }
+}
+
+
+
+LPOSequence_T *copy_fuse_lpo_remap(LPOSequence_T *holder_x, /*FUSES TWO LPOs*/
+			     LPOSequence_T *holder_y, /*MAKES *NEW* HOLDER */
+			     LPOLetterRef_T x_to_y[],
+			     LPOLetterRef_T y_to_x[],
+			     int nremap_x,
+			     LPOLetterRef_T remap_x[])
+{
+  int i,new_len,*iseq_new=NULL,len_x,len_y;
+  LPOLetterRef_T i_x,i_y,*new_x=NULL,*new_y=NULL;
+  LPOLetter_T *new_lpo=NULL,*seq_x,*seq_y;
+  LPOSequence_T *new_seq=NULL;
+
+  len_x=holder_x->length; /* GET letter ARRAY FROM BOTH x AND y */
+  seq_x=holder_x->letter;
+  len_y=holder_y->length;
+  seq_y=holder_y->letter;
+
+  CALLOC(new_seq,1,LPOSequence_T); /* CREATE A NEW HOLDER */
+  CALLOC(new_x,len_x,LPOLetterRef_T);
+  CALLOC(new_y,len_y,LPOLetterRef_T);
+  new_len=reindex_lpo_fusion(len_x,seq_x,len_y,seq_y,x_to_y,y_to_x,new_x,new_y,
+			     FISSION_BREAK,MINIMUM_FUSION,FUSION_PERCENT);
+  CALLOC(new_lpo,new_len,LPOLetter_T); /* ALLOCATE NEW LINEARIZED PO */
+  LOOP (i,new_len) { /* INITIALIZE ALL LINKS TO INVALID */
+    new_lpo[i].left.ipos=new_lpo[i].right.ipos=new_lpo[i].source.ipos
+      = INVALID_LETTER_POSITION;
+    new_lpo[i].align_ring=new_lpo[i].ring_id=i; /* POINT TO SELF */
+  }
+  new_seq->length=new_len; /* SAVE NEW LPO ARRAY IN NEW HOLDER */
+  new_seq->letter=new_lpo;
+
+  iseq_new=save_lpo_source_list(new_seq,holder_x->nsource_seq,/*COPY x SOURCE*/
+				holder_x->source_seq);
+  LOOP (i_x,len_x) /* COPY LETTER DATA TO CORRESPONDING LETTERS OF NEW LPO */
+    copy_lpo_letter(new_lpo+new_x[i_x],seq_x+i_x,new_x,iseq_new);
+  FREE(iseq_new);
+  iseq_new=save_lpo_source_list(new_seq,holder_y->nsource_seq,/*COPY y SOURCE*/
+				holder_y->source_seq);
+  LOOP (i_y,len_y)
+    copy_lpo_letter(new_lpo+new_y[i_y],seq_y+i_y,new_y,iseq_new);
+  FREE(iseq_new);
+
+  LOOP (i_x,len_x) /* COPY OLD ALIGNMENT RINGS TO THE NEW LPO */
+    copy_old_ring_to_new(i_x,seq_x,new_lpo,new_x);
+  LOOP (i_y,len_y)
+    copy_old_ring_to_new(i_y,seq_y,new_lpo,new_y);
+
+  LOOP (i_x,len_x) /* SAVE ALIGNMENT OF seq_x AND seq_y TO new_lpo.align_ring*/
+    if (x_to_y[i_x]>=0) /* seq_x[i_x] ALIGNED TO seq_y, SO SAVE! */
+      crosslink_rings(new_x[i_x],new_y[x_to_y[i_x]],new_lpo);
+
+  if (remap_x) /* CONVERT OLD INDEX TABLE TO NEW REFERENCE SYSTEM */
+    remap_x_to_new(nremap_x,remap_x,len_x,new_x);
+  FREE(new_x); /* DUMP SCRATCH MEMORY AND RETURN */
+  FREE(new_y);
+  return new_seq; /* HAND BACK THE NEW LPO CONTAINING THE FUSION */
+}
+
+
+/** fuses the two partial orders holder_x and holder_y, based upon the 
+ letter_x <--> letter_y mapping specified by x_to_y and y_to_x (which
+ must be consistent!)  A new LPO is created to store the result;
+ neither holder_x or holder_y are changed */
+LPOSequence_T *copy_fuse_lpo(LPOSequence_T *holder_x, /*WRAPPER: NO REMAPPING*/
+			LPOSequence_T *holder_y,
+			LPOLetterRef_T x_to_y[],
+			LPOLetterRef_T y_to_x[])
+{
+  return copy_fuse_lpo_remap(holder_x,holder_y,x_to_y,y_to_x,0,NULL);
+}
+
+
+
+
+LPOSequence_T *copy_lpo(LPOSequence_T *holder_x)
+{
+  int i;
+  LPOSequence_T dummy,*new_copy;
+  LPOLetterRef_T *x_to_y=NULL;
+  
+  memset(&dummy,0,sizeof(dummy)); /* BLANK ALL FIELDS */
+  CALLOC(x_to_y,holder_x->length,LPOLetterRef_T);
+  LOOP (i,holder_x->length)
+    x_to_y[i]= INVALID_LETTER_POSITION;
+
+  new_copy=copy_fuse_lpo(holder_x,&dummy,x_to_y,x_to_y);
+  FREE(x_to_y);
+  return new_copy;
+}
+
+
+
+
+
+void translate_lpo(int old_len,LPOLetterRef_T old_to_new[],
+			 LPOLetter_T seq[])
+{
+  int i,block_end;
+  LPOLetterLink_T *link;
+  block_end=old_len;
+  LOOPB (i,old_len) { /* TRANSLATE AND SHIFT THE WHOLE LPO */
+    seq[i].align_ring=old_to_new[seq[i].align_ring]; /* TRANSLATE INDICES*/
+    seq[i].ring_id=old_to_new[seq[i].ring_id];
+    for (link= &seq[i].left;link && link->ipos>=0;link=link->more)
+      link->ipos = old_to_new[link->ipos];
+    for (link= &seq[i].right;link && link->ipos>=0;link=link->more)
+      link->ipos = old_to_new[link->ipos];
+
+    if ((0==i || /* HIT SEQ START, SO COPY THE BLOCK NOW!*/
+	old_to_new[i] != old_to_new[i-1]+1) /*BLOCK BOUNDARY*/
+	&& old_to_new[i] != i) { /* ACTUALLY REQUIRES A SHIFT */
+      memmove(seq+old_to_new[i],seq+i,(block_end-i)*sizeof(LPOLetter_T));
+      block_end=i; /* RESET TO POINT TO END OF NEXT BLOCK TO COPY */
+    } /* BLOCK NOW SHIFTED TO ITS NEW LOCATION */
+  }
+}
+
+
+
+LPOSequence_T *fuse_lpo_remap(LPOSequence_T *holder_x,
+			LPOSequence_T *holder_y,
+			LPOLetterRef_T x_to_y[],
+			LPOLetterRef_T y_to_x[],
+			int nremap_x,
+			LPOLetterRef_T remap_x[])
+{
+  int i,new_len,*iseq_new=NULL,len_x,len_y;
+  LPOLetterRef_T i_x,i_y,*new_x=NULL,*new_y=NULL;
+  LPOLetter_T *new_lpo=NULL,*seq_x,*seq_y;
+
+  len_x=holder_x->length; /* GET letter ARRAY FROM BOTH x AND y */
+  seq_x=holder_x->letter;
+  len_y=holder_y->length;
+  seq_y=holder_y->letter;
+
+  CALLOC(new_x,len_x,LPOLetterRef_T);
+  CALLOC(new_y,len_y,LPOLetterRef_T);
+  new_len=reindex_lpo_fusion(len_x,seq_x,len_y,seq_y,x_to_y,y_to_x,new_x,new_y,
+			     FISSION_BREAK,MINIMUM_FUSION,FUSION_PERCENT);
+  REALLOC(seq_x,new_len,LPOLetter_T); /* EXPAND seq_x TO HOLD FUSED LPO */
+  translate_lpo(len_x,new_x,seq_x); /* SHIFT ALL THE LETTERS TO NEW LOCATIONS*/
+  new_lpo=seq_x; /* THE EXPANDED VERSION OF seq_x IS OUR NEW LPO */
+  holder_x->length=new_len; /* SAVE NEW LPO ARRAY IN NEW HOLDER */
+  holder_x->letter=new_lpo;
+
+  LOOP (i_y,len_y) { /* INITIALIZE ALL LETTERS FOR STORING seq_y TO BLANK */
+    if (y_to_x[i_y]>=0 && new_x[y_to_x[i_y]]==new_y[i_y])/*i_y FUSED TO i_x*/
+      continue; /* THIS LETTER IS ALREADY PART OF seq_x SO DON'T OVERWRITE!!*/
+    i=new_y[i_y]; /* TRANSLATE TO NEW INDEXING */
+    memset(new_lpo+i,0,sizeof(LPOLetter_T)); /* NULL INITIALIZE IT! */
+    new_lpo[i].left.ipos=new_lpo[i].right.ipos=new_lpo[i].source.ipos
+      = INVALID_LETTER_POSITION; /* RESET TO UNLINKED STATE */
+    new_lpo[i].align_ring=new_lpo[i].ring_id=i; /* POINT TO SELF */
+  }
+
+  iseq_new=save_lpo_source_list(holder_x,holder_y->nsource_seq,/*COPY y SRC*/
+				holder_y->source_seq);
+  LOOP (i_y,len_y) /* COPY LETTER DATA TO CORRESPONDING LETTERS OF NEW LPO */
+    copy_lpo_letter(new_lpo+new_y[i_y],seq_y+i_y,new_y,iseq_new);
+  FREE(iseq_new);
+
+  LOOP (i_y,len_y) /* COPY OLD ALIGNMENT RINGS TO THE NEW LPO */
+    copy_old_ring_to_new(i_y,seq_y,new_lpo,new_y);
+
+  LOOP (i_x,len_x) /* SAVE ALIGNMENT OF seq_x AND seq_y TO new_lpo.align_ring*/
+    if (x_to_y[i_x]>=0) /* seq_x[i_x] ALIGNED TO seq_y, SO SAVE! */
+      crosslink_rings(new_x[i_x],new_y[x_to_y[i_x]],new_lpo);
+
+  if (remap_x) /* CONVERT OLD INDEX TABLE TO NEW REFERENCE SYSTEM */
+    remap_x_to_new(nremap_x,remap_x,len_x,new_x);
+  FREE(new_x); /* DUMP SCRATCH MEMORY AND RETURN */
+  FREE(new_y);
+  return holder_x; /* HAND BACK x LPO CONTAINING THE FUSION */
+}
+
+
+/** fuses the two partial orders holder_x and holder_y, based upon the 
+ letter_x <--> letter_y mapping specified by x_to_y and y_to_x (which
+ must be consistent!)  The result is returned in holder_x */
+LPOSequence_T *fuse_lpo(LPOSequence_T *holder_x, /*WRAPPER: NO REMAPPING*/
+			LPOSequence_T *holder_y,
+			LPOLetterRef_T x_to_y[],
+			LPOLetterRef_T y_to_x[])
+{
+  return fuse_lpo_remap(holder_x,holder_y,x_to_y,y_to_x,0,NULL);
+}
+
+
+
+/** FREES the linked list link including all nodes beneath it; NB: link 
+ itself is freed, so DO NOT pass a static LPOLetterLink */
+void free_lpo_link_list(LPOLetterLink_T *link)
+{
+  LPOLetterLink_T *next;
+  for (;link;link=next) { /* DUMP ALL THE LINKS */
+    next=link->more;
+    free(link);
+  }
+}
+
+/** FREES the linked list source including all nodes beneath it; NB: source 
+ itself is freed, so DO NOT pass a static LPOLetterSource */
+void free_lpo_source_list(LPOLetterSource_T *source)
+{
+  LPOLetterSource_T *next;
+  for (;source;source=next) { /* DUMP ALL THE SOURCE ENTRIES */
+    next=source->more;
+    free(source);
+  }
+}
+
+
+/** FREES ALL DATA ASSOCIATED WITH letter[], AND OPTIONALLY letter ITSELF*/
+void free_lpo_letters(int nletter,LPOLetter_T *letter,int please_free_block)
+{
+  int i;
+  if (!letter) /* NOTHING TO FREE... */
+    return;
+  LOOP (i,nletter) { /* DUMP ALL LINKED LISTS */
+    if (letter[i].left.more)
+      free_lpo_link_list(letter[i].left.more);
+    if (letter[i].right.more)
+      free_lpo_link_list(letter[i].right.more);
+    if (letter[i].source.more)
+      free_lpo_source_list(letter[i].source.more);
+  }
+  if (please_free_block) /*DON'T ALWAYS WANT TO FREE... MIGHT BE IN AN ARRAY*/
+    free(letter);
+}
+
+
+
+
+void free_lpo_sourceinfo(int nsource_seq,LPOSourceInfo_T *source_seq,
+			int please_free_block)
+{
+  int i;
+  LOOP (i,nsource_seq) {
+    FREE(source_seq[i].title);
+    FREE(source_seq[i].sequence);
+    FREE(source_seq[i].seq_to_po);
+    FREE(source_seq[i].po_to_seq);
+    free_lpo_numeric_data(source_seq[i].ndata,source_seq[i].data,TRUE);
+    source_seq[i].data=NULL; /* DON'T LEAVE DANGLING POINTER! */
+  }
+  if (please_free_block)
+    free(source_seq);
+}
+
+
+
+
+/** FREES ALL DATA FROM seq, AND OPTIONALLY seq ITSELF */
+void free_lpo_sequence(LPOSequence_T *seq,int please_free_holder)
+{
+  int i;
+  if (!seq) /* NOTHING TO FREE... */
+    return;
+  free_lpo_letters(seq->length,seq->letter,TRUE);
+  seq->letter=NULL; /* MARK AS FREED... DON'T LEAVE DANGLING POINTER! */
+  FREE(seq->title);
+  FREE(seq->sequence);
+  if (seq->source_seq) {
+    free_lpo_sourceinfo(seq->nsource_seq,seq->source_seq,TRUE);
+    seq->source_seq=NULL; /* MARK AS FREED... DON'T LEAVE DANGLING POINTER! */
+  }
+  if (please_free_holder) /*DON'T ALWAYS WANT TO FREE... MIGHT BE IN AN ARRAY*/
+    free(seq);
+}
+
+/**@memo {\bfEXAMPLE}: dump the LPO dna_lpo, and all its associated data: \begin{verbatim}
+  if (dna_lpo) 
+    free_lpo_sequence(dna_lpo,TRUE);
+\end{verbatim}
+*/
+
+
+/** creates a new sequence which follows path[] through the LPO seq,
+ and gives it the specified name and title  */
+int add_path_sequence(int path_length,
+		      LPOLetterRef_T path[],
+		      LPOSequence_T *seq,
+		      char name[],
+		      char title[])
+{
+  int i,iseq_new;
+  LPOSourceInfo_T *new_seq;
+  LPOLetterSource_T save_source={0,0,NULL};
+
+  /* CREATE SOURCE ENTRY FOR THIS SEQUENCE */
+  iseq_new=save_lpo_source(seq,name,title,path_length,0,NO_BUNDLE,0,NULL);
+
+  LOOP (i,path_length) { /* ADD THIS AS SOURCE TO ALL POSITIONS IN path */
+    save_source.ipos=i;
+    add_lpo_sources(&seq->letter[path[i]].source,&save_source,&iseq_new);
+  } /* NB: THIS DOESN'T CHECK THAT path IS A VALID WALK THRU THE PARTIAL ORDER
+       MIGHT BE A GOOD IDEA TO CATCH POSSIBLE ERRORS IN path */
+  return iseq_new; /* RETURN INDEX OF NEWLY CREATED ENTRY */
+}
+
+/**@memo EXAMPLE: create a consensus sequence from a path: 
+    iseq=add_path_sequence(path_length,path,seq,name,title); 
+------------------------------------------------------- 
+-------------------------------------------------
+*/
+

Added: trunk/packages/poa/branches/upstream/current/lpo.h
===================================================================
--- trunk/packages/poa/branches/upstream/current/lpo.h	2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/lpo.h	2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,171 @@
+
+
+#ifndef LPO_HEADER_INCLUDED
+#define LPO_HEADER_INCLUDED
+
+#include <default.h>
+#include <poa.h>
+#include <seq_util.h>
+
+/*********************************************************** lpo.c */
+void lpo_init(LPOSequence_T *seq);
+
+void initialize_seqs_as_lpo(int nseq, Sequence_T seq[],ResidueScoreMatrix_T *m);
+void lpo_index_symbols(LPOSequence_T *lpo,ResidueScoreMatrix_T *m);
+
+int save_lpo_source(LPOSequence_T *seq,
+		     char name[],
+		     char title[],
+		     int length,
+		     int weight,
+		     int bundle_id,
+		    int ndata,
+		    LPONumericData_T data[]);
+
+int *save_lpo_source_list(LPOSequence_T *seq,
+			  int nsource_seq,
+			  LPOSourceInfo_T source_seq[]);
+
+LPOLetterLink_T *add_lpo_link(LPOLetterLink_T *list,LPOLetterRef_T ipos);
+
+void add_lpo_sources(LPOLetterSource_T *new_s,LPOLetterSource_T *old_s,
+		     int iseq_new[]);
+
+void crosslink_rings(LPOLetterRef_T a,LPOLetterRef_T b,LPOLetter_T seq[]);
+
+LPOSequence_T *copy_fuse_lpo(LPOSequence_T *holder_x,
+			     LPOSequence_T *holder_y,
+			     LPOLetterRef_T x_to_y[],
+			     LPOLetterRef_T y_to_x[]);
+
+LPOSequence_T *copy_lpo(LPOSequence_T *holder_x);
+
+LPOSequence_T *fuse_lpo(LPOSequence_T *holder_x,
+			LPOSequence_T *holder_y,
+			LPOLetterRef_T x_to_y[],
+			LPOLetterRef_T y_to_x[]);
+
+void free_lpo_letters(int nletter,LPOLetter_T *letter,int please_free_block);
+
+void free_lpo_sequence(LPOSequence_T *seq,int please_free_holder);
+
+int add_path_sequence(int path_length,
+		      LPOLetterRef_T path[],
+		      LPOSequence_T *seq,
+		      char name[],
+		      char title[]);
+
+
+/************************************************** FROM align_lpo.c */
+int align_lpo(LPOSequence_T *lposeq_x,
+	      LPOSequence_T *lposeq_y,
+	      ResidueScoreMatrix_T *m,
+	      LPOLetterRef_T **x_to_y,
+	      LPOLetterRef_T **y_to_x,
+	      int use_global_alignment);
+
+
+
+/************************************************** FROM align_lpo_po.c */
+LPOScore_T align_lpo_po(LPOSequence_T *lposeq_x,
+			LPOSequence_T *lposeq_y,
+			ResidueScoreMatrix_T *m,
+			LPOLetterRef_T **x_to_y,
+			LPOLetterRef_T **y_to_x,
+			LPOScore_T (*scoring_function)
+			 (int,int,LPOLetter_T [],LPOLetter_T [],
+			  ResidueScoreMatrix_T *),
+			int use_global_alignment);
+
+
+/************************************************** FROM buildup_lpo.c */
+LPOSequence_T *buildup_lpo(LPOSequence_T *new_seq,
+			   int nseq,LPOSequence_T seq[],
+			   ResidueScoreMatrix_T *score_matrix,
+			   int use_aggressive_fusion,
+			   int use_global_alignment);
+
+LPOSequence_T *buildup_clipped_lpo(LPOSequence_T *new_seq,
+				  int nseq,LPOSequence_T seq[],
+				  ResidueScoreMatrix_T *score_matrix,
+				   int use_global_alignment);
+
+LPOSequence_T *buildup_progressive_lpo(int nseq,LPOSequence_T seq[],
+				       ResidueScoreMatrix_T *score_matrix,
+				       int use_aggressive_fusion,
+				       char score_file[],
+				       LPOScore_T (*scoring_function)
+				       (int,int,LPOLetter_T [],LPOLetter_T [],
+					ResidueScoreMatrix_T *),
+				       int use_global_alignment);
+				       
+/**************************************************** lpo_format.c */
+void write_lpo(FILE *ifile,LPOSequence_T *seq,
+	       ResidueScoreMatrix_T *score_matrix);
+
+LPOSequence_T *read_lpo(FILE *ifile);
+LPOSequence_T *read_lpo_select(FILE *ifile,FILE *select_ifile,
+			       int keep_all_links,int remove_listed_sequences);
+
+void write_lpo_as_fasta(FILE *ifile,LPOSequence_T *seq,
+			int nsymbol,char symbol[]);
+
+void write_lpo_bundle_as_fasta(FILE *ifile,LPOSequence_T *seq,
+			       int nsymbol,char symbol[],int ibundle);
+void export_clustal_seqal(FILE *ifile,
+			  LPOSequence_T *seq,
+			  int nsymbol,char symbol[]);
+
+
+/****************************************************** heaviest_bundle.c */
+void generate_lpo_bundles(LPOSequence_T *seq,float minimum_fraction);
+
+
+/****************************************************** make_frame.c */
+LPOSequence_T *build_3_frames(char dna_seq[],char name[],char title[],
+			      LPOScore_T frameshift_score,
+			      ResidueScoreMatrix_T *m);
+LPOSequence_T *map_protein_to_dna(char dna_name[],
+				  LPOSequence_T *lpo_dna,
+				  int nseq,
+				  Sequence_T seq[],
+				  ResidueScoreMatrix_T *score_matrix,
+				  int use_aggressive_fusion);
+
+
+/****************************************************** remove_bundle.c */
+int remove_bundle(LPOSequence_T *seq,int ibundle,int delete_all_others);
+
+
+
+/******************************************************* numeric_data.c */
+LPONumericData_T *new_numeric_data(LPOSourceInfo_T *source_seq,
+				   char name[],
+				   char title[],
+				   double initial_value);
+
+LPONumericData_T *find_numeric_data(LPOSourceInfo_T *source_seq,
+				   char name[]);
+
+void free_lpo_numeric_data(int ndata,LPONumericData_T *data,
+			   int please_free_block);
+
+void new_numeric_data_sets(LPOSourceInfo_T *source_seq,
+			   int nset,char *set_names[],
+			   char source_name_fmt[],
+			   char target_name_fmt[],
+			   char title_fmt[]);
+void read_numeric_data(int nsource_seq,
+		       LPOSourceInfo_T source_seq[],
+		       FILE *ifile);
+LPONumericData_T *cp_numeric_data(LPOSourceInfo_T *source_seq,
+				  LPONumericData_T *data);
+
+/******************************************************* balance_matrix.c */
+int read_aa_frequencies(char filename[],ResidueScoreMatrix_T *score_matrix)
+     ;
+void balance_matrix_score(int nletter,LPOLetter_T letter[],
+			  ResidueScoreMatrix_T *score_matrix);
+
+
+#endif

Added: trunk/packages/poa/branches/upstream/current/lpo_format.c
===================================================================
--- trunk/packages/poa/branches/upstream/current/lpo_format.c	2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/lpo_format.c	2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,528 @@
+
+
+#include "default.h"
+#include "poa.h"
+#include "seq_util.h"
+#include "lpo.h"
+
+
+/** writes the LPO in seq to the stream ifile; optionally a symbol table
+ may be given for translating the letters in the LPO to text */
+void write_lpo(FILE *ifile,LPOSequence_T *seq,
+	       ResidueScoreMatrix_T *score_matrix)
+{
+  int i;
+  LPOLetterLink_T *link;
+  LPOLetterSource_T *source;
+
+  fprintf(ifile,"VERSION=LPO.0.1\n");
+  fprintf(ifile,"NAME=%s\nTITLE=%s\nLENGTH=%d\nSOURCECOUNT=%d\n",
+	  seq->name,seq->title,seq->length,seq->nsource_seq);
+
+  LOOPF (i,seq->nsource_seq)
+    fprintf(ifile,"SOURCENAME=%s\nSOURCEINFO=%d %d %d %d %s\n",
+	    seq->source_seq[i].name,seq->source_seq[i].length,
+	    seq->source_seq[i].istart,seq->source_seq[i].weight,
+	    seq->source_seq[i].bundle_id,seq->source_seq[i].title);
+
+  LOOPF (i,seq->length) {
+    fprintf(ifile,"%c:",
+	    seq->letter[i].letter < score_matrix->nsymbol ? 
+	    score_matrix->symbol[seq->letter[i].letter] 
+	    : seq->letter[i].letter);
+    for (link= &seq->letter[i].left;link && link->ipos>=0;link=link->more)
+      fprintf(ifile,"L%d",link->ipos);
+    for (source= &seq->letter[i].source;source;source=source->more)
+      fprintf(ifile,"S%d",source->iseq); /* SOURCE ID */
+    if (seq->letter[i].align_ring!=i) /* ALIGNED TO SOMETHING ELSE */
+      fprintf(ifile,"A%d",seq->letter[i].align_ring);
+    fputc('\n',ifile);
+  }
+}
+/**@memo example: writing a PO file: 
+  if (lpo_file_out) 
+    write_lpo(lpo_file_out,lpo_out,score_matrix.symbol);
+*/
+
+
+
+
+/** reads an LPO from the stream ifile, dynamically allocates memory for
+it, and returns a pointer to the LPO */ 
+LPOSequence_T *read_lpo(FILE *ifile)
+{
+  int i,j,length,nsource_seq,istart,field_id,*pos_count=NULL,value;
+  int weight,bundle_id,last_alloc=0;
+  LPOSequence_T *seq=NULL;
+  char c,name[1024],title[4096],version[256];
+  LPOLetterSource_T save_source={0,0,NULL};
+
+  CALLOC(seq,1,LPOSequence_T);
+  fscanf(ifile,"VERSION=%s",version);
+  if (fscanf(ifile," NAME=%[^\n] TITLE=%[^\n] LENGTH=%d SOURCECOUNT=%d",
+	     name,title,&length,&nsource_seq)!=4)
+    return NULL;
+  STRNCPY(seq->name,name,SEQUENCE_NAME_MAX);
+  seq->title=strdup(title);
+  seq->length=length;
+  seq->nsource_seq=nsource_seq;
+  CALLOC(seq->letter,length,LPOLetter_T);
+  GETMEM(seq->source_seq,nsource_seq,last_alloc,SOURCE_SEQ_BUFFER_CHUNK,LPOSourceInfo_T);
+  CALLOC(pos_count,nsource_seq,int);
+  LOOP (i,length) { /* INITIALIZE ALL LINKS TO INVALID */
+    seq->letter[i].align_ring=i; /* POINT TO SELF */
+    seq->letter[i].ring_id= INVALID_LETTER_POSITION; /* BLANK! */
+    seq->letter[i].left.ipos=seq->letter[i].right.ipos=
+      seq->letter[i].source.ipos= INVALID_LETTER_POSITION;
+  }
+
+  LOOPF(i,nsource_seq) { /* SAVE SOURCE INFO LIST */
+    if (fscanf(ifile," SOURCENAME=%[^\n] SOURCEINFO=%d %d %d %d",
+	       name,&length,&istart,&weight,&bundle_id)!=5)
+      return NULL;
+    /* SKIP WHITESPACE BEFORE TITLE; ALLOW EMPTY TITLE. */
+    fscanf(ifile,"%*[ \t]");
+    if (fscanf(ifile,"%[^\n]",title)!=1)
+      title[0] = '\0';
+    STRNCPY(seq->source_seq[i].name,name,SEQUENCE_NAME_MAX);
+    seq->source_seq[i].length=length;
+    seq->source_seq[i].istart=istart;
+    seq->source_seq[i].weight=weight;
+    seq->source_seq[i].bundle_id=bundle_id;
+    seq->source_seq[i].title=strdup(title);
+  }
+
+  LOOPF (i,seq->length) { /* NOW READ THE ACTUAL PARTIAL ORDER */
+    if (fscanf(ifile," %c:",&c)!=1) /* READ SEQUENCE LETTER */
+      return NULL;
+    seq->letter[i].letter=c;
+    while ((field_id=getc(ifile))!=EOF && '\n'!=field_id) {/* READ FIELDS*/
+      if (1!=fscanf(ifile,"%d",&value))
+	return NULL;
+      switch (field_id) {
+      case 'L':
+	add_lpo_link(&seq->letter[i].left,value); /* ADD LEFT-RIGHT LINKS*/
+	add_lpo_link(&seq->letter[value].right,i);
+	break;
+      case 'S':  /* SAVE THE SOURCE ID */
+	save_source.ipos=pos_count[value]++;
+	add_lpo_sources(&seq->letter[i].source,&save_source,&value);
+	break;
+      case 'A': /* SAVE THE ALIGN RING POINTER */
+	seq->letter[i].align_ring=value;
+	break;
+      }
+    }
+  }
+
+  LOOPF (i,seq->length) { /* SET ring_id TO MINIMUM VALUE ON EACH RING */
+    if (seq->letter[i].ring_id<0) {/* NEW RING, UPDATE IT! */
+      j=i; /* GO AROUND THE ENTIRE RING, SETTING ring_id TO i */
+      do seq->letter[j].ring_id=i; /* i IS MINIMUM VALUE ON THIS RING */
+      while ((j=seq->letter[j].align_ring)!=i);
+    }
+  }
+
+  FREE(pos_count);
+  return seq;
+}
+
+
+
+#define INVALID_LPO_LINK (-99)
+
+enum {
+  default_retention_mode,
+  default_no_retention_mode
+};
+
+/** reads an LPO from the stream ifile, dynamically allocates memory for
+it, and returns a pointer to the LPO */ 
+LPOSequence_T *read_lpo_select(FILE *ifile,FILE *select_file,
+			       int keep_all_links,int remove_listed_sequences)
+{
+  int i,j,k,length,nsource_seq,istart,field_id,*pos_count=NULL,value;
+  int weight,bundle_id,last_alloc=0,*iseq_compact=NULL,*last_pos=NULL;
+  int nlink,*link_list=NULL,*match_pos=NULL,*ring_old=NULL;
+  int *pos_compact=NULL,npos_compact=0,keep_this_letter,retention_mode;
+  LPOSequence_T *seq=NULL;
+  char c,name[1024],title[4096],version[256];
+  LPOLetterSource_T save_source={0,0,NULL},*source=NULL;
+
+  if (remove_listed_sequences)
+    retention_mode=default_retention_mode;/*KEEP SEQS AS DFLT, SKIP IF LISTED*/
+  else /* SKIP SEQS UNLESS LISTED IN select_file */
+    retention_mode=default_no_retention_mode;
+
+  CALLOC(seq,1,LPOSequence_T);
+  fscanf(ifile,"VERSION=%s",version);
+  if (fscanf(ifile," NAME=%[^\n] TITLE=%[^\n] LENGTH=%d SOURCECOUNT=%d",
+	     name,title,&length,&nsource_seq)!=4)
+    return NULL;
+  STRNCPY(seq->name,name,SEQUENCE_NAME_MAX);
+  seq->title=strdup(title);
+  seq->length=length;
+  seq->nsource_seq=nsource_seq;
+  CALLOC(seq->letter,length,LPOLetter_T);
+  GETMEM(seq->source_seq,nsource_seq,last_alloc,SOURCE_SEQ_BUFFER_CHUNK,LPOSourceInfo_T);
+  CALLOC(pos_count,nsource_seq,int);
+  LOOP (i,length) { /* INITIALIZE ALL LINKS TO INVALID */
+    seq->letter[i].align_ring=i; /* POINT TO SELF */
+    seq->letter[i].ring_id= INVALID_LETTER_POSITION; /* BLANK! */
+    seq->letter[i].left.ipos=seq->letter[i].right.ipos=
+      seq->letter[i].source.ipos= INVALID_LETTER_POSITION;
+  }
+
+  LOOPF(i,nsource_seq) { /* SAVE SOURCE INFO LIST */
+    if (fscanf(ifile," SOURCENAME=%[^\n] SOURCEINFO=%d %d %d %d %[^\n]",
+	       name,&length,&istart,&weight,&bundle_id,title)!=6)
+      return NULL;
+    STRNCPY(seq->source_seq[i].name,name,SEQUENCE_NAME_MAX);
+    seq->source_seq[i].length=length;
+    seq->source_seq[i].istart=istart;
+    seq->source_seq[i].weight=weight;
+    seq->source_seq[i].bundle_id=bundle_id;
+    seq->source_seq[i].title=strdup(title);
+  }
+
+  CALLOC(iseq_compact,nsource_seq,int);
+  CALLOC(last_pos,nsource_seq,int);
+  CALLOC(match_pos,nsource_seq,int);
+  if (select_file) {
+    LOOP (i,nsource_seq) /* DEFAULT: MARKED AS INVALID */
+      iseq_compact[i]= -retention_mode;
+    while (fscanf(select_file,"SOURCENAME=%[^\n]\n",name)==1) {
+      LOOP (i,nsource_seq)
+	if (strcmp(seq->source_seq[i].name,name)==0) {
+	  iseq_compact[i]= retention_mode-1;
+	  break;
+	}
+    }
+  }
+
+  j=0;
+  LOOPF (i,nsource_seq) {
+    last_pos[i]= -1; /* DEFAULT: INVALID */
+    if (iseq_compact[i]>=0) {
+      iseq_compact[i]=j;
+      if (i>j)
+	memcpy(seq->source_seq+j,seq->source_seq+i,sizeof(LPOSourceInfo_T));
+      match_pos[j]= INVALID_LPO_LINK; /* DEFAULT: NO VALID LINK! */
+      j++;
+    }
+    else { /* EXCLUDE THIS SEQUENCE FROM THE FILTERED LPO */
+      iseq_compact[i]= INVALID_LETTER_POSITION;
+      if (seq->source_seq[i].title) 
+	free(seq->source_seq[i].title);
+    }
+  }
+  seq->nsource_seq=nsource_seq=j; /* COMPACTED COUNT OF SEQUENCES TO KEEP*/
+
+  CALLOC(link_list,seq->length,int); /*TEMPORARY DATA FOR COMPACTION MAPPING */
+  CALLOC(pos_compact,seq->length,int);
+  CALLOC(ring_old,seq->length,int);
+  npos_compact=0;
+
+  LOOPF (i,seq->length) { /* NOW READ THE ACTUAL PARTIAL ORDER */
+    if (fscanf(ifile," %c:",&c)!=1) /* READ SEQUENCE LETTER */
+      return NULL;
+    seq->letter[npos_compact].letter=c;
+    nlink=0;
+    keep_this_letter=0; /*DEFAULT */
+    while ((field_id=getc(ifile))!=EOF && '\n'!=field_id) {/* READ FIELDS*/
+      if (1!=fscanf(ifile,"%d",&value))
+	return NULL;
+      switch (field_id) {
+      case 'L':
+	if (pos_compact[value]>=0) /*COULD BE VALID LINK: WAIT TO CHECK SRCs*/
+	  link_list[nlink++]=pos_compact[value]; /* SAVE IT TEMPORARILY */
+	break;
+      case 'S':  /* SAVE THE SOURCE ID */
+	if (iseq_compact[value]>=0) { /*KEEP THIS SOURCE, SO KEEP THIS LETTER*/
+	  keep_this_letter=1;
+	  value=iseq_compact[value]; /* TRANSLATE TO ITS COMPACTED INDEX*/
+	  if (last_pos[value]>=0) { /* MAKE SURE WE HAVE LINK TO LAST POSITION */
+	    LOOP (j,nlink) /* CHECK TO SEE IF LINK ALREADY SAVED */
+	      if (link_list[j]==last_pos[value])
+		break;
+	    if (LOOP_FINISHED(j,nlink)) { /* NO LINK??? ADD IT!!! */
+	      link_list[nlink++]=last_pos[value];
+	    }
+	  }
+	  last_pos[value]=npos_compact; /* THIS SEQ POS IS AT THIS NODE */
+	  save_source.ipos=pos_count[value]++;/* COUNT LENGTH OF THIS SEQ */
+	  add_lpo_sources(&seq->letter[npos_compact].source,
+			  &save_source,&value);
+	  seq->letter[npos_compact].ring_id= INVALID_LETTER_POSITION;
+	  seq->letter[npos_compact].align_ring=i;
+	  ring_old[i]=i; /* DEFAULT: SELF-RING OF ONE LETTER*/
+	}
+	break;
+      case 'A': /* SAVE THE ALIGN RING POINTER */
+	ring_old[i]=value; /* SAVE OLD ALIGN RING INDICES */
+	if (keep_this_letter) /* TEMP'Y: SAVE REVERSE MAPPING TO A-R INDICES*/
+	  seq->letter[npos_compact].align_ring=i;
+	break;
+      }
+    }
+    if (keep_this_letter) {
+      for (source= &seq->letter[npos_compact].source;source;source=source->more)
+	match_pos[source->iseq]=source->ipos - 1; /*VALID LINK MUST MATCH m_p*/
+
+      LOOPF (j,nlink) { /*ADD LEFT-RIGHT LINKS*/
+	for (source= &seq->letter[link_list[j]].source;source;source=source->more)
+	  if (keep_all_links /* KEEP LINKS EVEN IF NOT FROM SELECTED SEQS*/
+	      || source->ipos == match_pos[source->iseq]) { /*VALID LINK! */
+	    add_lpo_link(&seq->letter[npos_compact].left,link_list[j]);
+	    add_lpo_link(&seq->letter[link_list[j]].right,npos_compact);
+	    break; /* SAVED THIS LINK! */
+	  }
+      }
+      for (source= &seq->letter[npos_compact].source;source;source=source->more)
+	match_pos[source->iseq]= INVALID_LPO_LINK;/*DFLT:NO VALID LINK*/
+
+      pos_compact[i]=npos_compact++;
+    }
+    else 
+      pos_compact[i]= INVALID_LETTER_POSITION;
+  }
+  seq->length=npos_compact;
+
+  LOOPF (i,seq->length) { /* SET ring_id TO MINIMUM VALUE ON EACH RING */
+    if (seq->letter[i].ring_id<0) {/* NEW RING, UPDATE IT! */
+      j=seq->letter[i].align_ring; /* GO AROUND THE ENTIRE RING, SETTING ring_id TO i */
+      do {
+	/*printf("i=%d\tj=%d\tpos_compact[j]=%d\tring_old[j]=%d\n",i,j,pos_compact[j],ring_old[j]);*/
+	if (pos_compact[j]>=0) {
+	  k=pos_compact[j];
+	  seq->letter[k].ring_id=i; /* i IS MINIMUM VALUE ON THIS RING */
+	}
+	j=ring_old[j]; /* ADVANCE TO NEXT LETTER ON THE RING */
+	if (pos_compact[j]>=0) /* IF VALID, POINT IT BACK TO PREVIOUS LETTER*/
+	  seq->letter[pos_compact[j]].align_ring=k;
+      }
+      while (pos_compact[j]!=i); /* STOP WHEN WE'VE COMPLETED THE RING, BACK TO START*/
+    }
+  }
+
+  FREE(pos_count);
+  FREE(pos_compact);
+  FREE(iseq_compact);
+  FREE(last_pos);
+  FREE(link_list);
+  FREE(ring_old);
+  FREE(match_pos);
+  return seq;
+}
+
+/*
+LPOSequence_T *read_lpo(FILE *ifile)
+{
+  return read_lpo_select(ifile,NULL);
+}
+*/
+
+
+
+
+#define FASTA_GAP_CHARACTER '.'
+int xlate_lpo_to_al(LPOSequence_T *seq,
+		    int nsymbol,char symbol[],int ibundle,
+		    char gap_character,
+		    char ***p_seq_pos,char **p_p,char **p_include)
+{
+  int i,j,iring=0,nring=0,current_ring=0,iprint;
+  char **seq_pos=NULL,*p=NULL,*include_in_save=NULL;
+  LPOLetterSource_T *source;
+
+  LOOPF (i,seq->length) /* COUNT TOTAL #ALIGNMENT RINGS IN THE LPO */
+    if (seq->letter[i].ring_id != current_ring) { /* NEXT RING */
+      current_ring=seq->letter[i].ring_id;
+      nring++;
+    }
+  nring++; /* DON'T FORGET TO COUNT THE LAST RING!!! */
+  
+  CALLOC(seq_pos,seq->nsource_seq,char *); /* ALLOCATE MAP ARRAY*/
+  CALLOC(p,seq->nsource_seq*nring,char);
+  LOOP (i,seq->nsource_seq) /* BUILD POINTER ARRAY INTO MAP ARRAY */
+    seq_pos[i]=p+i*nring;
+  memset(p,gap_character,seq->nsource_seq*nring);
+  /* DEFAULT IS NO SEQUENCE PRESENT AT THIS POSITION */
+
+  current_ring=0; /* RESET TO BEGINNING */
+  LOOPF (i,seq->length) { /* NOW MAP THE LPO TO A FLAT LINEAR ORDER */
+    if (seq->letter[i].ring_id != current_ring) { /* NEXT RING */
+      current_ring=seq->letter[i].ring_id;
+      iring++;
+    } /* MAP EACH SOURCE SEQ ONTO LINEAR ORDER INDEXED BY iring */
+    for (source= &seq->letter[i].source;source;source=source->more)
+      if (symbol && seq->letter[i].letter<nsymbol)  /* TRANSLATE TO symbol */
+	seq_pos[source->iseq][iring]= symbol[seq->letter[i].letter];
+      else  /* NO NEED TO TRANSLATE */
+	seq_pos[source->iseq][iring]= seq->letter[i].letter;
+  }
+
+  if (ibundle>=0) { /* ONLY SAVE SEQS THAT ARE IN THIS BUNDLE */
+    CALLOC(include_in_save,nring,char); /* BLANK FLAGS: WHAT RINGS TO SHOW*/
+    LOOP (iring,nring) { /* CHECK EACH RING TO SEE IF IT'S IN BUNDLE */
+      LOOP (i,seq->nsource_seq) {
+	if (seq_pos[i][iring]!=gap_character /* ALIGNED HERE! */
+	    && seq->source_seq[i].bundle_id == ibundle) { /* PART OF BUNDLE!*/
+	  include_in_save[iring]=1; /* SO INCLUDE THIS RING */
+	  break;
+	}
+      }
+    }
+  }
+  if (p_seq_pos)
+    *p_seq_pos = seq_pos;
+  if (p_seq_pos)
+    *p_seq_pos = seq_pos;
+  if (p_seq_pos)
+    *p_seq_pos = seq_pos;
+  return nring;
+}
+
+
+/** writes the LPO in FASTA format, including all sequences in the 
+  specified bundle */
+void write_lpo_bundle_as_fasta(FILE *ifile,LPOSequence_T *seq,
+			       int nsymbol,char symbol[],int ibundle)
+{
+  int i,j,nring=0,iprint;
+  char **seq_pos=NULL,*p=NULL,*include_in_save=NULL;
+
+  nring=xlate_lpo_to_al(seq,nsymbol,symbol,ibundle, /* TRANSLATE TO */
+			FASTA_GAP_CHARACTER,        /* RC-MSA FMT */
+			&seq_pos,&p,&include_in_save);
+  LOOPF (i,seq->nsource_seq) { /* NOW WRITE OUT FASTA FORMAT */
+    if (ibundle<0 /* PRINT ALL BUNDLES */
+	|| seq->source_seq[i].bundle_id == ibundle) { /* OR JUST THIS BUNDLE*/
+      fprintf(ifile,">%s %s",seq->source_seq[i].name,seq->source_seq[i].title);
+      iprint=0;
+      LOOPF (j,nring) { /* WRITE OUT 60 CHARACTER SEQUENCE LINES */
+	if (NULL==include_in_save || include_in_save[j]) {
+	  fprintf(ifile,"%s%c",iprint%60? "":"\n", seq_pos[i][j]);
+	  iprint++; /* KEEP COUNT OF PRINTED CHARACTERS */
+	}
+      }
+      fputc('\n',ifile);
+    }
+  }
+
+  FREE(p); /* DUMP TEMPORARY MEMORY */
+  FREE(include_in_save);
+  FREE(seq_pos);
+}
+/**@memo example: writing FASTA format file: 
+    if (seq_ifile=fopen(fasta_out,"w")) {
+      write_lpo_bundle_as_fasta(seq_ifile,lpo_out,
+      score_matrix.nsymbol,score_matrix.symbol,ibundle);
+      fclose(seq_ifile);
+    }
+*/
+
+
+/** writes the LPO in FASTA format, including all sequences in all bundles 
+-------------------------------------------------------
+---------------------------------------------------------------------------
+*/
+void write_lpo_as_fasta(FILE *ifile,LPOSequence_T *seq,
+			int nsymbol,char symbol[])
+{ /* WRAPPER FUNCTION FOR SAVING ALL BUNDLES!! */
+  write_lpo_bundle_as_fasta(ifile,seq,nsymbol,symbol,ALL_BUNDLES);
+}
+
+
+
+
+/****************************************************************
+  *
+  *     WRITE_SEQUENCES
+  *
+  *     This function writes out sequences.  Each line of sequnces
+  *     will be written out in blocks with spacing in between
+  *     each block.  
+  *
+  ***************************************************************/
+
+int write_sequences(FILE *ifile,
+		    LPOSequence_T *seq,
+		    int indent,
+		    int nblock,  /* # OF BLOCKS */
+		    int block_size, /* SIZE OF BLOCKS */
+		    int block_spacing, /* SPACING BETWEEN BLOCKS */
+		    int paragraph_spacing, /* SPACING BETWEEN PRAGRAPHS */
+		    int names, /* BOOLEAN: PRINT NAMES AFTER 1ST PARAGRAPH? */
+		    char gap_char,
+		    int nsymbol,char symbol[])
+{
+  int i,ip,ial,iseq,iblock,nparagraph,remainder,ipos,ipos_local,broken;
+  int len,nring;
+  char **seq_pos=NULL,*p=NULL,*include_in_save=NULL;
+
+  nring=xlate_lpo_to_al(seq,nsymbol,symbol,ALL_BUNDLES, /* TRANSLATE TO */
+		        gap_char,                   /* RC-MSA FMT */
+			&seq_pos,&p,&include_in_save);
+
+  nparagraph = nring/(nblock*block_size);/*# OF FULL PARAGRAPHS */
+  remainder = nring%(nblock*block_size);
+  if (remainder != 0)
+    nparagraph++;
+  LOOPF(ip,nparagraph){  /* FOR EACH PARAGRAPH */
+    LOOPF(iseq,seq->nsource_seq){ /* FOR EACH SEQUENCE */
+      if (ip == 0 || names){
+	if ((len=strlen(seq->source_seq[iseq].name))>indent-1){ /*MUST WE TRUNCATE NAME? */
+	  LOOPF(i,indent-1) /* PRINT AS MUCH OF NAME AS POSSIBLE  */
+	    putc(seq->source_seq[iseq].name[i],ifile);
+	  putc(' ',ifile); /* LEAVE A SPACE */
+	}
+	else{ /* PRINT WHOLE NAME */
+	  fprintf(ifile,"%s",seq->source_seq[iseq].name); /* PRINT NAME */
+	  LOOP(i,indent-len) /* INDENT LINE */
+	    putc(' ',ifile);
+	}
+      }
+      else{
+	LOOP(i,indent) /* INDENT LINE */
+	  putc(' ',ifile);
+      }
+      LOOPF(iblock,nblock){ /* FOR EACH BLOCK */
+	broken=0;
+	LOOPF(i,block_size){
+	  ipos = i + (iblock*block_size) + (ip*nblock*block_size);
+	  if (ipos>=nring){
+	    broken=1;
+	    break;
+	  }
+	  putc(seq_pos[iseq][ipos],ifile); /* APPROP SYMBOL FOR THIS POS*/
+	}
+	if (broken)
+	  break;
+	LOOP(i,block_spacing) /* ADD SPACING BETWEEN BLOCKS */
+	  putc(' ',ifile);
+      } /* END OF BLOCK LOOP */
+      putc('\n',ifile); /* NEW LINE AT END OF SEQUENCE LINE */
+    } /* END OF SEQ LOOP */
+    LOOP(i,paragraph_spacing) /* ADD SPACING BETWEEN PARAGRAPHS */
+      putc('\n',ifile);
+  } /* END OF PARAGRAPH LOOP */
+ done:
+  FREE(p); /* DUMP TEMPORARY MEMORY */
+  FREE(include_in_save);
+  FREE(seq_pos);
+  return 0;
+}
+
+
+
+void export_clustal_seqal(FILE *ifile,
+			  LPOSequence_T *seq,
+			  int nsymbol,char symbol[])
+{
+  fprintf(ifile,"CLUSTAL W (1.74) multiple sequence alignment\n\n\n");
+  /* WRITE OUT SEQUNENCES: INDENT 36, 1 BLOCK OF 50 CHARS 0 CHARS BLOCK
+     SPACING, 2 LINES PARAGRAPH SPACING, PRINT NAMES ON ALL LINES. */
+  write_sequences(ifile,seq,36,1,50,0,2,1,'-',nsymbol,symbol);
+}
+

Added: trunk/packages/poa/branches/upstream/current/main.c
===================================================================
--- trunk/packages/poa/branches/upstream/current/main.c	2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/main.c	2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,255 @@
+
+
+#include <lpo.h>
+
+extern LPOScore_T caties_scoring_function(int i,
+				   int j,
+				   LPOLetter_T seq_x[],
+				   LPOLetter_T seq_y[],
+				   ResidueScoreMatrix_T *m)
+     ;  /* INCLUDE THIS HERE SO WE CAN PASS IT TO buildup_progressive_lpo() */
+
+
+
+int main(int argc,char *argv[])
+{
+  int i,ibundle=ALL_BUNDLES,nframe_seq=0,use_reverse_complement=0;
+  int nseq=0,do_switch_case=dont_switch_case,do_analyze_bundles=0;
+  char score_file[256],seq_file[256],*comment=NULL,*al_name="test align";
+  ResidueScoreMatrix_T score_matrix; /* DEFAULT GAP PENALTIES*/
+  Sequence_T *seq=NULL,*lpo_out=NULL,*frame_seq=NULL,*dna_lpo=NULL,
+  *lpo_in=NULL;
+  FILE *seq_ifile=NULL,*errfile=stderr,*logfile=NULL,*lpo_file_out=NULL,
+    *subset_ifile=NULL;
+  char *print_matrix_letters=NULL,*fasta_out=NULL,*po_out=NULL,*matrix_filename=NULL,
+  *seq_filename=NULL,*frame_dna_filename=NULL,*po_filename=NULL,
+  *hbmin=NULL,*numeric_data=NULL,*numeric_data_name="Nmiscall",*dna_to_aa=NULL,
+    *pair_score_file=NULL,*aafreq_file=NULL,*termval_file=NULL,
+    *bold_seq_name=NULL,*subset_file=NULL,*rm_subset_file=NULL;
+  float bundling_threshold=0.9;
+  int exit_code=0,count_sequence_errors=0,please_print_snps=0,
+    report_consensus_seqs=0,report_major_allele=0,use_aggressive_fusion=0;
+  int show_allele_evidence=0,please_collapse_lines=0,keep_all_links=0;
+  int remove_listed_seqs=0,please_report_similarity;
+  char *reference_seq_name="CONSENS%d",*clustal_out=NULL;
+  int use_global_alignment=0;
+  
+
+  black_flag_init(argv[0],PROGRAM_VERSION);
+
+  if (argc<2) {
+    fprintf(stderr,"\nUsage: %s [OPTIONS] matrixfile\n\n"
+"  OPTIONS\t\t\tFUNCTION\n"
+"  -------\t\t\t________\n"
+"INPUT:\n"
+"  -read_fasta FILENAME\t\tReads in FASTA sequence file.\n"
+"  -tolower\t\t\tForces FASTA sequences to lowercase\n"
+"\t\t\t\t(nucleotides in our matrix files)\n"
+"  -toupper\t\t\tForces FASTA sequences to UPPERCASE\n"
+"\t\t\t\t(amino acids in our matrix files)\n"
+"  -read_po FILENAME\t\tReads in PO file.\n"
+"  -subset FILENAME\t\tFilters PO-MSA to include list of seqs in file.\n"
+"  -remove FILENAME\t\tFilters PO-MSA to exclude list of seqs in file.\n"
+"\n"
+"\nALIGNMENT:\n"
+"  -pair_score FILENAME\t\tReads tab-delimited file of sequence-sequence\n"
+"\t\t\t\tsimilarity scores for constructing a guide-tree\n"
+"\t\t\t\tand performing progressive alignment using\n"
+"\t\t\t\tPO-PO alignment steps.\n"
+"  -fuse_all\t\t\tFuses identical letters on align rings.\n"
+"\nANALYSIS:\n"
+"  -hb\t\t\t\tPerforms heaviest bundling to generate consensi.\n"
+"  -hbmin VALUE\t\t\tBundles into heaviest bundle seqs with percent id >= value.\n"
+"\nOUTPUT:\n"
+"  -best\t\t\t\tRestricts MSA output to heaviest bundles.\n"
+"  -pir FILENAME\t\t\tWrites out MSA in PIR format.\n"
+"  -clustal FILENAME\t\tWrites out MSA in CLUSTAL format.\n"
+"  -po FILENAME\t\t\tWrites out MSA in PO format.\n"
+"  -printmatrix LETTERSET\tPrints score matrix to stdout.\n"
+"  -v\t\t\t\tRuns in verbose mode.\n\n"
+"  Note:  Either the -read_fasta or -read_po argument must be used with poa,\n" 
+"         since a FASTA file or PO file must be read in by poa.\n\n"
+"For more information, see http://www.bioinformatics.ucla.edu/poa\n\n"
+	    ,argv[0]);
+    exit(-1);
+  }
+
+  FOR_ARGS(i,argc) { /* READ ALL THE ARGUMENTS */
+    ARGMATCH_VAL("-tolower",do_switch_case,switch_case_to_lower);
+    ARGMATCH_VAL("-toupper",do_switch_case,switch_case_to_upper);
+    ARGMATCH_VAL("-v",logfile,stdout);
+    ARGMATCH_VAL("-best",ibundle,0); /*RESTRICT FASTA OUTPUT TO HB */
+    ARGMATCH_VAL("-hb",do_analyze_bundles,1);/*CALCULATE HEAVIEST BUNDLING*/
+    ARGGET("-printmatrix",print_matrix_letters);
+    ARGGET("-read_po",po_filename); /* READ A PO FILE FOR ALIGNMENT/ANALYSIS*/
+    ARGGET("-pir",fasta_out); /* SAVE FASTA-PIR FORMAT ALIGNMENT FILE */
+    ARGGET("-clustal",clustal_out); /* SAVE CLUSTAL FORMAT ALIGNMENT FILE */
+    ARGGET("-po",po_out); /* SAVE PO FORMAT ALIGNMENT FILE */
+    ARGGET("-hbmin",hbmin); /* SET THRESHOLD FOR BUNDLING */
+    ARGMATCH("-fuse_all",use_aggressive_fusion);
+    ARGMATCH("-global",use_global_alignment);
+    ARGGET("-pair_score",pair_score_file); /* FILENAME TO READ PAIR SCORES*/
+    ARGGET("-subset",subset_file); /* FILENAME TO READ SEQ SUBSET LIST*/
+    ARGGET("-remove",rm_subset_file); /* FILENAME TO READ SEQ REMOVAL LIST*/
+    ARGGET("-read_fasta",seq_filename); /* READ FASTA FILE FOR ALIGNMENT */
+    NEXTARG(matrix_filename); /* NON-FLAG ARG SHOULD BE MATRIX FILE */
+   
+  }
+
+  if (rm_subset_file) { /* TREAT SUBSET FILE AS LIST OF SEQS TO REMOVE */
+    subset_file=rm_subset_file;
+    remove_listed_seqs=1;
+  }
+
+  if (hbmin)
+    bundling_threshold=atof(hbmin);
+
+  if (!matrix_filename ||
+      read_score_matrix(matrix_filename,&score_matrix)<=0){/* READ MATRIX */
+    WARN_MSG(USERR,(ERRTXT,"Error reading matrix file %s.  Exiting",
+		    matrix_filename? matrix_filename: "(null)"),"$Revision: 1.2 $");
+    exit_code=1; /* SIGNAL ERROR CONDITION */
+    goto free_memory_and_exit;
+  }
+
+  if (logfile) {
+    fprintf(logfile, "X-Gap Penalties: %d %d %d\n",
+	    score_matrix.gap_penalty_set[0][0],
+	    score_matrix.gap_penalty_set[0][1],
+	    score_matrix.gap_penalty_set[0][2]);
+    fprintf(logfile, "Y-Gap Penalties: %d %d %d\n",
+	    score_matrix.gap_penalty_set[1][0],
+	    score_matrix.gap_penalty_set[1][1],
+	    score_matrix.gap_penalty_set[1][2]);
+  }
+  
+  if (print_matrix_letters) /* USER WANTS US TO PRINT A MATRIX */
+    print_score_matrix(stdout,&score_matrix,print_matrix_letters
+		       /*"ARNDCQEGHILKMFPSTWYV"*/);
+
+  if (po_filename) { /* READ PARTIAL ORDER FILE */
+    if (seq_ifile=fopen(po_filename,"r")) {
+      if (subset_file) { /* LIST OF SEQS TO FILTER THE PO WITH */
+	subset_ifile=fopen(subset_file,"r");
+	if (!subset_ifile) {
+	  WARN_MSG(USERR,(ERRTXT,"Error reading subset file %s.  Exiting",
+			  subset_file),"$Revision: 1.2 $");
+	  exit_code=1; /* SIGNAL ERROR CONDITION */
+	  goto free_memory_and_exit;
+	}
+	lpo_in=read_lpo_select(seq_ifile,subset_ifile,keep_all_links,
+			       remove_listed_seqs);
+	fclose(subset_ifile);
+      }
+      else
+	lpo_in=read_lpo(seq_ifile); /* READ LPO NORMALLY W/O FILTERING */
+      fclose(seq_ifile);
+    }
+    if (!lpo_in) {
+      WARN_MSG(USERR,(ERRTXT,"Error reading PO file %s!!!\nExiting.",
+		      po_filename),"$Revision: 1.2 $");
+      exit_code=1; /* SIGNAL ERROR CONDITION */
+      goto free_memory_and_exit;
+    }
+  }
+
+  if (seq_filename) { /* READ SEQUENCES TO ALIGN */
+    seq_ifile=fopen(seq_filename,"r"); /* READ SEQUENCE DATABASE */
+    if (seq_ifile) {
+      nseq=read_fasta(seq_ifile,&seq,do_switch_case,&comment);
+      fclose(seq_ifile);
+    }
+  }
+  
+  if (nseq>0) { /* INITIALIZE THE SEQUENCES AND RUN THE ALIGNMENT */
+    fprintf(errfile,"...Read %d sequences from %s...\n",nseq,seq_filename);
+    /*initialize_seqs_as_lpo(nseq,seq,&score_matrix); */
+    if (lpo_in) { /* ADD TO OUR EXISTING ALIGNMENT */
+      lpo_out=buildup_lpo(lpo_in,nseq,seq,&score_matrix,
+			  use_aggressive_fusion,
+			  use_global_alignment);/*BUILD ALIGNMENT*/
+      lpo_in=NULL; /* DATA STRUCTURE NOW POINTED TO BY lpo_out, 
+		      SO DON'T FREE IT TWICE!! */
+    }
+    else if (pair_score_file) {
+      lpo_out=buildup_progressive_lpo(nseq,seq,&score_matrix,
+				      use_aggressive_fusion,
+				      pair_score_file,
+				      caties_scoring_function,
+				      use_global_alignment);
+    }
+    else /* OTHERWISE JUST BUILDUP ON TOP OF FIRST READ SEQUENCE */
+      lpo_out=buildup_lpo(seq,nseq-1,seq+1,&score_matrix,
+			  use_aggressive_fusion,
+			  use_global_alignment);
+  }
+  else if (lpo_in) /* FILTERED LPO... CAN PRINT OUT AS LPO */
+    lpo_out=lpo_in;
+  else { /* HMM... NO DATA TO WORK WITH AT ALL.  MUST BE AN ERROR. COMPLAIN!*/
+    WARN_MSG(USERR,(ERRTXT,"Error reading sequence file %s and PO file %s. Exiting.", 
+            seq_filename? seq_filename: "because none specified",
+            po_filename? po_filename:  "because none specified"),"$Revision: 1.2 $");
+    exit_code=1; /* SIGNAL ERROR CONDITION */
+    goto free_memory_and_exit;
+  }
+  
+  if (comment) { /* SAVE THE COMMENT LINE AS TITLE OF OUR LPO */
+    FREE(lpo_out->title);
+    lpo_out->title=strdup(comment);
+  }
+
+  if (do_analyze_bundles) /* DIVIDE INTO BUNDLES W/ CONSENSUS */
+    generate_lpo_bundles(lpo_out,bundling_threshold);
+
+  if (po_out) { /* WRITE FINAL PARTIAL ORDER ALIGNMENT TO OUTPUT */
+    if (lpo_file_out=fopen(po_out, "w")) {
+       write_lpo(lpo_file_out,lpo_out,&score_matrix);
+       fclose(lpo_file_out);
+    }
+    else {
+      WARN_MSG(USERR,(ERRTXT,"*** Could not save PO file %s.  Exiting.",
+	      po_out),"$Revision: 1.2 $");
+      exit_code=1; /* SIGNAL ERROR CONDITION */ 
+    }
+  }
+
+  if (fasta_out) { /* WRITE FINAL ALIGNMENT IN FASTA FORMAT */
+    if (seq_ifile=fopen(fasta_out,"w")) { /* FASTA ALIGNMENT*/
+      write_lpo_bundle_as_fasta(seq_ifile,lpo_out,score_matrix.nsymbol,
+				score_matrix.symbol,ibundle);
+      fclose(seq_ifile);
+    }
+    else {
+      WARN_MSG(USERR,(ERRTXT,"*** Could not save FASTA file %s.  Exiting.",
+	      fasta_out),"$Revision: 1.2 $");
+      exit_code=1; /* SIGNAL ERROR CONDITION */ 
+   }
+  }
+
+  if (clustal_out) { /* WRITE FINAL ALIGNMENT IN FASTA FORMAT */
+    if (seq_ifile=fopen(clustal_out,"w")) { /* FASTA ALIGNMENT*/
+      export_clustal_seqal(seq_ifile,lpo_out,score_matrix.nsymbol,
+			   score_matrix.symbol);
+      fclose(seq_ifile);
+    }
+    else {
+      WARN_MSG(USERR,(ERRTXT,"*** Could not save CLUSTAL file %s.  Exiting.",
+	      fasta_out),"$Revision: 1.2 $");
+      exit_code=1; /* SIGNAL ERROR CONDITION */ 
+   }
+  }
+
+
+
+ free_memory_and_exit: /* FREE ALL DYNAMICALLY ALLOCATED DATA!!!! */
+  if (dna_lpo)
+    free_lpo_sequence(dna_lpo,TRUE);
+  if (lpo_in)
+    free_lpo_sequence(lpo_in,TRUE);
+  if (lpo_out != seq && lpo_out != lpo_in)
+    free_lpo_sequence(lpo_out,TRUE);
+  LOOPB (i,nseq) /* seq[] WAS ALLOCATED AS ONE ARRAY, SO FREE seq[0] LAST */
+    free_lpo_sequence(seq+i,i==0); /* FREE BLOCK AFTER ALL HOLDERS EMPTY*/
+  exit(exit_code);
+}
+

Added: trunk/packages/poa/branches/upstream/current/multidom.seq
===================================================================
--- trunk/packages/poa/branches/upstream/current/multidom.seq	2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/multidom.seq	2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,42 @@
+>ABL1_HUMAN PROTO-ONCOGENE TYROSINE-PROTEIN KINASE ABL (EC 2.7.1.112) (P150) (C-ABL).
+MLEICLKLVG CKSKKGLSSS SSCYLEEALQ RPVASDFEPQ GLSEAARWNS KENLLAGPSE
+NDPNLFVALY DFVASGDNTL SITKGEKLRV LGYNHNGEWC EAQTKNGQGW VPSNYITPVN
+SLEKHSWYHG PVSRNAAEYL LSSGINGSFL VRESESSPGQ RSISLRYEGR VYHYRINTAS
+DGKLYVSSES RFNTLAELVH HHSTVADGLI TTLHYPAPKR NKPTVYGVSP NYDKWEMERT
+DITMKHKLGG GQYGEVYEGV WKKYSLTVAV KTLKEDTMEV EEFLKEAAVM KEIKHPNLVQ
+LLGVCTREPP FYIITEFMTY GNLLDYLREC NRQEVNAVVL LYMATQISSA MEYLEKKNFI
+HRDLAARNCL VGENHLVKVA DFGLSRLMTG DTYTAHAGAK FPIKWTAPES LAYNKFSIKS
+DVWAFGVLLW EIATYGMSPY PGIDLSQVYE LLEKDYRMER PEGCPEKVYE LMRACWQWNP
+SDRPSFAEIH QAFETMFQES SISDEVEKEL GKQGVRGAVS TLLQAPELPT KTRTSRRAAE
+HRDTTDVPEM PHSKGQGESD PLDHEPAVSP LLPRKERGPP EGGLNEDERL LPKDKKTNLF
+SALIKKKKKT APTPPKRSSS FREMDGQPER RGAGEEEGRD ISNGALAFTP LDTADPAKSP
+KPSNGAGVPN GALRESGGSG FRSPHLWKKS STLTSSRLAT GEEEGGGSSS KRFLRSCSAS
+CVPHGAKDTE WRSVTLPRDL QSTGRQFDSS TFGGHKSEKP ALPRKRAGEN RSDQVTRGTV
+TPPPRLVKKN EEAADEVFKD IMESSPGSSP PNLTPKPLRR QVTVAPASGL PHKEEAEKGS
+ALGTPAAAEP VTPTSKAGSG APGGTSKGPA EESRVRRHKH SSESPGRDKG KLSRLKPAPP
+PPPAASAGKA GGKPSQSPSQ EAAGEAVLGA KTKATSLVDA VNSDAAKPSQ PGEGLKKPVL
+PATPKPQSAK PSGTPISPAP VPSTLPSASS ALAGDQPSST AFIPLISTRV SLRKTRQPPE
+RIASGAITKG VVLDSTEALC LAISRNSEQM ASHSAVLEAG KNLYTFCVSY VDSIQQMRNK
+FAFREAINKL ENNLRELQIC PATAGSGPAA TQDFSKLLSS VKEISDIVQR
+>CRKL_HUMAN CRK-LIKE PROTEIN.
+MSSARFDSSD RSAWYMGPVS RQEAQTRLQG QRHGMFLVRD SSTCPGDYVL SVSENSRVSH
+YIINSLPNRR FKIGDQEFDH LPALLEFYKI HYLDTTTLIE PAPRYPSPPM GSVSAPNLPT
+AEDNLEYVRT LYDFPGNDAE DLPFKKGEIL VIIEKPEEQW WSARNKDGRV GMIPVPYVEK
+LVRSSPHGKH GNRNSNSYGI PEPAHAYAQP QTTTPLPAVS GSPGAAITPL PSTQNGPVFA
+KAIQKRVPCA YDKTALALEV GDIVKVTRMN INGQWEGEVN GRKGLFPFTH VKIFDPQNPD
+ENE
+>GRB2_HUMAN GROWTH FACTOR RECEPTOR-BOUND PROTEIN 2 (GRB2 ADAPTOR PROTEIN)(SH2)
+MEAIAKYDFK ATADDELSFK RGDILKVLNE ECDQNWYKAE LNGKDGFIPK NYIEMKPHPW
+FFGKIPRAKA EEMLSKQRHD GAFLIRESES APGDFSLSVK FGNDVQHFKV LRDGAGKYFL
+WVVKFNSLNE LVDYHRSTSV SRNQQIFLRD IEQVPQQPTY VQALFDFDPQ EDGELGFRRG
+DFIHVMDNSD PNWWKGACHG QTGMFPRNYV TPVNRNV
+>MATK_HUMAN MEGAKARYOCYTE-ASSOCIATED TYROSINE-PROTEIN KINASE (EC 2.7.1.112) (TYROSINE-PROTEIN KINASE CTK) (PROTEIN KINASE HYL) (HEMATOPOIETIC CONSENSUS TYROSINE-LACKING KINASE).
+MAGRGSLVSW RAFHGCDSAE ELPRVSPRFL RAWHPPPVSA RMPTRRWAPG TQCITKCEHT
+RPKPGELAFR KGDVVTILEA CENKSWYRVK HHTSGQEGLL AAGALREREA LSADPKLSLM
+PWFHGKISGQ EAVQQLQPPE DGLFLVRESA RHPGDYVLCV SFGRDVIHYR VLHRDGHLTI
+DEAVFFCNLM DMVEHYSKDK GAICTKLVRP KRKHGTKSAE EELARAGWLL NLQHLTLGAQ
+IGEGEFGAVL QGEYLGQKVA VKNIKCDVTA QAFLDETAVM TKMQHENLVR LLGVILHQGL
+YIVMEHVSKG NLVNFLRTRG RALVNTAQLL QFSLHVAEGM EYLESKKLVH RDLAARNILV
+SEDLVAKVSD FGLAKAERKG LDSSRLPVKW TAPEALKHGK FTSKSDVWSF GVLLWEVFSY
+GRAPYPKMSL KEVSEAVEKG YRMEPPEGCP GPVHVLMSSC WEAEPARRPP FRKLAEKLAR
+ELRSAGAPAS VSGQDADGST SPRSQEP

Added: trunk/packages/poa/branches/upstream/current/numeric_data.c
===================================================================
--- trunk/packages/poa/branches/upstream/current/numeric_data.c	2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/numeric_data.c	2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,145 @@
+
+
+#include <lpo.h>
+
+/**@memo create a new LPONumericData_T record for the designated source_seq.  
+ Dynamically allocates data storage array equal in length to the length of 
+ the source_seq.  Initializes the array values to initial_value if non-zero.  
+ Returns a pointer to the new LPONumericData_T and also adds it to the 
+ source_seq->data[] list.
+*/
+LPONumericData_T *new_numeric_data(LPOSourceInfo_T *source_seq,
+				   char name[],
+				   char title[],
+				   double initial_value)
+{
+  int i;
+  LPONumericData_T *data=NULL;
+  REBUFF(source_seq->data,source_seq->ndata,NUMDATA_BUFFER_CHUNK,LPONumericData_T);
+  data=source_seq->data + source_seq->ndata++;
+
+  STRNCPY(data->name,name,SEQUENCE_NAME_MAX); /* COPY NAME AND TITLE */
+  if (title)
+    data->title=strdup(title);
+
+  CALLOC(data->data,source_seq->length,double); /* ALLOCATE THE ARRAY */
+  if (initial_value) /* INITIALIZE VALUES IF DESIRED */
+    LOOP (i,source_seq->length)
+      data->data[i]=initial_value;
+  return data; /* RETURN POINTER TO THE NEW DATA HOLDER */
+}
+
+
+
+LPONumericData_T *cp_numeric_data(LPOSourceInfo_T *source_seq,
+				  LPONumericData_T *data)
+{
+  int i;
+  LPONumericData_T *new_data;
+  new_data=new_numeric_data(source_seq,data->name,data->title,0);
+  LOOP (i,source_seq->length) /* COPY ALL THE VALUES */
+    new_data->data[i]=data->data[i];
+  return new_data;
+}
+
+
+
+/**@memo finds LPONumericData from the source_seq, matching the specified 
+   name, or returns NULL if not found. */
+LPONumericData_T *find_numeric_data(LPOSourceInfo_T *source_seq,
+				   char name[])
+{
+  int i;
+  LOOP (i,source_seq->ndata)
+    if (0==strcmp(source_seq->data[i].name,name)) /*FOUND IT. RETURN POINTER*/
+      return source_seq->data+i;
+  return NULL; /* NOT FOUND! */
+}
+
+
+
+/**@memo frees the set of numeric_data passed as arguments.  If requested, 
+ will also free the block of memory for the array of entries data[]. */
+void free_lpo_numeric_data(int ndata,LPONumericData_T *data,
+			   int please_free_block)
+{
+  int i;
+  LOOP (i,ndata) { /* DUMP ASSOCIATED ARRAYS */
+    FREE(data[i].title);
+    FREE(data[i].data);
+  }
+  if (please_free_block) /* DUMP THE BLOCK ITSELF */
+    free(data);
+}
+
+
+
+
+
+/**@memo creates one or more new numeric_data for a given sequence, 
+  based on the presence of corresponding named numeric_data.  Specifically, 
+  the list of set_names[] is processed one by one, creating a source_name
+  according to the source_name_fmt string, and finding a numeric_data 
+  entry with that name.  If it is not found, the routine calls exit(-1).  
+  If it is found, a new set of numeric_data is created, with a name 
+  created according to target_name_fmt, and titled according to 
+  the title_fmt string and source_data->title. */
+void new_numeric_data_sets(LPOSourceInfo_T *source_seq,
+			   int nset,char *set_names[],
+			   char source_name_fmt[],
+			   char target_name_fmt[],
+			   char title_fmt[])
+{
+  int j;
+  LPONumericData_T *data;
+  char name[256],title[4096];
+
+  LOOPF (j,nset) {
+    sprintf(name,source_name_fmt,set_names[j]); /* GENERATE NAME TO MATCH*/
+    data=find_numeric_data(source_seq,name); /*FIND SOURCE DATA*/
+    if (!data) {
+      WARN_MSG(USERR,(ERRTXT,"*** could not find dataset %s for seq %s.\nExiting\n\n",
+	      name,source_seq->name),"$Revision: 1.2 $");
+      exit(-1);
+    }
+    sprintf(name,target_name_fmt,set_names[j]); /* GENERATE NEW NAME */
+    sprintf(title,title_fmt,data->title);
+    new_numeric_data(source_seq,name,title,0.); /* CREATE NEW ARRAY */
+  }
+}
+
+
+
+
+/**@memo reads a stream of FASTA-formatted numeric data, and stores them in 
+the corresponding set of source_seq, matching the sequences by name.  
+Multiple numeric data entries can be read from a single stream. */
+void read_numeric_data(int nsource_seq,
+		       LPOSourceInfo_T source_seq[],
+		       FILE *ifile)
+{
+  int i,j;
+  char line[4096],seq_name[128],data_name[1024],title[2048];
+  LPONumericData_T *data;
+
+  while (fgets(line,sizeof(line),ifile)) {
+    title[0]='\0';
+    if (sscanf(line,">%s NUMERIC_DATA=%s %s",seq_name,data_name,title)>=2) {
+      LOOP (i,nsource_seq) /* FIND THE MATCHING SEQUENCE */
+	if (0==strcmp(seq_name,source_seq[i].name)) /* MATCH */
+	  break;
+      if (LOOP_FINISHED(i,nsource_seq)) /* SEQ NOT FOUND!! */
+	WARN_MSG(USERR,(ERRTXT,"Error! NUMERIC_DATA %s, sequence %s does not exist.  Skipping.\n\n",data_name,seq_name),"$Revision: 1.2 $");
+      else { /* FOUND THE SEQ, SAVE THE DATA */
+	if (data=find_numeric_data(source_seq+i,data_name)) /*REUSE EXISTING*/
+	  WARN_MSG(WARN,(ERRTXT,"NUMERIC_DATA %s already exists on sequence %s.  Overwriting.\n",data_name,seq_name),"$Revision: 1.2 $");
+	else /* CREATE A NEW DATA HOLDER */
+	  data=new_numeric_data(source_seq+i,data_name,title,0.);
+	LOOPF (j,source_seq[i].length) /* READ IN THE VALUES */
+	  fscanf(ifile," %lf",data->data+j);
+      }
+    }
+  }
+}
+
+

Added: trunk/packages/poa/branches/upstream/current/poa.h
===================================================================
--- trunk/packages/poa/branches/upstream/current/poa.h	2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/poa.h	2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,252 @@
+
+
+#ifndef POA_HEADER_INCLUDED
+#define POA_HEADER_INCLUDED
+
+
+
+
+/** MAXIMUM GAP LENGTH TRACKED IN align_lpo;
+    LARGER THAN THIS WILL BE CAPPED
+    (DEFAULT VALUE) */
+#ifndef TRUNCATE_GAP_LENGTH
+#define TRUNCATE_GAP_LENGTH 16
+#endif
+
+/** LENGTH OVER WHICH GAP PENALTY DECAYS IN align_lpo
+    (DEFAULT VALUE) */
+#ifndef DECAY_GAP_LENGTH
+#define DECAY_GAP_LENGTH 0
+#endif
+
+#define REV_COMP_STRING "/rev_comp"
+
+
+/** THE NULL LETTER-REFERENCE */
+#define INVALID_LETTER_POSITION (-1)
+
+typedef int LPOLetterRef_T;
+
+
+typedef int LPOScore_T;
+
+ /** NEEDED FOR seq_util.h */
+typedef LPOScore_T ResidueScore_T;
+#define RESIDUE_SCORE_DEFINED
+
+/** linked list for storing source origin (sequence position)
+ from which this letter was derived */
+struct LPOLetterSource_S {
+ /** index of the sequence, referencing the source_seq[] array*/
+  int iseq;
+ /** index of the corresponding position in that sequence */
+  LPOLetterRef_T ipos;
+ /** next node in the linked list */
+  struct LPOLetterSource_S *more;
+} ;
+
+typedef struct LPOLetterSource_S LPOLetterSource_T;
+
+
+/** linked list for connecting an LPOLetter to either right
+ or left */
+struct LPOLetterLink_S {
+ /** ADJACENT LETTER LINKED TO THIS LETTER */	
+  LPOLetterRef_T ipos;
+#ifdef USE_WEIGHTED_LINKS
+ /** transition cost for traversing this link */
+  LPOScore_T score;
+#endif
+ /** next node in the linked list */
+  struct LPOLetterLink_S *more;
+} ;
+
+typedef struct LPOLetterLink_S LPOLetterLink_T;
+
+
+/** the chunk size for allocating additional
+    letters in an LPOLetter_T array */
+#define LPO_LETTER_BUFFER_CHUNK 64
+
+
+/** Structure for storing individual LPO Letters*/
+struct LPOLetter_S {
+ /** ADJACENT LETTER(S) TO THE LEFT */	
+  LPOLetterLink_T left;
+ /** ADJACENT LETTER(S) TO THE RIGHT */
+  LPOLetterLink_T right;
+ /** SOURCE SEQ POSITION(S) */
+  LPOLetterSource_T source;
+ /** CIRCULAR LIST OF ALIGNED POSITIONS */
+  LPOLetterRef_T align_ring;
+ /** MINIMUM INDEX OF ALL POSITIONS ON THE RING */
+  LPOLetterRef_T ring_id;
+  /** SCORE FOR BALANCING PARTIAL ORDER EFFECTS ON MATRIX NEUTRALITY */
+  float score;
+ /** THE ACTUAL RESIDUE CODE! */
+  char letter;
+} ;
+
+
+
+typedef struct LPOLetter_S LPOLetter_T;
+
+
+/** maximum length of a sequence name */
+#define SEQUENCE_NAME_MAX 32
+/** buffer chunk size for expanding a block of seq storage */
+#define SEQUENCE_BUFFER_CHUNK 8
+
+/** buffer chunk size for expanding a source_seq[] array */
+#define SOURCE_SEQ_BUFFER_CHUNK 16
+
+
+
+#define NUMDATA_BUFFER_CHUNK 4
+/** storage for quantitative data attached to a sequence */
+struct LPONumericData_S { /** */
+  char name[SEQUENCE_NAME_MAX]; /** */
+  char *title; /** */
+  double *data;
+};
+typedef struct LPONumericData_S LPONumericData_T;
+
+
+/** Structure for storing individual source sequence information,
+ stuff like name, title etc. */
+struct LPOSourceInfo_S { /** */
+  char name[SEQUENCE_NAME_MAX]; /** */
+  char *title; /** */
+  char *sequence; /** */
+  int *seq_to_po; /** */
+  int *po_to_seq; /** */
+  LPONumericData_T *data; /** */
+  int ndata; /** */
+  int length; /** */
+  int istart;
+ /** FOR PURPOSES OF HEAVIEST BUNDLE CALCULATION */
+  int weight;
+ /** WHAT BUNDLE IS THIS A MEMBER OF? */
+  int bundle_id;
+};
+
+typedef struct LPOSourceInfo_S LPOSourceInfo_T;
+
+/** the NULL bundle-reference */
+#define NO_BUNDLE (-1)
+
+/** bundle-reference meaning "include all bundles" */
+#define ALL_BUNDLES (-1)
+
+
+/** holder for an LPO sequence, its letters, 
+  and associated information */
+struct LPOSequence_S {/** */
+  int length;/** */
+  LPOLetter_T *letter;/** */
+  char *title;/** */
+  char *sequence;/** */
+  char name[SEQUENCE_NAME_MAX];/** */
+  int nsource_seq;/** */
+  LPOSourceInfo_T *source_seq;
+};
+
+typedef struct LPOSequence_S LPOSequence_T;
+
+
+typedef LPOSequence_T Sequence_T;
+
+
+/**@memo GENERAL FORM IS seq_y[j].left.ipos */
+#define SEQ_Y_LEFT(j) (j-1)
+#define SEQ_Y_RIGHT(j) (j+1)
+
+
+
+/**@memo Data structure for analyzing sequence differences in MSA*/
+struct LPOLetterCount_S {
+  unsigned int is_error:2;
+  unsigned int meets_criteria:1;
+  unsigned int seq_count:29;
+};
+
+typedef struct LPOLetterCount_S LPOLetterCount_T;
+
+/** classification of sequence differences */
+enum {
+  no_error,
+  substitution_error,
+  insertion_error,
+  deletion_error,
+  max_error_states
+};
+
+
+
+
+
+
+
+/** DON'T ALLOCATE MORE THAN THIS TOTAL AMOUNT OF MEMORY 
+---------------------------------------------------------------
+---------------------------------------------------------------
+*/
+#define POA_MAX_ALLOC 300000000
+
+
+#endif
+
+
+
+
+
+
+/**@name The lpo library*/
+/*@{*/
+/**@memo This set of web pages documents the functionality of the lpo 
+function library.  This is a set of C functions for reading, writing,
+creating, manipulating, and aligning partial order sequences.  These 
+functions divide into several groups:
+\begin{itemize} 
+\item \URL[File utilities]{General.html#read_fasta}: reading and writing 
+FASTA and po files
+\item \URL[lpo utilities]{General.html#add_lpo_link}: creating, fusing, 
+freeing, manipulating lpo data
+\item \URL[alignment]{General.html#align_lpo}: aligning one or more linear 
+sequences to an lpo
+\item analysis: analyzing lpo structure, e.g. to find consensus
+\end{itemize}
+*/
+
+/**@memo Click \URL[here]{../poa} for more information about partial 
+order alignment.*/
+
+/*@}*/
+
+
+
+/**@name linking to the lpo library */
+/*@{*/
+/**@memo To use function from this library in your code, you must 
+ do two things.  First you must 
+include <lpo.h> in your source files, to access the prototypes.  
+ Second, when you compile, you must tell the compiler where the 
+ lpo header and library files are located.  {\bfNB it appears gcc 
+loads libraries in reverse order of the command line arguments, so 
+you have to specify your source files BEFORE the -llpo library argument 
+on the command line, or the linker will give you unresolved reference 
+errors}.  e.g.  \begin{verbatim}
+
+gcc -o myprog myfile.c -I~leec/lib/include -L~leec/lib -llpo
+\end{verbatim}
+*/
+
+/**@memo Click \URL[here]{../poa} for more information about partial 
+order alignment.*/
+
+/*@}*/
+
+
+
+
+

Added: trunk/packages/poa/branches/upstream/current/project.h
===================================================================
--- trunk/packages/poa/branches/upstream/current/project.h	2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/project.h	2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,11 @@
+
+#ifndef PROGRAM_NAME
+#define PROGRAM_NAME "poa"
+#endif
+#ifndef PROGRAM_VERSION
+#define PROGRAM_VERSION "v1.0.0"
+#endif
+
+
+#include "poa.h"
+

Added: trunk/packages/poa/branches/upstream/current/remove_bundle.c
===================================================================
--- trunk/packages/poa/branches/upstream/current/remove_bundle.c	2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/remove_bundle.c	2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,158 @@
+
+
+
+#include "default.h"
+#include "poa.h"
+#include "seq_util.h"
+#include "lpo.h"
+
+
+
+
+int compact_links(LPOLetterLink_T *list,int old_to_new[])
+{
+  LPOLetterLink_T *link=NULL,*link_last=NULL,*next_link,*link_head=NULL;
+
+  CALLOC(link,1,LPOLetterLink_T);
+  memcpy(link,list,sizeof(LPOLetterLink_T));
+
+  for (;link && link->ipos>=0;link=next_link){
+    next_link=link->more;
+    if (old_to_new[link->ipos]<0) /* THIS POSITION NO LONGER EXISTS! */
+      free(link); /* DELETE THIS LINK ENTRY */
+    else { /* COPY THIS BACK TO PREVIOUS LINK ENTRY: COMPACT THE LIST*/
+      link->ipos = old_to_new[link->ipos]; /* REMAP TO NEW INDEX SYSTEM */
+      if (link_last) /* CONNECT TO PREVIOUS NODE IN LIST */
+	link_last->more=link;
+      else /* THIS IS THE NEW HEAD OF THE LIST */
+	link_head=link;
+      link_last=link;
+    }
+  }
+  if (link) /* AN EMPTY LINK I.E. link->ipos<0 ... JUNK IT */
+    free(link);
+  if (link_last) /* TERMINATE LAST NODE IN LIST */
+    link_last->more=NULL;
+  if (link_head) {
+    memcpy(list,link_head,sizeof(LPOLetterLink_T));
+    free(link_head);
+    return 1; /* COMPACTED LINK LIST IS NON-EMPTY */
+  }
+  else { /* NOTHING LEFT IN LIST, SO BLANK IT */
+    list->more=NULL;
+    list->ipos= INVALID_LETTER_POSITION;
+    return 0; /* COMPACTED LINK LIST IS EMPTY */
+  }
+}
+
+
+
+
+
+
+
+int compact_sources(LPOLetterSource_T *list,int ibundle_delete,
+		    LPOSourceInfo_T source_seq[])
+{
+  LPOLetterSource_T *source=NULL,*source_last=NULL,*next_source,*source_head=NULL;
+
+  CALLOC(source,1,LPOLetterSource_T);
+  memcpy(source,list,sizeof(LPOLetterSource_T));
+
+  for (;source;source=next_source){
+    next_source=source->more;
+    if (source_seq[source->iseq].bundle_id == ibundle_delete)
+      free(source); /* DELETE THIS SOURCE ENTRY */
+    else { /* COPY THIS BACK TO PREVIOUS SOURCE ENTRY: COMPACT THE LIST*/
+      if (source_last) /* CONNECT TO PREVIOUS NODE IN LIST */
+	source_last->more=source;
+      else /* THIS IS THE NEW HEAD OF THE LIST */
+	source_head=source;
+      source_last=source;
+    }
+  }
+  if (source_last) /* TERMINATE LAST NODE IN LIST */
+    source_last->more=NULL;
+  if (source_head) {
+    memcpy(list,source_head,sizeof(LPOLetterSource_T));
+    free(source_head);
+    return 1; /* COMPACTED SOURCE LIST IS NON-EMPTY */
+  }
+  else { /* NOTHING LEFT IN LIST, SO BLANK IT */
+    list->more=NULL;
+    list->ipos= INVALID_LETTER_POSITION;
+    return 0; /* COMPACTED SOURCE LIST IS EMPTY */
+  }
+}
+
+
+
+
+
+
+void reindex_compact_rings(LPOSequence_T *seq)
+{
+  int i,iring= -1,ring_start= -1;
+  LOOPF (i,seq->length) { /* REMOVE ALL LINKS TO OLD, DELETED POSITIONS */
+    if (iring==seq->letter[i].ring_id) {
+      seq->letter[i].ring_id=ring_start; /* USE FIRST INDEX ON RING */
+      seq->letter[i].align_ring= i-1; /* LINK TO LETTER TO LEFT */
+    }
+    else { /* START OF A NEW RING */
+      if (ring_start>=0)
+	seq->letter[ring_start].align_ring = i-1;
+      iring=seq->letter[i].ring_id;
+      seq->letter[i].ring_id=seq->letter[i].align_ring=ring_start=i;
+    }
+  }
+  if (ring_start>=0)
+    seq->letter[ring_start].align_ring = i-1;
+}
+
+
+
+
+#define DELETE_THIS_BUNDLE (-2)
+
+int remove_bundle(LPOSequence_T *seq,int ibundle,int delete_all_others)
+{
+  int i,j=0,*old_to_new=NULL,new_length;
+
+  if (delete_all_others) { /* INSTEAD OF DELETING THIS BUNDLE, */
+    LOOP (i,seq->nsource_seq) /*MARK ALL OTHERS TO BE DELETED! */
+      if (seq->source_seq[i].bundle_id!=ibundle)
+	seq->source_seq[i].bundle_id = DELETE_THIS_BUNDLE;
+    ibundle=DELETE_THIS_BUNDLE;
+  }
+
+  CALLOC(old_to_new,seq->length,int); /* CREATE MAPPING ARRAY */
+  LOOPF (i,seq->length) {
+    if (compact_sources(&seq->letter[i].source,ibundle,seq->source_seq)){
+      if (i>j) /* COPY LETTER TO COMPACTED POSITION */
+	memcpy(seq->letter+j,seq->letter+i,sizeof(LPOLetter_T));
+      old_to_new[i]=j++; /* SAVE MAPPING FROM OLD TO NEW, COMPACTED POSITION*/
+    }
+    else {
+      free_lpo_letters(1,seq->letter+i,FALSE); /* DUMP DATA FOR THIS LETTER */
+      old_to_new[i]= INVALID_LETTER_POSITION;
+    }
+  }
+  new_length=j;
+  if (new_length<seq->length) /* ERASE UNUSED PORTIONS AFTER COMPACTED ARRAY*/
+    memset(seq->letter+new_length,0,(seq->length - new_length)*sizeof(LPOLetter_T));
+
+  LOOP (i,new_length) { /* REMOVE ALL LINKS TO OLD, DELETED POSITIONS */
+    compact_links(&seq->letter[i].left,old_to_new);
+    compact_links(&seq->letter[i].right,old_to_new);
+  }
+  seq->length=new_length;
+  reindex_compact_rings(seq);
+
+  FREE(old_to_new);
+  return new_length;
+}
+
+
+
+
+

Added: trunk/packages/poa/branches/upstream/current/seq_util.c
===================================================================
--- trunk/packages/poa/branches/upstream/current/seq_util.c	2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/seq_util.c	2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,265 @@
+
+#include "default.h"
+#include "seq_util.h"
+
+
+
+/** randomizes seq[] by shuffling, and places the result in randseq[];
+ if randseq[] and seq[] are distinct, seq[] is left unchanged */
+void shuffle_seq(int len,
+		char seq[],
+		char randseq[])
+{
+  int i,j;
+  char c;
+
+  for (i=0;i<len;i++)  {
+    j=rand()%len; /* CHOOSE RANDOM POSITION j */
+    c=seq[i]; /* SWAP POSITIONS i AND j */
+    randseq[i]=seq[j];
+    randseq[j]=c;
+  }
+
+  return;
+}
+
+
+
+
+
+
+/**@memo TRANSLATE FROM ASCII LETTERS TO COMPACTED NUMBERICAL INDEX: 
+    index_symbols(seq[i].length,seq[i].sequence,seq[i].sequence,
+		  m->nsymbol,m->symbol);
+*/
+/** converts characters in seq[] to the INDEX of the matching character in
+ symbols[], and returns the result in out[] */
+void index_symbols(int nseq,char seq[],char out[],
+		   int nsymbs,char symbols[])
+{
+  int i,j,k;
+  LOOP (i,nseq) {
+    k=nsymbs-1; /* DEFAULT: UNMATCHABLE SYMBOL */
+    LOOP (j,nsymbs) {  /* FIND MATCHING SYMBOL */
+      if (symbols[j]==seq[i]) {  /* FOUND IT! */
+	k=j;
+	break;
+      }
+    }
+    out[i]=k; /* SAVE THE TRANSLATED CODE */
+  }
+  return;
+}
+
+
+
+
+
+
+int *Score_matrix_row=NULL;
+
+int best_match_qsort_cmp(const void *void_a,const void *void_b)
+{
+  int *a=(int *)void_a,*b=(int *)void_b;
+
+  if (Score_matrix_row[*a]>Score_matrix_row[*b])
+    return -1;
+  else if (Score_matrix_row[*a]<Score_matrix_row[*b])
+    return 1;
+  else /* EQUAL */
+    return 0;
+}
+
+
+
+
+#ifdef SOURCE_EXCLUDED
+char DNA_symbols[1024];
+float DNA_rescale_score;
+#endif
+
+/** reads an alignment scoring matrix in the pam format */
+int read_score_matrix(char filename[],ResidueScoreMatrix_T *m)
+{
+  int i,j,k,nsymb=0,found_symbol_line=0,isymb;
+  char line[1024],dna_codes[256];
+  FILE *ifile;
+
+   /* GAP PENALTY DEFAULTS */
+  m->gap_penalty_set[0][0]=m->gap_penalty_set[1][0]=12; /*SAVE PENALTIES*/
+  m->gap_penalty_set[0][1]=m->gap_penalty_set[1][1]=2;
+  m->gap_penalty_set[0][2]=m->gap_penalty_set[1][2]=0;
+  m->trunc_gap_length = TRUNCATE_GAP_LENGTH;
+  m->decay_gap_length = DECAY_GAP_LENGTH;
+  
+  ifile=fopen(filename,"r");
+  if (!ifile) {
+    WARN_MSG(USERR,(ERRTXT,"Can't open alignment matrix from %s\n",filename),"$Revision: 1.2.2.1 $");
+    return -2; /* FAILED TO FIND FILE TO READ */
+  }
+
+  while (fgets(line,1023,ifile)) {
+    if ('#'==line[0] || '\n'==line[0]) /* SKIP COMMENT OR BLANK LINES */
+      continue;
+    
+    else if (1==sscanf(line,"GAP-TRUNCATION-LENGTH=%d",&i)) {
+      m->trunc_gap_length = i;
+    }
+    
+    else if (1==sscanf(line,"GAP-DECAY-LENGTH=%d",&i)) {
+      m->decay_gap_length = i;
+    }
+    
+    else if (3==sscanf(line,"GAP-PENALTIES=%d %d %d",&i,&j,&k)) {
+      m->gap_penalty_set[0][0]=m->gap_penalty_set[1][0]=i; /*SAVE PENALTIES*/
+      m->gap_penalty_set[0][1]=m->gap_penalty_set[1][1]=j;
+      m->gap_penalty_set[0][2]=m->gap_penalty_set[1][2]=k;
+    }
+
+    else if (3==sscanf(line,"GAP-PENALTIES-X=%d %d %d",&i,&j,&k)) {
+      m->gap_penalty_set[1][0]=i; /*SAVE PENALTIES ONLY FOR X DIRECTION*/
+      m->gap_penalty_set[1][1]=j;
+      m->gap_penalty_set[1][2]=k;
+    }
+
+#ifdef SOURCE_EXCLUDED
+    else if (1==sscanf(line,"DNACODES=%99s",dna_codes)) { /* READ DNACODES*/
+      strcpy(DNA_symbols,dna_codes);/*SYMBOLS COUNTED AS DNA FOR AUTORECOG*/
+    }
+
+    else if (1==sscanf(line,"DNASCALE=%f",&DNA_rescale_score))
+      continue;
+#endif
+    
+    else if (!found_symbol_line) { /* READ THIS LINE AS LIST OF SEQ SYMBOLS*/
+      for (i=0;'\0'!=line[i];i++)
+	if (!isspace(line[i])) /* IGNORE WHITESPACE */
+	  m->symbol[nsymb++]=line[i]; /* SAVE TO LIST OF SYMBOLS */
+      found_symbol_line=1; /* SET FLAG SO WE NOW READ MATRIX SCORE VALUES */
+    }
+    
+    else { /* READ SCORING MATRIX LINES */
+      found_symbol_line=0; /* DEFAULT: FAILED TO FIND MATCHING SYMBOL IN LIST*/
+      LOOP (isymb,nsymb) /* FIND MATCH TO THIS SYMBOL */
+	if (m->symbol[isymb]==line[0]) {
+	  found_symbol_line=1; /* SIGNAL THAT WE SUCCESFULLY FOUND MATCH */
+	  j=1; /* SKIP FIRST CHARACTER: OUR SEQUENCE SYMBOL */
+	  LOOPF (i,nsymb) { /* READ ALL THE SCORE VALUES ON THIS LINE */
+	    if (1==sscanf(line+j,"%d%n",&(m->score[isymb][i]),&k))
+	      j+=k; /* ADVANCE THE READING POSITION */
+	    else { /* MISSING SCORE DATA: ERROR! */
+	      IF_GUARD(1,5.23,(ERRTXT,"Missing score value for pair %c:%c",
+			      m->symbol[isymb],m->symbol[i]),TRAP)
+		;
+	      fclose(ifile); /* CLOSE OUR STREAM */
+	      return -1;
+	    }
+	  }
+	  break;
+	}
+      IF_GUARD(!found_symbol_line,1.5,(ERRTXT,"Missing or unknown sequence symbol: %c",line[0]),TRAP) { /* ERROR: AN INVALID SYMBOL, NOT IN LIST */
+	fclose(ifile); /* CLOSE OUR STREAM */
+	return -1;
+      }
+    }
+  }
+  fclose(ifile);
+
+  LOOPF (i,nsymb) {
+    Score_matrix_row= m->score[i]; /* ROW TO USE FOR SORTING best_match */
+    LOOP (j,nsymb)
+      m->best_match[i][j] = j;
+    qsort(m->best_match[i],nsymb,sizeof(m->best_match[0][0]),
+	  best_match_qsort_cmp);
+#ifdef SOURCE_EXCLUDED
+    printf("%c SORT",m->symbol[i]); /* TEST: PRINT OUT SORTED TABLE */
+    LOOPF (j,nsymb)
+      printf("\t%c:%d",m->symbol[m->best_match[i][j]],
+	     m->score[i][m->best_match[i][j]]);
+    printf("\n");
+#endif
+  }
+
+  m->symbol[nsymb]='\0'; /* TERMINATE THE SYMBOL STRING */
+  m->nsymbol=nsymb;
+  return nsymb;
+}
+
+
+
+/** prints a scoring matrix, only including those symbols in subset[] */
+void print_score_matrix(FILE *ifile,ResidueScoreMatrix_T *m,char subset[])
+{
+  int i,i_m,j,j_m,nsubset;
+
+  nsubset=strlen(subset);
+
+  printf(" ");
+  LOOPF (i,nsubset)
+    printf("  %c",subset[i]);
+  printf("\n");
+
+  LOOPF (i,nsubset) {
+    LOOP (i_m,m->nsymbol)
+      if (m->symbol[i_m]==subset[i])
+	break;
+    printf("%c",subset[i]);
+    LOOPF (j,nsubset) {
+      LOOP (j_m,m->nsymbol)
+	if (m->symbol[j_m]==subset[j])
+	  break;
+      printf("%3d",m->score[i_m][j_m]);
+    }
+    printf("\n");
+  }
+  return;
+}
+
+
+
+/** restricts seq[] to the set of allowed characters given by symbol[];
+ other characters will be replaced by the default symbol[0] */
+int limit_residues(char seq[],char symbol[])
+{
+  int i,len,nreplace=0;
+
+  len=strlen(seq);
+  for (i=strspn(seq,symbol);i<len;i=i+1+strspn(seq+i+1,symbol)) {
+    seq[i]=symbol[0]; /* FORCE IT TO BE A VALID SYMBOL */
+    nreplace++; /* COUNT TOTAL REPLACEMENTS */
+  }
+  return nreplace;
+}
+
+
+
+
+
+/** RETURNS THE COMPLEMENTARY BASE *IN LOWER CASE* */
+char complementary_base(char base) 
+{
+  switch (base) {
+  case 'A': case 'a':                     return 't';
+  case 'T': case 't': case 'U': case 'u': return 'a';
+  case 'G': case 'g':                     return 'c';
+  case 'C': case 'c':                     return 'g';
+  default: return base;
+  }
+}
+
+/** REVERSE COMPLEMENTS seq[] IN PLACE, AND RETURNS POINTER TO seq  
+---------------------------------------------------------------
+-----------------------------------------------------------
+*/
+char *reverse_complement(char seq[])
+{
+  int i,j;
+  char c;
+  for ((i=0),(j=strlen(seq)-1);i<=j;i++,j--) {/* SWAP FROM ENDS TO CENTER*/
+    c=complementary_base(seq[i]); /* SWAP ENDS AND COMPLEMENT */
+    seq[i]=complementary_base(seq[j]);
+    seq[j]=c;
+  }
+  return seq;
+}
+

Added: trunk/packages/poa/branches/upstream/current/seq_util.h
===================================================================
--- trunk/packages/poa/branches/upstream/current/seq_util.h	2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/seq_util.h	2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,72 @@
+
+#ifndef SEQ_UTIL_HEADER_INCLUDED
+#define SEQ_UTIL_HEADER_INCLUDED
+
+/* NB: YOU *MUST* TYPEDEF Sequence_T ELSEWHERE, IN ORDER FOR THE PROTOTYPES
+   BELOW TO WORK... */
+
+/* USE_PROJECT_HEADER IS A GENERAL WAY TO SNEAK IN CUSTOMIZATION
+   BEFORE PROCESSING DEFINITIONS IN THIS FILE */
+#ifdef USE_PROJECT_HEADER
+#include "project.h"
+#endif
+
+#ifndef RESIDUE_SCORE_DEFINED
+typedef int ResidueScore_T; /* DEFAULT: USE int FOR SCORING */
+#endif
+
+
+
+
+#define FASTA_NAME_MAX   4096  /*define the max length of the name */
+#define SEQ_LENGTH_MAX  32768 /* define the maximum length of each seq */
+
+enum {
+  dont_switch_case,
+  switch_case_to_lower,
+  switch_case_to_upper
+};
+
+
+
+#define MATRIX_SYMBOL_MAX 128
+typedef struct {
+  int nsymbol;
+  char symbol[MATRIX_SYMBOL_MAX];
+  ResidueScore_T score[MATRIX_SYMBOL_MAX][MATRIX_SYMBOL_MAX];
+  int best_match[MATRIX_SYMBOL_MAX][MATRIX_SYMBOL_MAX];
+  ResidueScore_T gap_penalty_set[2][3];
+  int trunc_gap_length;
+  int decay_gap_length;
+  int nfreq; /* STORE FREQUENCIES OF AMINO ACIDS FOR BALANCING MATRIX...*/
+  char freq_symbol[MATRIX_SYMBOL_MAX];
+  float freq[MATRIX_SYMBOL_MAX];
+} ResidueScoreMatrix_T;
+
+/****************************************************** seq_util.c */
+void shuffle_seq(int len,
+		char seq[],
+		char randseq[]);
+
+void index_symbols(int nseq,char seq[],char out[],
+		   int nsymbs,char symbols[]);
+
+int read_score_matrix(char filename[],ResidueScoreMatrix_T *m);
+
+void print_score_matrix(FILE *ifile,ResidueScoreMatrix_T *m,char subset[]);
+
+int limit_residues(char seq[],char symbol[]);
+
+int create_seq(int nseq,Sequence_T **seq,char seq_name[],char seq_title[],char tmp_seq[],int do_switch_case);
+
+char *reverse_complement(char seq[]);
+
+
+
+/******************************************************* fasta_format.c */
+int read_fasta(FILE *seq_file,Sequence_T **seq,
+	       int do_switch_case,char **comment);
+
+void write_fasta(FILE *ifile,char name[],char title[],char seq[]);
+
+#endif

Added: trunk/packages/poa/branches/upstream/current/stringptr.c
===================================================================
--- trunk/packages/poa/branches/upstream/current/stringptr.c	2006-08-07 14:01:18 UTC (rev 90)
+++ trunk/packages/poa/branches/upstream/current/stringptr.c	2006-08-23 13:45:39 UTC (rev 91)
@@ -0,0 +1,95 @@
+
+#include "default.h"
+
+
+/*******************************************************************
+  *
+  *     STRINGPTR_CAT:
+  *     holds string and count of last alloc'd size.  Conveniently
+  *     manages static and dynamic strings, allowing auto-resize
+  *     without worrying about memory management.
+  *
+  *     This function adds any string to the string ptr.  It even
+  *     insulates against possibility that S2 is a substring of S1
+  *     by pre-copying it.
+  *
+  ******************************************************************/
+
+char *stringptr_cat_pos(stringptr *s1,const char s2[],int *pos) /* ~~g --- */
+{
+  int s2_len,total_len;
+  char *stringptr_cat_temp_p=NULL,*s2_temp=NULL;
+  
+  if (s2 == NULL)					/* it might be better to make this a debug cope */
+  	return s1->p;
+  
+  if (0 == s1->last_alloc) /* SAVE STATIC STRING */
+    stringptr_cat_temp_p = s1->p; /* SAVE OLD */
+
+  total_len=s2_len=strlen(s2)+1;
+  CALLOC(s2_temp,s2_len,char); /* ALLOCATE TEMP STORAGE */
+  memcpy(s2_temp,s2,s2_len); /* COPY THE STRING */
+
+  if (s1->p) { /* CALCULATE ADDITIONAL SPACE NEEDED FOR ORIGINAL STRING */
+    if (pos) /* USE THE CALLER-SUPPLIED STRING LENGTH */
+      total_len += *pos;
+    else /* OTHERWISE MEASURE IT */
+      total_len += strlen(s1->p);
+  }
+  
+  GETMEM(s1->p,total_len,
+	 s1->last_alloc,STRINGPTR_BUFFER_CHUNK,char);
+
+  if (stringptr_cat_temp_p) /* PUT OLD STATIC STRING BACK IN */ 
+    strcpy(s1->p,stringptr_cat_temp_p); 
+
+  if (pos) { /* ATTACH NEW STRING AT CALLER-SUPPLIED END-POSITION */
+    strcpy(s1->p + *pos,s2_temp);
+    *pos = total_len-1; /* EXCLUDE TERMINATOR */
+  }
+  else /* OTHERWISE strcat AS USUAL */
+    strcat(s1->p,s2_temp);
+
+  FREE(s2_temp);  /* FREE THE TEMP STORAGE */
+
+  return s1->p;
+}
+
+
+char *stringptr_cat(stringptr *s1,const char s2[]) /* ~~g --- */
+{
+  return stringptr_cat_pos(s1,s2,NULL);
+}
+
+
+char *stringptr_cpy(stringptr *s1,const char s2[]) /* ~~g --- */
+{
+  GETMEM(s1->p,strlen(s2)+1,s1->last_alloc,STRINGPTR_BUFFER_CHUNK,char);
+  strcpy(s1->p,s2);
+
+  return s1->p;
+}
+
+
+
+
+
+
+/**stringptr_free*************************************************
+  *
+  *     stringptr_free:
+  *     AUTHOR: tal
+  *     Wed Jul 27 03:04:41 PDT 1994
+  *
+  *     Frees stringptr type.
+  *
+  ***************************************************************/
+
+int stringptr_free(stringptr *s) /* ~~g --- */
+{
+  FREE(s->p);
+  s->last_alloc=0;
+  return 0;
+}
+
+




More information about the debian-med-commit mailing list