[med-svn] [piler] 01/02: Imported Upstream version 0~20140707

Afif Elghraoui afif-guest at moszumanska.debian.org
Fri Nov 20 07:14:13 UTC 2015


This is an automated email from the git hooks/post-receive script.

afif-guest pushed a commit to branch master
in repository piler.

commit 9a9b955cfa32a189eb9d8c039e6f82c34f84db89
Author: Afif Elghraoui <afif at ghraoui.name>
Date:   Thu Nov 19 20:35:29 2015 -0800

    Imported Upstream version 0~20140707
---
 Makefile           |  25 ++
 annot.cpp          |  47 ++++
 annotedge.cpp      |  47 ++++
 bitfuncs.h         |  20 ++
 cons.cpp           | 134 +++++++++++
 contigs.cpp        | 178 ++++++++++++++
 crisp.cpp          | 640 +++++++++++++++++++++++++++++++++++++++++++++++++
 findcc.cpp         | 233 ++++++++++++++++++
 gff.cpp            | 114 +++++++++
 gff2.cpp           | 253 ++++++++++++++++++++
 gffset.cpp         |  98 ++++++++
 gffset.h           |  32 +++
 glix.cpp           | 304 ++++++++++++++++++++++++
 glix.h             |  60 +++++
 hash.cpp           | 164 +++++++++++++
 iix.cpp            | 160 +++++++++++++
 iix.h              |  42 ++++
 log.cpp            |  51 ++++
 main.cpp           |  87 +++++++
 makeannot.cpp      | 373 +++++++++++++++++++++++++++++
 mem.cpp            |  53 +++++
 options.cpp        | 156 ++++++++++++
 params.h           |   8 +
 piler2.h           | 148 ++++++++++++
 progress.cpp       |  67 ++++++
 quit.cpp           |  38 +++
 readafa.cpp        |  86 +++++++
 readhits.cpp       | 121 ++++++++++
 readmfa.cpp        |  98 ++++++++
 readmotif.cpp      |  74 ++++++
 readreps.cpp       | 126 ++++++++++
 readtrs.cpp        |  94 ++++++++
 tan.cpp            | 503 +++++++++++++++++++++++++++++++++++++++
 tanmotif2fasta.cpp | 114 +++++++++
 tr.cpp             | 354 +++++++++++++++++++++++++++
 trs.cpp            | 682 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 trs2fasta.cpp      | 142 +++++++++++
 types.h            | 152 ++++++++++++
 usage.cpp          |  40 ++++
 utils.cpp          |  89 +++++++
 utils_linux.cpp    |  76 ++++++
 utils_unix.cpp     |  38 +++
 utils_win32.cpp    |  34 +++
 writecrisp.cpp     |  61 +++++
 writefasta.cpp     |  62 +++++
 writeimages.cpp    |  50 ++++
 writepiles.cpp     |  31 +++
 writetrs.cpp       |  44 ++++
 48 files changed, 6603 insertions(+)

diff --git a/Makefile b/Makefile
new file mode 100755
index 0000000..d6852f8
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,25 @@
+CFLAGS = -O3 -DNDEBUG=1
+LDLIBS = -lm -static
+# LDLIBS = -lm
+
+OBJ = .o
+EXE =
+
+RM = rm -f
+CP = cp
+
+GPP = g++
+LD = $(GPP) $(CFLAGS)
+CPP = $(GPP) -c $(CFLAGS) 
+CC = gcc -c $(CFLAGS) 
+
+all: piler2
+
+CPPSRC = $(sort $(wildcard *.cpp))
+CPPOBJ	= $(subst .cpp,.o,$(CPPSRC))
+
+$(CPPOBJ): %.o: %.cpp
+	$(CPP) $< -o $@
+
+piler2: $(CPPOBJ)
+	$(LD) -o piler2 $(CPPOBJ) $(LDLIBS)
diff --git a/annot.cpp b/annot.cpp
new file mode 100755
index 0000000..c42edf5
--- /dev/null
+++ b/annot.cpp
@@ -0,0 +1,47 @@
+#include "piler2.h"
+
+void Annot()
+	{
+	const char *InputFileName = RequiredValueOpt("annot");
+	const char *RepeatFileName = RequiredValueOpt("rep");
+	const char *OutputFileName = RequiredValueOpt("out");
+
+	ProgressStart("Reading repeat file");
+	int RepCount;
+	RepData *Reps = ReadReps(RepeatFileName, &RepCount);
+	ProgressDone();
+
+	Progress("%d records", RepCount);
+
+	FILE *fInput = OpenStdioFile(InputFileName);
+	FILE *fOutput = OpenStdioFile(OutputFileName, FILEIO_MODE_WriteOnly);
+
+	ProgressStart("Transferring annotation");
+	GFFRecord Rec;
+	while (GetNextGFFRecord(fInput, Rec))
+		{
+		const bool Rev = Rec.Strand == '-';
+		const char *Annot = MakeAnnot(Rec.SeqName, Rec.Start-1, Rec.End-1, Rev,
+		  Reps, RepCount);
+
+		fprintf(fOutput, "%s\t%s\t%s\t%d\t%d\t%.3g\t%c",
+		//                0   1   2   3   4   5     6
+		  Rec.SeqName,	// 0
+		  Rec.Source,	// 1
+		  Rec.Feature,	// 2
+		  Rec.Start,	// 3
+		  Rec.End,		// 4
+		  Rec.Score,	// 5
+		  Rec.Strand);	// 6
+
+		if (-1 == Rec.Frame)
+			fprintf(fOutput, "\t.");
+		else
+			fprintf(fOutput, "\t%d", Rec.Frame);
+
+		fprintf(fOutput, "\t%s ; Annot \"%s\"\n", Rec.Attrs, Annot);
+		}
+	fclose(fInput);
+	fclose(fOutput);
+	ProgressDone();
+	}
diff --git a/annotedge.cpp b/annotedge.cpp
new file mode 100755
index 0000000..c534305
--- /dev/null
+++ b/annotedge.cpp
@@ -0,0 +1,47 @@
+#include "piler2.h"
+
+void AnnotEdge()
+	{
+	const char *InputFileName = RequiredValueOpt("annotedge");
+	const char *RepeatFileName = RequiredValueOpt("rep");
+	const char *OutputFileName = RequiredValueOpt("out");
+
+	ProgressStart("Reading repeat file");
+	int RepCount;
+	RepData *Reps = ReadReps(RepeatFileName, &RepCount);
+	ProgressDone();
+
+	Progress("%d records", RepCount);
+
+	FILE *fInput = OpenStdioFile(InputFileName);
+	FILE *fOutput = OpenStdioFile(OutputFileName, FILEIO_MODE_WriteOnly);
+
+	ProgressStart("Transferring annotation");
+	GFFRecord Rec;
+	while (GetNextGFFRecord(fInput, Rec))
+		{
+		const bool Rev = (Rec.Strand == '-');
+		const char *Annot = MakeAnnotEdge(Rec.SeqName, Rec.Start-1, Rec.End-1, Rev,
+		  Reps, RepCount);
+
+		fprintf(fOutput, "%s\t%s\t%s\t%d\t%d\t%.3g\t%c",
+		//                0   1   2   3   4   5     6
+		  Rec.SeqName,	// 0
+		  Rec.Source,	// 1
+		  Rec.Feature,	// 2
+		  Rec.Start,	// 3
+		  Rec.End,		// 4
+		  Rec.Score,	// 5
+		  Rec.Strand);	// 6
+
+		if (-1 == Rec.Frame)
+			fprintf(fOutput, "\t.");
+		else
+			fprintf(fOutput, "\t%d", Rec.Frame);
+
+		fprintf(fOutput, "\t%s ; Annot \"%s\"\n", Rec.Attrs, Annot);
+		}
+	fclose(fInput);
+	fclose(fOutput);
+	ProgressDone();
+	}
diff --git a/bitfuncs.h b/bitfuncs.h
new file mode 100755
index 0000000..d831cb1
--- /dev/null
+++ b/bitfuncs.h
@@ -0,0 +1,20 @@
+#ifndef bitfuncs_h
+#define bitfuncs_h
+
+static inline void SetBit(int *BitVec, int Bit)
+	{
+	int IntIndex = Bit/BITS_PER_INT;
+	int BitIndex = Bit%BITS_PER_INT;
+	int i = BitVec[IntIndex];
+	BitVec[IntIndex] = (i | (1 << BitIndex));
+	}
+
+static inline int BitIsSet(int *BitVec, int Bit)
+	{
+	int IntIndex = Bit/BITS_PER_INT;
+	int BitIndex = Bit%BITS_PER_INT;
+	int i = BitVec[IntIndex];
+	return (i & (1 << BitIndex));
+	}
+
+#endif // bitfuncs_h
diff --git a/cons.cpp b/cons.cpp
new file mode 100755
index 0000000..d555fd4
--- /dev/null
+++ b/cons.cpp
@@ -0,0 +1,134 @@
+#include "piler2.h"
+
+static const char Letter[5] = { 'A', 'C', 'G', 'T', '-'};
+
+static void GetCounts(const char *Seqs, int ColCount, int SeqCount,
+  int ColIndex, int Counts[5])
+	{
+	memset(Counts, 0, 5*sizeof(unsigned));
+	for (int SeqIndex = 0; SeqIndex < SeqCount; ++SeqIndex)
+		{
+		char c = Seqs[SeqIndex*ColCount + ColIndex];
+		c = toupper(c);
+		switch (c)
+			{
+		case 'a':
+		case 'A':
+			++(Counts[0]);
+			break;
+		case 'c':
+		case 'C':
+			++(Counts[1]);
+			break;
+		case 'g':
+		case 'G':
+			++(Counts[2]);
+			break;
+		case 't':
+		case 'T':
+			++(Counts[3]);
+			break;
+		case '-':
+			++(Counts[4]);
+			break;
+			}
+		}
+	}
+
+static bool Conserved(const char *Seqs, int ColCount, int SeqCount, int Col)
+	{
+	int Counts[5];
+	GetCounts(Seqs, ColCount, SeqCount, Col, Counts);
+	if (Counts[4] > 0)
+		return false;
+
+	for (int i = 0; i < 4; ++i)
+		if (Counts[i] != 0 && Counts[i] != SeqCount)
+			return false;
+
+	return true;
+	}
+
+static void FindStartEndExact(const char *Seqs, int ColCount, int SeqCount,
+  int *ptrStart, int *ptrEnd)
+	{
+	int BestLength = 0;
+	int BestStart = 0;
+	int BestEnd = 0;
+	int Length = 0;
+	int Start = 0;
+	for (int Col = 0; Col <= ColCount; ++Col)
+		{
+		if (Col != ColCount && Conserved(Seqs, ColCount, SeqCount, Col))
+			{
+			if (Start == -1)
+				{
+				Start = Col;
+				Length = 1;
+				}
+			else
+				++Length;
+			}
+		else
+			{
+			if (Length > BestLength)
+				{
+				BestStart = Start;
+				BestEnd = Col - 1;
+				if (BestEnd - BestStart + 1 != Length)
+					Quit("BestEnd");
+				}
+			Length = -1;
+			Start = -1;
+			}
+		}
+	*ptrStart = BestStart;
+	*ptrEnd = BestEnd;
+	}
+
+void Cons()
+	{
+	const char *InputFileName = RequiredValueOpt("cons");
+	const char *OutputFileName = RequiredValueOpt("out");
+	const char *Label = RequiredValueOpt("label");
+	bool Exact = FlagOpt("exact");
+
+	ProgressStart("Reading alignment");
+	int ColCount;
+	int SeqCount;
+	const char *Seqs = ReadAFA(InputFileName, &ColCount, &SeqCount);
+	ProgressDone();
+	
+	Progress("%d seqs, length %d", SeqCount, ColCount);
+
+	char *ConsSeq = all(char, ColCount+1);
+	int ConsSeqLength = 0;
+
+	int StartCol = 0;
+	int EndCol = ColCount - 1;
+
+	if (Exact)
+		FindStartEndExact(Seqs, ColCount, SeqCount, &StartCol, &EndCol);
+
+	for (int Col = StartCol; Col <= EndCol; ++Col)
+		{
+		int Counts[5];
+		GetCounts(Seqs, ColCount, SeqCount, Col, Counts);
+		int MaxCount = 0;
+		char MaxLetter = 'A';
+		for (int i = 0; i < 4; ++i)
+			{
+			if (Counts[i] > MaxCount)
+				{
+				MaxLetter = Letter[i];
+				MaxCount = Counts[i];
+				}
+			}
+		if (MaxLetter == '-')
+			continue;
+		ConsSeq[ConsSeqLength++] = MaxLetter;
+		}
+
+	FILE *f = OpenStdioFile(OutputFileName, FILEIO_MODE_WriteOnly);
+	WriteFasta(f, ConsSeq, ConsSeqLength, Label);
+	}
diff --git a/contigs.cpp b/contigs.cpp
new file mode 100755
index 0000000..951f2dc
--- /dev/null
+++ b/contigs.cpp
@@ -0,0 +1,178 @@
+#include "piler2.h"
+
+const int INSANE_LENGTH = 500000000;
+
+// Arbitrarily chosen prime number
+static ContigData *HashTable[HASH_TABLE_SIZE];
+static ContigData **ContigMap;
+static int SeqLength;
+
+void SetSeqLength(int Length)
+	{
+	SeqLength = Length;
+	}
+
+static ContigData *HashLookup(const char *Label)
+	{
+	unsigned h = Hash(Label);
+	assert(h < HASH_TABLE_SIZE);
+	ContigData *p = HashTable[h];
+	while (p != 0)
+		{
+		if (0 == strcmp(Label, p->Label))
+			return p;
+		p = p->Next;
+		}
+	return 0;
+	}
+
+void LogContigs()
+	{
+	Log("               Label    Hash         Pos     Length\n");
+	Log("--------------------  ------  ---------- ----------\n");
+	for (int h = 0; h < HASH_TABLE_SIZE; ++h)
+		{
+		for (const ContigData *p = HashTable[h]; p != 0; p = p->Next)
+			{
+			const ContigData &Contig = *p;
+			Log("%20.20s  %6d  %10d  %10d",
+			  Contig.Label,
+			  h,
+			  Contig.From,
+			  Contig.Length);
+			if (Hash(Contig.Label) != h)
+				Log(" ** Hash=%d", Hash(Contig.Label));
+			Log("\n");
+			}
+		}
+	}
+
+static void AppendContig(const char *Label, int Pos)
+	{
+	ContigData *Contig = all(ContigData, 1);
+	Contig->Label = strsave(Label);
+	char *Space = strchr(Contig->Label, ' ');
+	if (0 != Space)
+		*Space = 0;
+
+	unsigned h = Hash(Label);
+	assert(h < HASH_TABLE_SIZE);
+	ContigData *p = HashTable[h];
+	Contig->Length = Pos + 1;
+	Contig->Next = p;
+	HashTable[h] = Contig;
+	}
+
+void AddContigPos(const char *Label, int Pos)
+	{
+	ContigData *Contig = HashLookup(Label);
+	if (0 == Contig)
+		AppendContig(Label, Pos);
+	else
+		{
+		if (Pos > Contig->Length)
+			Contig->Length = Pos;
+		}
+	}
+
+void AddContig(const char *Label, int GlobalPos, int Length)
+	{
+	ContigData *Contig = all(ContigData, 1);
+	Contig->Label = strsave(Label);
+	char *Space = strchr(Contig->Label, ' ');
+	if (0 != Space)
+		*Space = 0;
+
+	unsigned h = Hash(Contig->Label);
+	assert(h < HASH_TABLE_SIZE);
+	ContigData *p = HashTable[h];
+	Contig->From = GlobalPos;
+	Contig->Length = Length;
+	Contig->Next = p;
+	HashTable[h] = Contig;
+	}
+
+int GlobalizeContigs()
+	{
+	int GlobalPos = 0;
+	for (int h = 0; h < HASH_TABLE_SIZE; ++h)
+		{
+		for (ContigData *p = HashTable[h]; p != 0; p = p->Next)
+			{
+			ContigData &Contig = *p;
+			const int Length = p->Length;
+			if (Length < 1 || Length >= INSANE_LENGTH)
+				Quit("GlobalizeContigs, insane length %d", Length);
+			p->From = GlobalPos;
+			GlobalPos += Length;
+
+		// Pad up to start of next bin
+		// (required for contig map)
+			int BinRemainder = GlobalPos%CONTIG_MAP_BIN_SIZE;
+			if (BinRemainder > 0)
+				GlobalPos += CONTIG_MAP_BIN_SIZE - BinRemainder;
+			if (GlobalPos%CONTIG_MAP_BIN_SIZE)
+				Quit("Dumb mistake rounding contig");
+			}
+		}
+	SeqLength = GlobalPos;
+	return SeqLength;
+	}
+
+void MakeContigMap()
+	{
+	if (SeqLength%CONTIG_MAP_BIN_SIZE)
+		Quit("MakeContigMap: expects rounded size");
+
+	const int BinCount = SeqLength/CONTIG_MAP_BIN_SIZE;
+	ContigMap = all(ContigData *, BinCount);
+	zero(ContigMap, ContigData *, BinCount);
+
+	for (int h = 0; h < HASH_TABLE_SIZE; ++h)
+		{
+		for (ContigData *p = HashTable[h]; p != 0; p = p->Next)
+			{
+			ContigData &Contig = *p;
+			int From = p->From;
+			int To = From + p->Length - 1;
+
+			if (From%CONTIG_MAP_BIN_SIZE)
+				Quit("MakeContigMap: expected rounded contig from");
+
+			int BinFrom = From/CONTIG_MAP_BIN_SIZE;
+			int BinTo = To/CONTIG_MAP_BIN_SIZE;
+			for (int Bin = BinFrom; Bin <= BinTo; ++Bin)
+				{
+				if (ContigMap[Bin] != 0)
+					Quit("MakeContigMap: overlap error");
+				ContigMap[Bin] = p;
+				}
+			}
+		}
+	}
+
+const char *GlobalToContig(int GlobalPos, int *ptrContigPos)
+	{
+	if (GlobalPos < 0 || GlobalPos >= SeqLength)
+		Quit("GlobalToContig: invalid pos");
+
+	const int Bin = GlobalPos/CONTIG_MAP_BIN_SIZE;
+	ContigData *Contig = ContigMap[Bin];
+	if (0 == Contig)
+		Quit("GlobalToContig: doesn't map");
+	int ContigPos = GlobalPos - Contig->From;
+	if (ContigPos < 0 || ContigPos >= Contig->Length)
+		Quit("GlobalToContig: out of bounds");
+	*ptrContigPos = ContigPos;
+	return Contig->Label;
+	}
+
+int ContigToGlobal(int ContigPos, const char *Label)
+	{
+	const ContigData *Contig = HashLookup(Label);
+	if (0 == Contig)
+		Quit("ContigToGlobal: contig not found (%s)", Label);
+	if (ContigPos < 0 || ContigPos >= Contig->Length)
+		Quit("ContigToGlobal: out of bounds");
+	return Contig->From + ContigPos;
+	}
diff --git a/crisp.cpp b/crisp.cpp
new file mode 100755
index 0000000..399923f
--- /dev/null
+++ b/crisp.cpp
@@ -0,0 +1,640 @@
+#include "piler2.h"
+#include "bitfuncs.h"
+#include <algorithm>
+
+#define TRACE		0
+
+#define GFFRecord GFFRecord2
+
+/***
+Find CRISPR families.
+
+CRISPR candidate:
+
+              <-----spacer------->
+
+    ------====--------------------====------------ genome
+
+	      Pile                    Pile
+           ^-----------------------^
+                      Hit
+
+Candidate if
+    (a) hit length >= MIN_LENGTH_CRISPR and <= MAX_LENGTH_CRISPR, and
+    (b) offset is >= MIN_LENGTH_SPACER and <= MAX_LENGTH_SPACER.
+
+***/
+
+static int MIN_CRISPR_LENGTH = 10;
+static int MAX_CRISPR_LENGTH = 200;
+static int MIN_SPACER_LENGTH = 10;
+static int MAX_SPACER_LENGTH = 200;
+static double MIN_CRISPR_RATIO = 0.8;
+static double MIN_SPACER_RATIO = 0.8;
+static int MIN_FAM_SIZE = 3;
+static int MAX_SPACE_DIFF = 20;
+
+static int g_paramMinFamSize = 3;
+static int g_paramMaxLengthDiffPct = 5;
+static bool g_paramSingleHitCoverage = true;
+
+static PileData *Piles;
+static int PileCount;
+static int EdgeCount;
+
+static int MaxImageCount = 0;
+static int SeqLength;
+static int SeqLengthChunks;
+
+static int GetHitLength(const HitData &Hit)
+	{
+	const int QueryHitLength = Hit.QueryTo - Hit.QueryFrom + 1;
+	const int TargetHitLength = Hit.TargetTo - Hit.TargetFrom + 1;
+	return (QueryHitLength + TargetHitLength)/2;
+	}
+
+static int GetSpacerLength(const HitData &Hit)
+	{
+	if (Hit.QueryFrom > Hit.TargetTo)
+		return Hit.QueryFrom - Hit.TargetTo;
+	else
+		return Hit.TargetFrom - Hit.QueryTo;
+	}
+
+static bool IsCand(const HitData &Hit)
+	{
+	if (Hit.Rev)
+		return false;
+
+	int HitLength = GetHitLength(Hit);
+	if (HitLength > MAX_CRISPR_LENGTH || HitLength < MIN_CRISPR_LENGTH)
+		return false;
+
+	int SpacerLength = GetSpacerLength(Hit);
+	if (SpacerLength > MAX_SPACER_LENGTH || SpacerLength < MIN_SPACER_LENGTH)
+		return false;
+
+	return true;
+	}
+
+static PILE_INDEX_TYPE *IdentifyPiles(int *CopyCount)
+	{
+	PILE_INDEX_TYPE *PileIndexes = all(PILE_INDEX_TYPE, SeqLengthChunks);
+
+#if	DEBUG
+	memset(PileIndexes, 0xff, SeqLengthChunks*sizeof(PILE_INDEX_TYPE));
+#endif
+
+	int PileIndex = -1;
+	bool InPile = false;
+	for (int i = 0; i < SeqLengthChunks; ++i)
+		{
+		if (BitIsSet(CopyCount, i))
+			{
+			if (!InPile)
+				{
+				++PileIndex;
+				if (PileIndex > MAX_STACK_INDEX)
+					Quit("Too many stacks");
+				InPile = true;
+				}
+			PileIndexes[i] = PileIndex;
+			}
+		else
+			InPile = false;
+		}
+	PileCount = PileIndex + 1;
+	return PileIndexes;
+	}
+
+static void IncCopyCountImage(int *CopyCount, int From, int To)
+	{
+	if (From < 0)
+		Quit("From < 0");
+
+	From /= CHUNK_LENGTH;
+	To /= CHUNK_LENGTH;
+
+	if (From >= SeqLengthChunks)
+		{
+		Warning("IncCopyCountImage: From=%d, SeqLength=%d", From, SeqLengthChunks);
+		From = SeqLengthChunks - 1;
+		}
+	if (To >= SeqLengthChunks)
+		{
+		Warning("IncCopyCountImage: To=%d, SeqLength=%d", To, SeqLengthChunks);
+		To = SeqLengthChunks - 1;
+		}
+
+	if (From > To)
+		Quit("From > To");
+
+	for (int i = From; i <= To; ++i)
+		SetBit(CopyCount, i);
+	}
+
+static void IncCopyCount(int *CopyCount, const HitData &Hit)
+	{
+	IncCopyCountImage(CopyCount, Hit.TargetFrom, Hit.TargetTo);
+	IncCopyCountImage(CopyCount, Hit.QueryFrom, Hit.QueryTo);
+	}
+
+static int CmpHits(const void *ptrHit1, const void *ptrHit2)
+	{
+	HitData *Hit1 = (HitData *) ptrHit1;
+	HitData *Hit2 = (HitData *) ptrHit2;
+	return Hit1->QueryFrom - Hit2->QueryFrom;
+	}
+
+static int CmpImages(const void *ptrImage1, const void *ptrImage2)
+	{
+	PileImageData *Image1 = (PileImageData *) ptrImage1;
+	PileImageData *Image2 = (PileImageData *) ptrImage2;
+	return Image1->SIPile - Image2->SIPile;
+	}
+
+static void AssertImagesSorted(PileImageData *Images, int ImageCount)
+	{
+	for (int i = 0; i < ImageCount - 1; ++i)
+		if (Images[i].SIPile > Images[i+1].SIPile)
+			Quit("Images not sorted");
+	}
+
+static void SortImagesPile(PileImageData *Images, int ImageCount)
+	{
+	qsort(Images, ImageCount, sizeof(PileImageData), CmpImages);
+	}
+
+static void SortImages()
+	{
+	for (int PileIndex = 0; PileIndex < PileCount; ++PileIndex)
+		{
+		PileData &Pile = Piles[PileIndex];
+		SortImagesPile(Pile.Images, Pile.ImageCount);
+#if	DEBUG
+		AssertImagesSorted(Pile.Images, Pile.ImageCount);
+#endif
+		}
+	}
+
+static bool FamMemberLt(const FamMemberData &Mem1, const FamMemberData &Mem2)
+	{
+	const PileData &Pile1 = Piles[Mem1.PileIndex];
+	const PileData &Pile2 = Piles[Mem2.PileIndex];
+	return Pile1.From < Pile2.From;
+	}
+
+static void CreatePiles(const HitData *Hits, int HitCount,
+  PILE_INDEX_TYPE *PileIndexes)
+	{
+	Piles = all(PileData, PileCount);
+	zero(Piles, PileData, PileCount);
+	for (int i = 0; i < PileCount; ++i)
+		{
+		Piles[i].FamIndex = -1;
+		Piles[i].SuperFamIndex = -1;
+		Piles[i].Rev = -1;
+		}
+
+// Count images in stack
+	ProgressStart("Create piles: count images");
+	for (int HitIndex = 0; HitIndex < HitCount; ++HitIndex)
+		{
+		const HitData &Hit = Hits[HitIndex];
+
+		int Pos = Hit.QueryFrom/CHUNK_LENGTH;
+		PILE_INDEX_TYPE PileIndex = PileIndexes[Pos];
+		assert(PileIndex == PileIndexes[Hit.QueryTo/CHUNK_LENGTH]);
+		assert(PileIndex >= 0 && PileIndex < PileCount);
+		++(Piles[PileIndex].ImageCount);
+
+		Pos = Hit.TargetFrom/CHUNK_LENGTH;
+		PileIndex = PileIndexes[Pos];
+		assert(PileIndex >= 0 && PileIndex < PileCount);
+		assert(PileIndex == PileIndexes[Hit.TargetTo/CHUNK_LENGTH]);
+		++(Piles[PileIndex].ImageCount);
+		}
+	ProgressDone();
+
+// Allocate memory for image list
+	int TotalImageCount = 0;
+	ProgressStart("Create piles: allocate image memory");
+	for (int PileIndex = 0; PileIndex < PileCount; ++PileIndex)
+		{
+		PileData &Pile = Piles[PileIndex];
+		const int ImageCount = Pile.ImageCount;
+		TotalImageCount += ImageCount;
+		assert(ImageCount > 0);
+		Pile.Images = all(PileImageData, ImageCount);
+		}
+	ProgressDone();
+
+// Build image list
+	ProgressStart("Create piles: build image list");
+	for (int PileIndex = 0; PileIndex < PileCount; ++PileIndex)
+		{
+		PileData &Pile = Piles[PileIndex];
+		Pile.ImageCount = 0;
+		Pile.From = -1;
+		Pile.To = -1;
+		}
+
+	for (int HitIndex = 0; HitIndex < HitCount; ++HitIndex)
+		{
+		const HitData &Hit = Hits[HitIndex];
+
+		const bool Rev = Hit.Rev;
+
+		const int Length1 = Hit.QueryTo - Hit.QueryFrom;
+		const int Length2 = Hit.TargetTo - Hit.TargetFrom;
+
+		const int From1 = Hit.QueryFrom;
+		const int From2 = Hit.TargetFrom;
+
+		const int To1 = Hit.QueryTo;
+		const int To2 = Hit.TargetTo;
+
+		const int Pos1 = From1/CHUNK_LENGTH;
+		const int Pos2 = From2/CHUNK_LENGTH;
+
+		PILE_INDEX_TYPE PileIndex1 = PileIndexes[Pos1];
+		PILE_INDEX_TYPE PileIndex2 = PileIndexes[Pos2];
+
+		assert(PileIndex1 == PileIndexes[(From1 + Length1 - 1)/CHUNK_LENGTH]);
+		assert(PileIndex1 >= 0 && PileIndex1 < PileCount);
+
+		assert(PileIndex2 == PileIndexes[(From2 + Length2 - 1)/CHUNK_LENGTH]);
+		assert(PileIndex2 >= 0 && PileIndex2 < PileCount);
+
+		PileData &Pile1 = Piles[PileIndex1];
+		PileImageData &Image1 = Pile1.Images[Pile1.ImageCount++];
+		Image1.SILength = Length2;
+		Image1.SIPile = PileIndex2;
+		Image1.SIRev = Rev;
+
+		PileData &Pile2 = Piles[PileIndex2];
+		PileImageData &Image2 = Pile2.Images[Pile2.ImageCount++];
+		Image2.SILength = Length1;
+		Image2.SIPile = PileIndex1;
+		Image2.SIRev = Rev;
+
+		if (Pile1.From == -1 || From1 < Pile1.From)
+			Pile1.From = From1;
+		if (Pile1.To == -1 || To1 > Pile1.To)
+			Pile1.To = To1;
+
+		if (Pile2.From == -1 || From2 < Pile2.From)
+			Pile2.From = From2;
+		if (Pile2.To == -1 || To2 > Pile2.To)
+			Pile2.To = To2;
+
+		if (Pile1.ImageCount > MaxImageCount)
+			MaxImageCount = Pile1.ImageCount;
+		if (Pile2.ImageCount > MaxImageCount)
+			MaxImageCount = Pile2.ImageCount;
+		}
+	ProgressDone();
+	}
+
+static int PileDist(const PileData &Pile1, const PileData &Pile2)
+	{
+	if (Pile1.From > Pile2.To)
+		return Pile1.From - Pile2.To;
+	else
+		return Pile2.From - Pile1.From;
+	}
+
+static int FindEdgesPile(PileData &Pile, int PileIndex,
+  PILE_INDEX_TYPE Partners[], bool PartnersRev[])
+	{
+	const int PileLength = Pile.To - Pile.From + 1;
+	if (PileLength < MIN_CRISPR_LENGTH || PileLength > MAX_CRISPR_LENGTH)
+		return 0;
+
+	const int ImageCount = Pile.ImageCount;
+
+	int PartnerCount = 0;
+	for (int ImageIndex = 0; ImageIndex < ImageCount; ++ImageIndex)
+		{
+		const PileImageData &Image = Pile.Images[ImageIndex];
+		const int PartnerImageLength = Image.SILength;
+		const int PartnerPileIndex = Image.SIPile;
+		const PileData &PartnerPile = Piles[PartnerPileIndex];
+		const int PartnerPileLength = PartnerPile.To - PartnerPile.From + 1;
+		const int Dist = PileDist(Pile, PartnerPile);
+
+		if (PartnerPileLength >= MIN_CRISPR_LENGTH &&
+		  PartnerPileLength <= MAX_CRISPR_LENGTH &&
+		  Dist >= MIN_SPACER_LENGTH &&
+		  Dist <= MAX_SPACER_LENGTH)
+			{
+			PartnersRev[PartnerCount] = Image.SIRev;
+			Partners[PartnerCount] = PartnerPileIndex;
+			++PartnerCount;
+			}
+		}
+	return PartnerCount;
+	}
+
+static void AddEdges(EdgeList &Edges, PILE_INDEX_TYPE PileIndex,
+  PILE_INDEX_TYPE Partners[], bool PartnersRev[], int PartnerCount)
+	{
+	EdgeCount += PartnerCount;
+	for (int i = 0; i < PartnerCount; ++i)
+		{
+		int PileIndex2 = Partners[i];
+		EdgeData Edge;
+		Edge.Node1 = PileIndex;
+		Edge.Node2 = PileIndex2;
+		Edge.Rev = PartnersRev[i];
+		Edges.push_back(Edge);
+		}
+	}
+
+static void FindCandEdges(EdgeList &Edges, int MaxImageCount)
+	{
+	Edges.clear();
+
+	PILE_INDEX_TYPE *Partners = all(PILE_INDEX_TYPE, MaxImageCount);
+	bool *PartnersRev = all(bool, MaxImageCount);
+	for (int PileIndex = 0; PileIndex < PileCount; ++PileIndex)
+		{
+		PileData &Pile = Piles[PileIndex];
+		int PartnerCount;
+		PartnerCount = FindEdgesPile(Pile, PileIndex, Partners, PartnersRev);
+		AddEdges(Edges, PileIndex, Partners, PartnersRev, PartnerCount);
+		}
+	freemem(Partners);
+	freemem(PartnersRev);
+	}
+
+static void AssignFamsToPiles(FamList &Fams)
+	{
+	int FamIndex = 0;
+	for (PtrFamList p = Fams.begin(); p != Fams.end(); ++p)
+		{
+		FamData *Fam = *p;
+		if (Fam->size() < (size_t) g_paramMinFamSize)
+			Quit("Fam size");
+		for (PtrFamData q = Fam->begin(); q != Fam->end(); ++q)
+			{
+			FamMemberData &FamMember = *q;
+			int PileIndex = FamMember.PileIndex;
+			PileData &Pile = Piles[PileIndex];
+			Pile.FamIndex = FamIndex;
+			Pile.Rev = (int) FamMember.Rev;
+			}
+		++FamIndex;
+		}
+	}
+
+// Discard families that don't have regular spacing
+typedef std::vector<FamMemberData> FAMVEC;
+
+static void FilterCrispFam(const FamData &Fam, FamList &OutFams)
+	{
+	int PileCount = (int) Fam.size();
+
+	FAMVEC FamVec;
+	FamVec.reserve(PileCount);
+
+	for (FamData::const_iterator p = Fam.begin(); p != Fam.end(); ++p)
+		FamVec.push_back(*p);
+
+	std::sort(FamVec.begin(), FamVec.end(), FamMemberLt);
+
+#if	TRACE
+	Log("\n");
+	Log("FilterCrispFam fam size=%d\n", (int) Fam.size());
+	Log("\n");
+	Log("After sort:\n");
+	Log(" Pile     From       To\n");
+	Log("=====  =======  =======\n");
+	for (int i = 0; i < PileCount; ++i)
+		{
+		const FamMemberData &Mem = FamVec[i];
+		int PileIndex = Mem.PileIndex;
+		const PileData &Pile = Piles[PileIndex];
+		Log("%5d  %7d  %7d\n", PileIndex, Pile.From, Pile.To);
+		}
+	Log("\n");
+#endif
+
+	FamData OutFam;
+	for (int i = 0; i < PileCount; ++i)
+		{
+		const FamMemberData &Mem = FamVec[i];
+
+		if (i == 0 || i == 1)
+			{
+			OutFam.push_back(Mem);
+			continue;
+			}
+
+		const FamMemberData &Mem_1 = FamVec[i-1];
+		const FamMemberData &Mem_2 = FamVec[i-2];
+
+		int PileIndex = Mem.PileIndex;
+		int PileIndex_1 = Mem_1.PileIndex;
+		int PileIndex_2 = Mem_2.PileIndex;
+
+		const PileData &Pile = Piles[PileIndex];
+		const PileData &Pile_1 = Piles[PileIndex_1];
+		const PileData &Pile_2 = Piles[PileIndex_2];
+
+		int Space_12 = Pile_1.From - Pile_2.To;
+		int Space_1 = Pile.From - Pile_1.To;
+#if	TRACE
+		{
+		Log("<Pile %d %d-%d> <space %d> <Pile %d %d-%d> <space %d> <Pile %d %d-%d>\n",
+		  PileIndex_2,
+		  Pile_2.From,
+		  Pile_2.To,
+		  Space_12,
+		  PileIndex_1,
+		  Pile_1.From,
+		  Pile_2.To,
+		  Space_1,
+		  PileIndex,
+		  Pile.From,
+		  Pile.To);
+		}
+#endif
+
+		if (iabs(Space_12 - Space_1) <= MAX_SPACE_DIFF)
+			{
+#if	TRACE
+			Log("Add %d to current family\n", PileIndex);
+#endif
+			OutFam.push_back(Mem);
+			}
+		else
+			{
+#if	TRACE
+			Log("Space difference too big, fam size so far %d\n", (int) OutFam.size());
+#endif
+			if (OutFam.size() >= (size_t) g_paramMinFamSize)
+				{
+				FamData *NewFam = new FamData;
+				*NewFam = OutFam;
+				OutFams.push_back(NewFam);
+				OutFam = *new FamData;
+				}
+			else
+				OutFam.clear();
+			}
+		}
+
+	if (OutFam.size() >= (size_t) g_paramMinFamSize)
+		{
+		FamData *NewFam = new FamData;
+		*NewFam = OutFam;
+		OutFams.push_back(NewFam);
+		}
+	}
+
+static void FilterCrispFams(const FamList &InFams, FamList &OutFams)
+	{
+	OutFams.clear();
+
+	for (FamList::const_iterator p = InFams.begin(); p != InFams.end(); ++p)
+		{
+		FamData *Fam = *p;
+		FilterCrispFam(*Fam, OutFams);
+		}
+	}
+
+void Crisp()
+	{
+	const char *InputFileName = RequiredValueOpt("crisp");
+
+	const char *OutputFileName = ValueOpt("out");
+	const char *PilesFileName = ValueOpt("piles");
+	const char *ImagesFileName = ValueOpt("images");
+	const char *ArraysFileName = ValueOpt("arrays");
+
+	const char *strMinFamSize = ValueOpt("famsize");
+
+	if (0 == OutputFileName && 0 == PilesFileName && 0 == ImagesFileName)
+		Quit("No output file specified, must be at least one of -out, -piles, -images");
+
+	if (0 != strMinFamSize)
+		g_paramMinFamSize = atoi(strMinFamSize);
+
+	ProgressStart("Read hit file");
+	int HitCount;
+	int SeqLength;
+	HitData *Hits = ReadHits(InputFileName, &HitCount, &SeqLength);
+	ProgressDone();
+	Progress("%d hits", HitCount);
+
+	ProgressStart("Filter candidate hits");
+	int NewHitCount = 0;
+	for (int i = 0; i < HitCount; ++i)
+		{
+		HitData &Hit = Hits[i];
+		if (IsCand(Hit))
+			Hits[NewHitCount++] = Hit;
+		}
+	ProgressDone();
+	Progress("%d of %d candidate hits", NewHitCount, HitCount);
+	HitCount = NewHitCount;
+
+	SeqLengthChunks = (SeqLength + CHUNK_LENGTH - 1)/CHUNK_LENGTH;
+
+	const int BitVectorLength = (SeqLengthChunks + BITS_PER_INT - 1)/BITS_PER_INT;
+	int *CopyCount = all(int, BitVectorLength);
+	zero(CopyCount, int, BitVectorLength);
+
+	ProgressStart("Compute copy counts");
+	for (int i = 0; i < HitCount; ++i)
+		IncCopyCount(CopyCount, Hits[i]);
+	ProgressDone();
+
+	ProgressStart("Identify piles");
+	PILE_INDEX_TYPE *PileIndexes = IdentifyPiles(CopyCount);
+	ProgressDone();
+
+	Progress("%d stacks", PileCount);
+
+	freemem(CopyCount);
+	CopyCount = 0;
+
+	CreatePiles(Hits, HitCount, PileIndexes);
+
+	if (0 != ImagesFileName)
+		{
+		ProgressStart("Writing images file");
+		WriteImages(ImagesFileName, Hits, HitCount, PileIndexes);
+		ProgressDone();
+		}
+
+	freemem(Hits);
+	Hits = 0;
+
+	if (0 != PilesFileName)
+		{
+		ProgressStart("Writing piles file");
+		WritePiles(PilesFileName, Piles, PileCount);
+		ProgressDone();
+		}
+
+	freemem(PileIndexes);
+	PileIndexes = 0;
+
+	if (0 == OutputFileName)
+		return;
+
+	ProgressStart("Find edges");
+	EdgeList Edges;
+	FindCandEdges(Edges, MaxImageCount);
+	ProgressDone();
+
+	Progress("%d edges", (int) Edges.size());
+
+	Progress("Find connected components");
+	FamList Fams;
+	FindConnectedComponents(Edges, Fams, g_paramMinFamSize);
+
+	Progress("Filter");
+	FamList OutFams;
+	FilterCrispFams(Fams, OutFams);
+
+	AssignFamsToPiles(OutFams);
+	ProgressDone();
+
+	Progress("%d arrays", (int) OutFams.size());
+
+	ProgressStart("Write crisp file");
+	WriteCrispFile(OutputFileName, Piles, PileCount);
+	ProgressDone();
+
+	if (0 != ArraysFileName)
+		{
+		FILE *fArray = OpenStdioFile(ArraysFileName, FILEIO_MODE_WriteOnly);
+		ProgressStart("Writing arrays file");
+		int FamIndex = 0;
+		for (PtrFamList p = OutFams.begin(); p != OutFams.end(); ++p)
+			{
+			int Lo = -1;
+			int Hi = -1;
+			FamData *Fam = *p;
+			if (Fam->size() < (size_t) g_paramMinFamSize)
+				Quit("Fam size");
+			for (PtrFamData q = Fam->begin(); q != Fam->end(); ++q)
+				{
+				FamMemberData &FamMember = *q;
+				int PileIndex = FamMember.PileIndex;
+				PileData &Pile = Piles[PileIndex];
+				if (Lo == -1 || Pile.From < Lo)
+					Lo = Pile.From;
+				if (Hi == -1 || Pile.To > Hi)
+					Hi = Pile.To;
+				}
+			WriteArray(fArray, FamIndex, Lo, Hi);
+			++FamIndex;
+			}
+		fclose(fArray);
+		ProgressDone();
+		}
+	}
diff --git a/findcc.cpp b/findcc.cpp
new file mode 100755
index 0000000..46a7cb7
--- /dev/null
+++ b/findcc.cpp
@@ -0,0 +1,233 @@
+#include "piler2.h"
+
+enum REVSTATE
+	{
+	REVSTATE_Unknown = 0,
+	REVSTATE_Normal = 1,
+	REVSTATE_Reversed = 2
+	};
+
+struct NeighborData
+	{
+	int NodeIndex;
+	bool Rev;
+	};
+
+typedef std::list<NeighborData> NeighborList;
+typedef NeighborList::iterator PtrNeighborList;
+
+struct NodeData
+	{
+	REVSTATE Rev;
+	int Index;
+	NeighborList *Neighbors;
+	NodeData *Next;
+	NodeData *Prev;
+	NodeData **List;
+	};
+
+static NodeData *Nodes;
+
+static FamData *MakeFam(NodeData *Nodes)
+	{
+	FamData *Fam = new FamData;
+	for (const NodeData *Node = Nodes; Node; Node = Node->Next)
+		{
+		FamMemberData FamMember;
+		FamMember.PileIndex = Node->Index;
+		switch (Node->Rev)
+			{
+		case REVSTATE_Unknown:
+			Quit("REVSTATE_Unknown");
+
+		case REVSTATE_Normal:
+			FamMember.Rev = false;
+			break;
+
+		case REVSTATE_Reversed:
+			FamMember.Rev = true;
+			break;
+			}
+
+		Fam->push_back(FamMember);
+		}
+	return Fam;
+	}
+
+static void AddNodeToList(NodeData *Node, NodeData **List)
+	{
+	NodeData *Head = *List;
+
+	Node->Next = Head;
+	Node->Prev = 0;
+
+	if (Head != 0)
+		Head->Prev = Node;
+
+	Node->List = List;
+	*List = Node;
+	}
+
+static void DeleteNodeFromList(NodeData *Node, NodeData **List)
+	{
+	assert(Node->List == List);
+
+	NodeData *Head = *List;
+
+	if (Node->Next != 0)
+		Node->Next->Prev = Node->Prev;
+
+	if (Node->Prev != 0)
+		Node->Prev->Next = Node->Next;
+	else
+		*List = Node->Next;
+
+	Node->List = 0;
+	}
+
+static NodeData *ListHead(NodeData **List)
+	{
+	return *List;
+	}
+
+static void MoveBetweenLists(NodeData *Node, NodeData **FromList, NodeData **ToList)
+	{
+	DeleteNodeFromList(Node, FromList);
+	AddNodeToList(Node, ToList);
+	}
+
+static bool ListIsEmpty(NodeData **List)
+	{
+	return 0 == *List;
+	}
+
+static bool NodeIsInList(NodeData *Node, NodeData **List)
+	{
+	return Node->List == List;
+	}
+
+static void LogList(NodeData **List)
+	{
+	for (const NodeData *Node = *List; Node; Node = Node->Next)
+		Log(" %d", Node->Index);
+	Log("\n");
+	}
+
+static int GetMaxIndex(EdgeList &Edges)
+	{
+	int MaxIndex = -1;
+	for (PtrEdgeList p = Edges.begin(); p != Edges.end(); ++p)
+		{
+		EdgeData &Edge = *p;
+		if (Edge.Node1 > MaxIndex)
+			MaxIndex = Edge.Node1;
+		if (Edge.Node2 > MaxIndex)
+			MaxIndex = Edge.Node2;
+		}
+	return MaxIndex;
+	}
+
+static REVSTATE RevState(REVSTATE Rev1, bool Rev2)
+	{
+	switch (Rev1)
+		{
+	case REVSTATE_Normal:
+		if (Rev2)
+			return REVSTATE_Reversed;
+		return REVSTATE_Normal;
+
+	case REVSTATE_Reversed:
+		if (Rev2)
+			return REVSTATE_Normal;
+		return REVSTATE_Reversed;
+		}
+	assert(false);
+	return REVSTATE_Unknown;
+	}
+
+int FindConnectedComponents(EdgeList &Edges, FamList &Fams, int MinComponentSize)
+	{
+	Fams.clear();
+
+	if (0 == Edges.size())
+		return 0;
+
+	int NodeCount = GetMaxIndex(Edges) + 1;
+	Nodes = new NodeData[NodeCount];
+
+	for (int i = 0; i < NodeCount; ++i)
+		{
+		Nodes[i].Neighbors = new NeighborList;
+		Nodes[i].Rev = REVSTATE_Unknown;
+		Nodes[i].Index = i;
+		}
+	
+	NodeData *NotVisitedList = 0;
+	NodeData *PendingList = 0;
+	NodeData *CurrentList = 0;
+
+	for (PtrEdgeList p = Edges.begin(); p != Edges.end(); ++p)
+		{
+		EdgeData &Edge = *p;
+		int From = Edge.Node1;
+		int To = Edge.Node2;
+
+		assert(From >= 0 && From < NodeCount);
+		assert(To >= 0 && From < NodeCount);
+
+		NeighborData NTo;
+		NTo.NodeIndex = To;
+		NTo.Rev = Edge.Rev;
+		Nodes[From].Neighbors->push_back(NTo);
+
+		NeighborData NFrom;
+		NFrom.NodeIndex = From;
+		NFrom.Rev = Edge.Rev;
+		Nodes[To].Neighbors->push_back(NFrom);
+		}
+
+	for (int i = 0; i < NodeCount; ++i)
+		AddNodeToList(&Nodes[i], &NotVisitedList);
+
+	int FamCount = 0;
+	while (!ListIsEmpty(&NotVisitedList))
+		{
+		int ClassSize = 0;
+		NodeData *Node = ListHead(&NotVisitedList);
+
+	// This node becomes the first in the family
+	// By convention, the first member defines reversal or lack thereof.
+		Node->Rev = REVSTATE_Normal;
+		assert(ListIsEmpty(&PendingList));
+		MoveBetweenLists(Node, &NotVisitedList, &PendingList);
+		while (!ListIsEmpty(&PendingList))
+			{
+			Node = ListHead(&PendingList);
+			assert(REVSTATE_Normal == Node->Rev || REVSTATE_Reversed == Node->Rev);
+			NeighborList *Neighbors = Node->Neighbors;
+			for (PtrNeighborList p = Neighbors->begin(); p != Neighbors->end(); ++p)
+				{
+				NeighborData &Neighbor = *p;
+				int NeighborIndex = Neighbor.NodeIndex;
+				NodeData *NeighborNode = &(Nodes[NeighborIndex]);
+				if (NodeIsInList(NeighborNode, &NotVisitedList))
+					{
+					NeighborNode->Rev = RevState(Node->Rev, Neighbor.Rev);
+					MoveBetweenLists(NeighborNode, &NotVisitedList, &PendingList);
+					}
+				}
+			++ClassSize;
+			MoveBetweenLists(Node, &PendingList, &CurrentList);
+			}
+
+		if (ClassSize >= MinComponentSize)
+			{
+			FamData *Fam = MakeFam(CurrentList);
+			Fams.push_back(Fam);
+			++FamCount;
+			}
+
+		CurrentList = 0;
+		}
+	return FamCount;
+	}
diff --git a/gff.cpp b/gff.cpp
new file mode 100755
index 0000000..6098dd1
--- /dev/null
+++ b/gff.cpp
@@ -0,0 +1,114 @@
+#include "piler2.h"
+
+// GFF fields are:
+// <seqname> <source> <feature> <start> <end> <score> <strand> <frame> [attributes] [comments]
+//     0         1         2        3      4      5        6       7         8           9
+
+int GFFLineNr;
+
+// Destructive read -- pokes nuls onto FS
+int GetFields(char *Line, char **Fields, int MaxFields, char FS)
+	{
+	char *p = Line;
+	for (int FieldIndex = 0; FieldIndex < MaxFields; ++FieldIndex)
+		{
+		Fields[FieldIndex] = p;
+		char *Tab = strchr(p, FS);
+		char *End = Tab;
+		if (0 == End)
+			End = strchr(p, '\0');
+		size_t FieldLength = End - p;
+		if (FieldLength > MAX_GFF_FEATURE_LENGTH)
+			Quit("Max GFF field length exceeded, field is %d chars, max=%d, line %d",
+			  FieldLength, MAX_GFF_FEATURE_LENGTH, GFFLineNr);
+		if (0 == Tab)
+			return FieldIndex + 1;
+		*Tab = 0;
+		p = Tab + 1;
+		}
+	return MaxFields;
+	}
+
+bool GetNextGFFRecord(FILE *f, GFFRecord &Rec)
+	{
+	for (;;)
+		{
+		++GFFLineNr;
+		const char TAB = '\t';
+		char Line[MAX_GFF_LINE+1];
+		char *Ok = fgets(Line, sizeof(Line), f);
+		if (NULL == Ok)
+			{
+			if (feof(f))
+				return false;
+			Quit("Error reading GFF file, line=%d feof=%d ftell=%d ferror=%d errno=%d",
+			  GFFLineNr, feof(f), ftell(f), ferror(f), errno);
+			}
+		if ('#' == Line[0])
+			continue;
+		size_t n = strlen(Line);
+		if (0 == n)
+			Quit("fgets returned zero-length line");
+		if (Line[n-1] != '\n' && !feof(f))
+			Quit("Max line length in GFF file exceeded, line %d is %d chars long, max=%d",
+			  GFFLineNr, n - 1, MAX_GFF_LINE);
+		Line[n-1] = 0;	// delete newline
+
+		char *Fields[9];
+		int FieldCount = GetFields(Line, Fields, 9, '\t');
+		if (FieldCount < 8)
+			Quit("GFF record has < 8 fields, line %d", GFFLineNr);
+
+		const char *SeqName = Fields[0];
+		const char *Source = Fields[1];
+		const char *Feature = Fields[2];
+		const char *Start = Fields[3];
+		const char *End = Fields[4];
+		const char *Score = Fields[5];
+		const char *Strand = Fields[6];
+		const char *Frame = Fields[7];
+		const char *Attrs = "";
+		if (FieldCount > 8)
+			{
+	// Truncate attrs if comment found
+			Attrs = Fields[8];
+	//		char *Pound = strchr(Attrs, '#');
+	//		if (0 != Pound)
+	//			*Pound = 0;
+			}
+
+		strcpy(Rec.SeqName, SeqName);
+		strcpy(Rec.Source, Source);
+		strcpy(Rec.Feature, Feature);
+		Rec.Start = atoi(Start);
+		Rec.End = atoi(End);
+		Rec.Score = (float) atof(Score);
+		Rec.Strand = Strand[0];
+		Rec.Frame = Frame[0] == '.' ? -1 : atoi(Frame);
+		strcpy(Rec.Attrs, Attrs);
+		return true;
+		}
+	}
+
+// GFF fields are:
+// <seqname> <source> <feature> <start> <end> <score> <strand> <frame> [attributes] [comments]
+//     0         1         2        3      4      5        6       7         8           9
+void WriteGFFRecord(FILE *f, const GFFRecord &Rec)
+	{
+	fprintf(f, "%s\t%s\t%s\t%d\t%d\t%.3g\t%c",
+	//           0   1   2   3   4   5     6   7   8
+	  Rec.SeqName,	// 0
+	  Rec.Source,	// 1
+	  Rec.Feature,	// 2
+	  Rec.Start,	// 3
+	  Rec.End,		// 4
+	  Rec.Score,	// 5
+	  Rec.Strand);	// 6
+
+	if (-1 == Rec.Frame)
+		fprintf(f, "\t.");
+	else
+		fprintf(f, "\t%d", Rec.Frame);
+
+	fprintf(f, "\t%s\n", Rec.Attrs);
+	}
diff --git a/gff2.cpp b/gff2.cpp
new file mode 100755
index 0000000..4a799f4
--- /dev/null
+++ b/gff2.cpp
@@ -0,0 +1,253 @@
+#include "piler2.h"
+
+#define GFFRecord GFFRecord2
+
+// GFF fields are:
+// <seqname> <source> <feature> <start> <end> <score> <strand> <frame> [attributes] [comments]
+//     0         1         2        3      4      5        6       7         8           9
+
+static int GFFLineNr2;
+
+bool HasTargetAttrs(const char *Attrs)
+	{
+	const char *ptrTarget = strstr(Attrs, "Target ");
+	return 0 != ptrTarget;
+	}
+
+//  Piles 123 456
+//  0123456
+void ParsePilesAttrs(const char *Attrs, int *ptrQueryPile, int *ptrTargetPile)
+	{
+	const char *ptrPiles = strstr(Attrs, "Piles ");
+	if (0 == ptrPiles)
+		Quit("Piles attribute not found");
+
+	const char *ptrValues = ptrPiles + 6;
+	int n = sscanf(ptrValues, "%d %d", ptrQueryPile, ptrTargetPile);
+	if (n != 2)
+		Quit("ParsePilesAttrs, sscanf(%s)=%d",  ptrValues, n);
+	}
+
+//  BandClust 123
+//  01234567890
+void ParseBandClustAttrs(const char *Attrs, int *ptrBandClustIndex)
+	{
+	const char *ptrBandClust = strstr(Attrs, "BandClust ");
+	if (0 == ptrBandClust)
+		Quit("BandClust attribute not found");
+
+	const char *ptrIndex = ptrBandClust + 10;
+	*ptrBandClustIndex = atoi(ptrIndex);
+	}
+
+//  Pyramid 123
+//  01234567890
+void ParsePyramidAttrs(const char *Attrs, int *ptrClustIndex)
+	{
+	const char *p = strstr(Attrs, "Pyramid ");
+	if (0 == p)
+		Quit("Pyramid attribute not found");
+
+	const char *ptrIndex = p + 8;
+	*ptrClustIndex = atoi(ptrIndex);
+	}
+
+//  Target SeqName 123 456
+//  01234567
+void ParseTargetAttrs(const char *Attrs, char SeqName[],
+  int SeqNameBytes, int *ptrStart, int *ptrEnd)
+	{
+	const char *ptrTarget = strstr(Attrs, "Target ");
+	if (0 == ptrTarget)
+		Quit("Target attribute not found");
+
+	const char *ptrRest = ptrTarget + 7;
+	const char *ptrBlank = strchr(ptrRest, ' ');
+	if (0 == ptrBlank)
+		Quit("Invalid Target attributes '%s', missing blank", Attrs);
+	size_t NameLength = ptrBlank - ptrRest;
+	if (NameLength >= (size_t) SeqNameBytes)
+		Quit("Target name length too long '%s'", Attrs);
+	memcpy(SeqName, ptrRest, NameLength);
+	SeqName[NameLength] = 0;
+	int n = sscanf(ptrBlank+1, "%d %d", ptrStart, ptrEnd);
+	if (n != 2)
+		Quit("Invalid Target start/end attributes '%s', sscanf=%d", ptrBlank+1);
+	}
+
+// Destructive read -- pokes nuls onto FS
+static int GetFields2(char *Line, char **Fields, int MaxFields, char FS)
+	{
+	char *p = Line;
+	for (int FieldIndex = 0; FieldIndex < MaxFields; ++FieldIndex)
+		{
+		Fields[FieldIndex] = p;
+		char *Tab = strchr(p, FS);
+		char *End = Tab;
+		if (0 == End)
+			End = strchr(p, '\0');
+		size_t FieldLength = End - p;
+		if (FieldLength > MAX_GFF_FEATURE_LENGTH)
+			Quit("Max GFF field length exceeded, field is %d chars, max=%d, line %d",
+			  FieldLength, MAX_GFF_FEATURE_LENGTH, GFFLineNr2);
+		if (0 == Tab)
+			return FieldIndex + 1;
+		*Tab = 0;
+		p = Tab + 1;
+		}
+	return MaxFields;
+	}
+
+// WARNING: Line[] and Fields[]are overwritten
+// by each call to GetNextGFFRecord.
+// The Rec agument to GetNextGFFRecord returns
+// pointers into Line[].
+static char Line[MAX_GFF_LINE+1];
+static char *Fields[9];
+
+bool GetNextGFFRecord(FILE *f, GFFRecord &Rec)
+	{
+	for (;;)
+		{
+		++GFFLineNr2;
+		const char TAB = '\t';
+		char *Ok = fgets(Line, sizeof(Line), f);
+		if (NULL == Ok)
+			{
+			if (feof(f))
+				return false;
+			Quit("Error reading GFF file, line=%d feof=%d ftell=%d ferror=%d errno=%d",
+			  GFFLineNr2, feof(f), ftell(f), ferror(f), errno);
+			}
+		if ('#' == Line[0])
+			continue;
+		size_t n = strlen(Line);
+		if (0 == n)
+			Quit("fgets returned zero-length line");
+		if (Line[n-1] != '\n')
+			Quit("Max line length in GFF file exceeded, line %d is %d chars long, max=%d",
+			  GFFLineNr2, n - 1, MAX_GFF_LINE);
+		Line[n-1] = 0;	// delete newline
+
+		int FieldCount = GetFields2(Line, Fields, 9, '\t');
+		if (FieldCount < 8)
+			Quit("GFF record has < 8 fields, line %d", GFFLineNr2);
+
+		char *SeqName = Fields[0];
+		char *Source = Fields[1];
+		char *Feature = Fields[2];
+		char *Start = Fields[3];
+		char *End = Fields[4];
+		char *Score = Fields[5];
+		char *Strand = Fields[6];
+		char *Frame = Fields[7];
+		char *Attrs = Fields[8];
+
+	// Truncate attrs if comment found
+		char *Pound = strchr(Attrs, '#');
+		if (0 != Pound)
+			*Pound = 0;
+
+		Rec.SeqName = SeqName;
+		Rec.Source = Source;
+		Rec.Feature = Feature;
+		Rec.Start = atoi(Start);
+		Rec.End = atoi(End);
+		Rec.Score = (float) atof(Score);
+		Rec.Strand = Strand[0];
+		Rec.Frame = Frame[0] == '.' ? -1 : atoi(Frame);
+		Rec.Attrs = Attrs;
+
+		return true;
+		}
+	}
+
+// GFF fields are:
+// <seqname> <source> <feature> <start> <end> <score> <strand> <frame> [attributes] [comments]
+//     0         1         2        3      4      5        6       7         8           9
+void WriteGFFRecord(FILE *f, const GFFRecord &Rec)
+	{
+	fprintf(f, "%s\t%s\t%s\t%d\t%d\t%.3g\t%c",
+	//           0   1   2   3   4   5     6   7   8
+	  Rec.SeqName,	// 0
+	  Rec.Source,	// 1
+	  Rec.Feature,	// 2
+	  Rec.Start,	// 3
+	  Rec.End,		// 4
+	  Rec.Score,	// 5
+	  Rec.Strand);	// 6
+
+	if (-1 == Rec.Frame)
+		fprintf(f, "\t.");
+	else
+		fprintf(f, "\t%d", Rec.Frame);
+
+	fprintf(f, "\t%s\n", Rec.Attrs);
+	}
+
+void SaveGFFStrings(GFFRecord &Rec)
+	{
+	Rec.SeqName = strsave(Rec.SeqName);
+	Rec.Feature = strsave(Rec.Feature);
+	Rec.Source = strsave(Rec.Source);
+	Rec.Attrs = strsave(Rec.Attrs);
+	}
+
+void FreeGFFStrings(GFFRecord &Rec)
+	{
+	free((void *) Rec.SeqName);
+	free((void *) Rec.Feature);
+	free((void *) Rec.Source);
+	free((void *) Rec.Attrs);
+
+	Rec.SeqName = 0;
+	Rec.Feature = 0;
+	Rec.Source = 0;
+	Rec.Attrs = 0;
+	}
+
+void GFFRecordToHit(const GLIX &Glix, const GFFRecord &Rec, HitData &Hit)
+	{
+	if (0 != strcmp(Rec.Feature, "hit"))
+		Quit("GFFRecordToHit: feature=%s", Rec.Feature);
+
+	const int QueryLength = Rec.End - Rec.Start + 1;
+	Hit.QueryFrom = Glix.LocalToGlobal(Rec.SeqName, Rec.Start - 1);
+	Hit.QueryTo = Hit.QueryFrom + QueryLength - 1;
+
+	char TargetName[MAX_GFF_FEATURE_LENGTH+1];
+	int TargetStart;
+	int TargetEnd;
+	ParseTargetAttrs(Rec.Attrs, TargetName, sizeof(TargetName), &TargetStart, &TargetEnd);
+
+	const int TargetLength = TargetEnd - TargetStart + 1;
+	Hit.TargetFrom = Glix.LocalToGlobal(TargetName, TargetStart - 1);
+	Hit.TargetTo = Hit.TargetFrom + TargetLength - 1;
+
+	Hit.Rev = (Rec.Strand == '-');
+	}
+
+void HitToGFFRecord(const GLIX &Glix, const HitData &Hit, GFFRecord &Rec, char AnnotBuffer[])
+	{
+	Rec.Source = "piler";
+	Rec.Feature = "hit";
+	Rec.Score = 0;
+	Rec.Frame = '.';
+	Rec.Strand = (Hit.Rev ? '-' : '+');
+
+	const int QueryLength = Hit.QueryTo - Hit.QueryFrom + 1;
+	int SeqQueryFrom;
+	const char *QueryLabel = Glix.GlobalToSeqPadded(Hit.QueryFrom, &SeqQueryFrom);
+	const int SeqQueryTo = SeqQueryFrom + QueryLength - 1;
+
+	Rec.SeqName = QueryLabel;
+	Rec.Start = SeqQueryFrom + 1;
+	Rec.End = SeqQueryTo + 1;
+
+	const int TargetLength = Hit.TargetTo - Hit.TargetFrom + 1;
+	int SeqTargetFrom;
+	const char *TargetLabel = Glix.GlobalToSeqPadded(Hit.TargetFrom, &SeqTargetFrom);
+	const int SeqTargetTo = SeqTargetFrom + TargetLength - 1;
+	sprintf(AnnotBuffer, "Target %s %d %d", TargetLabel, SeqTargetFrom + 1, SeqTargetTo + 1);
+	Rec.Attrs = AnnotBuffer;
+	}
diff --git a/gffset.cpp b/gffset.cpp
new file mode 100755
index 0000000..59232b0
--- /dev/null
+++ b/gffset.cpp
@@ -0,0 +1,98 @@
+/***
+GFFSet is a container class for a set of GFF
+records. This is to hide issues to do with
+retrieving from a file, efficient memory use
+etc. The initial implementation here is
+designed for simplicity; by hiding the details
+we can improve it later as needed without
+a big impact on the calling code.
+***/
+
+#include "piler2.h"
+
+#define GFFRecord GFFRecord2
+
+GFFSet::GFFSet()
+	{
+	m_Recs = 0;
+	m_RecordCount = 0;
+	m_RecordBufferSize = 0;
+	m_GLIX = 0;
+	m_IIX = 0;
+	}
+
+GFFSet::~GFFSet()
+	{
+	Free();
+	}
+
+void GFFSet::Free()
+	{
+	for (int i = 0; i < m_RecordCount; ++i)
+		FreeGFFStrings(m_Recs[i]);
+	freemem(m_Recs);
+
+	delete m_GLIX;
+	delete m_IIX;
+
+	m_Recs = 0;
+	m_RecordCount = 0;
+	m_RecordBufferSize = 0;
+	m_GLIX = 0;
+	m_IIX = 0;
+	}
+
+void GFFSet::FromFile(FILE *f)
+	{
+	Free();
+
+	GFFRecord Rec;
+	while (GetNextGFFRecord(f, Rec))
+		{
+		SaveGFFStrings(Rec);
+		Add(Rec);
+		}
+	}
+
+// Caller's respsonsibility to allocate memory
+// for string fields, Add() doesn't make copies.
+// SaveGFFStrings() makes copies if needed.
+void GFFSet::Add(const GFFRecord &Rec)
+	{
+	if (m_RecordCount > m_RecordBufferSize)
+		Quit("GFFSet::Add, m_RecordCount > m_BufferSize");
+	if (m_RecordCount == m_RecordBufferSize)
+		{
+		m_RecordBufferSize += BUFFER_SIZE_INC;
+		reall(m_Recs, GFFRecord, m_RecordBufferSize);
+		}
+	m_Recs[m_RecordCount++] = Rec;
+	}
+
+const GFFRecord &GFFSet::Get(int RecordIndex) const
+	{
+	if (RecordIndex < 0 || RecordIndex >= m_RecordCount)
+		Quit("GFFRecord::Get(%d) out of range %d", RecordIndex, m_RecordCount);
+	return m_Recs[RecordIndex];
+	}
+
+void GFFSet::MakeGLIX()
+	{
+	m_GLIX = new GLIX;
+	m_GLIX->Init();
+	m_GLIX->FromGFFSet(*this);
+	}
+
+void GFFSet::MakeIIX()
+	{
+	const int GlobalLength = m_GLIX->GetGlobalLength();
+	m_IIX = new IIX;
+	m_IIX->Init(GlobalLength);
+	m_IIX->SetGLIX(m_GLIX);
+
+	for (int RecordIndex = 0; RecordIndex < m_RecordCount; ++RecordIndex)
+		{
+		const GFFRecord &Rec = Get(RecordIndex);
+		m_IIX->AddLocal(Rec.SeqName, Rec.Start - 1, Rec.End - 1, RecordIndex);
+		}
+	}
diff --git a/gffset.h b/gffset.h
new file mode 100755
index 0000000..efad211
--- /dev/null
+++ b/gffset.h
@@ -0,0 +1,32 @@
+#define GFFRecord	GFFRecord2
+
+const int BUFFER_SIZE_INC = 4096;
+
+class GFFSet
+	{
+public:
+	GFFSet();
+	virtual ~GFFSet();
+	void Free();
+	
+	void FromFile(FILE *f);
+	void Add(const GFFRecord &Rec);
+
+	void MakeGLIX();
+	void MakeIIX();
+
+	GLIX *GetGLIX() const { return m_GLIX; }
+	IIX *GetIIX() const { return m_IIX; }
+
+	const GFFRecord &Get(int RecordIndex) const;
+	int GetRecordCount() const { return m_RecordCount; }
+
+private:
+	GFFRecord *m_Recs;
+	int m_RecordCount;
+	int m_RecordBufferSize;
+	GLIX *m_GLIX;
+	IIX *m_IIX;
+	};
+
+#undef GFFRecord
diff --git a/glix.cpp b/glix.cpp
new file mode 100755
index 0000000..dfc73ca
--- /dev/null
+++ b/glix.cpp
@@ -0,0 +1,304 @@
+/***
+GLIX = Global Local IndeX
+
+This index provides an efficient mapping of a
+set of sequences to and from a single set of 
+global coordinates such that each sequence has
+a unique range of positions. Global coordinates
+are convenient in a number of algorithms, and
+there may be quite large numbers of sequences
+(e.g., contigs or scaffolds in an early assembly),
+so efficiency is useful here.
+
+"Local" coordinate is (Label, SeqPos), where
+SeqPos is 0-based, i.e. is 0, 1 ... (SeqLength-1)
+for each sequence.
+
+"Global" coordindate is GlobalPos, where
+GlobalPos = 0, 1 ... (GlobalLength - 1).
+
+To convert from Local to Global, a hash table
+is used to look up by sequence name.
+
+To convert from Global to Local, the global
+sequence is divided into blocks of length B.
+A vector of length GlobalLength/B contains
+the sequence index for each block. Padding
+is therefore required to ensure that there
+are no blocks that map to two different
+sequences.
+***/
+
+#include "piler2.h"
+
+#define GFFRecord GFFRecord2
+
+GLIX::GLIX()
+	{
+	m_SeqCount = 0;
+	m_GlobalLength = 0;
+	m_HashTableSize = 0;
+	m_Pad = 0;
+	m_HashTable = 0;
+	m_SeqMap = 0;
+	}
+
+GLIX::~GLIX()
+	{
+	}
+
+void GLIX::Free()
+	{
+	// TODO@@
+	}
+
+void GLIX::Init(int Pad, int HashTableSize)
+	{
+	Free();
+
+	m_Pad = Pad;
+	m_HashTableSize = HashTableSize;
+	m_HashTable = all(SEQDATA *, HashTableSize);
+	zero(m_HashTable, SEQDATA *, HashTableSize);
+	}
+
+SEQDATA *GLIX::SeqIndexLookup(const char *Label) const
+	{
+	int h = Hash(Label, m_HashTableSize);
+	assert(h >= 0 && h < m_HashTableSize);
+	for (SEQDATA *i = m_HashTable[h]; i; i = i->Next)
+		if (0 == strcmp(i->Label, Label))
+			return i;
+	return 0;
+	}
+
+SEQDATA *GLIX::AddToIndex(const char *Label, int Length)
+	{
+	int h = Hash(Label, m_HashTableSize);
+	assert(h >= 0 && h < m_HashTableSize);
+
+	SEQDATA *i = all(SEQDATA, 1);
+	i->Label = strsave(Label);
+	i->Length = Length;
+	i->Next = m_HashTable[h];
+	m_HashTable[h] = i;
+	++m_SeqCount;
+	return i;
+	}
+
+void GLIX::AssignOffsets()
+	{
+	int Offset = 0;
+	for (int h = 0; h < m_HashTableSize; ++h)
+		{
+		for (SEQDATA *i = m_HashTable[h]; i; i = i->Next)
+			{
+			i->Offset = Offset;
+			Offset += i->Length;
+			const int NewOffset = RoundUp(Offset, m_Pad, SEQMAP_BLOCKSIZE);
+			i->RoundedLength = i->Length + NewOffset - Offset;
+			Offset = NewOffset;
+			}
+		}
+	m_GlobalLength = Offset;
+	}
+
+// Add() is typically used when creating a GLIX from GFF records.
+// It can be called multiple times with the same label, the
+// maximum position determines the sequence length.
+void GLIX::Add(const char *Label, int Pos)
+	{
+	SEQDATA *IE = SeqIndexLookup(Label);
+	if (0 == IE)
+		AddToIndex(Label, Pos + 1);
+	else
+		{
+		if (Pos + 1 > IE->Length)
+			IE->Length = Pos + 1;
+		}
+	}
+
+// Insert() is typically used when creating a GLIX from
+// a sequence file. It must be called exactly once per label.
+void GLIX::Insert(const char *Label, int Offset, int Length)
+	{
+	const SEQDATA *IE = SeqIndexLookup(Label);
+	if (0 != IE)
+		Quit("Duplicate sequence label %s", Label);
+
+	int h = Hash(Label, m_HashTableSize);
+	assert(h >= 0 && h < m_HashTableSize);
+
+	SEQDATA *i = all(SEQDATA, 1);
+	i->Label = strsave(Label);
+	i->Length = Length;
+	i->Offset = Offset;
+	i->Next = m_HashTable[h];
+	m_HashTable[h] = i;
+	++m_SeqCount;
+	}
+
+int GLIX::FromGFFFile(FILE *f)
+	{
+	int RecordCount = 0;
+	GFFRecord Rec;
+	while (GetNextGFFRecord(f, Rec))
+		{
+		++RecordCount;
+		Add(Rec.SeqName, Rec.End - 1);
+		if (HasTargetAttrs(Rec.Attrs))
+			{
+			char TargetName[MAX_GFF_FEATURE_LENGTH+1];
+			int Start;
+			int End;
+			ParseTargetAttrs(Rec.Attrs, TargetName, sizeof(TargetName), &Start, &End);
+			Add(TargetName, End - 1);
+			}
+		}
+	AssignOffsets();
+	return RecordCount;
+	}
+
+int GLIX::FromGFFSet(const GFFSet &Set)
+	{
+	const int RecordCount = Set.GetRecordCount();
+	for (int i = 0; i < RecordCount; ++i)
+		{
+		const GFFRecord &Rec = Set.Get(i);
+		Add(Rec.SeqName, Rec.End - 1);
+		}
+	AssignOffsets();
+	return RecordCount;
+	}
+
+void GLIX::LogMe() const
+	{
+	Log("m_SeqCount=%d\n", m_SeqCount);
+	Log("               Label      Length     Hash\n");
+	Log("--------------------  ----------  -------\n");
+	int Count = 0;
+	for (int h = 0; h < m_HashTableSize; ++h)
+		{
+		for (SEQDATA *i = m_HashTable[h]; i; i = i->Next)
+			{
+			Log("%20.20s  %10d  %7d\n", i->Label, i->Length, h);
+			++Count;
+			}
+		}
+	if (Count != m_SeqCount)
+		Log("\n**** ERROR ****   Count=%d\n", Count);
+	}
+
+int GLIX::GetGlobalLength() const
+	{
+	return m_GlobalLength;
+	}
+
+int GLIX::GetSeqCount() const
+	{
+	return m_SeqCount;
+	}
+
+int GLIX::GetSeqOffset(const char *Label) const
+	{
+	const SEQDATA *IE = SeqIndexLookup(Label);
+	if (0 == IE)
+		Quit("Label not found %s", Label);
+	return IE->Offset;
+	}
+
+int GLIX::GetSeqLength(const char *Label) const
+	{
+	const SEQDATA *IE = SeqIndexLookup(Label);
+	if (0 == IE)
+		Quit("Label not found %s", Label);
+	return IE->Length;
+	}
+
+void GLIX::MakeGlobalToLocalIndex()
+	{
+	if (m_GlobalLength%SEQMAP_BLOCKSIZE)
+		Quit("MakeGlobalToLocalIndex: expects rounded size");
+
+	const int BinCount = (m_GlobalLength + SEQMAP_BLOCKSIZE - 1)/SEQMAP_BLOCKSIZE;
+	m_SeqMap = all(SEQDATA *, BinCount);
+	zero(m_SeqMap, SEQDATA *, BinCount);
+
+	for (int h = 0; h < m_HashTableSize; ++h)
+		{
+		for (SEQDATA *p = m_HashTable[h]; p != 0; p = p->Next)
+			{
+			SEQDATA &IE = *p;
+			int From = p->Offset;
+			int To = From + p->Length - 1;
+
+			if (From%SEQMAP_BLOCKSIZE)
+				Quit("MakeGlobalToLocalIndex: expected rounded contig from");
+
+			int BinFrom = From/SEQMAP_BLOCKSIZE;
+			int BinTo = To/SEQMAP_BLOCKSIZE;
+			for (int Bin = BinFrom; Bin <= BinTo; ++Bin)
+				{
+				if (m_SeqMap[Bin] != 0)
+					Quit("MakeGlobalToLocalIndex: overlap error");
+				m_SeqMap[Bin] = p;
+				}
+			}
+		}
+	}
+
+const char *GLIX::GlobalToSeq(int GlobalPos, int *ptrSeqPos) const
+	{
+	if (GlobalPos < 0 || GlobalPos >= m_GlobalLength)
+		Quit("GlobalToSeqPos: invalid pos");
+	if (0 == m_SeqMap)
+		Quit("GLIX::MakeGlobalToLocalIndex not called");
+
+	const int Bin = GlobalPos/SEQMAP_BLOCKSIZE;
+	SEQDATA *IE = m_SeqMap[Bin];
+	if (0 == IE)
+		Quit("GlobalToSeqPos: doesn't map");
+	int SeqPos = GlobalPos - IE->Offset;
+	if (SeqPos < 0 || SeqPos >= IE->Length)
+		Quit("GlobalToSeqPos: out of bounds");
+	*ptrSeqPos = SeqPos;
+	return IE->Label;
+	}
+
+const char *GLIX::GlobalToSeqPadded(int GlobalPos, int *ptrSeqPos) const
+	{
+	if (GlobalPos < 0 || GlobalPos >= m_GlobalLength)
+		Quit("GlobalToSeqPos: invalid pos");
+	if (0 == m_SeqMap)
+		Quit("GLIX::MakeGlobalToLocalIndex not called");
+
+	const int Bin = GlobalPos/SEQMAP_BLOCKSIZE;
+	SEQDATA *IE = m_SeqMap[Bin];
+	if (0 == IE)
+		Quit("GlobalToSeqPos: doesn't map");
+	int SeqPos = GlobalPos - IE->Offset;
+	if (SeqPos < 0 || SeqPos >= IE->RoundedLength)
+		Quit("GlobalToSeqPos: out of bounds");
+	*ptrSeqPos = SeqPos;
+	return IE->Label;
+	}
+
+int GLIX::LocalToGlobal(const char *Label, int LocalPos) const
+	{
+	const SEQDATA *IE = SeqIndexLookup(Label);
+	if (0 == IE)
+		Quit("ContigToGlobal: contig not found (%s)", Label);
+	if (LocalPos < 0 || LocalPos >= IE->Length)
+		Quit("ContigToGlobal: out of bounds");
+	return IE->Offset + LocalPos;
+	}
+
+int GLIX::LocalToGlobalNoFail(const char *Label, int LocalPos) const
+	{
+	const SEQDATA *IE = SeqIndexLookup(Label);
+	if (0 == IE)
+		return -1;
+	if (LocalPos < 0 || LocalPos >= IE->Length)
+		return -1;
+	return IE->Offset + LocalPos;
+	}
diff --git a/glix.h b/glix.h
new file mode 100755
index 0000000..e210613
--- /dev/null
+++ b/glix.h
@@ -0,0 +1,60 @@
+#define GFFRecord GFFRecord2
+
+const int SEQMAP_BLOCKSIZE = 1024;
+const int DEFAULT_HASH_TABLE_SIZE = 17909;
+const int DEFAULT_PAD = 0;
+
+struct SEQDATA
+	{
+	char *Label;
+	int Length;
+	int RoundedLength;
+	int Offset;
+	SEQDATA *Next;
+	};
+
+class GLIX
+	{
+public:
+	GLIX();
+	virtual ~GLIX();
+	void Free();
+
+	void Init(int Pad = DEFAULT_PAD, int HashTableSize = DEFAULT_HASH_TABLE_SIZE);
+
+	const char *GlobalToSeq(int GlobalPos, int *ptrSeqPos) const;
+	const char *GlobalToSeqPadded(int GlobalPos, int *ptrSeqPos) const;
+	int LocalToGlobal(const char *Label, int LocalPos) const;
+	int LocalToGlobalNoFail(const char *Label, int LocalPos) const;
+
+	int FromGFFFile(FILE *f);
+	int FromGFFSet(const GFFSet &Set);
+
+	void Add(const char *Label, int LocalPos);
+	void Insert(const char *Label, int Offset, int Length);
+
+	void MakeGlobalToLocalIndex();
+
+	int GetGlobalLength() const;
+	int GetSeqCount() const;
+	int GetSeqOffset(const char *Label) const;
+	int GetSeqLength(const char *Label) const;
+	void LogMe() const;
+
+private:
+	SEQDATA *AddToIndex(const char *Label, int Length);
+	void AssignOffsets();
+
+	SEQDATA *SeqIndexLookup(const char *Label) const;
+
+private:
+	SEQDATA **m_HashTable;
+	SEQDATA **m_SeqMap;
+
+	int m_SeqCount;
+	int m_HashTableSize;
+	int m_GlobalLength;
+	int m_Pad;
+	};
+
+#undef	GFFRecord
diff --git a/hash.cpp b/hash.cpp
new file mode 100755
index 0000000..14dd92b
--- /dev/null
+++ b/hash.cpp
@@ -0,0 +1,164 @@
+#include "piler2.h"
+
+typedef  unsigned long  int  ub4;   /* unsigned 4-byte quantities */
+typedef  unsigned       char ub1;   /* unsigned 1-byte quantities */
+
+#define hashsize(n) ((ub4)1<<(n))
+#define hashmask(n) (hashsize(n)-1)
+
+/*
+--------------------------------------------------------------------
+mix -- mix 3 32-bit values reversibly.
+For every delta with one or two bits set, and the deltas of all three
+  high bits or all three low bits, whether the original value of a,b,c
+  is almost all zero or is uniformly distributed,
+* If mix() is run forward or backward, at least 32 bits in a,b,c
+  have at least 1/4 probability of changing.
+* If mix() is run forward, every bit of c will change between 1/3 and
+  2/3 of the time.  (Well, 22/100 and 78/100 for some 2-bit deltas.)
+mix() was built out of 36 single-cycle latency instructions in a 
+  structure that could supported 2x parallelism, like so:
+      a -= b; 
+      a -= c; x = (c>>13);
+      b -= c; a ^= x;
+      b -= a; x = (a<<8);
+      c -= a; b ^= x;
+      c -= b; x = (b>>13);
+      ...
+  Unfortunately, superscalar Pentiums and Sparcs can't take advantage 
+  of that parallelism.  They've also turned some of those single-cycle
+  latency instructions into multi-cycle latency instructions.  Still,
+  this is the fastest good hash I could find.  There were about 2^^68
+  to choose from.  I only looked at a billion or so.
+--------------------------------------------------------------------
+*/
+#define mix(a,b,c) \
+{ \
+  a -= b; a -= c; a ^= (c>>13); \
+  b -= c; b -= a; b ^= (a<<8); \
+  c -= a; c -= b; c ^= (b>>13); \
+  a -= b; a -= c; a ^= (c>>12);  \
+  b -= c; b -= a; b ^= (a<<16); \
+  c -= a; c -= b; c ^= (b>>5); \
+  a -= b; a -= c; a ^= (c>>3);  \
+  b -= c; b -= a; b ^= (a<<10); \
+  c -= a; c -= b; c ^= (b>>15); \
+}
+
+/*
+--------------------------------------------------------------------
+hash() -- hash a variable-length key into a 32-bit value
+  k       : the key (the unaligned variable-length array of bytes)
+  len     : the length of the key, counting by bytes
+  initval : can be any 4-byte value
+Returns a 32-bit value.  Every bit of the key affects every bit of
+the return value.  Every 1-bit and 2-bit delta achieves avalanche.
+About 6*len+35 instructions.
+
+The best hash table sizes are powers of 2.  There is no need to do
+mod a prime (mod is sooo slow!).  If you need less than 32 bits,
+use a bitmask.  For example, if you need only 10 bits, do
+  h = (h & hashmask(10));
+In which case, the hash table should have hashsize(10) elements.
+
+If you are hashing n strings (ub1 **)k, do it like this:
+  for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
+
+By Bob Jenkins, 1996.  bob_jenkins at burtleburtle.net.  You may use this
+code any way you wish, private, educational, or commercial.  It's free.
+
+See http://burtleburtle.net/bob/hash/evahash.html
+Use for hash table lookup, or anything where one collision in 2^^32 is
+acceptable.  Do NOT use for cryptographic purposes.
+--------------------------------------------------------------------
+*/
+
+unsigned Hash(const char *key)
+{
+	register ub1 *k = (ub1 *) key;
+	register ub4  length = (ub4) strlen(key);   /* the length of the key */
+	const ub4  initval = 12345; /* the previous hash, or an arbitrary value */
+   register ub4 a,b,c,len;
+
+   /* Set up the internal state */
+   len = length;
+   a = b = 0x9e3779b9;  /* the golden ratio; an arbitrary value */
+   c = initval;         /* the previous hash value */
+
+   /*---------------------------------------- handle most of the key */
+   while (len >= 12)
+   {
+      a += (k[0] +((ub4)k[1]<<8) +((ub4)k[2]<<16) +((ub4)k[3]<<24));
+      b += (k[4] +((ub4)k[5]<<8) +((ub4)k[6]<<16) +((ub4)k[7]<<24));
+      c += (k[8] +((ub4)k[9]<<8) +((ub4)k[10]<<16)+((ub4)k[11]<<24));
+      mix(a,b,c);
+      k += 12; len -= 12;
+   }
+
+   /*------------------------------------- handle the last 11 bytes */
+   c += length;
+   switch(len)              /* all the case statements fall through */
+   {
+   case 11: c+=((ub4)k[10]<<24);
+   case 10: c+=((ub4)k[9]<<16);
+   case 9 : c+=((ub4)k[8]<<8);
+      /* the first byte of c is reserved for the length */
+   case 8 : b+=((ub4)k[7]<<24);
+   case 7 : b+=((ub4)k[6]<<16);
+   case 6 : b+=((ub4)k[5]<<8);
+   case 5 : b+=k[4];
+   case 4 : a+=((ub4)k[3]<<24);
+   case 3 : a+=((ub4)k[2]<<16);
+   case 2 : a+=((ub4)k[1]<<8);
+   case 1 : a+=k[0];
+     /* case 0: nothing left to add */
+   }
+   mix(a,b,c);
+   /*-------------------------------------------- report the result */
+   return c%HASH_TABLE_SIZE;
+}
+
+unsigned Hash(const char *key, unsigned TableSize)
+{
+	register ub1 *k = (ub1 *) key;
+	register ub4  length = (ub4) strlen(key);   /* the length of the key */
+	const ub4  initval = 12345; /* the previous hash, or an arbitrary value */
+   register ub4 a,b,c,len;
+
+   /* Set up the internal state */
+   len = length;
+   a = b = 0x9e3779b9;  /* the golden ratio; an arbitrary value */
+   c = initval;         /* the previous hash value */
+
+   /*---------------------------------------- handle most of the key */
+   while (len >= 12)
+   {
+      a += (k[0] +((ub4)k[1]<<8) +((ub4)k[2]<<16) +((ub4)k[3]<<24));
+      b += (k[4] +((ub4)k[5]<<8) +((ub4)k[6]<<16) +((ub4)k[7]<<24));
+      c += (k[8] +((ub4)k[9]<<8) +((ub4)k[10]<<16)+((ub4)k[11]<<24));
+      mix(a,b,c);
+      k += 12; len -= 12;
+   }
+
+   /*------------------------------------- handle the last 11 bytes */
+   c += length;
+   switch(len)              /* all the case statements fall through */
+   {
+   case 11: c+=((ub4)k[10]<<24);
+   case 10: c+=((ub4)k[9]<<16);
+   case 9 : c+=((ub4)k[8]<<8);
+      /* the first byte of c is reserved for the length */
+   case 8 : b+=((ub4)k[7]<<24);
+   case 7 : b+=((ub4)k[6]<<16);
+   case 6 : b+=((ub4)k[5]<<8);
+   case 5 : b+=k[4];
+   case 4 : a+=((ub4)k[3]<<24);
+   case 3 : a+=((ub4)k[2]<<16);
+   case 2 : a+=((ub4)k[1]<<8);
+   case 1 : a+=k[0];
+     /* case 0: nothing left to add */
+   }
+   mix(a,b,c);
+   /*-------------------------------------------- report the result */
+   return c%TableSize;
+}
diff --git a/iix.cpp b/iix.cpp
new file mode 100755
index 0000000..2578534
--- /dev/null
+++ b/iix.cpp
@@ -0,0 +1,160 @@
+/***
+IIX = Interval IndeX.
+
+Given a fixed set of intervals on a 
+sequence of length L, provides an efficient
+way to retrieve all intervals in the set that
+have non-zero overlap with given interval.
+
+This implementation is designed to be simple
+to implement and efficient for the typical
+sets of intervals that I will be dealing
+with -- this is not a good general solution
+to the problem. The main assumption is that
+there are few long intervals.
+
+The sequence is divided into blocks of length B.
+For each block, there is a list of intervals
+that overlap that block. So an indexed interval
+(ii) of length > B will appear in more than one
+list, and in fact any ii of length > 1 may appear
+in more than one list if it happens to cross
+a block boundary. Intervals of length >> B are a
+problem because they will appear in many lists,
+but for my purposes they are rare.
+
+Each interval is associated with a user-defined
+integer. This can be used by the calling code to
+map an ii to some data, e.g. a GFF record. How
+this is done is up to the caller.
+***/
+
+#include "piler2.h"
+
+IIX::IIX()
+	{
+	m_GlobalLength = 0;
+	m_BlockLength = 0;
+	m_BlockCount = 0;
+	m_Blocks = 0;
+	m_GLIX = 0;
+	}
+
+IIX::~IIX()
+	{
+	Free();
+	}
+
+void IIX::Free()
+	{
+	// TODO@@
+	m_GlobalLength = 0;
+	}
+
+int IIX::PosToBlockIndex(int Pos) const
+	{
+	return Pos/m_BlockLength;
+	}
+
+void IIX::Init(int GlobalLength, int BlockLength)
+	{
+	m_GlobalLength = GlobalLength;
+	m_BlockLength = BlockLength;
+	m_BlockCount = (GlobalLength + BlockLength - 1)/BlockLength;
+	m_Blocks = all(INTERVAL *, m_BlockCount);
+	zero(m_Blocks, INTERVAL *, m_BlockCount);
+	m_GLIX = 0;
+	}
+
+bool IIX::ValidInterval(int From, int To) const
+	{
+	if (From < 0 || From >= m_GlobalLength)
+		return false;
+	if (To < 0 || To >= m_GlobalLength)
+		return false;
+	if (From > To)
+		return false;
+	return true;
+	}
+
+void IIX::AddGlobal(int GlobalFrom, int GlobalTo, int User)
+	{
+	assert(ValidInterval(GlobalFrom, GlobalTo));
+
+	const int BlockFrom = PosToBlockIndex(GlobalFrom);
+	const int BlockTo = PosToBlockIndex(GlobalTo);
+	for (int BlockIndex = BlockFrom; BlockIndex <= BlockTo; ++BlockIndex)
+		{
+		INTERVAL *ii = all(INTERVAL, 1);
+
+		ii->From = GlobalFrom;
+		ii->To = GlobalTo;
+		ii->User = User;
+		ii->Next = m_Blocks[BlockIndex];
+
+		m_Blocks[BlockIndex] = ii;
+		}
+	}
+
+void IIX::AddLocal(const char *Label, int LocalFrom, int LocalTo, int User)
+	{
+	if (0 == m_GLIX)
+		Quit("IIX::AddLocal: no GLIX");
+
+	int GlobalFrom = m_GLIX->LocalToGlobal(Label, LocalFrom);
+	int GlobalTo = GlobalFrom + (LocalTo - LocalFrom + 1);
+	return AddGlobal(GlobalFrom, GlobalTo, User);
+	}
+
+int IIX::LookupGlobal(int GlobalFrom, int GlobalTo, int **ptrHits) const
+	{
+	*ptrHits = 0;
+	if (!ValidInterval(GlobalFrom, GlobalTo))
+		return 0;
+
+	int *Hits = 0;
+	int HitCount = 0;
+	int HitBufferSize = 0;
+	const int BlockFrom = PosToBlockIndex(GlobalFrom);
+	const int BlockTo = PosToBlockIndex(GlobalTo);
+	for (int BlockIndex = BlockFrom; BlockIndex <= BlockTo; ++BlockIndex)
+		{
+		for (INTERVAL *ii = m_Blocks[BlockIndex]; ii; ii = ii->Next)
+			{
+			if (Overlap(GlobalFrom, GlobalTo, ii->From, ii->To) > 0)
+				{
+				if (HitCount >= HitBufferSize)
+					{
+					HitBufferSize += 128;
+					reall(Hits, int, HitBufferSize);
+					}
+				Hits[HitCount++] = ii->User;
+				}
+			}
+		}
+	*ptrHits = Hits;
+	return HitCount;
+	}
+
+int IIX::LookupLocal(const char *Label, int LocalFrom, int LocalTo, int **ptrHits) const
+	{
+	int GlobalFrom = m_GLIX->LocalToGlobalNoFail(Label, LocalFrom);
+	if (-1 == GlobalFrom)
+		return 0;
+
+	int GlobalTo = GlobalFrom + (LocalTo - LocalFrom + 1);
+	return LookupGlobal(GlobalFrom, GlobalTo, ptrHits);
+	}
+
+void IIX::LogMe() const
+	{
+	Log("IIX %d blocks\n", m_BlockCount);
+	Log(" Block        From          To        User\n");
+	Log("======  ==========  ==========  ==========\n");
+	for (int BlockIndex = 0; BlockIndex < m_BlockCount; ++BlockIndex)
+		{
+		for (INTERVAL *ii = m_Blocks[BlockIndex]; ii; ii = ii->Next)
+			Log("%6d  %10d  %10d  %10d\n",
+			  BlockIndex, ii->From, ii->To, ii->User);
+		}
+	}
diff --git a/iix.h b/iix.h
new file mode 100755
index 0000000..e49e882
--- /dev/null
+++ b/iix.h
@@ -0,0 +1,42 @@
+const int DEFAULT_BLOCK_LENGTH = 10000;
+
+// From and To are 0-based coordinates,
+// i.e. are >= 0 and < m_GlobalLength.
+struct INTERVAL
+	{
+	int From;
+	int To;
+	int User;
+	INTERVAL *Next;
+	};
+
+class IIX
+	{
+public:
+	IIX();
+	virtual ~IIX();
+
+	void Init(int GlobalLength, int BlockLength = DEFAULT_BLOCK_LENGTH);
+	void SetGLIX(GLIX *ptrGLIX) { m_GLIX = ptrGLIX; }
+
+	void Free();
+	void AddGlobal(int GlobalFrom, int GlobalTo, int User);
+	void AddLocal(const char *Label, int LocalFrom, int LocalTo, int User);
+
+	int LookupGlobal(int GlobalFrom, int GlobalTo, int **ptrHits) const;
+	int LookupLocal(const char *Label, int LocalFrom, int LocalTo, int **ptrHits) const;
+
+	bool ValidInterval(int From, int To) const;
+
+	void LogMe() const;
+
+private:
+	int PosToBlockIndex(int Pos) const;
+
+private:
+	int m_GlobalLength;
+	int m_BlockLength;
+	int m_BlockCount;
+	INTERVAL **m_Blocks;
+	GLIX *m_GLIX;
+	};
diff --git a/log.cpp b/log.cpp
new file mode 100755
index 0000000..f598ca9
--- /dev/null
+++ b/log.cpp
@@ -0,0 +1,51 @@
+#include "piler2.h"
+
+static FILE *g_fLog = 0;
+
+void SetLog()
+	{
+	bool Append = false;
+	const char *FileName = ValueOpt("log");
+	if (0 == FileName)
+		{
+		FileName = ValueOpt("loga");
+		if (0 == FileName)
+			return;
+		Append = true;
+		}
+
+	g_fLog = fopen(FileName, Append ? "a" : "w");
+	if (NULL == g_fLog)
+		{
+		fprintf(stderr, "\n*** FATAL ERROR *** Cannot open log file\n");
+		perror(FileName);
+		exit(1);
+		}
+	}
+
+void Log(const char Format[], ...)
+	{
+	if (0 == g_fLog)
+		return;
+
+	char Str[4096];
+	va_list ArgList;
+	va_start(ArgList, Format);
+	vsprintf(Str, Format, ArgList);
+	fprintf(g_fLog, "%s", Str);
+	fflush(g_fLog);
+	}
+
+void Warning(const char Format[], ...)
+	{
+	char Str[4096];
+	va_list ArgList;
+	va_start(ArgList, Format);
+	vsprintf(Str, Format, ArgList);
+	fprintf(stderr, "\n** WARNING ** %s\n", Str);
+	if (0 != g_fLog)
+		{
+		fprintf(g_fLog, "** WARNING ** %s\n", Str);
+		fflush(g_fLog);
+		}
+	}
diff --git a/main.cpp b/main.cpp
new file mode 100755
index 0000000..07e9959
--- /dev/null
+++ b/main.cpp
@@ -0,0 +1,87 @@
+#include "piler2.h"
+
+#if	WIN32
+#include <windows.h>
+#endif
+
+bool g_Quiet = false;
+const char *g_ProcessName = "piler2";
+
+int main(int argc, char *argv[])
+	{
+	g_ProcessName = argv[0];
+
+#if	WIN32
+// Multi-tasking does not work well in CPU-bound
+// console apps running under Win32.
+// Reducing the process priority allows GUI apps
+// to run responsively in parallel.
+	SetPriorityClass(GetCurrentProcess(), BELOW_NORMAL_PRIORITY_CLASS);
+#endif
+
+	ProcessArgVect(argc - 1, argv + 1);
+	SetLog();
+	g_Quiet = FlagOpt("quiet");
+
+	for (int i = 0; i < argc; ++i)
+		Log("%s ", argv[i]);
+	Log("\n");
+
+	if (ValueOpt("trs") != 0)
+		{
+		TRS();
+		return 0;
+		}
+	else if (ValueOpt("tan") != 0)
+		{
+		Tan();
+		return 0;
+		}
+	else if (ValueOpt("tr") != 0)
+		{
+		TR();
+		return 0;
+		}
+	else if (ValueOpt("trs2fasta"))
+		{
+		TRS2Fasta();
+		return 0;
+		}
+	else if (ValueOpt("tanmotif2fasta"))
+		{
+		Tanmotif2Fasta();
+		return 0;
+		}
+	else if (ValueOpt("cons"))
+		{
+		Cons();
+		return 0;
+		}
+	else if (ValueOpt("annot"))
+		{
+		Annot();
+		return 0;
+		}
+	else if (ValueOpt("annotedge"))
+		{
+		AnnotEdge();
+		return 0;
+		}
+	else if (ValueOpt("crisp"))
+		{
+		Crisp();
+		return 0;
+		}
+	else if (FlagOpt("help"))
+		{
+		Usage();
+		exit(0);
+		}
+	else if (FlagOpt("version"))
+		{
+		fprintf(stderr, PILER_LONG_VERSION "\n");
+		exit(0);
+		}
+	else
+		CommandLineError("No command specified");
+ 	}
diff --git a/makeannot.cpp b/makeannot.cpp
new file mode 100755
index 0000000..c99b40f
--- /dev/null
+++ b/makeannot.cpp
@@ -0,0 +1,373 @@
+#include "piler2.h"
+
+#define MAX(i, j)	((i) >= (j) ? (i) : (j))
+#define MIN(i, j)	((i) <= (j) ? (i) : (j))
+
+static double MIN_OVERLAP = 0.05;
+const int MAX_HITS = 8;
+const int MAX_STR = 256;
+const int MAP_CHARS = 20;
+const double MAX_MARGIN = 0.05;
+const int EDGE_BASES = 100;
+const int MIN_OVERLAP_EDGE = 32;
+
+struct AnnotHit
+	{
+	int Overlap;
+	int RepIndex;
+	};
+static AnnotHit AHs[MAX_HITS];
+static int AnnotHitCount;
+
+static char Str[MAX_STR+1];
+static int StrPos;
+
+static const RepData *g_Reps;
+static int g_RepCount;
+static bool g_Rev;
+static int g_Mid;
+
+static int InRegion(int ContigFrom, int ContigTo, int Pos)
+	{
+	if (Pos >= ContigFrom && Pos <= ContigTo)
+		return true;
+	if (Pos < ContigFrom && ContigFrom - Pos <= MAX_MARGIN)
+		return true;
+	if (Pos > ContigTo && Pos - ContigTo <= MAX_MARGIN)
+		return true;
+	return false;
+	}
+
+static int CmpAH(const void *a1, const void *a2)
+	{
+	const AnnotHit *h1 = (const AnnotHit *) a1;
+	const AnnotHit *h2 = (const AnnotHit *) a2;
+	int i1 = h1->RepIndex;
+	int i2 = h2->RepIndex;
+	assert(i1 >= 0 && i1 < g_RepCount);
+	assert(i2 >= 0 && i2 < g_RepCount);
+	if (g_Rev)
+		return g_Reps[i2].ContigFrom - g_Reps[i1].ContigFrom;
+	return g_Reps[i1].ContigFrom - g_Reps[i2].ContigFrom;
+	}
+
+static int CmpAHEdge(const void *a1, const void *a2)
+	{
+	const AnnotHit *h1 = (const AnnotHit *) a1;
+	const AnnotHit *h2 = (const AnnotHit *) a2;
+	int i1 = h1->RepIndex;
+	int i2 = h2->RepIndex;
+	assert(i1 >= 0 && i1 < g_RepCount);
+	assert(i2 >= 0 && i2 < g_RepCount);
+
+	const RepData &Rep1 = g_Reps[i1];
+	const RepData &Rep2 = g_Reps[i2];
+
+	int df1 = iabs(Rep1.ContigFrom - g_Mid);
+	int df2 = iabs(Rep2.ContigFrom - g_Mid);
+
+	int dt1 = iabs(Rep1.ContigTo - g_Mid);
+	int dt2 = iabs(Rep2.ContigTo - g_Mid);
+
+	int d1 = MIN(df1, dt1);
+	int d2 = MIN(df2, dt2);
+
+	return d1 - d2;
+	}
+
+static void SafeCatChar(char c)
+	{
+	if (StrPos >= MAX_STR)
+		return;
+	Str[StrPos++] = c;
+	Str[StrPos] = 0;
+	}
+
+static void SafeCat(const char *s)
+	{
+	while (char c = *s++)
+		{
+		if (StrPos >= MAX_STR)
+			break;
+		Str[StrPos++] = c;
+		}
+	Str[StrPos] = 0;
+	}
+
+static int GetAnnots(const char *Label, int ContigFrom, int ContigTo,
+  const RepData *Reps, int RepCount, int MinOverlap)
+	{
+	int AnnotHitCount = 0;
+	for (int RepIndex = 0; RepIndex < RepCount; ++RepIndex)
+		{
+		const RepData &Rep = Reps[RepIndex];
+		if (0 != strcmp(Label, Rep.ContigLabel))
+			continue;
+		int ov = Overlap(ContigFrom, ContigTo, Rep.ContigFrom, Rep.ContigTo);
+		if (ov < MinOverlap)
+			continue;
+
+	// Unconditionally accept if not maxed out
+		if (AnnotHitCount < MAX_HITS)
+			{
+			AHs[AnnotHitCount].Overlap = ov;
+			AHs[AnnotHitCount].RepIndex = RepIndex;
+			++AnnotHitCount;
+			}
+		else
+			{
+		// Overwrite shortest hit if this is longer
+			int SmallestOverlap = AHs[0].Overlap;
+			int SmallestIndex = 0;
+			for (int i = 1; i < MAX_HITS; ++i)
+				{
+				if (AHs[i].Overlap < SmallestOverlap)
+					{
+					SmallestOverlap = AHs[i].Overlap;
+					SmallestIndex = 0;
+					}
+				}
+			if (ov > SmallestOverlap)
+				{
+				AHs[SmallestIndex].RepIndex = RepIndex;
+				AHs[SmallestIndex].Overlap = ov;
+				}
+			}
+		}
+	return AnnotHitCount;
+	}
+
+static int PosToCharIndex(int RegionFrom, int RegionTo, int Pos, int MapChars)
+	{
+	if (MapChars <= 1)
+		return 0;
+
+	int RegionLength = RegionTo - RegionFrom + 1;
+	if (RegionLength <= 1)
+		return 0;
+
+	if (Pos < RegionFrom)
+		return 0;
+	if (Pos >= RegionTo)
+		return MapChars - 1;
+
+	int CharIndex = ((Pos - RegionFrom)*MapChars)/RegionLength;
+	if (CharIndex < 0 || CharIndex >= MapChars)
+		Quit("PosToCharIndex messed up");
+	return CharIndex;
+	}
+
+static void RevStr()
+	{
+	for (int i = 0; i < MAP_CHARS/2; ++i)
+		{
+		char c1 = Str[i];
+		char c2 = Str[MAP_CHARS-i-1];
+		Str[i] = c2;
+		Str[MAP_CHARS-i-1] = c1;
+		}
+	}
+
+const char *MakeAnnotEdge(const char *Label, int SeqFrom, int SeqTo, bool Rev,
+  const RepData *Reps, int RepCount)
+	{
+	StrPos = 0;
+	for (int i = 0; i < MAP_CHARS; ++i)
+		SafeCatChar('-');
+
+	const int Mid = (SeqFrom + SeqTo)/2;
+	int ContigFrom = SeqFrom - EDGE_BASES/2;
+	int ContigTo = SeqFrom + EDGE_BASES/2;
+
+	int From = ContigFrom;
+	int To = ContigTo;
+	if (From < 0)
+		From = 0;
+
+	AnnotHitCount = GetAnnots(Label, From, To, Reps, RepCount, MIN_OVERLAP_EDGE);
+	if (0 == AnnotHitCount)
+		return Str;
+
+// Global vars needed by CmpAHEdge
+	g_Rev = Rev;
+	g_Reps = Reps;
+	g_RepCount = RepCount;
+	g_Mid = Mid;
+// Sort by edge
+	qsort((void *) AHs, AnnotHitCount, sizeof(AnnotHit), CmpAHEdge);
+
+	for (int AnnotIndex = 0; AnnotIndex < AnnotHitCount; ++AnnotIndex)
+		{
+		const int RepIndex = AHs[AnnotIndex].RepIndex;
+		const RepData &Rep = Reps[RepIndex];
+
+		bool LeftEnd = false;
+		bool RightEnd = false;
+		int RepFrom = Rep.RepeatFrom;
+		if (-1 != RepFrom)
+			{
+			int RepTo = Rep.RepeatTo;
+			int RepLeft = Rep.RepeatLeft;
+			int RepLength = RepTo + RepLeft - 1;
+			if (RepLength <= 0)
+				RepLength = 9999;	// hack to avoid div by 0
+
+			int RepContigFrom = Rep.ContigFrom;
+			int RepContigTo = Rep.ContigTo;
+			if (Rep.Rev)
+				{
+				int Tmp = RepFrom;
+				RepFrom = RepLeft;
+				RepLeft = Tmp;
+				}
+			const double LeftMargin = (double) RepFrom / RepLength;
+			const double RightMargin = (double) RepLeft / RepLength;
+			bool LeftInRegion = InRegion(ContigFrom, ContigTo, Rep.ContigFrom);
+			bool RightInRegion = InRegion(ContigFrom, ContigTo, Rep.ContigTo);
+			LeftEnd = (LeftInRegion && LeftMargin <= MAX_MARGIN);
+			RightEnd = (RightInRegion && RightMargin <= MAX_MARGIN);
+			}
+
+		int MapFrom = PosToCharIndex(ContigFrom, ContigTo, Rep.ContigFrom, MAP_CHARS);
+		int MapTo = PosToCharIndex(ContigFrom, ContigTo, Rep.ContigTo, MAP_CHARS);
+
+		const char cLower = 'a' + AnnotIndex;
+		const char cUpper = 'A' + AnnotIndex;
+
+		if (LeftEnd)
+			Str[MapFrom] = cUpper;
+		else
+			Str[MapFrom] = cLower;
+
+		for (int i = MapFrom + 1; i <= MapTo - 1; ++i)
+			Str[i] = cLower;
+
+		if (RightEnd)
+			Str[MapTo] = cUpper;
+		else
+			Str[MapTo] = cLower;
+
+		SafeCat(" ");
+		SafeCat(Rep.RepeatName);
+
+		if (Rep.RepeatFrom >= 0)
+			{
+			int FullRepLength = Rep.RepeatTo + Rep.RepeatLeft;
+			int RepMissing = Rep.RepeatFrom + Rep.RepeatLeft;
+			double Pct = ((FullRepLength - RepMissing)*100.0)/FullRepLength;
+			char s[32];
+			sprintf(s, "(%.0f%%)", Pct);
+			SafeCat(s);
+			}
+		}
+
+	if (Rev)
+		RevStr();
+	return Str;
+	}
+
+const char *MakeAnnot(const char *Label, int SeqFrom, int SeqTo, bool Rev,
+  const RepData *Reps, int RepCount)
+	{
+	int ContigFrom = SeqFrom;
+	int ContigTo = SeqTo;
+	int Length = ContigTo - ContigFrom + 1;
+	const char *strMinRatio = ValueOpt("minratio");
+	if (strMinRatio != 0)
+		MIN_OVERLAP = atof(strMinRatio)/100.0;
+	const int MinOverlap = (int) (Length*MIN_OVERLAP);
+
+	StrPos = 0;
+	for (int i = 0; i < MAP_CHARS; ++i)
+		SafeCatChar('-');
+
+	AnnotHitCount = GetAnnots(Label, ContigFrom, ContigTo, Reps, RepCount,
+	  MinOverlap);
+	if (0 == AnnotHitCount)
+		return Str;
+
+// Global vars needed by CmpAH
+	g_Rev = Rev;
+	g_Reps = Reps;
+	g_RepCount = RepCount;
+// Sort by start pos
+	qsort((void *) AHs, AnnotHitCount, sizeof(AnnotHit), CmpAH);
+
+	for (int AnnotIndex = 0; AnnotIndex < AnnotHitCount; ++AnnotIndex)
+		{
+		const int RepIndex = AHs[AnnotIndex].RepIndex;
+		const RepData &Rep = Reps[RepIndex];
+		const int From = MAX(Rep.ContigFrom, ContigFrom);
+		const int To = MIN(Rep.ContigTo, ContigTo);
+
+		bool LeftEnd = false;
+		bool RightEnd = false;
+		int RepFrom = Rep.RepeatFrom;
+		if (-1 != RepFrom)
+			{
+			int RepTo = Rep.RepeatTo;
+			int RepLeft = Rep.RepeatLeft;
+			int RepLength = RepTo + RepLeft - 1;
+			if (RepLength <= 0)
+				RepLength = 9999;	// hack to avoid div by 0
+
+			if (Rep.ContigFrom < ContigFrom)
+				RepFrom += (ContigFrom - Rep.ContigFrom);
+			if (Rep.ContigTo > ContigTo)
+				RepTo -= Rep.ContigTo - ContigTo;
+
+			const double LeftMargin = (double) RepFrom / RepLength;
+			const double RightMargin = (double) RepLeft / RepLength;
+			LeftEnd = (LeftMargin <= MAX_MARGIN);
+			RightEnd = (RightMargin <= MAX_MARGIN);
+			}
+
+		int MapFrom = ((From - ContigFrom)*MAP_CHARS)/Length;
+		int MapTo = ((To - ContigFrom)*MAP_CHARS)/Length;
+
+		if (Rev)
+			{
+			int Tmp = MapFrom;
+			MapFrom = MAP_CHARS - MapTo - 1;
+			MapTo = MAP_CHARS - Tmp - 1;
+			}
+
+		if (MapFrom < 0 || MapFrom >= MAP_CHARS || MapTo < 0 || MapTo >= MAP_CHARS)
+			Quit("MakeAnnot: failed to map");
+
+		const char cLower = 'a' + AnnotIndex;
+		const char cUpper = 'A' + AnnotIndex;
+
+		if (LeftEnd)
+			Str[MapFrom] = cUpper;
+		else
+			Str[MapFrom] = cLower;
+
+		for (int i = MapFrom + 1; i <= MapTo - 1; ++i)
+			Str[i] = cLower;
+
+		if (RightEnd)
+			Str[MapTo] = cUpper;
+		else
+			Str[MapTo] = cLower;
+
+		SafeCat(" ");
+		SafeCat(Rep.RepeatName);
+
+		if (Rep.RepeatFrom >= 0)
+			{
+			int FullRepLength = Rep.RepeatTo + Rep.RepeatLeft;
+			int RepMissing = Rep.RepeatFrom + Rep.RepeatLeft;
+			if (ContigFrom > Rep.ContigFrom)
+				RepMissing += ContigFrom - Rep.ContigFrom;
+			if (Rep.ContigTo > ContigTo)
+				RepMissing += Rep.ContigTo - ContigTo;
+			double Pct = ((FullRepLength - RepMissing)*100.0)/FullRepLength;
+			char s[32];
+			sprintf(s, "(%.0f%%)", Pct);
+			SafeCat(s);
+			}
+		}
+
+	return Str;
+	}
diff --git a/mem.cpp b/mem.cpp
new file mode 100755
index 0000000..93bf2a8
--- /dev/null
+++ b/mem.cpp
@@ -0,0 +1,53 @@
+#include "piler2.h"
+
+#ifdef	_MSC_VER
+#include <crtdbg.h>
+#endif
+
+static int AllocatedBytes;
+static int PeakAllocatedBytes;
+
+// Allocate memory, fail on error, track total
+void *allocmem(int bytes)
+	{
+	assert(bytes < 0xfffffff0);		// check for overlow
+	char *p = (char *) malloc((size_t) (bytes + 4));
+	if (0 == p)
+		Quit("Out of memory (%d)", bytes);
+	int *pSize = (int *) p;
+	*pSize = bytes;
+	AllocatedBytes += bytes;
+	if (AllocatedBytes > PeakAllocatedBytes)
+		PeakAllocatedBytes = AllocatedBytes;
+	return p + 4;
+	}
+
+void freemem(void *p)
+	{
+	if (0 == p)
+		return;
+	int *pSize = (int *) ((char *) p - 4);
+	int bytes = *pSize;
+	assert(bytes <= AllocatedBytes);
+	AllocatedBytes -= bytes;
+	free(((char *) p) - 4);
+	}
+
+void chkmem()
+	{
+#ifdef	_MSC_VER
+	assert(_CrtCheckMemory());
+#endif
+	}
+
+void *reallocmem(void *p, int bytes)
+	{
+	if (0 == p)
+		return allocmem(bytes);
+
+	int *pSize = (int *) ((char *) p - 4);
+	int oldbytes = *pSize;
+	void *newp = allocmem(bytes);
+	memcpy(newp, p, oldbytes);
+	return newp;
+	}
diff --git a/options.cpp b/options.cpp
new file mode 100755
index 0000000..7342aca
--- /dev/null
+++ b/options.cpp
@@ -0,0 +1,156 @@
+#include "piler2.h"
+
+struct VALUE_OPT
+	{
+	const char *m_pstrName;
+	const char *m_pstrValue;
+	};
+
+struct FLAG_OPT
+	{
+	const char *m_pstrName;
+	bool m_bSet;
+	};
+
+static VALUE_OPT ValueOpts[] =
+	{
+	"trs",					0,
+	"trs2",					0,
+	"trs2fasta",			0,
+	"tanmotif2fasta",		0,
+	"cons",					0,
+	"arrays",				0,
+	"out",					0,
+	"piles",				0,
+	"images",				0,
+	"log",					0,
+	"loga",					0,
+	"seq",					0,
+	"path",					0,
+	"rep",					0,
+	"mincover",				0,
+	"maxlengthdiffpct",		0,
+	"maxfam",				0,
+	"label",				0,
+	"annot",				0,
+	"annotedge",			0,
+	"famsize",				0,
+	"prefix",				0,
+	"tan",					0,
+	"pyramid",				0,
+	"motif",				0,
+	"minhits",				0,
+	"maxmargin",			0,
+	"minratio",				0,
+	"tr",					0,
+	"cand",					0,
+	"mintrspacing",			0,
+	"maxtrspacing",			0,
+	"mintrlength",			0,
+	"minspacingratio",		0,
+	"minfam",				0,
+	"minhitratio",			0,
+	"mindistpairs",			0,
+	"crisp",				0,
+	};
+static int ValueOptCount = sizeof(ValueOpts)/sizeof(ValueOpts[0]);
+
+static FLAG_OPT FlagOpts[] =
+	{
+	"multihit",				0,
+	"quiet",				0,
+	"version",				0,
+	"help",					0,
+	"edge",					0,
+	"pilesonly",			0,
+	"exact",				0,
+	};
+static int FlagOptCount = sizeof(FlagOpts)/sizeof(FlagOpts[0]);
+
+void CommandLineError(const char *Format, ...)
+	{
+	va_list ArgList;
+	va_start(ArgList, Format);
+
+	char Str[4096];
+	vsprintf(Str, Format, ArgList);
+	Usage();
+	fprintf(stderr, "\n** Command line error** %s\n", Str);
+	exit(1);
+	}
+
+static bool TestSetFlagOpt(const char *Arg)
+	{
+	for (int i = 0; i < FlagOptCount; ++i)
+		if (!stricmp(Arg, FlagOpts[i].m_pstrName))
+			{
+			FlagOpts[i].m_bSet = true;
+			return true;
+			}
+	return false;
+	}
+
+static bool TestSetValueOpt(const char *Arg, const char *Value)
+	{
+	for (int i = 0; i < ValueOptCount; ++i)
+		if (!stricmp(Arg, ValueOpts[i].m_pstrName))
+			{
+			if (0 == Value)
+				CommandLineError("Option -%s must have value\n", Arg);
+			ValueOpts[i].m_pstrValue = strsave(Value);
+			return true;
+			}
+	return false;
+	}
+
+bool FlagOpt(const char *Name)
+	{
+	for (int i = 0; i < FlagOptCount; ++i)
+		if (!stricmp(Name, FlagOpts[i].m_pstrName))
+			return FlagOpts[i].m_bSet;
+	Quit("FlagOpt(%s) invalid", Name);
+	return false;
+	}
+
+const char *ValueOpt(const char *Name)
+	{
+	for (int i = 0; i < ValueOptCount; ++i)
+		if (!stricmp(Name, ValueOpts[i].m_pstrName))
+			return ValueOpts[i].m_pstrValue;
+	Quit("ValueOpt(%s) invalid", Name);
+	return 0;
+	}
+
+const char *RequiredValueOpt(const char *Name)
+	{
+	const char *s = ValueOpt(Name);
+	if (0 == s)
+		CommandLineError("Required option -%s not specified\n", Name);
+	return s;
+	}
+
+void ProcessArgVect(int argc, char *argv[])
+	{
+	for (int iArgIndex = 0; iArgIndex < argc; )
+		{
+		const char *Arg = argv[iArgIndex];
+		if (Arg[0] != '-')
+			Quit("Command-line option \"%s\" must start with '-'\n", Arg);
+		const char *ArgName = Arg + 1;
+		if (TestSetFlagOpt(ArgName))
+			{
+			++iArgIndex;
+			continue;
+			}
+		
+		char *Value = 0;
+		if (iArgIndex < argc - 1)
+			Value = argv[iArgIndex+1];
+		if (TestSetValueOpt(ArgName, Value))
+			{
+			iArgIndex += 2;
+			continue;
+			}
+		CommandLineError("Invalid command line option \"%s\"\n", ArgName);
+		}
+	}
diff --git a/params.h b/params.h
new file mode 100755
index 0000000..1345644
--- /dev/null
+++ b/params.h
@@ -0,0 +1,8 @@
+const int BITS_PER_INT = 32;
+const int CONTIG_MAP_BIN_SIZE = 1024;
+const int CHUNK_LENGTH = 1;//TODO?
+const int MAX_GFF_LINE = 1023;
+const int MAX_GFF_FEATURE_LENGTH = 128;
+const int HASH_TABLE_SIZE = 17909;
+const int FASTA_BLOCK = 60;
+const int DEFAULT_MAX_FAM = 16;
diff --git a/piler2.h b/piler2.h
new file mode 100755
index 0000000..7f10059
--- /dev/null
+++ b/piler2.h
@@ -0,0 +1,148 @@
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdio.h>
+#include <assert.h>
+#include <errno.h>
+#include <stdarg.h>
+
+#define PILER_LONG_VERSION "PILER v1.0"
+
+#if	_MSC_VER
+#pragma warning(disable:4800)	// don't warn about bool->int conversion
+#endif
+
+#ifdef _DEBUG
+#define DEBUG	1
+#endif
+
+#ifdef	WIN32
+#define FILEIO_BINARY_MODE	1
+#else
+#define FILEIO_BINARY_MODE	0
+#define stricmp strcasecmp
+#define strnicmp strncasecmp
+#endif
+
+#include "params.h"
+#include "types.h"
+
+extern bool g_Quiet;
+extern const char *g_ProcessName;
+
+// Memory wrappers.
+// Macro hacks, but makes code more readable
+// by hiding cast and sizeof.
+#define	all(t, n)		(t *) allocmem((n)*sizeof(t))
+#define	reall(p, t, n)		p = (t *) reallocmem(p, (n)*sizeof(t))
+#define zero(p,	t, n)	memset(p, 0, (n)*sizeof(t))
+void *allocmem(int bytes);
+void freemem(void *p);
+void *reallocmem(void *p, int bytes);
+
+char *strsave(const char *s);
+
+FILE *OpenStdioFile(const char *FileName, FILEIO_MODE Mode = FILEIO_MODE_ReadOnly);
+void Rewind(FILE *f);
+int GetFileSize(FILE *f);
+
+// FASTA input / output
+// MFA=multi FASTA (>= 1 sequence in file)
+// AFA=aligned FASTA (>= 1 sequence with gaps, must be same length)
+char *ReadMFA(const char FileName[], int *ptrSeqLength);
+char *ReadAFA(const char FileName[], int *ptrSeqLength, int *ptrSeqCount);
+void WriteFasta(FILE *f, const char *Seq, int Length, const char *Label, bool Rev = false);
+
+void Usage();
+void Quit(const char szFormat[], ...);
+
+void SetLog();
+void Log(const char Format[], ...);
+void Warning(const char Format[], ...);
+
+void ProgressStart(const char *Format, ...);
+void Progress(const char *Format, ...);
+void ProgressDone();
+int GetElapsedSecs();
+unsigned GetMaxMemUseBytes();
+
+void ProcessArgVect(int argc, char *argv[]);
+const char *ValueOpt(const char *Name);
+const char *RequiredValueOpt(const char *Name);
+bool FlagOpt(const char *Name);
+void CommandLineError(const char *Format, ...);
+
+unsigned GetPeakMemUseBytes();
+unsigned GetMemUseBytes();
+double GetRAMSize();
+
+int FindConnectedComponents(EdgeList &Edges, FamList &Fams,
+  int MinComponentSize);
+
+void AddContig(const char *Label, int GlobalPos, int Length);
+void AddContigPos(const char *Label, int Pos);
+void LogContigs();
+int GlobalizeContigs();
+const char *GlobalToContig(int GlobalPos, int *ptrContigPos);
+int ContigToGlobal(int ContigPos, const char *Label);
+void MakeContigMap();
+void SetSeqLength(int Length);
+unsigned Hash(const char *key);
+unsigned Hash(const char *key, unsigned TableSize);
+int Overlap(int From1, int To1, int From2, int To2);
+int RoundUp(int i, int MinInc, int MultipleOf);
+
+static inline int iabs(int i)
+	{
+	return i > 0 ? i : -i;
+	}
+
+const char *MakeAnnot(const char *Label, int Start, int End, bool Rev,
+  const RepData *Reps, int RepCount);
+const char *MakeAnnotEdge(const char *Label, int SeqFrom, int SeqTo, bool Rev,
+  const RepData *Reps, int RepCount);
+
+// GFF helpers
+int GetFields(char *Line, char **Fields, int MaxFields, char FS);
+bool GetNextGFFRecord(FILE *f, GFFRecord &Rec);
+void WriteGFFRecord(FILE *f, const GFFRecord &Rec);
+extern int GFFLineNr;
+
+bool GetNextGFFRecord(FILE *f, GFFRecord2 &Rec);
+void WriteGFFRecord(FILE *f, const GFFRecord2 &Rec);
+
+void GFFRecordToHit(const GLIX &Glix, const GFFRecord2 &Rec, HitData &Hit);
+void HitToGFFRecord(const GLIX &Glix, const HitData &Hit, GFFRecord2 &Rec, char AnnotBuffer[]);
+void FreeGFFStrings(GFFRecord2 &Rec);
+void SaveGFFStrings(GFFRecord2 &Rec);
+
+bool HasTargetAttrs(const char *Attrs);
+void ParseTargetAttrs(const char *Attrs, char SeqName[],
+  int SeqNameBytes, int *ptrStart, int *ptrEnd);
+void ParsePilesAttrs(const char *Attrs, int *ptrQueryPile, int *ptrTargetPile);
+
+// GFF input
+HitData *ReadHits(const char *FileName, int *ptrHitCount, int *ptrSeqLength);
+TRSData *ReadTRS(const char *FileName, int *ptrTRSCount);
+RepData *ReadReps(const char *FileName, int *ptrRepCount);
+MotifData *ReadMotif(const char *FileName, int *ptrMotifCount);
+
+// GFF output
+void WriteTRSFile(const char *OutputFileName, const PileData *Piles, int PileCount);
+void WritePiles(const char *OutputFileName, const PileData *Piles, int PileCount);
+void WriteImages(const char *OutputFileName, const HitData *Hits, int PileCount,
+  const PILE_INDEX_TYPE *PileIndexes);
+void WriteCrispFile(const char *OutputFileName, const PileData *Piles, int PileCount);
+void WriteArray(FILE *f, int ArrayIndex, int Lo, int Hi);
+
+// Commands
+void TR();
+void TRS();
+//void TRS2();
+void TRS2Fasta();
+void Cons();
+void Annot();
+void AnnotEdge();
+void Tanmotif2Fasta();
+void Tan();
+void Crisp();
diff --git a/progress.cpp b/progress.cpp
new file mode 100755
index 0000000..c800f39
--- /dev/null
+++ b/progress.cpp
@@ -0,0 +1,67 @@
+#include "piler2.h"
+#include <time.h>
+
+static time_t g_StartTime = time(0);
+static time_t g_StartTimeStep;
+static char *g_Desc = 0;
+
+int GetElapsedSecs()
+	{
+	return (int) (time(0) - g_StartTime);
+	}
+
+static void ProgressPrefix()
+	{
+	int ElapsedSecs = (int) (time(0) - g_StartTime);
+	unsigned MemUseMB = (GetMemUseBytes() + 500000)/1000000;
+	unsigned RAMMB = (unsigned) ((GetRAMSize() + 500000)/1000000);
+	unsigned MemUsePct = (MemUseMB*100)/RAMMB;
+	Log("%4d s  %4d Mb (%3d%%) ", ElapsedSecs, MemUseMB, MemUsePct);
+
+	if (!g_Quiet)
+		fprintf(stderr, "%4d s  %6d Mb (%3d%%) ", ElapsedSecs, MemUseMB, MemUsePct);
+	}
+
+void ProgressStart(const char *Format, ...)
+	{
+	ProgressPrefix();
+
+	g_StartTimeStep = time(0);
+	char Str[4096];
+	va_list ArgList;
+	va_start(ArgList, Format);
+	vsprintf(Str, Format, ArgList);
+	if (g_Desc != 0)
+		free(g_Desc);
+	g_Desc = strsave(Str);
+	Log("%s\n", Str);
+
+	if (!g_Quiet)
+		fprintf(stderr, "%s\n", Str);
+	}
+
+void ProgressDone()
+	{
+	if (0 == g_Desc)
+		return;
+
+	ProgressPrefix();
+
+	int Secs = (int) (time(0) - g_StartTimeStep);
+	Log("%s done (%d s).\n", g_Desc, Secs);
+
+	if (!g_Quiet)
+		fprintf(stderr, "%s done (%d s).\n", g_Desc, Secs);
+	}
+
+void Progress(const char *Format, ...)
+	{
+	char Str[4096];
+	va_list ArgList;
+	va_start(ArgList, Format);
+	vsprintf(Str, Format, ArgList);
+	Log("%s\n", Str);
+
+	if (!g_Quiet)
+		fprintf(stderr, "%s\n", Str);
+	}
diff --git a/quit.cpp b/quit.cpp
new file mode 100755
index 0000000..084014c
--- /dev/null
+++ b/quit.cpp
@@ -0,0 +1,38 @@
+#include "piler2.h"
+
+#ifdef WIN32
+#define _WIN32_WINNT 0x0400
+#include <windows.h>
+
+void Break()
+	{
+	DebugBreak();
+	}
+#endif
+
+// Exit immediately with error message, printf-style.
+void Quit(const char szFormat[], ...)
+	{
+	va_list ArgList;
+	char szStr[4096];
+
+	va_start(ArgList, szFormat);
+	vsprintf(szStr, szFormat, ArgList);
+
+	fprintf(stderr, "\n*** FATAL ERROR ***  %s %s\n", g_ProcessName, szStr);
+
+	Log("\n*** FATAL ERROR ***  ");
+	Log("%s\n", szStr);
+
+#if	DEBUG
+#ifdef WIN32
+	if (IsDebuggerPresent())
+		{
+		int iBtn = MessageBox(NULL, szStr, "piler", MB_ICONERROR | MB_OKCANCEL);
+		if (IDCANCEL == iBtn)
+			Break();
+		}
+#endif
+#endif
+	exit(1);
+	}
diff --git a/readafa.cpp b/readafa.cpp
new file mode 100755
index 0000000..1587780
--- /dev/null
+++ b/readafa.cpp
@@ -0,0 +1,86 @@
+#include "piler2.h"
+
+char *ReadAFA(FILE *f, int *ptrSeqLength, int *ptrSeqCount)
+	{
+	rewind(f);
+	int FileSize = GetFileSize(f);
+	int BufferSize = FileSize;
+	char *Buffer = all(char, BufferSize);
+
+	char prev_c = '\n';
+	bool InLabel = false;
+	int Pos = 0;
+	int SeqStart = 0;
+	int SeqCount = 0;
+	int SeqLength;
+
+	for (;;)
+		{
+		int c = fgetc(f);
+		if (EOF == c)
+			{
+			if (feof(f))
+				break;
+			Quit("Stream error");
+			}
+		if (InLabel)
+			{
+			if (c == '\r')
+				continue;
+			if ('\n' == c)
+				InLabel = false;
+			}
+		else
+			{
+			if ('>' == c && '\n' == prev_c)
+				{
+				int ThisSeqLength = Pos - SeqStart;
+				if (0 == SeqCount)
+					;
+				else if (1 == SeqCount)
+					SeqLength = ThisSeqLength;
+				else if (SeqCount > 1)
+					{
+					if (SeqLength != ThisSeqLength)
+						Quit("ReadAFA: %u seqs, sequence lengths differ (a) %d %d",
+						  SeqCount, SeqLength, ThisSeqLength);
+					}
+				++SeqCount;
+				SeqStart = Pos;
+				InLabel = true;
+				}
+			else if (!isspace(c))
+				{
+				if (Pos >= BufferSize)
+					Quit("ReadAFA: buffer too small");
+				Buffer[Pos++] = (c);
+				}
+			}
+		prev_c = c;
+		}
+
+	int ThisSeqLength = Pos - SeqStart;
+	if (1 == SeqCount)
+		SeqLength = ThisSeqLength;
+	else
+		{
+		if (SeqLength != ThisSeqLength)
+			Quit("ReadAFA: %u seqs, sequence lengths differ (b) %d %d",
+			  SeqCount, SeqLength, ThisSeqLength);
+		}
+
+	*ptrSeqCount = SeqCount;
+	*ptrSeqLength = SeqLength;
+
+	if (Pos != SeqCount*SeqLength)
+		Quit("ReadAFA: Internal error");
+	return Buffer;
+	}
+
+char *ReadAFA(const char FileName[], int *ptrSeqLength, int *ptrSeqCount)
+	{
+	FILE *f = OpenStdioFile(FileName);
+	char *Seqs = ReadAFA(f, ptrSeqLength, ptrSeqCount);
+	fclose(f);
+	return Seqs;
+	}
diff --git a/readhits.cpp b/readhits.cpp
new file mode 100755
index 0000000..9d6356a
--- /dev/null
+++ b/readhits.cpp
@@ -0,0 +1,121 @@
+#include "piler2.h"
+
+// Destructive: pokes Rec.Attrs
+static char *GetTarget(GFFRecord &Rec, int *ptrStart, int *ptrEnd)
+	{
+	char *Attrs = Rec.Attrs;
+	char *SemiColon = strchr(Attrs, ';');
+	if (0 != SemiColon)
+		*SemiColon = 0;
+	char *Space = strchr(Attrs, ' ');
+	if (0 == Space)
+		Quit("GFF hit record line %d, missing space in attrs", GFFLineNr);
+	*Space = 0;
+	if (0 != strcmp(Attrs, "Target"))
+		Quit("GFF hit record line %d, expected Target as first attr", GFFLineNr);
+	char *Label = Space + 1;
+	Space = strchr(Label, ' ');
+	if (0 == Space)
+		Quit("GFF hit record line %d, missing space following Target label", GFFLineNr);
+	*Space = 0;
+	char *Start = Space + 1;
+	Space = strchr(Start, ' ');
+	if (0 == Space)
+		Quit("GFF hit record line %d, missing space following Target start", GFFLineNr);
+	*Space = 0;
+	char *End = Space + 1;
+	*ptrStart = atoi(Start);
+	*ptrEnd = atoi(End);
+	return Label;
+	}
+
+static int ReadHitsPass1(FILE *f)
+	{
+	GFFLineNr = 0;
+	int HitCount = 0;
+	GFFRecord Rec;
+	while (GetNextGFFRecord(f, Rec))
+		{
+		if (0 != strcmp(Rec.Feature, "hit"))
+			continue;
+		if (Rec.Start <= 0 || Rec.End <= 0 || Rec.Start > Rec.End)
+			Warning("GFF line %d: invalid start %d / end %d",
+			  GFFLineNr, Rec.Start, Rec.End);
+
+		int TargetStart;
+		int TargetEnd;
+		const char *TargetLabel = GetTarget(Rec, &TargetStart, &TargetEnd);
+		if (TargetStart <= 0 || TargetEnd <= 0 || TargetStart > TargetEnd)
+			Warning("GFF line %d: invalid target start %d / end %d",
+			  GFFLineNr, Rec.Start, Rec.End);
+
+		++HitCount;
+		AddContigPos(Rec.SeqName, Rec.End - 1);
+		AddContigPos(TargetLabel, TargetEnd - 1);
+		}
+	return HitCount;
+	}
+
+static int ReadHitsPass2(FILE *f, HitData *Hits)
+	{
+	rewind(f);
+
+	GFFLineNr = 0;
+	int HitCount = 0;
+	GFFRecord Rec;
+	while (GetNextGFFRecord(f, Rec))
+		{
+		if (0 != strcmp(Rec.Feature, "hit"))
+			continue;
+		if (Rec.Start <= 0 || Rec.End <= 0 || Rec.Start > Rec.End)
+			Warning("GFF line %d: invalid start %d / end %d",
+			  GFFLineNr, Rec.Start, Rec.End);
+
+		int TargetStart;
+		int TargetEnd;
+		const char *TargetLabel = GetTarget(Rec, &TargetStart, &TargetEnd);
+		if (TargetStart <= 0 || TargetEnd <= 0 || TargetStart > TargetEnd)
+			Warning("GFF line %d: invalid target start %d / end %d",
+			  GFFLineNr, Rec.Start, Rec.End);
+
+		HitData &Hit = Hits[HitCount];
+
+		const int QueryFrom = ContigToGlobal(Rec.Start, Rec.SeqName);
+		const int TargetFrom = ContigToGlobal(TargetStart, TargetLabel);
+
+		const int QueryLength = Rec.End - Rec.Start + 1;
+		const int TargetLength = TargetEnd - TargetStart + 1;
+
+		Hit.QueryFrom = QueryFrom;
+		Hit.QueryTo = QueryFrom + QueryLength - 1;
+		Hit.TargetFrom = TargetFrom;
+		Hit.TargetTo = TargetFrom + TargetLength - 1;
+
+		if (Rec.Strand == '+')
+			Hit.Rev = false;
+		else if (Rec.Strand == '-')
+			Hit.Rev = true;
+		else
+			Quit("GFF line %d, Invalid strand %c", GFFLineNr, Rec.Strand);
+
+		++HitCount;
+		}
+	return HitCount;
+	}
+
+HitData *ReadHits(const char *FileName, int *ptrHitCount, int *ptrSeqLength)
+	{
+	FILE *f = OpenStdioFile(FileName);
+
+	int HitCount = ReadHitsPass1(f);
+	int SeqLength = GlobalizeContigs();
+	MakeContigMap();
+
+	HitData *Hits = all(HitData, HitCount);
+	ReadHitsPass2(f, Hits);
+	fclose(f);
+
+	*ptrSeqLength = SeqLength;
+	*ptrHitCount = HitCount;
+	return Hits;
+	}
diff --git a/readmfa.cpp b/readmfa.cpp
new file mode 100755
index 0000000..4a3d753
--- /dev/null
+++ b/readmfa.cpp
@@ -0,0 +1,98 @@
+#include "piler2.h"
+
+#define APPEND_CHAR(c)							\
+	{											\
+	if (Pos >= BufferSize)						\
+		Quit("ReadMFA: buffer too small");		\
+	Buffer[Pos++] = (c);						\
+	}
+
+#define APPEND_LABEL(c)							\
+	{											\
+	if (LabelLength >= LabelBufferLength-1)		\
+		{										\
+		LabelBufferLength += 128;				\
+		char *NewLabel = all(char, LabelBufferLength); \
+		memcpy(NewLabel, Label, LabelLength);	\
+		freemem(Label);							\
+		Label = NewLabel;						\
+		}										\
+	Label[LabelLength] = c;						\
+	++LabelLength;								\
+	}
+
+char *ReadMFA(FILE *f, int *ptrLength)
+	{
+	rewind(f);
+	int FileSize = GetFileSize(f);
+	int BufferSize = FileSize;
+	char *Buffer = all(char, BufferSize);
+
+	char prev_c = '\n';
+	bool InLabel = false;
+	int ContigFrom = 0;
+	char *Label = 0;
+	int LabelLength = 0;
+	int LabelBufferLength = 0;
+	int Pos = 0;
+	int ContigStart = 0;
+
+	for (;;)
+		{
+		int c = fgetc(f);
+		if (EOF == c)
+			{
+			if (feof(f))
+				break;
+			Quit("Stream error");
+			}
+		if (InLabel)
+			{
+			if (c == '\r')
+				continue;
+			if ('\n' == c)
+				{
+				Label[LabelLength] = 0;
+				InLabel = false;
+				}
+			else
+				{
+				APPEND_LABEL(c)
+				}
+			}
+		else
+			{
+			if ('>' == c && '\n' == prev_c)
+				{
+				int ContigLength = Pos - ContigStart;
+				if (ContigLength > 0)
+					AddContig(Label, ContigStart, ContigLength);
+
+				ContigStart = Pos;
+				InLabel = true;
+				LabelLength = 0;
+				}
+			else if (!isspace(c))
+				{
+				APPEND_CHAR(c)
+				}
+			}
+		prev_c = c;
+		}
+
+	int ContigLength = Pos - ContigStart;
+	if (ContigLength > 0)
+		AddContig(Label, ContigStart, ContigLength);
+
+	*ptrLength = Pos;
+	SetSeqLength(Pos);
+	return Buffer;
+	}
+
+char *ReadMFA(const char FileName[], int *ptrSeqLength)
+	{
+	FILE *f = OpenStdioFile(FileName);
+	char *Seq = ReadMFA(f, ptrSeqLength);
+	fclose(f);
+	return Seq;
+	}
diff --git a/readmotif.cpp b/readmotif.cpp
new file mode 100755
index 0000000..a662c60
--- /dev/null
+++ b/readmotif.cpp
@@ -0,0 +1,74 @@
+#include "piler2.h"
+
+static int GetFam(GFFRecord &Rec)
+	{
+	char *Attrs = Rec.Attrs;
+	char *Pyr = strstr(Attrs, "Pyramid");
+	if (0 == Pyr)
+		Quit("GFF tandemmotif record line %d, failed to find Pyramid attr", GFFLineNr);
+
+// Pyramid 123
+// 012345678
+	return atoi(Pyr + 8);
+	}
+
+static int ReadMotifPass1(FILE *f)
+	{
+	GFFLineNr = 0;
+	int MotifCount = 0;
+	GFFRecord Rec;
+	while (GetNextGFFRecord(f, Rec))
+		{
+		if (0 != strcmp(Rec.Feature, "tandemmotif"))
+			continue;
+		if (Rec.Start <= 0 || Rec.End <= 0 || Rec.Start > Rec.End)
+			Warning("GFF line %d: invalid start %d / end %d",
+			  GFFLineNr, Rec.Start, Rec.End);
+		++MotifCount;
+		}
+	return MotifCount;
+	}
+
+static int ReadMotifPass2(FILE *f, MotifData *Motifs)
+	{
+	rewind(f);
+
+	GFFLineNr = 0;
+	int MotifCount = 0;
+	GFFRecord Rec;
+	while (GetNextGFFRecord(f, Rec))
+		{
+		if (0 != strcmp(Rec.Feature, "tandemmotif"))
+			continue;
+		if (Rec.Start <= 0 || Rec.End <= 0 || Rec.Start > Rec.End)
+			Warning("GFF line %d: invalid start %d / end %d",
+			  GFFLineNr, Rec.Start, Rec.End);
+
+		int FamIndex = GetFam(Rec);
+
+		MotifData &Motif = Motifs[MotifCount];
+
+		const int Length = Rec.End - Rec.Start + 1;
+
+		Motif.ContigLabel = strsave(Rec.SeqName);
+		Motif.ContigFrom = Rec.Start - 1;
+		Motif.ContigTo = Motif.ContigFrom + Length - 1;
+		Motif.FamIndex = FamIndex;
+
+		++MotifCount;
+		}
+	return MotifCount;
+	}
+
+MotifData *ReadMotif(const char *FileName, int *ptrMotifCount)
+	{
+	FILE *f = OpenStdioFile(FileName);
+
+	int MotifCount = ReadMotifPass1(f);
+	MotifData *Motifs = all(MotifData, MotifCount);
+	ReadMotifPass2(f, Motifs);
+	fclose(f);
+
+	*ptrMotifCount = MotifCount;
+	return Motifs;
+	}
diff --git a/readreps.cpp b/readreps.cpp
new file mode 100755
index 0000000..113b7e3
--- /dev/null
+++ b/readreps.cpp
@@ -0,0 +1,126 @@
+#include "piler2.h"
+
+// Destructive: pokes Rec.Attrs
+static char *GetRepeat(GFFRecord &Rec)
+	{
+	char *Attrs = Rec.Attrs;
+	char *Start = strstr(Attrs, "Repeat");
+	if (0 == Start)
+		Quit("GFF line %d, repeat record does not have Repeat attribute", GFFLineNr);
+
+	char *SemiColon = strchr(Start, ';');
+	if (0 != SemiColon)
+		*SemiColon = 0;
+	char *Space = strchr(Start, ' ');
+	if (0 == Space)
+		Quit("GFF repeat record line %d, missing space in attrs", GFFLineNr);
+	return Space + 1;
+	}
+
+// "Repeat HETRP_DM Satellite 1518 1669 0"
+//            0         1       2    3  4
+void ParseRepeat(char *Str, RepData &Rep)
+	{
+	char *Fields[5];
+	int FieldCount = GetFields(Str, Fields, 5, ' ');
+	if (FieldCount != 5)
+		Quit("GFF line %d, Repeat attribute does not have 5 fields");
+
+	const char *RepeatName = Fields[0];
+	const char *RepeatClass = Fields[1];
+	const char *RepeatFrom = Fields[2];
+	const char *RepeatTo = Fields[3];
+	const char *RepeatLeft = Fields[4];
+
+	Rep.RepeatName = strsave(RepeatName);
+	Rep.RepeatClass = strsave(RepeatClass);
+	if (0 == strcmp(RepeatFrom, "."))
+		{
+		Rep.RepeatFrom = -1;
+		Rep.RepeatTo = -1;
+		Rep.RepeatLeft = -1;
+		}
+	else
+		{
+		Rep.RepeatFrom = atoi(RepeatFrom) - 1;
+		Rep.RepeatTo = atoi(RepeatTo) - 1;
+		Rep.RepeatLeft = atoi(RepeatLeft);
+		}
+	}
+
+static int ReadRepsPass1(FILE *f)
+	{
+	GFFLineNr = 0;
+	int RepCount = 0;
+	GFFRecord Rec;
+	while (GetNextGFFRecord(f, Rec))
+		{
+		if (0 != strcmp(Rec.Feature, "repeat"))
+			{
+			static bool WarningIssued = false;
+			if (!WarningIssued)
+				{
+				Warning("GFF record feature '%s' is not a repeat", Rec.Feature);
+				WarningIssued = true;
+				}
+			continue;
+			}
+		if (Rec.Start <= 0 || Rec.End <= 0 || Rec.Start > Rec.End)
+			Warning("GFF line %d: invalid start %d / end %d",
+			  GFFLineNr, Rec.Start, Rec.End);
+		++RepCount;
+		}
+	return RepCount;
+	}
+
+static int ReadRepsPass2(FILE *f, RepData *Reps)
+	{
+	rewind(f);
+
+	GFFLineNr = 0;
+	int RepCount = 0;
+	GFFRecord Rec;
+	while (GetNextGFFRecord(f, Rec))
+		{
+		if (0 != strcmp(Rec.Feature, "repeat"))
+			continue;
+
+		static char *Repeat = GetRepeat(Rec);
+
+		RepData &Rep = Reps[RepCount];
+		ParseRepeat(Repeat, Rep);
+
+		if (Rec.Start <= 0 || Rec.End <= 0 || Rec.Start > Rec.End)
+			Warning("GFF line %d: invalid start %d / end %d",
+			  GFFLineNr, Rec.Start, Rec.End);
+
+		const int Length = Rec.End - Rec.Start + 1;
+
+		Rep.ContigLabel = strsave(Rec.SeqName);
+		Rep.ContigFrom = Rec.Start - 1;
+		Rep.ContigTo = Rep.ContigFrom + Length - 1;
+
+		if (Rec.Strand == '+')
+			Rep.Rev = false;
+		else if (Rec.Strand == '-')
+			Rep.Rev = true;
+		else
+			Quit("GFF line %d, Invalid strand %c", GFFLineNr, Rec.Strand);
+
+		++RepCount;
+		}
+	return RepCount;
+	}
+
+RepData *ReadReps(const char *FileName, int *ptrRepCount)
+	{
+	FILE *f = OpenStdioFile(FileName);
+
+	int RepCount = ReadRepsPass1(f);
+	RepData *Reps = all(RepData, RepCount);
+	ReadRepsPass2(f, Reps);
+	fclose(f);
+
+	*ptrRepCount = RepCount;
+	return Reps;
+	}
diff --git a/readtrs.cpp b/readtrs.cpp
new file mode 100755
index 0000000..d097340
--- /dev/null
+++ b/readtrs.cpp
@@ -0,0 +1,94 @@
+#include "piler2.h"
+
+// Destructive: pokes Rec.Attrs
+static char *GetFam(GFFRecord &Rec)
+	{
+	char *Attrs = Rec.Attrs;
+	char *SemiColon = strchr(Attrs, ';');
+	if (0 != SemiColon)
+		*SemiColon = 0;
+	char *Space = strchr(Attrs, ' ');
+	if (0 == Space)
+		Quit("GFF trs record line %d, missing space in attrs", GFFLineNr);
+	*Space = 0;
+	if (0 != strcmp(Attrs, "Family"))
+		Quit("GFF trs record line %d, expected Family as first attr", GFFLineNr);
+	return Space + 1;
+	}
+
+static int ReadTRSPass1(FILE *f)
+	{
+	GFFLineNr = 0;
+	int TRSCount = 0;
+	GFFRecord Rec;
+	while (GetNextGFFRecord(f, Rec))
+		{
+		if (0 != strcmp(Rec.Feature, "trs"))
+			continue;
+		if (Rec.Start <= 0 || Rec.End <= 0 || Rec.Start > Rec.End)
+			Warning("GFF line %d: invalid start %d / end %d",
+			  GFFLineNr, Rec.Start, Rec.End);
+		++TRSCount;
+		}
+	return TRSCount;
+	}
+
+static int ReadTRSPass2(FILE *f, TRSData *TRSs)
+	{
+	rewind(f);
+
+	GFFLineNr = 0;
+	int TRSCount = 0;
+	GFFRecord Rec;
+	while (GetNextGFFRecord(f, Rec))
+		{
+		if (0 != strcmp(Rec.Feature, "trs"))
+			continue;
+		if (Rec.Start <= 0 || Rec.End <= 0 || Rec.Start > Rec.End)
+			Warning("GFF line %d: invalid start %d / end %d",
+			  GFFLineNr, Rec.Start, Rec.End);
+
+		static char *Fam = GetFam(Rec);
+
+		int FamIndex = 0;
+		int SuperFamIndex = 0;
+		int n = sscanf(Fam, "%d.%d", &FamIndex, &SuperFamIndex);
+		if (n == 1)
+			SuperFamIndex = -1;
+		else if (n != 2)
+			Quit("Invalid Family %s", Fam);
+
+		TRSData &TRS = TRSs[TRSCount];
+
+		const int Length = Rec.End - Rec.Start + 1;
+
+		TRS.ContigLabel = strsave(Rec.SeqName);
+		TRS.ContigFrom = Rec.Start - 1;
+		TRS.ContigTo = TRS.ContigFrom + Length - 1;
+		TRS.FamIndex = FamIndex;
+		TRS.SuperFamIndex = SuperFamIndex;
+
+		if (Rec.Strand == '+')
+			TRS.Rev = false;
+		else if (Rec.Strand == '-')
+			TRS.Rev = true;
+		else
+			Quit("GFF line %d, Invalid strand %c", GFFLineNr, Rec.Strand);
+
+		++TRSCount;
+		}
+	return TRSCount;
+	}
+
+TRSData *ReadTRS(const char *FileName, int *ptrTRSCount)
+	{
+	FILE *f = OpenStdioFile(FileName);
+
+	int TRSCount = ReadTRSPass1(f);
+	TRSData *TRSs = all(TRSData, TRSCount);
+	ReadTRSPass2(f, TRSs);
+	fclose(f);
+
+	*ptrTRSCount = TRSCount;
+	return TRSs;
+	}
diff --git a/tan.cpp b/tan.cpp
new file mode 100755
index 0000000..72ca8c9
--- /dev/null
+++ b/tan.cpp
@@ -0,0 +1,503 @@
+#include "piler2.h"
+
+#define GFFRecord GFFRecord2
+
+static int MIN_HIT_COUNT = 2;
+static double MAX_FRACT_MARGIN = 0.05;
+static double MIN_RATIO = 0.5;
+
+struct TanPile
+	{
+	char *Label;
+	int From;
+	int To;
+	HitData *Hits;
+	int HitCount;
+	};
+static TanPile *Piles;
+static int PileCount;
+static int PileBufferSize;
+static int PyramidIndex;
+FILE *fOut;
+FILE *fPyramid;
+FILE *fMotif;
+
+static inline int min3(int i, int j, int k)
+	{
+	return min(i, min(j, k));
+	}
+
+static inline int max3(int i, int j, int k)
+	{
+	return max(i, max(j, k));
+	}
+
+static void TouchPiles(int PileIndex)
+	{
+	if (PileIndex < PileBufferSize)
+		{
+		if (PileIndex >= PileCount)
+			PileCount = PileIndex + 1;
+		return;
+		}
+
+	int NewPileBufferSize = PileBufferSize + 10000;
+	TanPile *NewPiles = all(TanPile, NewPileBufferSize);
+	zero(NewPiles, TanPile, NewPileBufferSize);
+	memcpy(NewPiles, Piles, PileCount*sizeof(TanPile));
+	Piles = NewPiles;
+	PileBufferSize = NewPileBufferSize;
+	PileCount = PileIndex + 1;
+	}
+
+static void AssignHit(int PileIndex, const char *Label, int QueryFrom, int QueryTo,
+  int TargetFrom, int TargetTo, bool Rev)
+	{
+	TanPile &Pile = Piles[PileIndex];
+
+	int HitIndex = Pile.HitCount;
+	HitData &Hit = Pile.Hits[HitIndex];
+
+	Hit.QueryFrom = QueryFrom;
+	Hit.QueryTo = QueryTo;
+	Hit.TargetFrom = TargetFrom;
+	Hit.TargetTo = TargetTo;
+	Hit.Rev = Rev;
+
+	++(Pile.HitCount);
+	}
+
+static void AddHit(int PileIndex, const char *Label, int QueryFrom, int QueryTo,
+  int TargetFrom, int TargetTo, bool Rev)
+	{
+	TouchPiles(PileIndex);
+
+	TanPile &Pile = Piles[PileIndex];
+	if (0 == Pile.HitCount)
+		{
+		Pile.Label = strsave(Label);
+		Pile.From = min(QueryFrom, TargetFrom);
+		Pile.To = max(QueryTo, TargetTo);
+		}
+	else
+		{
+		if (0 != strcmp(Label, Pile.Label))
+			Quit("Labels disagree");
+		Pile.From = min3(Pile.From, QueryFrom, TargetFrom);
+		Pile.To = max3(Pile.To, QueryTo, TargetTo);
+		}
+	++(Pile.HitCount);
+	}
+
+bool TandemPair(const HitData &Hit1, const HitData &Hit2)
+	{
+	int Length1 = (Hit1.QueryTo - Hit1.QueryFrom + Hit1.TargetTo- Hit1.TargetFrom)/2;
+	int Length2 = (Hit2.QueryTo - Hit2.QueryFrom + Hit2.TargetTo - Hit2.TargetFrom)/2;
+	int ShorterLength = min(Length1, Length2);
+	int LongerLength = max(Length1, Length2);
+	if ((double) ShorterLength / (double) LongerLength < MIN_RATIO)
+		return false;
+
+	int StartDist = iabs(Hit1.TargetFrom - Hit2.TargetFrom);
+	int EndDist = iabs(Hit1.QueryTo - Hit2.QueryTo);
+
+	double StartMargin = (double) StartDist / (double) ShorterLength;
+	double EndMargin = (double) EndDist / (double) ShorterLength;
+	return StartMargin <= MAX_FRACT_MARGIN && EndMargin <= MAX_FRACT_MARGIN;
+	}
+
+static void WritePyramid(int PyramidIndex, const char *Label, int From, int To)
+	{
+	if (0 == fPyramid)
+		return;
+
+	GFFRecord Rec;
+	Rec.Source = "piler";
+	Rec.Feature = "pyramid";
+	Rec.Score = 0;
+	Rec.Frame = -1;
+	Rec.Strand = '.';
+	Rec.SeqName = Label;
+	Rec.Start = From + 1;
+	Rec.End = To + 1;
+
+	char s[32];
+	sprintf(s, "PyramidIndex %d", PyramidIndex);
+	Rec.Attrs = s;
+
+	WriteGFFRecord(fPyramid, Rec);
+	}
+
+static void InferMotif(int PyramidIndex, const TanPile &Pile, FamData *Fam)
+	{
+	const int MAX_PAIRS = 1024;
+	int DiagDists[MAX_PAIRS];
+	int PairCount = 0;
+	int MinDiagDist = -1;
+	for (PtrFamData q1 = Fam->begin(); q1 != Fam->end(); ++q1)
+		{
+		FamMemberData &FamMember1 = *q1;
+		int HitIndex1 = FamMember1.PileIndex;
+		if (HitIndex1 < 0 || HitIndex1 >= Pile.HitCount)
+			Quit("Hit index out of range");
+		const HitData &Hit1 = Pile.Hits[HitIndex1];
+		int Diag1Start = Hit1.TargetFrom - Hit1.QueryFrom;
+		int Diag1End = Hit1.TargetTo - Hit1.QueryTo;
+		int Diag1 = (Diag1Start + Diag1End)/2;
+
+		PtrFamData q2 = q1;
+		for (++q2; q2 != Fam->end(); ++q2)
+			{
+			FamMemberData &FamMember2 = *q2;
+			int HitIndex2 = FamMember2.PileIndex;
+			if (HitIndex2 < 0 || HitIndex2 >= Pile.HitCount)
+				Quit("Hit index out of range");
+			const HitData &Hit2 = Pile.Hits[HitIndex2];
+
+			int Diag2Start = Hit2.TargetFrom - Hit2.QueryFrom;
+			int Diag2End = Hit2.TargetTo - Hit2.QueryTo;
+			int Diag2 = (Diag2Start + Diag2End)/2;
+
+			int DiagDist = iabs(Diag1 - Diag2);
+			if (-1 == MinDiagDist || DiagDist < MinDiagDist)
+				MinDiagDist = DiagDist;
+			DiagDists[PairCount++] = DiagDist;
+			if (PairCount == MAX_PAIRS)
+				goto Done;
+			}
+		}
+Done:
+// Find all dists that are no more than 10% bigger than MinDist.
+// The average of these distances is the estimated repeat length.
+	int Count = 0;
+	int Sum = 0;
+	int MaxCandidateDist = (MinDiagDist*110)/100;
+	for (int i = 0; i < PairCount; ++i)
+		{
+		const int Dist = DiagDists[i];
+		if (Dist <= MaxCandidateDist)
+			{
+			Sum += Dist;
+			++Count;
+			}
+		}
+	if (0 == Count)
+		Quit("Huh?");
+
+	const int EstimatedRepeatLength = Sum/Count;
+	Log("\n");
+	Log("Pyramid %d: min dist = %d ", PyramidIndex, MinDiagDist);
+	Log(" Count=%d Sum=%d Estd=%d\n", Count, Sum, EstimatedRepeatLength);
+
+	GFFRecord Rec;
+	Rec.Source = "piler";
+	Rec.Feature = "tandemmotif";
+	Rec.Score = 0;
+	Rec.Frame = -1;
+	Rec.SeqName = Pile.Label;
+	Rec.Strand = '.';
+
+// By definition, a Pyramid is set of hits for which QueryTo
+// and TargetFrom values are approximately equal.
+// We arbitrarily choose the canonical motif to end
+// at QueryTo. We output this motif and the Target motifs
+// to which it aligns.
+	FamMemberData &FamMember = *(Fam->begin());
+	int HitIndex = FamMember.PileIndex;
+	if (HitIndex < 0 || HitIndex >= Pile.HitCount)
+		Quit("Hit index out of range");
+	const HitData &Hit = Pile.Hits[HitIndex];
+
+	const int MotifQueryTo = Hit.QueryTo;
+	const int MotifQueryFrom = MotifQueryTo - EstimatedRepeatLength + 1;
+	const int TargetTo = Hit.TargetTo;
+	const int TargetFrom = TargetTo - EstimatedRepeatLength + 1;
+
+	char s[1024];
+	sprintf(s, "Target %s %d %d ; Pyramid %d",
+		Pile.Label,
+		TargetFrom + 1,
+		TargetTo + 1,
+		PyramidIndex);
+
+	Rec.Start = MotifQueryFrom + 1;
+	Rec.End = MotifQueryTo + 1;
+	Rec.Attrs = s;
+
+	WriteGFFRecord(fMotif, Rec);
+
+	for (PtrFamData q = Fam->begin(); q != Fam->end(); ++q)
+		{
+		FamMemberData &FamMember = *q;
+		int HitIndex = FamMember.PileIndex;
+		if (HitIndex < 0 || HitIndex >= Pile.HitCount)
+			Quit("Hit index out of range");
+		const HitData &Hit = Pile.Hits[HitIndex];
+
+		const int To = Hit.TargetTo;
+		const int From = To - EstimatedRepeatLength + 1;
+
+		char s[1024];
+		sprintf(s, "Target %s %d %d ; Pyramid %d",
+		  Pile.Label,
+		  MotifQueryFrom + 1,
+		  MotifQueryTo + 1,
+		  PyramidIndex);
+
+		Rec.Start = From + 1;
+		Rec.End = To + 1;
+		Rec.Attrs = s;
+
+		WriteGFFRecord(fMotif, Rec);
+		}
+	}
+
+static void LogPyramid(int PyramidIndex, const TanPile &Pile, FamData *Fam)
+	{
+	Log("\n");
+	Log("Pyramid %d\n", PyramidIndex);
+
+	Log(
+"     Label   QStart1     QEnd1   TStart1     TEnd1   QStart1     QEnd1   TStart1     TEnd1    Diag   Start     End\n");
+	Log(
+"----------  --------  --------  --------  --------  --------  --------  --------  --------  ------  ------  ------\n");
+	for (PtrFamData q1 = Fam->begin(); q1 != Fam->end(); ++q1)
+		{
+		FamMemberData &FamMember1 = *q1;
+		int HitIndex1 = FamMember1.PileIndex;
+		if (HitIndex1 < 0 || HitIndex1 >= Pile.HitCount)
+			Quit("Hit index out of range");
+		const HitData &Hit1 = Pile.Hits[HitIndex1];
+		int Diag1Start = Hit1.TargetFrom - Hit1.QueryFrom;
+		int Diag1End = Hit1.TargetTo - Hit1.QueryTo;
+		int Diag1 = (Diag1Start + Diag1End)/2;
+
+		PtrFamData q2 = q1;
+		for (++q2; q2 != Fam->end(); ++q2)
+			{
+			FamMemberData &FamMember2 = *q2;
+			int HitIndex2 = FamMember2.PileIndex;
+			if (HitIndex2 < 0 || HitIndex2 >= Pile.HitCount)
+				Quit("Hit index out of range");
+			const HitData &Hit2 = Pile.Hits[HitIndex2];
+
+			int Diag2Start = Hit2.TargetFrom - Hit2.QueryFrom;
+			int Diag2End = Hit2.TargetTo - Hit2.QueryTo;
+			int Diag2 = (Diag2Start + Diag2End)/2;
+
+			int DiagDist = iabs(Diag1 - Diag2);
+
+			int StartDist = iabs(Hit1.TargetFrom - Hit2.TargetFrom);
+			int EndDist = iabs(Hit1.QueryTo - Hit2.QueryTo);
+			Log("%10.10s  %8d  %8d  %8d  %8d  %8d  %8d  %8d  %8d  %6d",
+				Pile.Label,
+				Hit1.QueryFrom,
+				Hit1.QueryTo,
+				Hit1.TargetFrom,
+				Hit1.TargetTo,
+				Hit2.QueryFrom,
+				Hit2.QueryTo,
+				Hit2.TargetFrom,
+				Hit2.TargetTo,
+				DiagDist);
+			Log("  %6d", StartDist);
+			Log("  %6d", EndDist);
+			Log("\n");
+			}
+		}
+	}
+
+static void FindPyramids(int PileIndex)
+	{
+	const TanPile &Pile = Piles[PileIndex];
+	const int HitCount = Pile.HitCount;
+	if (HitCount < MIN_HIT_COUNT)
+		return;
+
+	EdgeList Edges;
+	EdgeData Edge;
+	Edge.Rev = false;
+	const HitData *Hits = Pile.Hits;
+	for (int HitIndex1 = 0; HitIndex1 < HitCount; ++HitIndex1)
+		{
+		const HitData &Hit1 = Hits[HitIndex1];
+		Edge.Node1 = HitIndex1;
+		for (int HitIndex2 = 0; HitIndex2 < HitIndex1; ++HitIndex2)
+			{
+			const HitData &Hit2 = Hits[HitIndex2];
+			if (TandemPair(Hit1, Hit2))
+				{
+				Edge.Node2 = HitIndex2;
+				Edges.push_back(Edge);
+				}
+			}
+		}
+
+	FamList Fams;
+	FindConnectedComponents(Edges, Fams, MIN_HIT_COUNT);
+
+	if (0 == Fams.size())
+		return;
+
+	GFFRecord Rec;
+	Rec.Source = "piler";
+	Rec.Feature = "hit";
+	Rec.Score = 0;
+	Rec.Frame = -1;
+	Rec.SeqName = Pile.Label;
+
+	for (PtrFamList p = Fams.begin(); p != Fams.end(); ++p)
+		{
+		FamData *Fam = *p;
+		if (0 != fMotif)
+			InferMotif(PyramidIndex, Pile, Fam);
+		LogPyramid(PyramidIndex, Pile, Fam);
+
+		int PyramidFrom = -1;
+		int PyramidTo = -1;
+
+		for (PtrFamData q = Fam->begin(); q != Fam->end(); ++q)
+			{
+			FamMemberData &FamMember = *q;
+			int HitIndex = FamMember.PileIndex;
+			if (HitIndex < 0 || HitIndex >= Pile.HitCount)
+				Quit("Hit index out of range");
+			const HitData &Hit = Pile.Hits[HitIndex];
+
+			char s[1024];
+			sprintf(s, "Target %s %d %d ; Pile %d ; Pyramid %d",
+			  Pile.Label,
+			  Hit.TargetFrom + 1,
+			  Hit.TargetTo + 1,
+			  PileIndex,
+			  PyramidIndex);
+
+			Rec.Strand = (Hit.Rev ? '-' : '+');
+			Rec.Start = Hit.QueryFrom + 1;
+			Rec.End = Hit.QueryTo + 1;
+			Rec.Attrs = s;
+
+			WriteGFFRecord(fOut, Rec);
+
+			if (PyramidFrom == -1)
+				PyramidFrom = min(Hit.QueryFrom, Hit.TargetFrom);
+			else
+				PyramidFrom = min3(PyramidFrom, Hit.QueryFrom, Hit.TargetFrom);
+
+			if (PyramidTo == -1)
+				PyramidTo = max(Hit.QueryTo, Hit.TargetTo);
+			else
+				PyramidTo = max3(PyramidTo, Hit.QueryTo, Hit.TargetTo);
+			}
+		WritePyramid(PyramidIndex, Pile.Label, PyramidFrom, PyramidTo);
+		++PyramidIndex;
+		}
+	}
+
+void Tan()
+	{
+// Image file annotated with from-to pile indexes
+// Produced by:
+//		piler2 -trs banded_hits.gff -images mainband_images.gff
+	const char *HitFileName = RequiredValueOpt("tan");
+	const char *OutFileName = RequiredValueOpt("out");
+	const char *PyramidFileName = ValueOpt("pyramid");
+	const char *MotifFileName = ValueOpt("motif");
+	const char *strMinHits = ValueOpt("minhits");
+	const char *strMaxMargin = ValueOpt("maxmargin");
+	const char *strMinRatio = ValueOpt("minratio");
+
+	if (0 != strMinHits)
+		MIN_HIT_COUNT = atoi(strMinHits);
+	if (0 != strMaxMargin)
+		MAX_FRACT_MARGIN = atof(strMaxMargin);
+	if (0 != strMinRatio)
+		MIN_RATIO = atof(strMinRatio);
+
+	FILE *fInput = OpenStdioFile(HitFileName);
+
+	ProgressStart("Initialize piles");
+	GFFRecord Rec;
+	int HitCount = 0;
+	while (GetNextGFFRecord(fInput, Rec))
+		{
+		if (0 != strcmp(Rec.Feature, "hit"))
+			continue;
+
+		int QueryPileIndex = -1;
+		int TargetPileIndex = -1;
+		ParsePilesAttrs(Rec.Attrs, &QueryPileIndex, &TargetPileIndex);
+		if (QueryPileIndex != TargetPileIndex)
+			continue;
+
+		char TargetLabel[128];
+		int TargetStart;
+		int TargetEnd;
+		ParseTargetAttrs(Rec.Attrs, TargetLabel, sizeof(TargetLabel), &TargetStart, &TargetEnd);
+		if (0 != strcmp(Rec.SeqName, TargetLabel))
+			Quit("Labels don't match");
+
+		const int QueryFrom = Rec.Start - 1;
+		const int QueryTo = Rec.End - 1;
+		const int TargetFrom = TargetStart - 1;
+		const int TargetTo = TargetEnd - 1;
+		const bool Rev = (Rec.Strand == '-');
+
+		AddHit(QueryPileIndex, Rec.SeqName, QueryFrom, QueryTo, TargetFrom, TargetTo, Rev);
+		++HitCount;
+		}
+	ProgressDone();
+
+	Progress("%d hits, %d piles", HitCount, PileCount);
+
+	ProgressStart("Allocate piles");
+	for (int PileIndex = 0; PileIndex < PileCount; ++PileIndex)
+		{
+		TanPile &Pile = Piles[PileIndex];
+		Pile.Hits = all(HitData, Pile.HitCount);
+		Pile.HitCount = 0;
+		}
+	ProgressDone();
+
+	ProgressStart("Assign hits to piles");
+	Rewind(fInput);
+	while (GetNextGFFRecord(fInput, Rec))
+		{
+		if (0 != strcmp(Rec.Feature, "hit"))
+			continue;
+
+		int QueryPileIndex = -1;
+		int TargetPileIndex = -1;
+		ParsePilesAttrs(Rec.Attrs, &QueryPileIndex, &TargetPileIndex);
+		if (QueryPileIndex != TargetPileIndex)
+			continue;
+
+		char TargetLabel[128];
+		int TargetStart;
+		int TargetEnd;
+		ParseTargetAttrs(Rec.Attrs, TargetLabel, sizeof(TargetLabel), &TargetStart, &TargetEnd);
+		if (0 != strcmp(Rec.SeqName, TargetLabel))
+			Quit("Labels don't match");
+
+		const int QueryFrom = Rec.Start - 1;
+		const int QueryTo = Rec.End - 1;
+		const int TargetFrom = TargetStart - 1;
+		const int TargetTo = TargetEnd - 1;
+		const bool Rev = (Rec.Strand == '-');
+
+		AssignHit(QueryPileIndex, Rec.SeqName, QueryFrom, QueryTo, TargetFrom, TargetTo, Rev);
+		}
+	ProgressDone();
+
+	fOut = OpenStdioFile(OutFileName, FILEIO_MODE_WriteOnly);
+	fPyramid = (0 == PyramidFileName ? 0 : OpenStdioFile(PyramidFileName, FILEIO_MODE_WriteOnly));
+	fMotif = (0 == PyramidFileName ? 0 : OpenStdioFile(MotifFileName, FILEIO_MODE_WriteOnly));
+
+	ProgressStart("Find pyramids");
+	for (int PileIndex = 0; PileIndex < PileCount; ++PileIndex)
+		FindPyramids(PileIndex);
+	int PyramidCount = PyramidIndex;
+	ProgressDone();
+
+	Progress("%d pyramids", PyramidCount);
+	}
diff --git a/tanmotif2fasta.cpp b/tanmotif2fasta.cpp
new file mode 100755
index 0000000..848c23d
--- /dev/null
+++ b/tanmotif2fasta.cpp
@@ -0,0 +1,114 @@
+#include "piler2.h"
+
+static void LogRecords(const MotifData *Motifs, int MotifCount)
+	{
+	Log("              Contig        From          To  Length  Family\n");
+	Log("--------------------  ----------  ----------  ------  ------\n");
+	for (int MotifIndex = 0; MotifIndex < MotifCount; ++MotifIndex)
+		{
+		const MotifData &Motif = Motifs[MotifIndex];
+		Log("%20.20s  %10d  %10d  %6d  %d\n",
+		  Motif.ContigLabel,
+		  Motif.ContigFrom,
+		  Motif.ContigTo,
+		  Motif.ContigTo - Motif.ContigFrom + 1,
+		  Motif.FamIndex);
+		}
+	}
+
+static int CmpMotif(const void *s1, const void *s2)
+	{
+	const MotifData *Motif1 = (const MotifData *) s1;
+	const MotifData *Motif2 = (const MotifData *) s2;
+	return Motif1->FamIndex - Motif2->FamIndex;
+	}
+
+static char *FamFileName(const char *Path, int Fam)
+	{
+	static char FileName[256];
+	char s[128];
+	sprintf(s, "/%d", Fam);
+	if (strlen(Path) + strlen(s) + 3 >= sizeof(FileName))
+		Quit("Path name too long");
+	strcpy(FileName, Path);
+	strcat(FileName, s);
+	fprintf(stderr, "\n\n*** FILE NAME *** '%s'\n", FileName);
+	return FileName;
+	}
+
+static char *MotifLabel(const char *Prefix, const MotifData &Motif)
+	{
+	int n = (int) strlen(Motif.ContigLabel) + 128;
+	char *s = all(char, n);
+	if (0 != Prefix)
+		sprintf(s, "%s.%d %s:%d",
+		  Prefix,
+		  Motif.FamIndex,
+		  Motif.ContigLabel,
+		  Motif.ContigFrom+1);
+	else
+		sprintf(s, "%d %s:%d",
+		  Motif.FamIndex,
+		  Motif.ContigLabel,
+		  Motif.ContigFrom+1);
+	return s;
+	}
+
+void Tanmotif2Fasta()
+	{
+	const char *MotifFileName = RequiredValueOpt("tanmotif2fasta");
+	const char *SeqFileName = RequiredValueOpt("seq");
+	const char *Path = ValueOpt("path");
+	const char *strMaxFam = ValueOpt("maxfam");
+	const char *Prefix = ValueOpt("prefix");
+
+	int MaxFam = DEFAULT_MAX_FAM;
+	if (strMaxFam != 0)
+		MaxFam = atoi(strMaxFam);
+
+	if (0 == Path)
+		Path = ".";
+
+	ProgressStart("Reading seq file");
+	int SeqLength;
+	const char *Seq = ReadMFA(SeqFileName, &SeqLength);
+	ProgressDone();
+
+	Progress("Seq length %d bases, %.3g Mb", SeqLength, SeqLength/1e6);
+
+	ProgressStart("Read Motif file");
+	int MotifCount;
+	MotifData *Motifs = ReadMotif(MotifFileName, &MotifCount);
+	ProgressDone();
+
+	Progress("%d records", MotifCount);
+
+	ProgressStart("Sorting by family");
+	qsort((void *) Motifs, MotifCount, sizeof(MotifData), CmpMotif);
+	ProgressDone();
+
+	FILE *f = 0;
+	int CurrentFamily = -1;
+	int MemberCount = 0;
+	for (int MotifIndex = 0; MotifIndex < MotifCount; ++MotifIndex)
+		{
+		const MotifData &Motif = Motifs[MotifIndex];
+		if (Motif.FamIndex != CurrentFamily)
+			{
+			if (f != 0)
+				fclose(f);
+			char *FastaFileName = FamFileName(Path, Motif.FamIndex);
+			f = OpenStdioFile(FastaFileName, FILEIO_MODE_WriteOnly);
+			CurrentFamily = Motif.FamIndex;
+			MemberCount = 0;
+			}
+		++MemberCount;
+		if (MemberCount > MaxFam)
+			continue;
+		const int From = ContigToGlobal(Motif.ContigFrom, Motif.ContigLabel);
+		const int Length = Motif.ContigTo - Motif.ContigFrom + 1;
+		char *Label = MotifLabel(Prefix, Motif);
+		WriteFasta(f, Seq + From, Length, Label, false);
+		freemem(Label);
+		}
+	}
diff --git a/tr.cpp b/tr.cpp
new file mode 100755
index 0000000..e6d1690
--- /dev/null
+++ b/tr.cpp
@@ -0,0 +1,354 @@
+#include "piler2.h"
+#if defined(DEBUG) && defined(_MSC_VER)
+#include <crtdbg.h>
+#endif
+
+#define GFFRecord GFFRecord2
+
+/***
+Find LTR families.
+
+1. LTR candidates
+
+    ------===--------------------===------------ genome
+           ^----------------------^
+                      Hit
+
+Hit is candidate pair of LTRs bounding a candidate LINE if:
+    (a) hit length >= MIN_LENGTH_LTR and <= MAX_LENGTH_LTR, and
+    (b) offset is >= MIN_LENGTH_LINE and <= MAX_LENGTH_LINE.
+
+2. Same family if a local alignment connects two LTR candidates
+of similar length.
+
+
+              Cand A                  Cand B
+    ------===........===----------===........===----- genome
+                      ^------------^
+                            Hit
+***/
+
+static int MIN_LENGTH_LINE = 50;
+static int MAX_LENGTH_LINE = 12000;
+static int MIN_LENGTH_LTR = 50;
+static int MAX_LENGTH_LTR = 2000;
+static double MIN_LINE_RATIO = 0.9;
+static int MIN_FAM_SIZE = 3;
+static double MIN_HIT_LENGTH_RATIO = 0.5;
+static int MIN_DIST_EDGE = 50000;
+
+static EdgeList Edges;
+
+static double Ratio(int i, int j)
+	{
+	if (i == 0 && j == 0)
+		return 0;
+	if (i < j)
+		return (double) i / (double) j;
+	else
+		return (double) j / (double) i;
+	}
+
+static int GetHitLength(const HitData &Hit)
+	{
+	const int QueryHitLength = Hit.QueryTo - Hit.QueryFrom + 1;
+	const int TargetHitLength = Hit.TargetTo - Hit.TargetFrom + 1;
+	return (QueryHitLength + TargetHitLength)/2;
+	}
+
+static int GetLTRLength(const HitData &Hit)
+	{
+	const int QueryLTRLength = Hit.QueryTo - Hit.QueryFrom + 1;
+	const int TargetLTRLength = Hit.TargetTo - Hit.TargetFrom + 1;
+	return (QueryLTRLength + TargetLTRLength)/2;
+	}
+
+static int GetHitFrom(const HitData &Hit)
+	{
+	return min(Hit.QueryFrom, Hit.TargetFrom);
+	}
+
+static int GetHitTo(const HitData &Hit)
+	{
+	return max(Hit.QueryTo, Hit.TargetTo);
+	}
+
+static int GetLINELength(const HitData &Hit)
+	{
+	return GetHitTo(Hit) - GetHitFrom(Hit) + 1;
+	}
+
+static bool IsCandLTR(const HitData &Hit)
+	{
+	const int LTRLength = GetLTRLength(Hit);
+	if (LTRLength < MIN_LENGTH_LTR || LTRLength > MAX_LENGTH_LTR)
+		return false;
+
+	const int LINELength = GetLINELength(Hit);
+	if (LINELength < MIN_LENGTH_LINE || LINELength > MAX_LENGTH_LINE)
+		return false;
+
+	return true;
+	}
+
+static HitData *Cands;
+static int CandCount = 0;
+static int CandBufferSize = 0;
+static void AddCand(const HitData &Hit, IIX &IntervalIndex)
+	{
+	if (CandCount >= CandBufferSize)
+		{
+		CandBufferSize += 10000;
+		Cands = reall(Cands, HitData, CandBufferSize);
+		}
+
+	int From;
+	int To;
+	if (Hit.QueryFrom < Hit.TargetFrom)
+		{
+		From = Hit.QueryFrom;
+		To = Hit.TargetTo;
+		}
+	else
+		{
+		From = Hit.TargetFrom;
+		To = Hit.TargetTo;
+		}
+
+	IntervalIndex.AddGlobal(From, To, CandCount);
+	Cands[CandCount++] = Hit;
+	}
+
+static bool TooClose(const GLIX &HitGlix, const HitData &Hit1, const HitData &Hit2)
+	{
+	const int Hit1From = Hit1.QueryFrom;
+	const int Hit2From = Hit2.QueryFrom;
+	int SeqFrom1;
+	int SeqFrom2;
+	const char *Label1 = HitGlix.GlobalToSeq(Hit1From, &SeqFrom1);
+	const char *Label2 = HitGlix.GlobalToSeq(Hit2From, &SeqFrom2);
+	if (0 != strcmp(Label1, Label2))
+		return false;
+	return iabs(SeqFrom2 - SeqFrom1) < MIN_DIST_EDGE;
+	}
+
+static bool IsEdge(const GLIX &HitGlix, const HitData &Hit, int i, int j)
+	{
+	if (i == j)
+		return false;
+
+	assert(i >= 0 && i < CandCount);
+	assert(j >= 0 && j < CandCount);
+
+	const HitData &Hit_i = Cands[i];
+	const HitData &Hit_j = Cands[j];
+
+	const int HitLength = GetHitLength(Hit);
+	const int HitLength_i = GetHitLength(Hit_i);
+	const int HitLength_j = GetHitLength(Hit_j);
+
+	if (Ratio(HitLength, HitLength_i) < MIN_HIT_LENGTH_RATIO ||
+	  Ratio(HitLength, HitLength_j) < MIN_HIT_LENGTH_RATIO ||
+	  Ratio(HitLength_i, HitLength_j) < MIN_HIT_LENGTH_RATIO)
+		return false;
+
+	const int LINELength_i = GetLINELength(Hit_i);
+	const int LINELength_j = GetLINELength(Hit_j);
+
+	if (Ratio(LINELength_i, LINELength_j) < MIN_LINE_RATIO)
+		return false;
+
+	if (TooClose(HitGlix, Hit_i, Hit_j))
+		return false;
+
+	return true;
+	}
+
+static void FindEdges(const HitData &Hit, const GLIX &HitGlix, const IIX &IntervalIndex)
+	{
+	int *QueryMatches;
+	int *TargetMatches;
+	const int QueryMatchCount = IntervalIndex.LookupGlobal(Hit.QueryFrom, Hit.QueryTo, &QueryMatches);
+	const int TargetMatchCount = IntervalIndex.LookupGlobal(Hit.TargetFrom, Hit.TargetTo, &TargetMatches);
+	const int MatchCount = QueryMatchCount + TargetMatchCount;
+	int *Matches = all(int, MatchCount);
+	memcpy(Matches, QueryMatches, QueryMatchCount*sizeof(int));
+	memcpy(Matches + QueryMatchCount, TargetMatches, TargetMatchCount*sizeof(int));
+
+	for (int i = 0; i < MatchCount; ++i)
+		{
+		const int m = Matches[i];
+		if (m < 0 || m > CandCount)
+			Quit("m=%d count=%d", m, CandCount);
+		}
+
+	for (int i = 0; i < MatchCount; ++i)
+		{
+		const int CandIndex1 = Matches[i];
+		for (int j = 0; j < i; ++j)
+			{
+			const int CandIndex2 = Matches[j];
+			if (IsEdge(HitGlix, Hit, CandIndex1, CandIndex2))
+				{
+				EdgeData Edge;
+				Edge.Node1 = CandIndex1;
+				Edge.Node2 = CandIndex2;
+				Edge.Rev = Hit.Rev;
+				Edges.push_back(Edge);
+				}
+			}
+		}
+	}
+
+static void WriteOutputFile(FILE *fOut, const GLIX &HitGlix, FamList &Fams)
+	{
+	GFFRecord Rec;
+	Rec.Feature = "tr";
+	Rec.Source = "piler";
+	Rec.Score = 0;
+	Rec.Strand = '.';
+	Rec.Frame = -1;
+
+	int FamIndex = 0;
+	for (PtrFamList p = Fams.begin(); p != Fams.end(); ++p)
+		{
+		FamData *Fam = *p;
+		for (PtrFamData q = Fam->begin(); q != Fam->end(); ++q)
+			{
+			FamMemberData &FamMember = *q;
+			int CandIndex = FamMember.PileIndex;
+			assert(CandIndex >= 0 && CandIndex < CandCount);
+			const HitData &Hit = Cands[CandIndex];
+
+			const int GlobalFrom = GetHitFrom(Hit);
+			const int GlobalTo = GetHitTo(Hit);
+			const int Length = GlobalTo - GlobalFrom + 1;
+
+			int SeqFrom;
+			const char *Label = HitGlix.GlobalToSeq(GlobalFrom, &SeqFrom);
+			int SeqTo = SeqFrom + Length - 1;
+
+			Rec.SeqName = Label;
+			Rec.Start = SeqFrom + 1;
+			Rec.End = SeqTo + 1;
+			Rec.Strand = (FamMember.Rev ? '-' : '+');
+
+			char Attrs[1024];
+			sprintf(Attrs, "Family %d ; Cand %d", FamIndex, CandIndex);
+
+			Rec.Attrs = Attrs;
+
+			WriteGFFRecord(fOut, Rec);
+			}
+		++FamIndex;
+		}
+	}
+
+static void WriteCands(FILE *f, const GLIX &HitGlix)
+	{
+	GFFRecord Rec;
+	for (int CandIndex = 0; CandIndex < CandCount; ++CandIndex)
+		{
+		const HitData &Hit = Cands[CandIndex];
+		char AnnotBuffer[1024];
+		HitToGFFRecord(HitGlix, Hit, Rec, AnnotBuffer);
+		char s[4096];
+		sprintf(s, "%s ; Cand %d", Rec.Attrs, CandIndex);
+		Rec.Attrs = s;
+		WriteGFFRecord(f, Rec);
+		}
+	}
+
+void TR()
+	{
+#if defined(DEBUG) && defined(_MSC_VER)
+	_CrtSetDbgFlag(0);	// too expensive
+#endif
+
+	const char *HitFileName = RequiredValueOpt("tr");
+	const char *OutFileName = RequiredValueOpt("out");
+	const char *CandFileName = ValueOpt("cand");
+
+	const char *strMinTrSpacing = ValueOpt("mintrspacing");
+	const char *strMaxTrSpacing = ValueOpt("maxtrspacing");
+	const char *strMinTrLength = ValueOpt("mintrlength");
+	const char *strMaxTrLength = ValueOpt("minspacingratio");
+	const char *strMinFam = ValueOpt("minfam");
+	const char *strMinHitRatio = ValueOpt("minhitratio");
+	const char *strMinDistPairs = ValueOpt("mindistpairs");
+
+	if (0 != strMinTrSpacing)
+		MIN_LENGTH_LINE = atoi(strMinTrSpacing);
+	if (0 != strMaxTrSpacing)
+		MAX_LENGTH_LINE = atoi(strMaxTrSpacing);
+	if (0 != strMinTrLength)
+		MIN_LENGTH_LTR = atoi(strMinTrLength);
+	if (0 != strMaxTrLength)
+		MAX_LENGTH_LTR = atoi(strMaxTrLength);
+	if (0 != strMinFam)
+		MIN_FAM_SIZE = atoi(strMinFam);
+	if (0 != strMinHitRatio)
+		MIN_HIT_LENGTH_RATIO = atoi(strMinHitRatio);
+	if (0 != strMinDistPairs)
+		MIN_DIST_EDGE = atoi(strMinDistPairs);
+
+	FILE *fHit = OpenStdioFile(HitFileName, FILEIO_MODE_ReadOnly);
+
+	ProgressStart("Index hits");
+	GLIX HitGlix;
+	HitGlix.Init();
+	HitGlix.FromGFFFile(fHit);
+	HitGlix.MakeGlobalToLocalIndex();
+	ProgressDone();
+
+	const int GlobalLength = HitGlix.GetGlobalLength();
+	IIX IntervalIndex;
+	IntervalIndex.Init(GlobalLength);
+
+	ProgressStart("Find candidate TRs");
+	Rewind(fHit);
+	GFFRecord Rec;
+	while (GetNextGFFRecord(fHit, Rec))
+		{
+		HitData Hit;
+		GFFRecordToHit(HitGlix, Rec, Hit);
+		if (IsCandLTR(Hit))
+			AddCand(Hit, IntervalIndex);
+		}
+	ProgressDone();
+
+	Progress("%d candidates", CandCount);
+
+	if (0 != CandFileName)
+		{
+		ProgressStart("Write candidates");
+		FILE *fCand = OpenStdioFile(CandFileName, FILEIO_MODE_WriteOnly);
+		WriteCands(fCand, HitGlix);
+		ProgressDone();
+		}
+
+	ProgressStart("Make graph");
+	Rewind(fHit);
+	while (GetNextGFFRecord(fHit, Rec))
+		{
+		HitData Hit;
+		GFFRecordToHit(HitGlix, Rec, Hit);
+		FindEdges(Hit, HitGlix, IntervalIndex);
+		}
+	fclose(fHit);
+	fHit = 0;
+
+	ProgressDone();
+
+	Progress("%d edges", (int) Edges.size());
+
+	ProgressStart("Find families");
+	FamList Fams;
+	FindConnectedComponents(Edges, Fams, MIN_FAM_SIZE);
+	ProgressDone();
+
+	Progress("%d families", (int) Fams.size());
+
+	FILE *fOut = OpenStdioFile(OutFileName, FILEIO_MODE_WriteOnly);
+	WriteOutputFile(fOut, HitGlix, Fams);
+	}
diff --git a/trs.cpp b/trs.cpp
new file mode 100755
index 0000000..4dc047a
--- /dev/null
+++ b/trs.cpp
@@ -0,0 +1,682 @@
+#include "piler2.h"
+#include "bitfuncs.h"
+
+#define TRACE		0
+
+static int g_paramMinFamSize = 3;
+static int g_paramMaxLengthDiffPct = 5;
+static bool g_paramSingleHitCoverage = true;
+
+static PileData *Piles;
+static int PileCount;
+static int EdgeCount;
+
+static int MaxImageCount = 0;
+static int SeqLength;
+static int SeqLengthChunks;
+
+static PILE_INDEX_TYPE *IdentifyPiles(int *CopyCount)
+	{
+	PILE_INDEX_TYPE *PileIndexes = all(PILE_INDEX_TYPE, SeqLengthChunks);
+
+#if	DEBUG
+	memset(PileIndexes, 0xff, SeqLengthChunks*sizeof(PILE_INDEX_TYPE));
+#endif
+
+	int PileIndex = -1;
+	bool InPile = false;
+	for (int i = 0; i < SeqLengthChunks; ++i)
+		{
+		if (BitIsSet(CopyCount, i))
+			{
+			if (!InPile)
+				{
+				++PileIndex;
+				if (PileIndex > MAX_STACK_INDEX)
+					Quit("Too many stacks");
+				InPile = true;
+				}
+			PileIndexes[i] = PileIndex;
+			}
+		else
+			InPile = false;
+		}
+	PileCount = PileIndex + 1;
+	return PileIndexes;
+	}
+
+static void IncCopyCountImage(int *CopyCount, int From, int To)
+	{
+	if (From < 0)
+		Quit("From < 0");
+
+	From /= CHUNK_LENGTH;
+	To /= CHUNK_LENGTH;
+
+	if (From >= SeqLengthChunks)
+		{
+		Warning("IncCopyCountImage: From=%d, SeqLength=%d", From, SeqLengthChunks);
+		From = SeqLengthChunks - 1;
+		}
+	if (To >= SeqLengthChunks)
+		{
+		Warning("IncCopyCountImage: To=%d, SeqLength=%d", To, SeqLengthChunks);
+		To = SeqLengthChunks - 1;
+		}
+
+	if (From > To)
+		Quit("From > To");
+
+	for (int i = From; i <= To; ++i)
+		SetBit(CopyCount, i);
+	}
+
+static void IncCopyCount(int *CopyCount, const HitData &Hit)
+	{
+	IncCopyCountImage(CopyCount, Hit.TargetFrom, Hit.TargetTo);
+	IncCopyCountImage(CopyCount, Hit.QueryFrom, Hit.QueryTo);
+	}
+
+static int CmpHits(const void *ptrHit1, const void *ptrHit2)
+	{
+	HitData *Hit1 = (HitData *) ptrHit1;
+	HitData *Hit2 = (HitData *) ptrHit2;
+	return Hit1->QueryFrom - Hit2->QueryFrom;
+	}
+
+static int CmpImages(const void *ptrImage1, const void *ptrImage2)
+	{
+	PileImageData *Image1 = (PileImageData *) ptrImage1;
+	PileImageData *Image2 = (PileImageData *) ptrImage2;
+	return Image1->SIPile - Image2->SIPile;
+	}
+
+static void AssertImagesSorted(PileImageData *Images, int ImageCount)
+	{
+	for (int i = 0; i < ImageCount - 1; ++i)
+		if (Images[i].SIPile > Images[i+1].SIPile)
+			Quit("Images not sorted");
+	}
+
+static void SortImagesPile(PileImageData *Images, int ImageCount)
+	{
+	qsort(Images, ImageCount, sizeof(PileImageData), CmpImages);
+	}
+
+static void SortImages()
+	{
+	for (int PileIndex = 0; PileIndex < PileCount; ++PileIndex)
+		{
+		PileData &Pile = Piles[PileIndex];
+		SortImagesPile(Pile.Images, Pile.ImageCount);
+#if	DEBUG
+		AssertImagesSorted(Pile.Images, Pile.ImageCount);
+#endif
+		}
+	}
+
+static void CreatePiles(const HitData *Hits, int HitCount,
+  PILE_INDEX_TYPE *PileIndexes)
+	{
+	Piles = all(PileData, PileCount);
+	zero(Piles, PileData, PileCount);
+	for (int i = 0; i < PileCount; ++i)
+		{
+		Piles[i].FamIndex = -1;
+		Piles[i].SuperFamIndex = -1;
+		Piles[i].Rev = -1;
+		}
+
+// Count images in stack
+	ProgressStart("Create stacks: count images");
+	for (int HitIndex = 0; HitIndex < HitCount; ++HitIndex)
+		{
+		const HitData &Hit = Hits[HitIndex];
+
+		int Pos = Hit.QueryFrom/CHUNK_LENGTH;
+		PILE_INDEX_TYPE PileIndex = PileIndexes[Pos];
+		assert(PileIndex == PileIndexes[Hit.QueryTo/CHUNK_LENGTH]);
+		assert(PileIndex >= 0 && PileIndex < PileCount);
+		++(Piles[PileIndex].ImageCount);
+
+		Pos = Hit.TargetFrom/CHUNK_LENGTH;
+		PileIndex = PileIndexes[Pos];
+		assert(PileIndex >= 0 && PileIndex < PileCount);
+		assert(PileIndex == PileIndexes[Hit.TargetTo/CHUNK_LENGTH]);
+		++(Piles[PileIndex].ImageCount);
+		}
+	ProgressDone();
+
+// Allocate memory for image list
+	int TotalImageCount = 0;
+	ProgressStart("Create stacks: allocate image memory");
+	for (int PileIndex = 0; PileIndex < PileCount; ++PileIndex)
+		{
+		PileData &Pile = Piles[PileIndex];
+		const int ImageCount = Pile.ImageCount;
+		TotalImageCount += ImageCount;
+		assert(ImageCount > 0);
+		Pile.Images = all(PileImageData, ImageCount);
+		}
+	ProgressDone();
+
+// Build image list
+	ProgressStart("Create stacks: build image list");
+	for (int PileIndex = 0; PileIndex < PileCount; ++PileIndex)
+		{
+		PileData &Pile = Piles[PileIndex];
+		Pile.ImageCount = 0;
+		Pile.From = -1;
+		Pile.To = -1;
+		}
+
+	for (int HitIndex = 0; HitIndex < HitCount; ++HitIndex)
+		{
+		const HitData &Hit = Hits[HitIndex];
+
+		const bool Rev = Hit.Rev;
+
+		const int Length1 = Hit.QueryTo - Hit.QueryFrom;
+		const int Length2 = Hit.TargetTo - Hit.TargetFrom;
+
+		const int From1 = Hit.QueryFrom;
+		const int From2 = Hit.TargetFrom;
+
+		const int To1 = Hit.QueryTo;
+		const int To2 = Hit.TargetTo;
+
+		const int Pos1 = From1/CHUNK_LENGTH;
+		const int Pos2 = From2/CHUNK_LENGTH;
+
+		PILE_INDEX_TYPE PileIndex1 = PileIndexes[Pos1];
+		PILE_INDEX_TYPE PileIndex2 = PileIndexes[Pos2];
+
+		assert(PileIndex1 == PileIndexes[(From1 + Length1 - 1)/CHUNK_LENGTH]);
+		assert(PileIndex1 >= 0 && PileIndex1 < PileCount);
+
+		assert(PileIndex2 == PileIndexes[(From2 + Length2 - 1)/CHUNK_LENGTH]);
+		assert(PileIndex2 >= 0 && PileIndex2 < PileCount);
+
+		PileData &Pile1 = Piles[PileIndex1];
+		PileImageData &Image1 = Pile1.Images[Pile1.ImageCount++];
+		Image1.SILength = Length2;
+		Image1.SIPile = PileIndex2;
+		Image1.SIRev = Rev;
+
+		PileData &Pile2 = Piles[PileIndex2];
+		PileImageData &Image2 = Pile2.Images[Pile2.ImageCount++];
+		Image2.SILength = Length1;
+		Image2.SIPile = PileIndex1;
+		Image2.SIRev = Rev;
+
+		if (Pile1.From == -1 || From1 < Pile1.From)
+			Pile1.From = From1;
+		if (Pile1.To == -1 || To1 > Pile1.To)
+			Pile1.To = To1;
+
+		if (Pile2.From == -1 || From2 < Pile2.From)
+			Pile2.From = From2;
+		if (Pile2.To == -1 || To2 > Pile2.To)
+			Pile2.To = To2;
+
+		if (Pile1.ImageCount > MaxImageCount)
+			MaxImageCount = Pile1.ImageCount;
+		if (Pile2.ImageCount > MaxImageCount)
+			MaxImageCount = Pile2.ImageCount;
+		}
+	ProgressDone();
+	}
+
+static int FindGlobalEdgesPileMulti(PileData &Pile, int PileIndex,
+  PILE_INDEX_TYPE Partners[], bool PartnersRev[])
+	{
+	const int PileLength = Pile.To - Pile.From + 1;
+	const int MinLength = (PileLength*(100 - g_paramMaxLengthDiffPct))/100;
+	const int MaxLength = (PileLength*(100 + g_paramMaxLengthDiffPct))/100;
+
+	const int ImageCount = Pile.ImageCount;
+	SortImagesPile(Pile.Images, ImageCount);
+
+#if	TRACE
+	Log("Pile1  Pile2  Pile1L  Pile2L  Fract1  Fract2  Global [Multi]\n");
+	Log("------  ------  -------  -------  ------  ------  ------\n");
+#endif
+
+	int CurrentPartnerPileIndex = -1;
+	int BasesCovered = 0;
+	int PartnerCount = 0;
+	for (int ImageIndex = 0; ; ++ImageIndex)
+		{
+		int PartnerPileIndex;
+		int ImageLength = 0;
+		bool Rev = false;
+		if (ImageIndex < ImageCount)
+			{
+			const PileImageData &Image = Pile.Images[ImageIndex];
+			PartnerPileIndex = Image.SIPile;
+			ImageLength = Image.SILength;
+			Rev = Image.SIRev;
+			}
+		else
+			PartnerPileIndex = -1;
+
+		if (PartnerPileIndex == CurrentPartnerPileIndex)
+			BasesCovered += ImageLength;
+		else
+			{
+			if (CurrentPartnerPileIndex != -1)
+				{
+				const PileData &PartnerPile = Piles[CurrentPartnerPileIndex];
+				const int PartnerPileLength = PartnerPile.To - PartnerPile.From + 1;
+				bool IsGlobalMatch = 
+				  PartnerPileLength >= MinLength && PartnerPileLength <= MaxLength &&
+				  BasesCovered >= MinLength && PartnerPileIndex != PileIndex;
+#if	TRACE
+				Log("%6d  %6d  %7d  %7d  %5.0f%%  %5.0f%%  %c\n",
+				  PileIndex,
+				  CurrentPartnerPileIndex,
+				  PileLength,
+				  PartnerPileLength,
+				  (BasesCovered*100.0)/PileLength,
+				  (BasesCovered*100.0)/PartnerPileLength,
+				  IsGlobalMatch ? 'Y' : 'N');
+#endif
+				if (IsGlobalMatch)
+					{
+					PartnersRev[PartnerCount] = Rev; // TODO
+					Partners[PartnerCount] = CurrentPartnerPileIndex;
+					++PartnerCount;
+					}
+				}
+			CurrentPartnerPileIndex = PartnerPileIndex;
+			BasesCovered = ImageLength;
+			}
+		if (ImageIndex == ImageCount)
+			break;
+		}
+	return PartnerCount;
+	}
+
+static int FindGlobalEdgesPileSingle(PileData &Pile, int PileIndex,
+  PILE_INDEX_TYPE Partners[], bool PartnersRev[])
+	{
+	const int ImageCount = Pile.ImageCount;
+	const int PileLength = Pile.To - Pile.From + 1;
+
+	const int MinLength = (PileLength*(100 - g_paramMaxLengthDiffPct))/100;
+	const int MaxLength = (PileLength*(100 + g_paramMaxLengthDiffPct))/100;
+
+#if	TRACE
+	Log("Pile1  Pile2  Pile1L  Pile2L  Fract1  Fract2  Global [Single]\n");
+	Log("------  ------  -------  -------  ------  ------  ------\n");
+#endif
+	int PartnerCount = 0;
+	for (int ImageIndex = 0; ImageIndex < ImageCount; ++ImageIndex)
+		{
+		const PileImageData &Image = Pile.Images[ImageIndex];
+		const int PartnerImageLength = Image.SILength;
+		const int PartnerPileIndex = Image.SIPile;
+		const PileData &PartnerPile = Piles[PartnerPileIndex];
+		const int PartnerPileLength = PartnerPile.To - PartnerPile.From + 1;
+
+		bool IsGlobalImage = 
+		  PartnerPileLength >= MinLength && PartnerPileLength <= MaxLength &&
+		  PartnerImageLength >= MinLength && PartnerImageLength <= MaxLength &&
+		  PartnerPileIndex != PileIndex;
+#if	TRACE
+		Log("%6d  %6d  %7d  %7d  %5.0f%%  %5.0f%%  %c\n",
+		  PileIndex,
+		  PartnerPileIndex,
+		  PileLength,
+		  PartnerPileLength,
+		  (PartnerImageLength*100.0)/PileLength,
+		  (PartnerImageLength*100.0)/PartnerPileLength,
+		  IsGlobalImage ? 'Y' : 'N');
+#endif
+		if (IsGlobalImage)
+			{
+			PartnersRev[PartnerCount] = Image.SIRev;
+			Partners[PartnerCount] = PartnerPileIndex;
+			++PartnerCount;
+			}
+		}
+	return PartnerCount;
+	}
+
+static void AddEdges(EdgeList &Edges, PILE_INDEX_TYPE PileIndex,
+  PILE_INDEX_TYPE Partners[], bool PartnersRev[], int PartnerCount)
+	{
+	EdgeCount += PartnerCount;
+	for (int i = 0; i < PartnerCount; ++i)
+		{
+		int PileIndex2 = Partners[i];
+		EdgeData Edge;
+		Edge.Node1 = PileIndex;
+		Edge.Node2 = PileIndex2;
+		Edge.Rev = PartnersRev[i];
+		Edges.push_back(Edge);
+		}
+	}
+
+static void FindGlobalEdges(EdgeList &Edges, int MaxImageCount)
+	{
+	Edges.clear();
+
+	PILE_INDEX_TYPE *Partners = all(PILE_INDEX_TYPE, MaxImageCount);
+	bool *PartnersRev = all(bool, MaxImageCount);
+	for (int PileIndex = 0; PileIndex < PileCount; ++PileIndex)
+		{
+		PileData &Pile = Piles[PileIndex];
+		int PartnerCount;
+		if (g_paramSingleHitCoverage)
+			PartnerCount = FindGlobalEdgesPileSingle(Pile, PileIndex, Partners, PartnersRev);
+		else
+			PartnerCount = FindGlobalEdgesPileMulti(Pile, PileIndex, Partners, PartnersRev);
+		AddEdges(Edges, PileIndex, Partners, PartnersRev, PartnerCount);
+		}
+	freemem(Partners);
+	freemem(PartnersRev);
+	}
+
+static void AssignFamsToPiles(FamList &Fams)
+	{
+	int FamIndex = 0;
+	for (PtrFamList p = Fams.begin(); p != Fams.end(); ++p)
+		{
+		FamData *Fam = *p;
+		for (PtrFamData q = Fam->begin(); q != Fam->end(); ++q)
+			{
+			FamMemberData &FamMember = *q;
+			int PileIndex = FamMember.PileIndex;
+			PileData &Pile = Piles[PileIndex];
+			Pile.FamIndex = FamIndex;
+			Pile.Rev = (int) FamMember.Rev;
+			}
+		++FamIndex;
+		}
+	}
+
+static inline unsigned TriangleSubscript(unsigned FamCount, unsigned i, unsigned j)
+	{
+	assert(i >= 0 && j >= 0 && i < FamCount && j < FamCount);
+	unsigned v;
+	if (i >= j)
+		v = j + (i*(i - 1))/2;
+	else
+		v = i + (j*(j - 1))/2;
+	assert(v < (FamCount*(FamCount - 1))/2);
+	return v;
+	}
+
+static void FindSuperFamEdges(FamList &Fams, EdgeList &Edges)
+	{
+	const int FamCount = (int) Fams.size();
+
+// Allocate triangular array Related[i][j], value is true
+// iff families i and j are related (i.e., there is a local
+// alignment between some member of i and some member of j).
+	const int TriangleSize = (FamCount*(FamCount - 1))/2;
+	bool *Related = all(bool, TriangleSize);
+	zero(Related, bool, TriangleSize);
+	for (PtrFamList p = Fams.begin(); p != Fams.end(); ++p)
+		{
+		FamData *Fam = *p;
+		for (PtrFamData q = Fam->begin(); q != Fam->end(); ++q)
+			{
+			FamMemberData &FamMember = *q;
+			int PileIndex = FamMember.PileIndex;
+			const PileData &Pile = Piles[PileIndex];
+			const int FamIndex = Pile.FamIndex;
+			if (-1 == FamIndex)
+				continue;
+			const int ImageCount = Pile.ImageCount;
+			for (int ImageIndex = 0; ImageIndex < ImageCount; ++ImageIndex)
+				{
+				const PileImageData &Image = Pile.Images[ImageIndex];
+				const int PartnerPileIndex = Image.SIPile;
+				if (PartnerPileIndex == PileIndex)
+					continue;
+				const PileData &PartnerPile = Piles[PartnerPileIndex];
+				const int PartnerFamIndex = PartnerPile.FamIndex;
+				if (-1 == PartnerFamIndex || PartnerFamIndex == FamIndex)
+					continue;
+				const int Index = TriangleSubscript(FamCount, FamIndex, PartnerFamIndex);
+				assert(Index >= 0 && Index < TriangleSize);
+				Related[Index] = true;
+				}
+			}
+		}
+
+	Edges.clear();
+	for (int i = 0; i < FamCount; ++i)
+		for (int j = i + 1; j < FamCount; ++j)
+			{
+			const int Index = TriangleSubscript(FamCount, i, j);
+			if (Related[Index])
+				{
+//				Log("R %d %d\n", i, j);
+				EdgeData Edge;
+				Edge.Node1 = i;
+				Edge.Node2 = j;
+				Edge.Rev = false;
+				Edges.push_back(Edge);
+				}
+			}
+	}
+
+static void AssignSuperFamsToPiles(FamList &Fams, FamList &SuperFams)
+	{
+	const int FamCount = (int) Fams.size();
+	FamData **FamVect = all(FamData *, FamCount);
+
+	int FamIndex = 0;
+	for (PtrFamList p = Fams.begin(); p != Fams.end(); ++p)
+		{
+		FamVect[FamIndex] = *p;
+		++FamIndex;
+		}
+
+	int SuperFamIndex = 0;
+	for (PtrFamList pSF = SuperFams.begin(); pSF != SuperFams.end(); ++pSF)
+		{
+		FamData &SFFams = *(*pSF);
+		for (PtrFamData p = SFFams.begin(); p != SFFams.end(); ++p)
+			{
+			FamMemberData &FamMember = *p;
+			int FamIndex = FamMember.PileIndex;
+			assert(FamIndex >= 0 && FamIndex < FamCount);
+			FamData *Fam = FamVect[FamIndex];
+			for (PtrFamData q = Fam->begin(); q != Fam->end(); ++q)
+				{
+				FamMemberData &FamMember = *q;
+				int PileIndex = FamMember.PileIndex;
+				assert(PileIndex >= 0 && PileIndex < PileCount);
+				PileData &Pile = Piles[PileIndex];
+				assert(Pile.FamIndex == FamIndex);
+				Pile.SuperFamIndex = SuperFamIndex;
+				}
+			}
+		++SuperFamIndex;
+		}
+	}
+
+static void FindSingletonSuperFams(FamList &Fams, FamList &SuperFams)
+	{
+	const int FamCount = (int) Fams.size();
+	FamData **FamVect = all(FamData *, FamCount);
+	bool *FamAssigned = all(bool, FamCount);
+
+	int FamIndex = 0;
+	for (PtrFamList p = Fams.begin(); p != Fams.end(); ++p)
+		{
+		FamVect[FamIndex] = *p;
+		FamAssigned[FamIndex] = false;
+		++FamIndex;
+		}
+
+// Flag families that have been assigned to superfamilies
+	for (PtrFamList pSF = SuperFams.begin(); pSF != SuperFams.end(); ++pSF)
+		{
+		FamData &SFFams = *(*pSF);
+		for (PtrFamData p = SFFams.begin(); p != SFFams.end(); ++p)
+			{
+			FamMemberData &FamMember = *p;
+			int FamIndex = FamMember.PileIndex;
+			assert(FamIndex >= 0 && FamIndex < FamCount);
+			FamAssigned[FamIndex] = true;
+			}
+		}
+
+// Create new superfamily for each unassigned family
+	for (int FamIndex = 0; FamIndex < FamCount; ++FamIndex)
+		{
+		if (FamAssigned[FamIndex])
+			continue;
+
+		FamMemberData Fam;
+		Fam.PileIndex = FamIndex;
+		Fam.Rev = false;
+
+		FamData *SuperFam = new FamData;
+		SuperFam->push_back(Fam);
+
+		SuperFams.push_back(SuperFam);
+		}
+
+// Validate
+	int SuperFamIndex = 0;
+	for (PtrFamList pSF = SuperFams.begin(); pSF != SuperFams.end(); ++pSF)
+		{
+		FamData &SFFams = *(*pSF);
+		for (PtrFamData p = SFFams.begin(); p != SFFams.end(); ++p)
+			{
+			FamMemberData &FamMember = *p;
+			int FamIndex = FamMember.PileIndex;
+			assert(FamIndex >= 0 && FamIndex < FamCount);
+			FamData *Fam = FamVect[FamIndex];
+
+			for (PtrFamData q = Fam->begin(); q != Fam->end(); ++q)
+				{
+				FamMemberData &FamMember = *q;
+				int PileIndex = FamMember.PileIndex;
+				if (PileIndex == 5354)
+					Log("");
+				PileData &Pile = Piles[PileIndex];
+
+				assert(Pile.FamIndex == FamIndex);
+				}
+			}
+		++SuperFamIndex;
+		}
+	}
+
+void TRS()
+	{
+	const char *InputFileName = RequiredValueOpt("trs");
+
+	const char *OutputFileName = ValueOpt("out");
+	const char *PilesFileName = ValueOpt("piles");
+	const char *ImagesFileName = ValueOpt("images");
+
+	const char *strMinFamSize = ValueOpt("famsize");
+	const char *strMaxLengthDiffPct = ValueOpt("maxlengthdiffpct");
+	g_paramSingleHitCoverage = !FlagOpt("multihit");
+
+	if (0 == OutputFileName && 0 == PilesFileName && 0 == ImagesFileName)
+		Quit("No output file specified, must be at least one of -out, -piles, -images");
+
+	if (0 != strMinFamSize)
+		g_paramMinFamSize = atoi(strMinFamSize);
+	if (0 != strMaxLengthDiffPct)
+		g_paramMaxLengthDiffPct = atoi(strMaxLengthDiffPct);
+
+	Log("singlehit=%s famsize=%d maxlengthdiffpct=%d\n",
+	  g_paramSingleHitCoverage ? "True" : "False",
+	  g_paramMinFamSize,
+	  g_paramMaxLengthDiffPct);
+
+	ProgressStart("Read hit file");
+	int HitCount;
+	int SeqLength;
+	HitData *Hits = ReadHits(InputFileName, &HitCount, &SeqLength);
+	ProgressDone();
+
+	Progress("%d hits", HitCount);
+
+	SeqLengthChunks = (SeqLength + CHUNK_LENGTH - 1)/CHUNK_LENGTH;
+
+	const int BitVectorLength = (SeqLengthChunks + BITS_PER_INT - 1)/BITS_PER_INT;
+	int *CopyCount = all(int, BitVectorLength);
+	zero(CopyCount, int, BitVectorLength);
+
+	ProgressStart("Compute copy counts");
+	for (int i = 0; i < HitCount; ++i)
+		IncCopyCount(CopyCount, Hits[i]);
+	ProgressDone();
+
+	ProgressStart("Identify piles");
+	PILE_INDEX_TYPE *PileIndexes = IdentifyPiles(CopyCount);
+	ProgressDone();
+
+	Progress("%d stacks", PileCount);
+
+	freemem(CopyCount);
+	CopyCount = 0;
+
+	CreatePiles(Hits, HitCount, PileIndexes);
+
+	if (0 != ImagesFileName)
+		{
+		ProgressStart("Writing images file");
+		WriteImages(ImagesFileName, Hits, HitCount, PileIndexes);
+		ProgressDone();
+		}
+
+	freemem(Hits);
+	Hits = 0;
+
+	if (0 != PilesFileName)
+		{
+		ProgressStart("Writing piles file");
+		WritePiles(PilesFileName, Piles, PileCount);
+		ProgressDone();
+		}
+
+	freemem(PileIndexes);
+	PileIndexes = 0;
+
+	if (0 == OutputFileName)
+		return;
+
+	ProgressStart("Find edges");
+	EdgeList Edges;
+	FindGlobalEdges(Edges, MaxImageCount);
+	ProgressDone();
+
+	Progress("%d edges", (int) Edges.size());
+
+	ProgressStart("Find families");
+	FamList Fams;
+	FindConnectedComponents(Edges, Fams, g_paramMinFamSize);
+	AssignFamsToPiles(Fams);
+	ProgressDone();
+
+	Progress("%d families", (int) Fams.size());
+
+	ProgressStart("Find superfamilies");
+	EdgeList SuperEdges;
+	FindSuperFamEdges(Fams, SuperEdges);
+
+	FamList SuperFams;
+	FindConnectedComponents(SuperEdges, SuperFams, 1);
+	FindSingletonSuperFams(Fams, SuperFams);
+
+	AssignSuperFamsToPiles(Fams, SuperFams);
+	ProgressDone();
+
+	Progress("%d superfamilies", (int) SuperFams.size());
+
+	ProgressStart("Write TRS output file");
+	WriteTRSFile(OutputFileName, Piles, PileCount);
+	ProgressDone();
+	}
diff --git a/trs2fasta.cpp b/trs2fasta.cpp
new file mode 100755
index 0000000..b52b625
--- /dev/null
+++ b/trs2fasta.cpp
@@ -0,0 +1,142 @@
+#include "piler2.h"
+
+static void LogRecords(const TRSData *TRSs, int TRSCount)
+	{
+	Log("              Contig        From          To  Length  +  Family\n");
+	Log("--------------------  ----------  ----------  ------  -  ------\n");
+	for (int TRSIndex = 0; TRSIndex < TRSCount; ++TRSIndex)
+		{
+		const TRSData &TRS = TRSs[TRSIndex];
+		Log("%20.20s  %10d  %10d  %6d  %c  %d.%d\n",
+		  TRS.ContigLabel,
+		  TRS.ContigFrom,
+		  TRS.ContigTo,
+		  TRS.ContigTo - TRS.ContigFrom + 1,
+		  TRS.Rev ? '-' : '+',
+		  TRS.SuperFamIndex,
+		  TRS.FamIndex);
+		}
+	}
+
+static int CmpTRS(const void *s1, const void *s2)
+	{
+	const TRSData *TRS1 = (const TRSData *) s1;
+	const TRSData *TRS2 = (const TRSData *) s2;
+	return TRS1->FamIndex - TRS2->FamIndex;
+	}
+
+static char *FamFileName(const char *Path, int Fam, int SuperFam)
+	{
+	static char FileName[256];
+	char s[128];
+	if (SuperFam == -1)
+		sprintf(s, "/%d", Fam);
+	else
+		sprintf(s, "/%d.%d", SuperFam, Fam);
+	if (strlen(Path) + strlen(s) + 3 >= sizeof(FileName))
+		Quit("Path name too long");
+	strcpy(FileName, Path);
+	strcat(FileName, s);
+	return FileName;
+	}
+
+static char *TRSLabel(const char *Prefix, const TRSData &TRS)
+	{
+	int n = (int) strlen(TRS.ContigLabel) + 128;
+	char *s = all(char, n);
+	if (TRS.SuperFamIndex == -1)
+		{
+		if (0 != Prefix)
+			sprintf(s, "%s.%d %s:%d%c",
+			Prefix,
+			TRS.FamIndex,
+			TRS.ContigLabel,
+			TRS.ContigFrom+1,
+			TRS.Rev ? '-' : '+');
+		else
+			sprintf(s, "%d.%s:%d%c",
+			TRS.FamIndex,
+			TRS.ContigLabel,
+			TRS.ContigFrom+1,
+			TRS.Rev ? '-' : '+');
+		}
+	else
+		{
+		if (0 != Prefix)
+			sprintf(s, "%s.%d.%d %s:%d%c",
+			Prefix,
+			TRS.SuperFamIndex,
+			TRS.FamIndex,
+			TRS.ContigLabel,
+			TRS.ContigFrom+1,
+			TRS.Rev ? '-' : '+');
+		else
+			sprintf(s, "%d.%d %s:%d%c",
+			TRS.SuperFamIndex,
+			TRS.FamIndex,
+			TRS.ContigLabel,
+			TRS.ContigFrom+1,
+			TRS.Rev ? '-' : '+');
+		}
+
+	return s;
+	}
+
+void TRS2Fasta()
+	{
+	const char *TRSFileName = RequiredValueOpt("trs2fasta");
+	const char *SeqFileName = RequiredValueOpt("seq");
+	const char *Path = ValueOpt("path");
+	const char *strMaxFam = ValueOpt("maxfam");
+	const char *Prefix = ValueOpt("prefix");
+
+	int MaxFam = DEFAULT_MAX_FAM;
+	if (strMaxFam != 0)
+		MaxFam = atoi(strMaxFam);
+
+	if (0 == Path)
+		Path = ".";
+
+	ProgressStart("Reading seq file");
+	int SeqLength;
+	const char *Seq = ReadMFA(SeqFileName, &SeqLength);
+	ProgressDone();
+
+	Progress("Seq length %d bases, %.3g Mb", SeqLength, SeqLength/1e6);
+
+	ProgressStart("Read TRS file");
+	int TRSCount;
+	TRSData *TRSs = ReadTRS(TRSFileName, &TRSCount);
+	ProgressDone();
+
+	Progress("%d records", TRSCount);
+
+	ProgressStart("Sorting by family");
+	qsort((void *) TRSs, TRSCount, sizeof(TRSData), CmpTRS);
+	ProgressDone();
+
+	FILE *f = 0;
+	int CurrentFamily = -1;
+	int MemberCount = 0;
+	for (int TRSIndex = 0; TRSIndex < TRSCount; ++TRSIndex)
+		{
+		const TRSData &TRS = TRSs[TRSIndex];
+		if (TRS.FamIndex != CurrentFamily)
+			{
+			if (f != 0)
+				fclose(f);
+			char *FastaFileName = FamFileName(Path, TRS.FamIndex, TRS.SuperFamIndex);
+			f = OpenStdioFile(FastaFileName, FILEIO_MODE_WriteOnly);
+			CurrentFamily = TRS.FamIndex;
+			MemberCount = 0;
+			}
+		++MemberCount;
+		if (MemberCount > MaxFam)
+			continue;
+		const int From = ContigToGlobal(TRS.ContigFrom, TRS.ContigLabel);
+		const int Length = TRS.ContigTo - TRS.ContigFrom + 1;
+		char *Label = TRSLabel(Prefix, TRS);
+		WriteFasta(f, Seq + From, Length, Label, TRS.Rev);
+		freemem(Label);
+		}
+	}
diff --git a/types.h b/types.h
new file mode 100755
index 0000000..9505f14
--- /dev/null
+++ b/types.h
@@ -0,0 +1,152 @@
+#include <list>
+#include <vector>
+using namespace std;
+
+enum FILEIO_MODE
+	{
+	FILEIO_MODE_Undefined = 0,
+	FILEIO_MODE_ReadOnly,
+	FILEIO_MODE_ReadWrite,
+	FILEIO_MODE_WriteOnly
+	};
+
+#if	FILEIO_BINARY_MODE
+#define FILIO_STRMODE_ReadOnly		"rb"
+#define FILIO_STRMODE_WriteOnly		"wb"
+#define FILIO_STRMODE_ReadWrite		"w+b"
+#else
+#define FILIO_STRMODE_ReadOnly		"r"
+#define FILIO_STRMODE_WriteOnly		"w"
+#define FILIO_STRMODE_ReadWrite		"w+"
+#endif
+
+struct HitData
+	{
+	int QueryFrom;
+	int QueryTo;
+	int TargetFrom;
+	int TargetTo;
+	bool Rev;
+	};
+
+typedef int PILE_INDEX_TYPE;
+const PILE_INDEX_TYPE MAX_STACK_INDEX = 0x7ffffff0;
+struct PileImageData
+	{
+// Hack to enforce structure packing
+// in a compiler-independent way
+	PILE_INDEX_TYPE Data[3];
+#define SILength	Data[0]
+#define SIPile		Data[1]
+#define SIRev		Data[2]
+	};
+
+struct EdgeData
+	{
+	int Node1;
+	int Node2;
+	bool Rev;
+	};
+typedef list<EdgeData> EdgeList;
+typedef EdgeList::iterator PtrEdgeList;
+
+struct FamMemberData
+	{
+	int PileIndex;
+	bool Rev;
+	};
+typedef list<FamMemberData> FamData;
+typedef FamData::iterator PtrFamData;
+
+typedef list<FamData *> FamList;
+typedef FamList::iterator PtrFamList;
+
+typedef list<int> IntList;
+typedef IntList::iterator PtrIntList;
+
+typedef list<IntList *> ListOfIntListPtrs;
+typedef ListOfIntListPtrs::iterator PtrListOfIntListPtrs;
+
+struct TRSData
+	{
+	char *ContigLabel;
+	int ContigFrom;
+	int ContigTo;
+	int FamIndex;
+	int SuperFamIndex;
+	bool Rev;
+	};
+
+struct MotifData
+	{
+	char *ContigLabel;
+	int ContigFrom;
+	int ContigTo;
+	int FamIndex;
+	};
+
+struct PileData
+	{
+	int From;
+	int To;
+	int ImageCount;
+	PileImageData *Images;
+	int SuperFamIndex;
+	int FamIndex;
+	int Rev;
+	};
+
+struct ContigData
+	{
+	int From;
+	int Length;
+	char *Label;
+	ContigData *Next;
+	};
+
+struct GFFRecord
+	{
+	char SeqName[MAX_GFF_FEATURE_LENGTH+1];
+	char Source[MAX_GFF_FEATURE_LENGTH+1];
+	char Feature[MAX_GFF_FEATURE_LENGTH+1];
+	int Start;
+	int End;
+	float Score;
+	char Strand;
+	int Frame;
+	char Attrs[MAX_GFF_FEATURE_LENGTH+1];
+	};
+
+struct GFFRecord2
+	{
+	const char *SeqName;
+	const char *Source;
+	const char *Feature;
+	int Start;
+	int End;
+	float Score;
+	char Strand;
+	int Frame;
+	const char *Attrs;
+	};
+
+struct RepData
+	{
+	char *ContigLabel;
+	int ContigFrom;
+	int ContigTo;
+	char *RepeatName;
+	char *RepeatClass;
+	int RepeatFrom;
+	int RepeatTo;
+	int RepeatLeft;
+	bool Rev;
+	};
+
+class GFFSet;
+class GLIX;
+class IIX;
+
+#include "gffset.h"
+#include "glix.h"
+#include "iix.h"
diff --git a/usage.cpp b/usage.cpp
new file mode 100755
index 0000000..d444575
--- /dev/null
+++ b/usage.cpp
@@ -0,0 +1,40 @@
+#include "piler2.h"
+
+void Credits()
+	{
+	static bool Displayed = false;
+	if (Displayed)
+		return;
+
+	fprintf(stderr, "\n" PILER_LONG_VERSION "\n");
+	fprintf(stderr, "http://www.drive5.com/piler\n");
+	fprintf(stderr, "Written by Robert C. Edgar\n");
+	fprintf(stderr, "This software is donated to the public domain.\n");
+	fprintf(stderr, "Please visit web site for requested citation.\n");
+	Displayed = true;
+	}
+
+void Usage()
+	{
+	Credits();
+	fprintf(stderr,
+"\n"
+"Usage:\n"
+"  piler -trs <hitfile> -out <trsfile> [options]\n"
+"  piler -trs2fasta <trsfile> -seq <fastafile> [-path <path>] [-maxfam <maxfam>]\n"
+"  piler -cons <alnfile> -out <fastafile> -label <label>\n"
+"  piler -annot <gff> -rep <repfile> -out <annotfile>\n"
+"  piler -help\n"
+"  piler -version\n"
+"\n"
+"Use -quiet to suppress progress messages\n"
+"\n"
+"-trs options:\n"
+"  -mincover <n>\n"
+"  -maxlengthdiffpct <n>\n"
+"  -piles <pilefile>\n"
+"  -images <imagefile>\n"
+"  -multihit\n"
+"\n"
+"For further information, please see the User Guide.\n");
+	}
diff --git a/utils.cpp b/utils.cpp
new file mode 100755
index 0000000..b03323a
--- /dev/null
+++ b/utils.cpp
@@ -0,0 +1,89 @@
+#include "piler2.h"
+
+char *strsave(const char *s)
+	{
+	if (0 == s)
+		return 0;
+	char *ptrCopy = strdup(s);
+	if (0 == ptrCopy)
+		Quit("Out of memory");
+	return ptrCopy;
+	}
+
+static const char *StdioStrMode(FILEIO_MODE Mode)
+	{
+	switch (Mode)
+		{
+	case FILEIO_MODE_ReadOnly:
+		return FILIO_STRMODE_ReadOnly;
+	case FILEIO_MODE_WriteOnly:
+		return FILIO_STRMODE_WriteOnly;
+	case FILEIO_MODE_ReadWrite:
+		return FILIO_STRMODE_ReadWrite;
+		}
+	Quit("StdioStrMode: Invalid mode");
+	return "r";
+	}
+
+FILE *OpenStdioFile(const char *FileName, FILEIO_MODE Mode)
+	{
+	const char *strMode = StdioStrMode(Mode);
+	FILE *f = fopen(FileName, strMode);
+	if (0 == f)
+		Quit("Cannot open %s, %s [%d]", FileName, strerror(errno), errno);
+	return f;
+	}
+
+void Rewind(FILE *f)
+	{
+	int Ok = fseek(f, 0, SEEK_SET);
+	if (Ok != 0)
+		Quit("Rewind fseek() != 0 errno=%d", errno);
+
+	long CurrPos = ftell(f);
+	if (CurrPos != 0)
+		Quit("FileSize: ftell=%ld errno=%d", CurrPos, errno);
+	}
+
+int GetFileSize(FILE *f)
+	{
+	long CurrPos = ftell(f);
+	if (CurrPos < 0)
+		Quit("FileSize: ftell<0 (CurrPos), errno=%d", errno);
+
+	int Ok = fseek(f, 0, SEEK_END);
+	if (Ok != 0)
+		Quit("FileSize fseek(END) != 0 errno=%d", errno);
+
+	long Size = ftell(f);
+	if (Size < 0)
+		Quit("FileSize: ftell<0 (size), errno=%d", errno);
+
+	Ok = fseek(f, CurrPos, SEEK_SET);
+	if (Ok != 0)
+		Quit("FileSize fseek(restore curr pos) != 0 errno=%d", errno);
+
+	long NewPos = ftell(f);
+	if (CurrPos < 0)
+		Quit("FileSize: ftell=%ld != CurrPos=%ld", CurrPos, NewPos);
+
+	return (int) Size;
+	}
+
+int Overlap(int From1, int To1, int From2, int To2)
+	{
+	return min(To1, To2) - max(From1, From2);
+	}
+
+// Add minimum possible value of j to i such that:
+//	(a) j >= MinInc, and
+//	(b) (i + j)%MultipleOf = 0.
+int RoundUp(int i, int MinInc, int MultipleOf)
+	{
+	int newi = i + MinInc;
+	int k = newi%MultipleOf;
+	if (k > 0)
+		newi += (MultipleOf - k);
+	assert(newi >= i + MinInc && 0 == newi%MultipleOf);
+	return newi;
+	}
diff --git a/utils_linux.cpp b/utils_linux.cpp
new file mode 100755
index 0000000..f6eda84
--- /dev/null
+++ b/utils_linux.cpp
@@ -0,0 +1,76 @@
+#include "piler2.h"
+
+#if	linux || __linux__
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stdio.h>
+#include <fcntl.h>
+
+double GetRAMSize()
+	{
+	const double DEFAULT_RAM = 1e9;
+	static double RAMMB = 0;
+	if (RAMMB != 0)
+		return RAMMB;
+
+	int fd = open("/proc/meminfo", O_RDONLY);
+	if (-1 == fd)
+		return DEFAULT_RAM;
+
+	char Buffer[128];
+	int n = read(fd, Buffer, sizeof(Buffer) - 1);
+	close(fd);
+	fd = -1;
+
+	if (n <= 0)
+		return DEFAULT_RAM;
+
+	Buffer[n] = 0;
+	char *pMem = strstr(Buffer, "Mem: ");
+	if (0 == pMem)
+		return DEFAULT_RAM;
+	int Bytes = atoi(pMem+4);
+	return (double) Bytes;
+	}
+
+static unsigned g_uPeakMemUseBytes;
+
+unsigned GetMaxMemUseBytes()
+	{
+	return g_uPeakMemUseBytes;
+	}
+
+unsigned GetMemUseBytes()
+	{
+	static char statm[64];
+	static int PageSize;
+	if (0 == statm[0])
+		{
+		PageSize = sysconf(_SC_PAGESIZE);
+		pid_t pid = getpid();
+		sprintf(statm, "/proc/%d/statm", (int) pid);
+		}
+
+	int fd = open(statm, O_RDONLY);
+	if (-1 == fd)
+		return 1000000;
+	char Buffer[64];
+	int n = read(fd, Buffer, sizeof(Buffer) - 1);
+	close(fd);
+	fd = -1;
+
+	if (n <= 0)
+		return 1000000;
+
+	Buffer[n] = 0;
+	int Pages = atoi(Buffer);
+
+	unsigned uBytes = Pages*PageSize;
+	if (uBytes > g_uPeakMemUseBytes)
+		g_uPeakMemUseBytes = uBytes;
+	return uBytes;
+	}
+
+#endif
diff --git a/utils_unix.cpp b/utils_unix.cpp
new file mode 100755
index 0000000..0d28c15
--- /dev/null
+++ b/utils_unix.cpp
@@ -0,0 +1,38 @@
+#include "piler2.h"
+
+#if ((unix || __unix) && !(linux || __linux__))
+
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/unistd.h>
+
+double GetRAMSize()
+	{
+	struct rlimit RL;
+	int Ok = getrlimit(RLIMIT_DATA, &RL);
+	if (Ok != 0)
+		return 1e9;
+	return RL.rlim_cur;
+	}
+
+static unsigned g_uPeakMemUseBytes = 1000000;
+
+unsigned GetPeakMemUseBytes()
+	{
+	return g_uPeakMemUseBytes;
+	}
+
+unsigned GetMemUseBytes()
+	{
+	struct rusage RU;
+	int Ok = getrusage(RUSAGE_SELF, &RU);
+	if (Ok != 0)
+		return 1000000;
+
+	unsigned Bytes = RU.ru_maxrss*1000;
+	if (Bytes > g_uPeakMemUseBytes)
+		g_uPeakMemUseBytes = Bytes;
+	return Bytes;
+	}
+
+#endif
diff --git a/utils_win32.cpp b/utils_win32.cpp
new file mode 100755
index 0000000..851dc02
--- /dev/null
+++ b/utils_win32.cpp
@@ -0,0 +1,34 @@
+#include "piler2.h"
+
+#if	WIN32
+#include <windows.h>
+#include <psapi.h>
+
+double GetRAMSize()
+	{
+	MEMORYSTATUS MS;
+	GlobalMemoryStatus(&MS);
+	return (double) MS.dwTotalPhys;
+	}
+
+static unsigned g_uMaxMemUseBytes;
+
+unsigned GetMaxMemUseBytes()
+	{
+	return g_uMaxMemUseBytes;
+	}
+
+unsigned GetMemUseBytes()
+	{
+	HANDLE hProc = GetCurrentProcess();
+	PROCESS_MEMORY_COUNTERS PMC;
+	BOOL bOk = GetProcessMemoryInfo(hProc, &PMC, sizeof(PMC));
+	if (!bOk)
+		return 1000000;
+	unsigned uBytes = (unsigned) PMC.WorkingSetSize;
+	if (uBytes > g_uMaxMemUseBytes)
+		g_uMaxMemUseBytes = uBytes;
+	return uBytes;
+	}
+
+#endif
diff --git a/writecrisp.cpp b/writecrisp.cpp
new file mode 100755
index 0000000..9f33ef0
--- /dev/null
+++ b/writecrisp.cpp
@@ -0,0 +1,61 @@
+#include "piler2.h"
+
+static void WriteCrisp(FILE *f, const PileData &Pile, int PileIndex)
+	{
+	int FamIndex = Pile.FamIndex;
+	assert(FamIndex >= 0);
+
+	int ContigFrom;
+	const char *ContigLabel = GlobalToContig(Pile.From, &ContigFrom);
+	const int Length = Pile.To - Pile.From + 1;
+	const int ContigTo = ContigFrom + Length - 1;
+	const char Strand = Pile.Rev ? '-' : '+';
+
+// GFF fields are:
+// <seqname> <source> <feature> <start> <end> <score> <strand> <frame> [attributes] [comments]
+//     0         1         2        3      4      5        6       7         8           9
+	fprintf(f, "%s\tpiler\ttrs\t%d\t%d\t0\t%c\t.\tFamily %d ; Pile %d\n",
+	  ContigLabel,
+	  ContigFrom + 1,
+	  ContigTo + 1,
+	  Strand,
+	  FamIndex,
+	  PileIndex);
+	}
+
+void WriteCrispFile(const char *OutputFileName, const PileData *Piles, int PileCount)
+	{
+	FILE *f = OpenStdioFile(OutputFileName, FILEIO_MODE_WriteOnly);
+
+	for (int PileIndex = 0; PileIndex < PileCount; ++PileIndex)
+		{
+		const PileData &Pile = Piles[PileIndex];
+		if (-1 == Pile.FamIndex)
+			continue;
+
+		WriteCrisp(f, Pile, PileIndex);
+		}
+
+	fclose(f);
+	}
+
+void WriteArray(FILE *f, int FamIndex, int Lo, int Hi)
+	{
+	assert(FamIndex >= 0);
+
+	int ContigFrom;
+	const char *ContigLabel = GlobalToContig(Lo, &ContigFrom);
+	const int Length = Hi - Lo + 1;
+	const int ContigTo = ContigFrom + Length - 1;
+	const char Strand = '+';
+
+// GFF fields are:
+// <seqname> <source> <feature> <start> <end> <score> <strand> <frame> [attributes] [comments]
+//     0         1         2        3      4      5        6       7         8           9
+	fprintf(f, "%s\tpiler\ttrs\t%d\t%d\t0\t%c\t.\tFamily %d\n",
+	  ContigLabel,
+	  ContigFrom + 1,
+	  ContigTo + 1,
+	  Strand,
+	  FamIndex);
+	}
diff --git a/writefasta.cpp b/writefasta.cpp
new file mode 100755
index 0000000..c685ba4
--- /dev/null
+++ b/writefasta.cpp
@@ -0,0 +1,62 @@
+#include "piler2.h"
+
+
+unsigned char CompChar[256];
+
+static void InitCompChar()
+	{
+	for (unsigned i = 0; i < 256; ++i)
+		CompChar[i] = (unsigned char) i;
+
+	CompChar['a'] = 't';
+	CompChar['c'] = 'g';
+	CompChar['g'] = 'c';
+	CompChar['t'] = 'a';
+	CompChar['n'] = 'n';
+	CompChar['A'] = 'T';
+	CompChar['C'] = 'G';
+	CompChar['G'] = 'C';
+	CompChar['T'] = 'A';
+	}
+
+void WriteFasta(FILE *f, const char *Seq, int Length, const char *Label, bool Rev)
+	{
+	static bool CompCharInit = false;
+	if (!CompCharInit)
+		{
+		InitCompChar();
+		CompCharInit = true;
+		}
+
+	fprintf(f, ">%s\n", Label);
+
+	if (Rev)
+		{
+		int Index = Length;
+		for (int i = 0; i < Length; i += FASTA_BLOCK)
+			{
+			int n = FASTA_BLOCK;
+			if (i + n > Length)
+				n = Length - i;
+			for (int j = 0; j < n; ++j)
+				{
+				const unsigned char c = Seq[--Index];
+				const unsigned char cComp = CompChar[c];
+				fputc(cComp, f);
+				}
+			fputc('\n', f);
+			}
+		}
+	else
+		{
+		for (int i = 0; i < Length; i += FASTA_BLOCK)
+			{
+			int n = FASTA_BLOCK;
+			if (i + n > Length)
+				n = Length - i;
+			for (int j = 0; j < n; ++j)
+				fputc(*Seq++, f);
+			fputc('\n', f);
+			}
+		}
+	}
diff --git a/writeimages.cpp b/writeimages.cpp
new file mode 100755
index 0000000..fbfd0b7
--- /dev/null
+++ b/writeimages.cpp
@@ -0,0 +1,50 @@
+#include "piler2.h"
+
+static void WriteHit(FILE *f, const HitData &Hit, const PILE_INDEX_TYPE *PileIndexes)
+	{
+	const int TargetFrom = Hit.TargetFrom;
+	const int QueryFrom = Hit.QueryFrom;
+
+	int TargetContigFrom;
+	const char *TargetLabel = GlobalToContig(TargetFrom, &TargetContigFrom);
+
+	int QueryContigFrom;
+	const char *QueryLabel = GlobalToContig(QueryFrom, &QueryContigFrom);
+
+	const int TargetLength = Hit.TargetTo - TargetFrom + 1;
+	const int QueryLength = Hit.QueryTo - QueryFrom + 1;
+
+	const int QueryPileIndex = PileIndexes[QueryFrom];
+	const int TargetPileIndex = PileIndexes[TargetFrom];
+
+	const int QueryContigTo = QueryContigFrom + QueryLength - 1;
+	const int TargetContigTo = TargetContigFrom + TargetLength - 1;
+
+	const char Strand = Hit.Rev ? '-' : '+';
+
+// GFF fields are:
+// <seqname> <source> <feature> <start> <end> <score> <strand> <frame> [attributes] [comments]
+//     0         1         2        3      4      5        6       7         8           9
+	fprintf(f, "%s\tpiler\thit\t%d\t%d\t0\t%c\t.\tTarget %s %d %d ; Piles %d %d\n",
+	  QueryLabel,
+	  QueryContigFrom + 1,
+	  QueryContigTo + 1,
+	  Strand,
+	  TargetLabel,
+	  TargetContigFrom + 1,
+	  TargetContigTo + 1,
+	  QueryPileIndex,
+	  TargetPileIndex);
+	}
+
+void WriteImages(const char *FileName, const HitData *Hits, int HitCount,
+  const PILE_INDEX_TYPE *PileIndexes)
+	{
+	FILE *f = OpenStdioFile(FileName, FILEIO_MODE_WriteOnly);
+	for (int HitIndex = 0; HitIndex < HitCount; ++HitIndex)
+		{
+		const HitData &Hit = Hits[HitIndex];
+		WriteHit(f, Hit, PileIndexes);
+		}
+	fclose(f);
+	}
diff --git a/writepiles.cpp b/writepiles.cpp
new file mode 100755
index 0000000..45b57de
--- /dev/null
+++ b/writepiles.cpp
@@ -0,0 +1,31 @@
+#include "piler2.h"
+
+static void WritePile(FILE *f, const PileData &Pile, int PileIndex)
+	{
+	int ContigFrom;
+	const char *ContigLabel = GlobalToContig(Pile.From, &ContigFrom);
+	const int Length = Pile.To - Pile.From + 1;
+	const int ContigTo = ContigFrom + Length - 1;
+	const char Strand = Pile.Rev ? '-' : '+';
+
+// GFF fields are:
+// <seqname> <source> <feature> <start> <end> <score> <strand> <frame> [attributes] [comments]
+//     0         1         2        3      4      5        6       7         8           9
+	fprintf(f, "%s\tpiler\tpile\t%d\t%d\t0\t%c\t.\tPile %d\n",
+	  ContigLabel,
+	  ContigFrom + 1,
+	  ContigTo + 1,
+	  Strand,
+	  PileIndex);
+	}
+
+void WritePiles(const char *FileName, const PileData *Piles, int PileCount)
+	{
+	FILE *f = OpenStdioFile(FileName, FILEIO_MODE_WriteOnly);
+	for (int PileIndex = 0; PileIndex < PileCount; ++PileIndex)
+		{
+		const PileData &Pile = Piles[PileIndex];
+		WritePile(f, Pile, PileIndex);
+		}
+	fclose(f);
+	}
diff --git a/writetrs.cpp b/writetrs.cpp
new file mode 100755
index 0000000..51db18b
--- /dev/null
+++ b/writetrs.cpp
@@ -0,0 +1,44 @@
+#include "piler2.h"
+
+static void WriteTRS(FILE *f, const PileData &Pile, int PileIndex)
+	{
+	int FamIndex = Pile.FamIndex;
+	assert(FamIndex >= 0);
+
+	int SuperFamIndex = Pile.SuperFamIndex;
+	assert(SuperFamIndex >= 0);
+
+	int ContigFrom;
+	const char *ContigLabel = GlobalToContig(Pile.From, &ContigFrom);
+	const int Length = Pile.To - Pile.From + 1;
+	const int ContigTo = ContigFrom + Length - 1;
+	const char Strand = Pile.Rev ? '-' : '+';
+
+// GFF fields are:
+// <seqname> <source> <feature> <start> <end> <score> <strand> <frame> [attributes] [comments]
+//     0         1         2        3      4      5        6       7         8           9
+	fprintf(f, "%s\tpiler\ttrs\t%d\t%d\t0\t%c\t.\tFamily %d.%d ; Pile %d\n",
+	  ContigLabel,
+	  ContigFrom + 1,
+	  ContigTo + 1,
+	  Strand,
+	  FamIndex,
+	  SuperFamIndex,
+	  PileIndex);
+	}
+
+void WriteTRSFile(const char *OutputFileName, const PileData *Piles, int PileCount)
+	{
+	FILE *f = OpenStdioFile(OutputFileName, FILEIO_MODE_WriteOnly);
+
+	for (int PileIndex = 0; PileIndex < PileCount; ++PileIndex)
+		{
+		const PileData &Pile = Piles[PileIndex];
+		if (-1 == Pile.FamIndex)
+			continue;
+
+		WriteTRS(f, Pile, PileIndex);
+		}
+
+	fclose(f);
+	}

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/piler.git



More information about the debian-med-commit mailing list