[med-svn] [mauvealigner] 01/02: Imported Upstream version 1.2.0+4713

Andreas Tille tille at debian.org
Sun Apr 19 20:14:23 UTC 2015


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository mauvealigner.

commit 541a254bb0b97e5cdfab13284177d260e91ae937
Author: Andreas Tille <tille at debian.org>
Date:   Sun Apr 19 22:06:14 2015 +0200

    Imported Upstream version 1.2.0+4713
---
 AUTHORS                            |    1 +
 COPYING                            |  340 +++++
 ChangeLog                          |    0
 Makefile.am                        |   26 +
 NEWS                               |    0
 README                             |    0
 acinclude.m4                       |  156 +++
 autogen.sh                         |    5 +
 configure.ac                       |   91 ++
 include/getopt.h                   |  133 ++
 src/AlignmentTree.cpp              |  188 +++
 src/AlignmentTree.h                |    0
 src/Makefile.am                    |  225 +++
 src/MatchRecord.h                  |  369 +++++
 src/RepeatHashCat.cpp              |   12 +
 src/RepeatHashCat.h                |   21 +
 src/SeedMatchEnumerator.h          |  144 ++
 src/UniqueMatchFinder.cpp          |   60 +
 src/UniqueMatchFinder.h            |   34 +
 src/addUnalignedIntervals.cpp      |   33 +
 src/alignmentProjector.cpp         |  101 ++
 src/backbone_global_to_local.cpp   |   60 +
 src/bbAnalyze.cpp                  | 1411 +++++++++++++++++++
 src/bbBreakOnGenes.cpp             |  358 +++++
 src/bbFilter.cpp                   |  292 ++++
 src/calculateBackboneCoverage.cpp  |  138 ++
 src/calculateBackboneCoverage2.cpp |  132 ++
 src/calculateCoverage.cpp          |   89 ++
 src/checkForLGT.cpp                |  253 ++++
 src/coordinateTranslate.cpp        |   51 +
 src/countInPlaceInversions.cpp     |   69 +
 src/createBackboneMFA.cpp          |   57 +
 src/evd.cpp                        |  129 ++
 src/extractBCITrees.cpp            |  369 +++++
 src/extractBackbone.cpp            |   83 ++
 src/extractBackbone2.cpp           |   70 +
 src/extractSubalignments.cpp       |   96 ++
 src/gappiness.cpp                  |   53 +
 src/getAlignmentWindows.cpp        |  137 ++
 src/getOrthologList.cpp            |  317 +++++
 src/getopt.c                       | 1279 +++++++++++++++++
 src/getopt.cpp                     |  772 ++++++++++
 src/getopt.h                       |  185 +++
 src/getopt1.c                      |  196 +++
 src/joinAlignmentFiles.cpp         |  108 ++
 src/makeBadgerMatrix.cpp           |  117 ++
 src/makeMc4Matrix.cpp              |  112 ++
 src/mauveAligner.cpp               |  919 ++++++++++++
 src/mauveAligner.h                 |   10 +
 src/mauveToXMFA.cpp                |   35 +
 src/mfa2xmfa.cpp                   |  117 ++
 src/multiEVD.cpp                   |  217 +++
 src/multiToRawSequence.cpp         |   28 +
 src/pairCompare.cpp                |   85 ++
 src/progressiveMauve.cpp           |  768 ++++++++++
 src/projectAndStrip.cpp            |  144 ++
 src/randomGeneSample.cpp           |  165 +++
 src/repeatoire.cpp                 | 2716 ++++++++++++++++++++++++++++++++++++
 src/rootTrees.cpp                  |  128 ++
 src/scoreALU.cpp                   |  729 ++++++++++
 src/scoreAlignment.cpp             |  467 +++++++
 src/scoreProcrastAlignment.cpp     |  458 ++++++
 src/sortContigs.cpp                |  181 +++
 src/stripGapColumns.cpp            |   74 +
 src/stripSubsetLCBs.cpp            |  183 +++
 src/toEvoHighwayFormat.cpp         |  148 ++
 src/toGBKsequence.cpp              |   38 +
 src/toGrimmFormat.cpp              |   84 ++
 src/toMultiFastA.cpp               |   54 +
 src/toRawSequence.cpp              |   27 +
 src/transposeCoordinates.cpp       |   71 +
 src/unalign.cpp                    |   91 ++
 src/uniqueMerCount.cpp             |   41 +
 src/uniquifyTrees.cpp              |  250 ++++
 src/xmfa2maf.cpp                   |   87 ++
 75 files changed, 17187 insertions(+)

diff --git a/AUTHORS b/AUTHORS
new file mode 100644
index 0000000..1dcdaf3
--- /dev/null
+++ b/AUTHORS
@@ -0,0 +1 @@
+Aaron Darling
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..d60c31a
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,340 @@
+		    GNU GENERAL PUBLIC LICENSE
+		       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+     59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+			    Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+

+		    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+

+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+

+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+

+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+			    NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+		     END OF TERMS AND CONDITIONS
+

+	    How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year  name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/ChangeLog b/ChangeLog
new file mode 100644
index 0000000..e69de29
diff --git a/Makefile.am b/Makefile.am
new file mode 100644
index 0000000..5c3907c
--- /dev/null
+++ b/Makefile.am
@@ -0,0 +1,26 @@
+EXTRA_DIST = \
+projects/everything.sln \
+projects/mauveAligner.sln \
+projects/calculateBackboneCoverage2.vcproj \
+projects/calculateBackboneCoverage.vcproj \
+projects/checkForLGT.vcproj \
+projects/extractBackbone.vcproj \
+projects/extractBCITrees.vcproj \
+projects/extractSubalignments.vcproj \
+projects/mauveAligner.vcproj \
+projects/repeatoire.vcproj \
+projects/progressiveMauve.vcproj \
+projects/rootTrees.vcproj \
+projects/scoreAlignment.vcproj \
+projects/scoreALU.vcproj \
+projects/sortContigs.vcproj \
+projects/toEvoHighwayFormat.vcproj \
+projects/toGrimmFormat.vcproj \
+projects/transposeCoordinates.vcproj \
+projects/unalign.vcproj \
+projects/uniqueMerCount.vcproj \
+projects/uniquifyTrees.vcproj \
+projects/mauveAligner.dox
+
+SUBDIRS = src 
+
diff --git a/NEWS b/NEWS
new file mode 100644
index 0000000..e69de29
diff --git a/README b/README
new file mode 100644
index 0000000..e69de29
diff --git a/acinclude.m4 b/acinclude.m4
new file mode 100644
index 0000000..cbb46db
--- /dev/null
+++ b/acinclude.m4
@@ -0,0 +1,156 @@
+# pkg.m4 - Macros to locate and utilise pkg-config.            -*- Autoconf -*-
+# 
+# Copyright © 2004 Scott James Remnant <scott at netsplit.com>.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# PKG_PROG_PKG_CONFIG([MIN-VERSION])
+# ----------------------------------
+AC_DEFUN([PKG_PROG_PKG_CONFIG],
+[m4_pattern_forbid([^_?PKG_[A-Z_]+$])
+m4_pattern_allow([^PKG_CONFIG(_PATH)?$])
+AC_ARG_VAR([PKG_CONFIG], [path to pkg-config utility])dnl
+if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then
+	AC_PATH_TOOL([PKG_CONFIG], [pkg-config])
+fi
+if test -n "$PKG_CONFIG"; then
+	_pkg_min_version=m4_default([$1], [0.9.0])
+	AC_MSG_CHECKING([pkg-config is at least version $_pkg_min_version])
+	if $PKG_CONFIG --atleast-pkgconfig-version $_pkg_min_version; then
+		AC_MSG_RESULT([yes])
+	else
+		AC_MSG_RESULT([no])
+		PKG_CONFIG=""
+	fi
+		
+fi[]dnl
+])# PKG_PROG_PKG_CONFIG
+
+# PKG_CHECK_EXISTS(MODULES, [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])
+#
+# Check to see whether a particular set of modules exists.  Similar
+# to PKG_CHECK_MODULES(), but does not set variables or print errors.
+#
+#
+# Similar to PKG_CHECK_MODULES, make sure that the first instance of
+# this or PKG_CHECK_MODULES is called, or make sure to call
+# PKG_CHECK_EXISTS manually
+# --------------------------------------------------------------
+AC_DEFUN([PKG_CHECK_EXISTS],
+[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl
+if test -n "$PKG_CONFIG" && \
+    AC_RUN_LOG([$PKG_CONFIG --exists --print-errors "$1"]); then
+  m4_ifval([$2], [$2], [:])
+m4_ifvaln([$3], [else
+  $3])dnl
+fi])
+
+
+# _PKG_CONFIG([VARIABLE], [COMMAND], [MODULES])
+# ---------------------------------------------
+m4_define([_PKG_CONFIG],
+[if test -n "$PKG_CONFIG"; then
+    if test -n "$$1"; then
+        pkg_cv_[]$1="$$1"
+    else
+        PKG_CHECK_EXISTS([$3],
+                         [pkg_cv_[]$1=`$PKG_CONFIG --[]$2 "$3" 2>/dev/null`],
+			 [pkg_failed=yes])
+    fi
+else
+	pkg_failed=untried
+fi[]dnl
+])# _PKG_CONFIG
+
+# _PKG_SHORT_ERRORS_SUPPORTED
+# -----------------------------
+AC_DEFUN([_PKG_SHORT_ERRORS_SUPPORTED],
+[AC_REQUIRE([PKG_PROG_PKG_CONFIG])
+if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
+        _pkg_short_errors_supported=yes
+else
+        _pkg_short_errors_supported=no
+fi[]dnl
+])# _PKG_SHORT_ERRORS_SUPPORTED
+
+
+# PKG_CHECK_MODULES(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND],
+# [ACTION-IF-NOT-FOUND])
+#
+#
+# Note that if there is a possibility the first call to
+# PKG_CHECK_MODULES might not happen, you should be sure to include an
+# explicit call to PKG_PROG_PKG_CONFIG in your configure.ac
+#
+#
+# --------------------------------------------------------------
+AC_DEFUN([PKG_CHECK_MODULES],
+[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl
+AC_ARG_VAR([$1][_CFLAGS], [C compiler flags for $1, overriding pkg-config])dnl
+AC_ARG_VAR([$1][_LIBS], [linker flags for $1, overriding pkg-config])dnl
+
+pkg_failed=no
+AC_MSG_CHECKING([for $1])
+
+_PKG_CONFIG([$1][_CFLAGS], [cflags], [$2])
+_PKG_CONFIG([$1][_LIBS], [libs], [$2])
+
+m4_define([_PKG_TEXT], [Alternatively, you may set the environment variables $1[]_CFLAGS
+and $1[]_LIBS to avoid the need to call pkg-config.
+See the pkg-config man page for more details.])
+
+if test $pkg_failed = yes; then
+        _PKG_SHORT_ERRORS_SUPPORTED
+        if test $_pkg_short_errors_supported = yes; then
+	        $1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "$2"`
+        else 
+	        $1[]_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "$2"`
+        fi
+	# Put the nasty error message in config.log where it belongs
+	echo "$$1[]_PKG_ERRORS" >&AS_MESSAGE_LOG_FD
+
+	ifelse([$4], , [AC_MSG_ERROR(dnl
+[Package requirements ($2) were not met:
+
+$$1_PKG_ERRORS
+
+Consider adjusting the PKG_CONFIG_PATH environment variable if you
+installed software in a non-standard prefix.
+
+_PKG_TEXT
+])],
+		[$4])
+elif test $pkg_failed = untried; then
+	ifelse([$4], , [AC_MSG_FAILURE(dnl
+[The pkg-config script could not be found or is too old.  Make sure it
+is in your PATH or set the PKG_CONFIG environment variable to the full
+path to pkg-config.
+
+_PKG_TEXT
+
+To get pkg-config, see <http://www.freedesktop.org/software/pkgconfig>.])],
+		[$4])
+else
+	$1[]_CFLAGS=$pkg_cv_[]$1[]_CFLAGS
+	$1[]_LIBS=$pkg_cv_[]$1[]_LIBS
+        AC_MSG_RESULT([yes])
+	ifelse([$3], , :, [$3])
+fi[]dnl
+])# PKG_CHECK_MODULES
diff --git a/autogen.sh b/autogen.sh
new file mode 100755
index 0000000..65f32e1
--- /dev/null
+++ b/autogen.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+mkdir -p config
+autoreconf --force --install -I config  
+echo "Now run ./configure --prefix=$HOME ; make install"
+
diff --git a/configure.ac b/configure.ac
new file mode 100644
index 0000000..8f7a923
--- /dev/null
+++ b/configure.ac
@@ -0,0 +1,91 @@
+dnl Process this file with autoconf to produce a configure script.
+AC_PREREQ([2.59])
+AC_INIT(mauveAligner, 1.2.0)
+AC_CONFIG_SRCDIR([src/mauveAligner.cpp])
+AC_CONFIG_AUX_DIR(config)
+
+
+dnl Get the target and build system types and add appropriate options
+AC_CANONICAL_TARGET
+
+AM_INIT_AUTOMAKE([no-define])
+AM_CONFIG_HEADER([src/config.h])
+
+AC_PREFIX_DEFAULT(/usr/local)
+
+dnl Override default O2
+CFLAGS=${CFLAGS-""}
+CXXFLAGS=${CXXFLAGS-""}
+
+dnl Checks for programs.
+AC_PROG_CC
+AC_PROG_CXX
+AC_PROG_INSTALL
+AC_PROG_LN_S 
+
+dnl Checks for header files.
+AC_HEADER_STDC
+
+dnl Check what compiler we're using
+AM_CONDITIONAL(ICC, test x$CXX = xicc )
+
+dnl Check for getopt_long
+AC_CHECK_FUNC(getopt_long,
+[GETOPT_LONG_SYSTEM=""],
+[GETOPT_LONG_SYSTEM='getopt.$(OBJEXT) getopt1.$(OBJEXT)']
+)
+AC_SUBST([GETOPT_LONG_SYSTEM])
+
+dnl Check for getopt
+AC_CHECK_FUNC(getopt,
+[HAVE_GETOPT="true"],
+AC_DEFINE(GETOPT_UNDEFINED,[],"Define this if the system does not provide getopt
+()")
+)
+
+PKG_CHECK_MODULES(DEPS, libMems-1.6 >= 1.0.0)
+AC_SUBST(DEPS_CFLAGS)
+AC_SUBST(DEPS_LIBS)
+
+dnl Mac OS X won't allow static compilation...
+STATIC_FLAG="-static -Wl,--whole-archive -lpthread -Wl,--no-whole-archive"
+if ( test "x$target_vendor" = "xapple") then
+        STATIC_FLAG=""
+fi
+AC_SUBST(STATIC_FLAG)
+
+dnl Allow debugging compilation
+AC_ARG_ENABLE(debug,
+[  --enable-debug    Turn on debugging],
+[case "${enableval}" in
+  yes) debug=true ;;
+  no)  debug=false ;;
+  *) AC_MSG_ERROR(bad value ${enableval} for --enable-debug) ;;
+esac],[debug=false])
+AM_CONDITIONAL(DEBUG, test x$debug = xtrue)
+
+dnl Checks for typedefs, structures, and compiler characteristics.
+AC_C_CONST
+AC_C_INLINE
+dnl AC_C_BIGENDIAN
+AC_HEADER_TIME
+
+AC_CHECK_FUNCS([memset]) 
+AC_CHECK_HEADERS([libintl.h])
+AC_CHECK_HEADERS([stdlib.h])
+AC_CHECK_HEADERS([string.h])
+AC_CHECK_HEADERS([strings.h])
+AC_CHECK_HEADERS([unistd.h])
+AC_CHECK_HEADERS([wchar.h])
+AC_FUNC_MALLOC
+AC_HEADER_STDBOOL
+AC_TYPE_SIZE_T
+
+dnl Checks for library functions.
+AC_PROG_GCC_TRADITIONAL
+
+dnl SAVE_LIBRARY_VERSION
+AC_SUBST(LIBTOOL_VERSION_INFO)
+
+
+AC_OUTPUT(Makefile src/Makefile )
diff --git a/include/getopt.h b/include/getopt.h
new file mode 100644
index 0000000..1330eea
--- /dev/null
+++ b/include/getopt.h
@@ -0,0 +1,133 @@
+/* Declarations for getopt.
+   Copyright (C) 1989, 90, 91, 92, 93, 94 Free Software Foundation, Inc.
+
+This file is part of the GNU C Library.  Its master source is NOT part of
+the C library, however.  The master source lives in /gd/gnu/lib.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB.  If
+not, write to the Free Software Foundation, Inc., 675 Mass Ave,
+Cambridge, MA 02139, USA.  */
+
+#ifndef _GETOPT_H
+#define _GETOPT_H 1
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/* For communication from `getopt' to the caller.
+   When `getopt' finds an option that takes an argument,
+   the argument value is returned here.
+   Also, when `ordering' is RETURN_IN_ORDER,
+   each non-option ARGV-element is returned here.  */
+
+extern char *optarg;
+
+/* Index in ARGV of the next element to be scanned.
+   This is used for communication to and from the caller
+   and for communication between successive calls to `getopt'.
+
+   On entry to `getopt', zero means this is the first call; initialize.
+
+   When `getopt' returns EOF, this is the index of the first of the
+   non-option elements that the caller should itself scan.
+
+   Otherwise, `optind' communicates from one call to the next
+   how much of ARGV has been scanned so far.  */
+
+extern int optind;
+
+/* Callers store zero here to inhibit the error message `getopt' prints
+   for unrecognized options.  */
+
+extern int opterr;
+
+/* Set to an option character which was unrecognized.  */
+
+extern int optopt;
+
+/* Describe the long-named options requested by the application.
+   The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector
+   of `struct option' terminated by an element containing a name which is
+   zero.
+
+   The field `has_arg' is:
+   no_argument		(or 0) if the option does not take an argument,
+   required_argument	(or 1) if the option requires an argument,
+   optional_argument	(or 2) if the option takes an optional argument.
+
+   If the field `flag' is not NULL, it points to a variable that is set
+   to the value given in the field `val' when the option is found, but
+   left unchanged if the option is not found.
+
+   To have a long-named option do something other than set an `int' to
+   a compiled-in constant, such as set a value from `optarg', set the
+   option's `flag' field to zero and its `val' field to a nonzero
+   value (the equivalent single-letter option character, if there is
+   one).  For long options that have a zero `flag' field, `getopt'
+   returns the contents of the `val' field.  */
+
+struct option
+{
+#if defined (__STDC__) && __STDC__
+  const char *name;
+#else
+  char *name;
+#endif
+  /* has_arg can't be an enum because some compilers complain about
+     type mismatches in all the code that assumes it is an int.  */
+  int has_arg;
+  int *flag;
+  int val;
+};
+
+/* Names for the values of the `has_arg' field of `struct option'.  */
+
+#define no_argument		0
+#define required_argument	1
+#define optional_argument	2
+
+#if ( defined (__STDC__) && __STDC__ ) || defined(__cplusplus) || defined(MSDOS)
+#ifdef __GNU_LIBRARY__
+/* Many other libraries have conflicting prototypes for getopt, with
+   differences in the consts, in stdlib.h.  To avoid compilation
+   errors, only prototype getopt for the GNU C library.  */
+extern int getopt (int argc, char *const *argv, const char *shortopts);
+#else /* not __GNU_LIBRARY__ */
+extern int getopt (int argc, char *const *argv, const char *optstring);
+#endif /* __GNU_LIBRARY__ */
+extern int getopt_long (int argc, char *const *argv, const char *shortopts,
+			const struct option *longopts, int *longind);
+extern int getopt_long_only (int argc, char *const *argv,
+			     const char *shortopts,
+			     const struct option *longopts, int *longind);
+
+/* Internal only.  Users should not call this directly.  */
+extern int _getopt_internal (int argc, char *const *argv,
+			     const char *shortopts,
+			     const struct option *longopts, int *longind,
+			     int long_only);
+#else /* not __STDC__ */
+extern int getopt ();
+extern int getopt_long ();
+extern int getopt_long_only ();
+
+extern int _getopt_internal ();
+#endif /* __STDC__ */
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif /* _GETOPT_H */
diff --git a/src/AlignmentTree.cpp b/src/AlignmentTree.cpp
new file mode 100644
index 0000000..b55994d
--- /dev/null
+++ b/src/AlignmentTree.cpp
@@ -0,0 +1,188 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "PhyloTree.h"
+#include <sstream>
+#include <stack>
+using namespace std;
+
+typedef unsigned uint;
+
+PhyloTree::PhyloTree() : vector< TreeNode >() {
+	weight = 0;
+	root = 0;
+}
+
+PhyloTree::PhyloTree( const PhyloTree& pt ) :
+vector< TreeNode >( pt ),
+weight( pt.weight ),
+root( pt.root )
+{}
+
+PhyloTree& PhyloTree::operator=( const PhyloTree& pt )
+{
+	vector< TreeNode >::operator=( pt );
+	weight = pt.weight;
+	root = pt.root;
+	return *this;
+}
+
+PhyloTree::~PhyloTree()
+{}
+
+void PhyloTree::clear()
+{
+	vector< TreeNode >::clear();
+	weight = 0;
+	root = 0;
+}
+
+
+/**
+ *  readTree version 2.0: read in a phylogenetic tree in the Newick file format.
+ *
+ */
+void PhyloTree::readTree( istream& tree_file ){
+	string line;
+	clear();
+	if( !getline( tree_file, line ) )
+		return;
+
+	stringstream line_str( line );
+
+	// look for a weight
+	string::size_type open_bracket_pos = line.find( "[" );
+	string::size_type bracket_pos = line.find( "]" );
+	if( open_bracket_pos != string::npos && bracket_pos != string::npos && 
+		open_bracket_pos < bracket_pos && bracket_pos < line.find( "(" ) ){
+		// read in a weight
+		getline( line_str, line, '[' );
+		getline( line_str, line, ']' );
+		stringstream weight_str( line );
+		weight_str >> weight;
+	}
+	
+	// ready to begin parsing the tree data.
+	string tree_line;
+	getline( line_str, tree_line, ';' );
+	uint read_state = 0;	/**< read_state of 0 indicates nothing has been parsed yet */
+	uint section_start = 0;
+	stack< node_id_t > node_stack;
+	stringstream blen_str;
+	TreeNode new_node;
+	new_node.distance = 0;	// default the distance to 0
+	for( uint charI = 0; charI < tree_line.size(); charI++ ){
+		switch( tree_line[ charI ] ){
+			// if this is an open parens then simply create a new
+			// parent node and push it on the parent stack
+			case '(':
+				if( node_stack.size() > 0 ){
+					new_node.parents.clear();
+					new_node.parents.push_back( node_stack.top() );
+					(*this)[ node_stack.top() ].children.push_back( (*this).size() );
+				}
+				node_stack.push( (*this).size() );
+				push_back( new_node );
+				read_state = 1;
+				section_start = charI + 1;
+				break;
+			case ')':
+				// read off a branch length
+				blen_str.clear();
+				blen_str.str( tree_line.substr( section_start, charI - section_start ) );
+				blen_str >> (*this)[ node_stack.top() ].distance;
+				if( read_state == 2 )
+					node_stack.pop();
+				section_start = charI + 1;
+				// pop off the top of the node stack after its branch length is read:
+				read_state = 2;
+				break;
+			case ',':
+				// read off a branch length
+				blen_str.clear();
+				blen_str.str( tree_line.substr( section_start, charI - section_start ) );
+				blen_str >> (*this)[ node_stack.top() ].distance;
+				if( read_state == 2 )
+					node_stack.pop();
+				section_start = charI + 1;
+				read_state = 1;	// indicates that we'll be creating a new node when we hit :
+				break;
+			case ':':
+				// read off a name, if possible
+				if( read_state == 1 ){
+					new_node.parents.clear();
+					new_node.parents.push_back( node_stack.top() );
+					(*this)[ node_stack.top() ].children.push_back( (*this).size() );
+					node_stack.push( (*this).size() );
+					push_back( new_node );
+					read_state = 2;	// pop this node after reading its branch length
+				}
+				(*this)[ node_stack.top() ].name = tree_line.substr( section_start, charI - section_start );
+				section_start = charI + 1;
+				break;
+			default:
+				break;
+		}
+	}
+
+}
+
+
+void PhyloTree::writeTree( ostream& os ) const{
+	stack< node_id_t > node_stack;
+	stack< uint > child_stack;
+	node_stack.push( root );
+	child_stack.push( 0 );
+
+	if( (*this).weight != 0 )
+		os << "[" << weight << "]";
+	os << "(";
+
+	while( node_stack.size() > 0 ) {
+		if( (*this)[ node_stack.top() ].children.size() != 0 ){
+			// this is a parent node
+			// if we have scanned all its children then pop it
+			if( child_stack.top() == (*this)[ node_stack.top() ].children.size() ){
+				os << ")";
+				if( node_stack.size() > 1 )
+					os << ":" << (*this)[ node_stack.top() ].distance;
+				node_stack.pop();
+				child_stack.pop();
+				continue;
+			}
+			// try to recurse to its children
+			// if the child is a parent as well spit out a paren
+			node_id_t child = (*this)[ node_stack.top() ].children[ child_stack.top() ];
+			node_stack.push( child );
+			child_stack.top()++;
+			// print a comma to separate multiple children
+			if( child_stack.top() > 1 )
+				os << ",";
+			if( (*this)[ child ].children.size() > 0 ){
+				child_stack.push( 0 );
+				os << "(";
+			}
+			continue;
+		}
+		
+		// this is a leaf node
+		os << (*this)[ node_stack.top() ].name << ":" << (*this)[ node_stack.top() ].distance;
+		
+		// pop the child
+		node_stack.pop();
+	}
+	os << ";" << endl;
+}
+
+
+double PhyloTree::getHeight() const
+{
+	return getHeight( root );
+}
+double PhyloTree::getHeight( node_id_t nodeI ) const
+{
+	if( (*this)[ nodeI ].children.size() == 0 )
+		return (*this)[ nodeI ].distance;
+	return (*this)[ nodeI ].distance + getHeight( (*this)[ nodeI ].children[ 0 ] );
+}
diff --git a/src/AlignmentTree.h b/src/AlignmentTree.h
new file mode 100644
index 0000000..e69de29
diff --git a/src/Makefile.am b/src/Makefile.am
new file mode 100644
index 0000000..7d9c94a
--- /dev/null
+++ b/src/Makefile.am
@@ -0,0 +1,225 @@
+
+if DEBUG
+D_CXXFLAGS = -Wall -g -DCOMMAND_LINE -D__GNDEBUG__ 
+else
+P_CXXFLAGS = -DCOMMAND_LINE
+endif
+OPTIMIZATION = -O2 -funroll-loops -fomit-frame-pointer -ftree-vectorize 
+AM_CXXFLAGS = $(OPTIMIZATION) $(D_CXXFLAGS) $(P_CXXFLAGS) 
+AM_LDFLAGS = @STATIC_FLAG@
+LIBRARY_CL = $(DEPS_LIBS) 
+#AM_LDADD = $(DEPS_LIBS) 
+INCLUDES = @DEPS_CFLAGS@ 
+
+bin_PROGRAMS = mauveAligner mauveStatic scoreAlignment \
+uniqueMerCount toRawSequence \
+mfa2xmfa addUnalignedIntervals \
+toMultiFastA getAlignmentWindows uniquifyTrees \
+toGrimmFormat mauveToXMFA \
+stripGapColumns progressiveMauve progressiveMauveStatic \
+extractBCITrees createBackboneMFA \
+repeatoire alignmentProjector stripSubsetLCBs \
+projectAndStrip makeBadgerMatrix randomGeneSample getOrthologList \
+bbFilter bbAnalyze backbone_global_to_local xmfa2maf coordinateTranslate
+
+EXTRA_PROGRAMS = bbBreakOnGenes mauveMpatrol mauveEfence toGBKsequence \
+multiToRawSequence unalign makeMc4Matrix multiEVD evd scoreALU \
+calculateBackboneCoverage2 sortContigs countInPlaceInversions gappiness \
+joinAlignmentFiles extractBackbone2 pairCompare \
+calculateCoverage calculateBackboneCoverage extractBackbone transposeCoordinates
+
+mauveAligner_SOURCES = mauveAligner.cpp mauveAligner.h
+mauveAligner_LDFLAGS = $(OPTIMIZATION)  
+mauveAligner_LDADD = $(DEPS_LIBS) 
+EXTRA_mauveAligner_SOURCES = getopt.c getopt.h getopt1.c
+mauveAligner_DEPENDENCIES = @GETOPT_LONG_SYSTEM@
+
+mauveMpatrol_SOURCES = mauveAligner.cpp
+mauveMpatrol_LDFLAGS = -lmpatrol -lbfd -liberty $(LIBRARY_CL) `wx-config --libs`
+EXTRA_mauveMpatrol_SOURCES = getopt.c getopt.h getopt1.c
+mauveMpatrol_DEPENDENCIES = @GETOPT_LONG_SYSTEM@
+
+
+
+mauveEfence_SOURCES = mauveAligner.cpp
+mauveEfence_LDADD = -lefence $(DEPS_LIBS)
+EXTRA_mauveEfence_SOURCES = getopt.c getopt.h getopt1.c
+mauveEfence_DEPENDENCIES = @GETOPT_LONG_SYSTEM@
+
+mauveStatic_SOURCES = mauveAligner.cpp
+mauveStatic_LDADD = $(LIBRARY_CL)
+EXTRA_mauveStatic_SOURCES = getopt.c getopt.h getopt1.c
+mauveStatic_DEPENDENCIES = @GETOPT_LONG_SYSTEM@
+
+#mauveAligner4_SOURCES = mauveAligner.cpp
+#mauveAligner4_LDADD = $(LIBRARY_CL)
+#mauveAligner4_CXXFLAGS = MAX_SEQ_COUNT=4
+#EXTRA_mauveAligner4_SOURCES = getopt.c getopt.h getopt1.c
+#mauveAligner4_DEPENDENCIES = @GETOPT_LONG_SYSTEM@
+
+
+calculateCoverage_SOURCES = calculateCoverage.cpp
+calculateCoverage_LDADD = $(LIBRARY_CL)
+EXTRA_calculateCoverage_SOURCES = getopt.c getopt.h getopt1.c
+calculateCoverage_DEPENDENCIES = @GETOPT_LONG_SYSTEM@
+
+scoreAlignment_SOURCES = scoreAlignment.cpp
+scoreAlignment_LDADD = $(LIBRARY_CL)
+EXTRA_scoreAlignment_SOURCES = getopt.c getopt.h getopt1.c
+scoreAlignment_DEPENDENCIES = @GETOPT_LONG_SYSTEM@
+
+extractBackbone_SOURCES = extractBackbone.cpp
+extractBackbone_LDADD = $(LIBRARY_CL)
+EXTRA_extractBackbone_SOURCES = getopt.c getopt.h getopt1.c
+extractBackbone_DEPENDENCIES = @GETOPT_LONG_SYSTEM@
+
+toRawSequence_SOURCES = toRawSequence.cpp
+toRawSequence_LDADD = $(LIBRARY_CL)
+EXTRA_toRawSequence_SOURCES = getopt.c getopt.h getopt1.c
+toRawSequence_DEPENDENCIES = @GETOPT_LONG_SYSTEM@
+
+transposeCoordinates_SOURCES = transposeCoordinates.cpp
+transposeCoordinates_LDADD = $(LIBRARY_CL)
+EXTRA_transposeCoordinates_SOURCES = getopt.c getopt.h getopt1.c
+transposeCoordinates_DEPENDENCIES = @GETOPT_LONG_SYSTEM@
+
+uniqueMerCount_SOURCES = uniqueMerCount.cpp
+uniqueMerCount_LDADD = $(LIBRARY_CL)
+EXTRA_uniqueMerCount_SOURCES = getopt.c getopt.h getopt1.c
+uniqueMerCount_DEPENDENCIES = @GETOPT_LONG_SYSTEM@
+
+calculateBackboneCoverage_SOURCES = calculateBackboneCoverage.cpp
+calculateBackboneCoverage_LDADD = $(LIBRARY_CL)
+EXTRA_calculateBackboneCoverage_SOURCES = getopt.c getopt.h getopt1.c
+calculateBackboneCoverage_DEPENDENCIES = @GETOPT_LONG_SYSTEM@
+
+mfa2xmfa_SOURCES = mfa2xmfa.cpp
+mfa2xmfa_LDADD = $(LIBRARY_CL)
+EXTRA_mfa2xmfa_SOURCES = getopt.c getopt.h getopt1.c
+mfa2xmfa_DEPENDENCIES = @GETOPT_LONG_SYSTEM@
+
+xmfa2maf_SOURCES = xmfa2maf.cpp
+xmfa2maf_LDADD = $(LIBRARY_CL)
+EXTRA_xmfa2maf_SOURCES = getopt.c getopt.h getopt1.c
+xmfa2maf_DEPENDENCIES = @GETOPT_LONG_SYSTEM@
+
+toGBKsequence_SOURCES = toGBKsequence.cpp
+toGBKsequence_LDADD = $(LIBRARY_CL)
+EXTRA_toGBKsequence_SOURCES = getopt.c getopt.h getopt1.c
+toGBKsequence_DEPENDENCIES = @GETOPT_LONG_SYSTEM@
+
+multiToRawSequence_SOURCES = multiToRawSequence.cpp
+multiToRawSequence_LDADD = $(LIBRARY_CL)
+EXTRA_multiToRawSequence_SOURCES = getopt.c getopt.h getopt1.c
+multiToRawSequence_DEPENDENCIES = @GETOPT_LONG_SYSTEM@
+
+unalign_SOURCES = unalign.cpp
+unalign_LDADD = $(LIBRARY_CL)
+EXTRA_unalign_SOURCES = getopt.c getopt.h getopt1.c
+unalign_DEPENDENCIES = @GETOPT_LONG_SYSTEM@
+
+addUnalignedIntervals_SOURCES = addUnalignedIntervals.cpp
+addUnalignedIntervals_LDADD = $(LIBRARY_CL)
+
+toMultiFastA_SOURCES = toMultiFastA.cpp
+toMultiFastA_LDADD = $(LIBRARY_CL)
+
+getAlignmentWindows_SOURCES = getAlignmentWindows.cpp
+getAlignmentWindows_LDADD = $(LIBRARY_CL)
+
+extractBackbone2_SOURCES = extractBackbone2.cpp
+extractBackbone2_LDADD = $(LIBRARY_CL)
+
+uniquifyTrees_SOURCES = uniquifyTrees.cpp
+uniquifyTrees_LDADD = $(LIBRARY_CL)
+
+
+countInPlaceInversions_SOURCES = countInPlaceInversions.cpp
+countInPlaceInversions_LDADD = $(LIBRARY_CL)
+
+toGrimmFormat_SOURCES = toGrimmFormat.cpp
+toGrimmFormat_LDADD = $(LIBRARY_CL)
+
+joinAlignmentFiles_SOURCES = joinAlignmentFiles.cpp
+joinAlignmentFiles_LDADD = $(LIBRARY_CL)
+
+mauveToXMFA_SOURCES = mauveToXMFA.cpp
+mauveToXMFA_LDADD = $(LIBRARY_CL)
+
+stripGapColumns_SOURCES = stripGapColumns.cpp
+stripGapColumns_LDADD = $(LIBRARY_CL)
+
+gappiness_SOURCES = gappiness.cpp
+gappiness_LDADD = $(LIBRARY_CL)
+
+
+progressiveMauve_SOURCES = progressiveMauve.cpp UniqueMatchFinder.h UniqueMatchFinder.cpp 
+progressiveMauve_LDFLAGS = 
+progressiveMauve_LDADD = $(LIBRARY_CL)
+
+progressiveMauveStatic_SOURCES = progressiveMauve.cpp UniqueMatchFinder.h UniqueMatchFinder.cpp 
+progressiveMauveStatic_LDADD = $(LIBRARY_CL)
+
+sortContigs_SOURCES = sortContigs.cpp
+sortContigs_LDADD = $(LIBRARY_CL)
+
+extractBCITrees_SOURCES = extractBCITrees.cpp
+extractBCITrees_LDADD = $(LIBRARY_CL)
+
+calculateBackboneCoverage2_SOURCES = calculateBackboneCoverage2.cpp
+calculateBackboneCoverage2_LDADD = $(LIBRARY_CL)
+
+createBackboneMFA_SOURCES = createBackboneMFA.cpp
+createBackboneMFA_LDADD = $(LIBRARY_CL)
+
+pairCompare_SOURCES = pairCompare.cpp
+pairCompare_LDADD = $(LIBRARY_CL)
+
+repeatoire_SOURCES = repeatoire.cpp MatchRecord.h SeedMatchEnumerator.h
+repeatoire_LDADD = $(LIBRARY_CL) 
+
+scoreALU_SOURCES = scoreALU.cpp
+scoreALU_LDADD = $(LIBRARY_CL) 
+
+evd_SOURCES = evd.cpp
+evd_LDADD = $(LIBRARY_CL)
+
+alignmentProjector_SOURCES = alignmentProjector.cpp
+alignmentProjector_LDADD = $(LIBRARY_CL)
+
+stripSubsetLCBs_SOURCES = stripSubsetLCBs.cpp
+stripSubsetLCBs_LDADD = $(LIBRARY_CL)
+
+projectAndStrip_SOURCES = projectAndStrip.cpp
+projectAndStrip_LDADD = $(LIBRARY_CL)
+
+makeBadgerMatrix_SOURCES = makeBadgerMatrix.cpp
+makeBadgerMatrix_LDADD = $(LIBRARY_CL)
+
+multiEVD_SOURCES = multiEVD.cpp
+multiEVD_LDADD = $(LIBRARY_CL)
+
+randomGeneSample_SOURCES = randomGeneSample.cpp
+randomGeneSample_LDADD = $(LIBRARY_CL)
+
+getOrthologList_SOURCES = getOrthologList.cpp
+getOrthologList_LDADD = $(LIBRARY_CL)
+
+bbFilter_SOURCES = bbFilter.cpp
+bbFilter_LDADD = $(LIBRARY_CL)
+
+bbAnalyze_SOURCES = bbAnalyze.cpp
+bbAnalyze_LDADD = $(LIBRARY_CL)
+
+makeMc4Matrix_SOURCES = makeMc4Matrix.cpp
+makeMc4Matrix_LDADD = $(LIBRARY_CL)
+
+bbBreakOnGenes_SOURCES = bbBreakOnGenes.cpp
+bbBreakOnGenes_LDADD = $(LIBRARY_CL)
+
+backbone_global_to_local_SOURCES = backbone_global_to_local.cpp
+backbone_global_to_local_LDADD = $(LIBRARY_CL)
+
+coordinateTranslate_SOURCES = coordinateTranslate.cpp
+coordinateTranslate_LDADD = $(LIBRARY_CL)
+
diff --git a/src/MatchRecord.h b/src/MatchRecord.h
new file mode 100644
index 0000000..8774ead
--- /dev/null
+++ b/src/MatchRecord.h
@@ -0,0 +1,369 @@
+#ifndef __MatchRecord_h__
+#define __MatchRecord_h__
+
+#include "libMems/MuscleInterface.h"
+#include "libMems/AbstractMatch.h"
+#include "libMems/SparseAbstractMatch.h"
+#include "libMems/AbstractGappedAlignment.h"
+#include "libMems/Interval.h"
+#include "libMems/CompactGappedAlignment.h"
+#include "libMems/MatchProjectionAdapter.h"
+#include <iostream>
+#include <set>
+#include <vector>
+//#include <boost/variant.hpp>
+
+// forward declaration
+class MatchLink;
+class MatchRecord;
+class CompactMatchRecord;
+class GappedMatchRecord;
+class UngappedMatchRecord;
+class CompactUngappedMatchRecord;
+
+/** stores a link between a subset and a superset match */
+class MatchLink
+{
+public:
+	MatchLink() : superset(NULL), subset(NULL) {};
+	MatchLink( MatchRecord* super, MatchRecord* sub, boost::dynamic_bitset<>& comp_list, std::vector< size_t > comp_map ) :
+		superset( super ), subset( sub ), super_component_list( comp_list ), sub_to_super_map( comp_map ) {};
+	void clear()
+	{
+		superset = NULL;
+		subset = NULL;
+		super_component_list.clear();
+		sub_to_super_map.clear();
+	}
+	MatchRecord* superset;	/**< The superset match connected by this link */
+	MatchRecord* subset;  /**< The subset match connected by this link */
+	boost::dynamic_bitset<> super_component_list;	/**< this gets sized to be equal to superset->Multiplicity() and tracks which components of the superset are linked */
+	std::vector< size_t > sub_to_super_map;	/**< mapping of subset components to superset components */
+};
+
+class MatchRecord : public mems::SparseAbstractMatch<>
+{
+public:
+	MatchRecord() : mems::SparseAbstractMatch<>() { clear(); }
+	MatchRecord( uint seq_count ): mems::SparseAbstractMatch<>( seq_count ){ clear(); }
+	GappedMatchRecord* subsuming_match;
+	std::vector< size_t > subsumption_component_map;
+	std::vector< MatchLink > left_subset_links;			/**< Links to nearby subset matches on the left side */
+	std::vector< MatchLink > right_subset_links;		/**< Links to nearby subset matches on the right side */
+	MatchLink left_superset;							/**< The left-side superset, if one exists */
+	MatchLink right_superset;							/**< The right-side superset, if one exists */
+	std::vector< MatchLink > extra_left_subsets;		/**< left-side subsets that were further away than the first linked subset on the left side */
+	std::vector< MatchLink > extra_right_subsets;		/**< right-side subsets that were further away than the first linked subset on the right side */
+	std::vector< MatchRecord* > chained_matches;
+	std::vector< std::vector< size_t > > chained_component_maps;	/**< maps components in this match to those in chained matches */
+	bool tandem;			/**< set to true if components of the match are chainable to each other (tandem repeats)*/
+	bool extended;			/**< set to false prior to extending this match */
+	bool is_novel_subset;
+    bool dont_extend;
+    
+	void clear()
+	{
+		subsuming_match = NULL;
+		left_superset.clear();
+		right_superset.clear();
+		tandem = false;
+		extended = false;
+		dont_extend = false;
+        is_novel_subset = false;
+	}
+};
+
+class CompactMatchRecord : public mems::DenseAbstractMatch<1>
+{
+public:
+	CompactMatchRecord() : mems::DenseAbstractMatch<1>() { clear(); }
+	CompactMatchRecord( uint seq_count ): mems::DenseAbstractMatch<1>( seq_count ){ clear(); }
+	GappedMatchRecord* subsuming_match;
+
+	void clear()
+	{
+		subsuming_match = NULL;
+	}
+};
+
+/**
+ * An ungapped alignment that also stores a match record
+ */
+class CompactUngappedMatchRecord : public mems::UngappedLocalAlignment< CompactMatchRecord >
+{
+public:
+
+	CompactUngappedMatchRecord(){};
+
+	/** always set seq_count, don't worry about align_length */
+	CompactUngappedMatchRecord( uint seq_count, gnSeqI align_length ) : mems::UngappedLocalAlignment< CompactMatchRecord >( seq_count )
+	{
+		subsuming_match = NULL;
+	}
+
+	CompactUngappedMatchRecord* Clone() const { return new CompactUngappedMatchRecord( *this ); }
+	CompactUngappedMatchRecord* Copy() const;
+	virtual void Free();
+};
+
+inline
+CompactUngappedMatchRecord* CompactUngappedMatchRecord::Copy() const
+{
+	return m_allocateAndCopy( *this );
+}
+inline
+void CompactUngappedMatchRecord::Free()
+{
+	m_free(this);
+}
+
+
+/**
+ * An ungapped alignment that also stores a match record
+ */
+class UngappedMatchRecord : public mems::UngappedLocalAlignment< MatchRecord >
+{
+public:
+
+	UngappedMatchRecord(){};
+
+	/** always set seq_count, don't worry about align_length */
+	UngappedMatchRecord( uint seq_count, gnSeqI align_length ) : mems::UngappedLocalAlignment< MatchRecord >( seq_count )
+	{
+		subsuming_match = NULL;
+	}
+
+	UngappedMatchRecord* Clone() const { return new UngappedMatchRecord( *this ); }
+	UngappedMatchRecord* Copy() const;
+	virtual void Free();
+
+	friend std::ostream& operator<<(std::ostream& os, const UngappedMatchRecord& mr); //write to source.
+};
+
+inline
+UngappedMatchRecord* UngappedMatchRecord::Copy() const
+{
+	return m_allocateAndCopy( *this );
+}
+inline
+void UngappedMatchRecord::Free()
+{
+	m_free(this);
+}
+
+
+/**
+ * The gapped match record class.  Abuses the Interval class to store a chain of other matches
+ */
+class GappedMatchRecord : public mems::GenericInterval< mems::AbstractGappedAlignment< MatchRecord > >
+{
+public:
+
+	/** always set seq_count, don't worry about align_length */
+	GappedMatchRecord() : 
+	  mems::GenericInterval< mems::AbstractGappedAlignment< MatchRecord > >()
+	{
+        //tjt: initialize spscore to 0
+        spscore = 0;  
+    }
+
+	GappedMatchRecord( UngappedMatchRecord& umr )
+	{
+        //tjt: initialize spscore to 0
+        spscore = 0;
+		std::vector<UngappedMatchRecord*> asdf(1, &umr);
+		mems::GenericInterval< mems::AbstractGappedAlignment< MatchRecord > > iv( asdf.begin(), asdf.end() );
+		mems::GenericInterval< mems::AbstractGappedAlignment< MatchRecord > >::operator=( iv );
+		MatchRecord::operator=( umr );
+	}
+
+	/** 
+	 * Call to indicate that all matches have been placed in the chained_matches list and can be 
+	 * converted to a gapped alignment
+	 */
+	void finalize(std::vector<genome::gnSequence *> seq_table);
+    //tjt: should this go somewhere else?
+    mems::score_t spscore;
+// methods inherited from AbstractGappedAlignment
+public:
+	GappedMatchRecord* Clone() const { return new GappedMatchRecord( *this ); }
+	GappedMatchRecord* Copy() const;
+	virtual void Free();
+
+	friend std::ostream& operator<<(std::ostream& os, const GappedMatchRecord& mr); //write to source.
+};
+
+inline
+GappedMatchRecord* GappedMatchRecord::Copy() const
+{
+	return m_allocateAndCopy( *this );
+}
+inline
+void GappedMatchRecord::Free()
+{
+	m_free(this);
+}
+
+
+/** orders on increasing multiplicity */
+typedef std::pair< MatchRecord*, std::vector< size_t >* > MatchSortEntry;
+class MatchSortEntryCompare
+{
+public:
+	bool operator()( const MatchSortEntry& a, const MatchSortEntry& b )
+	{
+		return a.first->Multiplicity() < b.first->Multiplicity();
+	}
+};
+
+template< typename T >
+class IsNullPtr
+{
+public:
+	bool operator()( const T* a ){ return a == NULL; }
+};
+
+void GappedMatchRecord::finalize( std::vector<genome::gnSequence *> seq_table)
+{
+	std::vector< mems::AbstractMatch* > iv_matches;
+	MatchSortEntryCompare msec;
+	std::vector< MatchSortEntry > mse_list( chained_matches.size() );
+
+	for( size_t cI = 0; cI < chained_matches.size(); ++cI )
+	{
+		mse_list[cI].first = chained_matches[cI];
+		mse_list[cI].second = &chained_component_maps[cI];
+	}
+	std::sort( mse_list.begin(), mse_list.end(), msec );
+	// add lowest multiplicity matches first, progressively add higher mult. matches
+	std::vector< mems::AbstractMatch* > chain;
+	for( size_t cI = 0; cI < mse_list.size(); ++cI )
+	{
+		mems::MatchProjectionAdapter mpaa( mse_list[cI].first, *(mse_list[cI].second) );
+		// clobber any region that overlaps with this mpaa
+		for( size_t seqI = 0; seqI < mpaa.SeqCount(); seqI++ )
+		{
+			size_t csize = chain.size();
+			for( size_t mI = 0; mI < csize; mI++ )
+			{
+				mems::AbstractMatch* m = chain[mI];
+				if( m == NULL )
+					continue;
+                if (m->LeftEnd(seqI) == 0 && m->Length( seqI ) == 0)
+                    continue; //should we throw error here?
+				if( m->RightEnd(seqI) < mpaa.LeftEnd(seqI) )
+					continue;	// no overlap here!
+				if( m->LeftEnd(seqI) > mpaa.RightEnd(seqI) )
+					continue;	// no overlap, woohoo!
+				if( m->LeftEnd(seqI) < mpaa.LeftEnd(seqI) &&
+					m->RightEnd(seqI) >= mpaa.LeftEnd(seqI) )
+				{
+	                // take the part of m to the left of mpaa and put it at the end of our chain
+					mems::AbstractMatch* m_left = m->Copy();
+					m_left->CropRight( m_left->RightEnd(seqI) - mpaa.LeftEnd(seqI) + 1, seqI );
+					m->CropLeft( m_left->Length(seqI), seqI );
+                    chain.push_back(m_left);
+				}
+				// now m is guaranteed to have left-end >= mpaa
+				if( m->RightEnd(seqI) <= mpaa.RightEnd(seqI) )
+				{
+					// m is completely contained inside mpaa, so get rid of it
+					m->Free();
+					chain[mI] = NULL;
+					continue;
+				}
+
+    			m->CropLeft( mpaa.RightEnd(seqI) - m->LeftEnd(seqI) + 1, seqI );		
+			}
+		}
+		// get rid of any null entries in the chain
+		std::vector< mems::AbstractMatch* >::iterator end_iter = std::remove( chain.begin(), chain.end(), (AbstractMatch*)NULL );
+		chain.erase( end_iter, chain.end() );
+		chain.push_back( mpaa.Copy() );
+		if( chain.back()->Orientation(0) == AbstractMatch::reverse )
+			chain.back()->Invert();
+	}
+	
+	if( chain.size() == 0 )
+	{
+		*this = GappedMatchRecord();
+		return;
+	}
+	mems::MatchStartComparator< mems::AbstractMatch > asc(0);
+	std::sort( chain.begin(), chain.end(), asc );
+	// aed: At this point the matches in chain are in sorted order, so the region betweeen each of them is what should get fed to muscle
+	//      will need to feed AbstractMatch instead of Match to MuscleInterface::Align though
+	std::vector< mems::AbstractMatch* >::iterator chain_begin = chain.begin();
+	uint chainsize = chain.size()-1;
+
+	if (1)
+	{
+	try{
+	for( uint i = 0; i < chainsize; i++ )
+	{
+		mems::GappedAlignment* cr = NULL;
+		boolean align_success = false;
+		// attempt a muscle alignment
+		cr = new mems::GappedAlignment();
+		mems::AbstractMatch* m1 = chain.at(i);
+		mems::AbstractMatch* m2 = chain.at(i+1);
+	
+		align_success = mems::MuscleInterface::getMuscleInterface().Align( *cr,  m1 , m2,  seq_table );
+		if( align_success )
+		{
+            //cerr << "muscle alignment success!!" << endl;
+			iv_matches.push_back( cr );
+			// aed: just insert the resulting GappedAlignment objects into chain
+			chain.insert(chain.begin()+(i+1), cr);
+			chainsize++;
+			// tjt: skip over newly inserted item
+			i++;		
+		}
+        else
+            continue;
+		
+	}
+	
+	}catch( genome::gnException& gne ){
+		std::cerr << gne << std::endl;
+	}catch(std::exception& e){
+		std::cerr << e.what() << std::endl;
+		std::cerr << chain.size() << std::endl;
+	}catch(...){
+		std::cerr << "matrix exception?\n";
+	}
+	}
+
+	MatchRecord* mr = this->Copy();
+	SetMatches( chain );
+	//tjt: now chain should be empty
+	// don't keep a potentially huge tree of GappedMatchRecords.  instead, flatten to a single cga
+	mems::CompactGappedAlignment<> tmpcga(*this);
+	chain.push_back(tmpcga.Copy());
+	SetMatches( chain );
+	//tjt: assign this to slot allocated & copied MatchRecord
+	MatchRecord::operator=(*mr);
+	mr->Free();
+}
+
+std::ostream& operator<<(std::ostream& os, const UngappedMatchRecord& ula);
+std::ostream& operator<<(std::ostream& os, const UngappedMatchRecord& ula){ //write to stream.
+	os << ula.AlignmentLength();
+	for(uint i=0; i < ula.SeqCount(); i++)
+		os << '\t' << ula.Start(i);
+	return os;
+}
+
+std::ostream& operator<<(std::ostream& os, const GappedMatchRecord& ula);
+std::ostream& operator<<(std::ostream& os, const GappedMatchRecord& ula){ //write to stream.
+	os << ula.AlignmentLength();
+	for(uint i=0; i < ula.SeqCount(); i++)
+		os << '\t' << ula.Start(i);
+	os << "\nlens:";
+	for(uint i=0; i < ula.SeqCount(); i++)
+		os << '\t' << ula.Length(i);
+
+	return os;
+}
+
+#endif // __MatchRecord_h__
diff --git a/src/RepeatHashCat.cpp b/src/RepeatHashCat.cpp
new file mode 100644
index 0000000..aef9197
--- /dev/null
+++ b/src/RepeatHashCat.cpp
@@ -0,0 +1,12 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/RepeatHashCat.h"
+
+uint32 gnSequence::concatContigStart( void ) const{
+	STACK_TRACE_START
+		int32 ccs = this->concat_contig_start;
+		return ccs;
+	STACK_TRACE_END
+}
diff --git a/src/RepeatHashCat.h b/src/RepeatHashCat.h
new file mode 100644
index 0000000..7ab00e4
--- /dev/null
+++ b/src/RepeatHashCat.h
@@ -0,0 +1,21 @@
+#ifndef _RepeatHashThread_h_
+#define _RepeatHashThread_h_
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/RepeatHash.h"
+
+class TheRealMemHash : public RepeatHash
+{
+public:
+	RepeatHashCat();
+	~RepeatHashCat();
+	RepeatHashThread(const RepeatHashThread& mh);
+	virtual RepeatHashThread* Clone() const;
+protected:
+	
+	//punt tjt: needed to add this to track where concatenated sequence starts
+	vector<uint32> concat_contig_start; // number of contigs in each sequence
+}
\ No newline at end of file
diff --git a/src/SeedMatchEnumerator.h b/src/SeedMatchEnumerator.h
new file mode 100644
index 0000000..08544c4
--- /dev/null
+++ b/src/SeedMatchEnumerator.h
@@ -0,0 +1,144 @@
+#ifndef __SeedMatchEnumerator_h__
+#define __SeedMatchEnumerator_h__
+
+#include "libMems/MatchFinder.h"
+#include "libMems/RepeatHash.h"
+#include "libMems/MemHash.h"
+#include "libMems/MatchList.h"
+#include "libMems/SortedMerList.h"
+#include "libMems/Match.h"
+
+/**
+ * Turns every seed match into a full match without extension.
+ */
+class SeedMatchEnumerator : public mems::MatchFinder 
+{
+public:
+	virtual SeedMatchEnumerator* Clone() const;
+
+	void FindMatches( mems::MatchList& match_list, size_t min_multi = 2, size_t max_multi = 1000, bool direct_repeats_only = false )
+	{
+        this->max_multiplicity = max_multi;
+        this->min_multiplicity = min_multi;
+        this->only_direct = direct_repeats_only;
+		for( size_t seqI = 0; seqI < match_list.seq_table.size(); ++seqI ){
+			if( !AddSequence( match_list.sml_table[ seqI ], match_list.seq_table[ seqI ] ) ){
+				genome::ErrorMsg( "Error adding " + match_list.seq_filename[seqI] + "\n");
+				return;
+			}
+		}
+		CreateMatches();
+		match_list.clear();
+		match_list.insert( match_list.end(), mlist.begin(), mlist.end() );
+	}
+
+	virtual boolean CreateMatches();
+protected:
+
+	virtual boolean EnumerateMatches( mems::IdmerList& match_list );
+	virtual boolean HashMatch(mems::IdmerList& match_list);
+	virtual mems::SortedMerList* GetSar(uint32 sarI) const;
+	mems::MatchList mlist;
+	void SetDirection(mems::Match& mhe);
+private:
+    //used to store rmin, rmax values
+    size_t max_multiplicity;
+    size_t min_multiplicity;
+	bool only_direct;
+};
+
+SeedMatchEnumerator* SeedMatchEnumerator::Clone() const{
+	return new SeedMatchEnumerator(*this);
+}
+
+inline
+mems::SortedMerList* SeedMatchEnumerator::GetSar(uint32 sarI) const{
+	return sar_table[0];
+}
+
+boolean SeedMatchEnumerator::CreateMatches(){
+	if(seq_count == 1){
+		MatchFinder::FindMatchSeeds();
+		return true;
+	}
+	return false;
+}
+
+boolean SeedMatchEnumerator::EnumerateMatches( mems::IdmerList& match_list ){
+	return HashMatch(match_list);
+}
+
+boolean SeedMatchEnumerator::HashMatch(mems::IdmerList& match_list){
+	//check that there is at least one forward component
+	match_list.sort(&mems::idmer_position_lessthan);
+	// initialize the hash entry
+	mems::Match mhe = mems::Match( match_list.size() );
+	mhe.SetLength( GetSar(0)->SeedLength() );
+	
+	//Fill in the new Match and set direction parity if needed.
+	mems::IdmerList::iterator iter = match_list.begin();
+    
+	uint32 repeatI = 0;
+	for(; iter != match_list.end(); iter++)
+		mhe.SetStart(repeatI++, iter->position + 1);
+
+	SetDirection( mhe );
+	bool found_reverse = false;
+	vector< size_t > component_map;
+	if(this->only_direct)
+	{
+		for( uint seqI = 0; seqI < mhe.Multiplicity(); seqI++)
+		{
+			if (mhe.Orientation(seqI) == 0)
+				component_map.push_back(seqI);
+			else
+				found_reverse = true;
+		}
+	}
+	mems::MatchProjectionAdapter mpaa(mhe.Copy(),  component_map);
+	if(mhe.Multiplicity() < 2){
+		std::cerr << "red flag " << mhe << "\n";
+    }
+    //use rmin & rmax to discard irrelevant seed matches
+    else if(mhe.Multiplicity() > this->max_multiplicity || mhe.Multiplicity() < this->min_multiplicity )
+    {
+        ;
+    }
+	else if(this->only_direct && found_reverse)
+	{
+		if ( mpaa.Multiplicity() > 1)
+		{
+			mems::Match new_mhe = mems::Match( mpaa.Multiplicity() );
+			new_mhe.SetLength( GetSar(0)->SeedLength() );
+			for(uint mult = 0; mult < mpaa.Multiplicity(); mult++)
+				new_mhe.SetStart(mult, mpaa.Start(mult));
+			mlist.push_back(new_mhe.Copy());
+		}
+	}
+    else{
+		mlist.push_back(mhe.Copy());
+		
+	}
+	return true;
+}
+
+// evil, evil code duplication.
+
+void SeedMatchEnumerator::SetDirection(mems::Match& mhe){
+	//get the reference direction
+	boolean ref_forward = false;
+	uint32 seqI=0;
+	for(; seqI < mhe.SeqCount(); ++seqI)
+		if(mhe[seqI] != mems::NO_MATCH){
+			ref_forward = !(GetSar(seqI)->GetMer(mhe[seqI] - 1) & 0x1);
+			break;
+		}
+	//set directional parity for the rest
+	for(++seqI; seqI < mhe.SeqCount(); ++seqI)
+		if(mhe[seqI] != mems::NO_MATCH)
+			if(ref_forward == (GetSar(seqI)->GetMer(mhe[seqI] - 1) & 0x1))
+				mhe.SetStart(seqI, -mhe[seqI]);
+}
+
+
+#endif	// __SeedMatchEnumerator_h__
diff --git a/src/UniqueMatchFinder.cpp b/src/UniqueMatchFinder.cpp
new file mode 100644
index 0000000..b47554b
--- /dev/null
+++ b/src/UniqueMatchFinder.cpp
@@ -0,0 +1,60 @@
+/*******************************************************************************
+ * $Id: UniqueMatchFinder.cpp,v 1.13 2004/03/01 02:40:08 darling Exp $
+ * This file is copyright 2002-2004 Aaron Darling.  All rights reserved.
+ * Please see the file called COPYING for licensing, copying, and modification
+ * rights.  Redistribution of this file, in whole or in part is prohibited
+ * without express permission.
+ ******************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "UniqueMatchFinder.h"
+#include <list>
+
+using namespace std;
+using namespace genome;
+using namespace mems;
+
+UniqueMatchFinder::UniqueMatchFinder(){
+}
+
+UniqueMatchFinder::~UniqueMatchFinder(){
+}
+
+UniqueMatchFinder::UniqueMatchFinder(const UniqueMatchFinder& mh) : MemHash(mh){
+
+}
+
+UniqueMatchFinder* UniqueMatchFinder::Clone() const{
+	return new UniqueMatchFinder(*this);
+}
+
+
+// enumerate out every pairwise match
+boolean UniqueMatchFinder::EnumerateMatches( IdmerList& match_list ){
+
+	match_list.sort(&idmer_id_lessthan);
+	IdmerList::iterator iter = match_list.begin();
+	IdmerList::iterator iter2 = match_list.begin();
+	uint cur_id_count = 1;
+	IdmerList unique_list;
+	// identify all of the unique seeds and add them to unique_list
+	while(iter2 != match_list.end()){
+		++iter2;
+		if(iter2 == match_list.end() || iter->id != iter2->id){
+			if( cur_id_count == 1 )
+				unique_list.push_back( *iter );
+			else
+				cur_id_count = 1;
+		}else
+			cur_id_count++;
+		++iter;
+	}
+	// hash all unique seeds
+	boolean success = true;
+	if( unique_list.size() >= 2 )
+		success = HashMatch(unique_list);
+	return success;
+}
diff --git a/src/UniqueMatchFinder.h b/src/UniqueMatchFinder.h
new file mode 100644
index 0000000..f9d08c0
--- /dev/null
+++ b/src/UniqueMatchFinder.h
@@ -0,0 +1,34 @@
+/*******************************************************************************
+ * $Id: UniqueMatchFinder.h,v 1.8 2004/03/01 02:40:08 darling Exp $
+ * This file is copyright 2002-2004 Aaron Darling.  All rights reserved.
+ * Please see the file called COPYING for licensing, copying, and modification
+ * rights.  Redistribution of this file, in whole or in part is prohibited
+ * without express permission.
+ ******************************************************************************/
+
+#ifndef _UniqueMatchFinder_h_
+#define _UniqueMatchFinder_h_
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/MemHash.h"
+
+/**
+ * Finds all pairwise matches with unique seeds among a group of sequences
+ */
+class UniqueMatchFinder : public mems::MemHash
+{
+public:
+	UniqueMatchFinder();
+	~UniqueMatchFinder();
+
+	UniqueMatchFinder(const UniqueMatchFinder& mh);
+	virtual UniqueMatchFinder* Clone() const;
+protected:
+
+	virtual boolean EnumerateMatches( mems::IdmerList& match_list );
+};
+
+#endif //_UniqueMatchFinder_h_
diff --git a/src/addUnalignedIntervals.cpp b/src/addUnalignedIntervals.cpp
new file mode 100644
index 0000000..3c2619a
--- /dev/null
+++ b/src/addUnalignedIntervals.cpp
@@ -0,0 +1,33 @@
+#include "libMems/Interval.h"
+#include "libMems/Islands.h"
+
+using namespace std;
+using namespace genome;
+using namespace mems;
+
+int main( int argc, char* argv[] )
+{
+	IntervalList iv_list;
+	if( argc != 3 )
+	{
+		cerr << "Usage: <input interval file> <output interval file>\n";
+		return -1;
+	}
+	ifstream in_file( argv[1] );
+	if( !in_file.is_open() )
+	{
+		cerr << "Error opening \"argv[1]\"\n";
+		return -1;
+	}
+	iv_list.ReadStandardAlignment( in_file );
+	LoadSequences(iv_list, NULL);
+	addUnalignedIntervals( iv_list );
+	ofstream out_file( argv[2] );
+	if( !out_file.is_open() )
+	{
+		cerr << "Error opening \"argv[2]\"\n";
+		return -2;
+	}
+	iv_list.WriteStandardAlignment( out_file );
+	return 0;
+}
diff --git a/src/alignmentProjector.cpp b/src/alignmentProjector.cpp
new file mode 100644
index 0000000..2e3df11
--- /dev/null
+++ b/src/alignmentProjector.cpp
@@ -0,0 +1,101 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <string>
+#include <fstream>
+#include <vector>
+#include <algorithm>
+#include <iostream>
+#include "libGenome/gnFilter.h"
+#include "libMems/IntervalList.h"
+#include "libMems/MatchList.h"
+#include "libMems/GappedAlignment.h"
+#include "libMems/Matrix.h"
+#include "libMems/MatchProjectionAdapter.h"
+#include "libMems/Aligner.h"
+#include "libGenome/gnFASSource.h"
+#include "libMems/ProgressiveAligner.h"
+using namespace std;
+using namespace genome;
+using namespace mems;
+
+
+
+
+int main( int argc, char* argv[] )
+{
+	if( argc < 6 )
+	{
+		cerr << "Usage: alignmentProjector <input xmfa> <output xmfa> <mfa seq input> <mfa seq output> <list of seqs to include, starting at 0>\n";
+		return -1;
+	}
+	ifstream aln_in;
+	aln_in.open( argv[1] );
+	if( !aln_in.is_open() ){
+		cerr << "Error opening " << argv[1] << endl;
+		return -1;
+	}
+	ofstream aln_out;
+	aln_out.open( argv[2] );
+	if( !aln_out.is_open() ){
+		cerr << "Error writing to " << argv[2] << endl;
+		return -1;
+	}
+	string mfa_seqs = argv[3];
+	string mfa_output = argv[4];
+	
+	try{
+		IntervalList input_ivs;
+		input_ivs.ReadStandardAlignment( aln_in );
+		aln_in.close();
+
+		MatchList ml;
+		ml.seq_filename = input_ivs.seq_filename;
+		LoadMFASequences( ml, mfa_seqs, NULL );
+		input_ivs.seq_table = ml.seq_table;
+
+		// create a projection list
+		vector< uint > projection;
+		IntervalList proj_ivs;
+		for( int i = 5; i < argc; ++i )
+		{
+			projection.push_back( atoi( argv[i] ) );
+			proj_ivs.seq_filename.push_back( mfa_seqs );
+			proj_ivs.seq_table.push_back( input_ivs.seq_table[projection.back()] );
+		}
+
+		vector< vector< MatchProjectionAdapter* > > LCB_list;
+		vector< LCB > projected_adjs;
+		projectIntervalList( input_ivs, projection, LCB_list, projected_adjs );
+
+		cout << "projection has " << LCB_list.size() << " LCBs\n";
+		proj_ivs.resize( LCB_list.size() );
+		for( size_t lcbI = 0; lcbI < LCB_list.size(); ++lcbI )
+			proj_ivs[lcbI].SetMatches( LCB_list[lcbI] );
+
+		proj_ivs.WriteStandardAlignment( aln_out );
+
+		gnSequence seq;
+		seq.LoadSource( mfa_seqs );
+		ofstream seq_out( mfa_output.c_str() );
+		gnSequence proj_seq;
+		for( size_t projI = 0; projI < projection.size(); ++projI )
+			proj_seq += seq.contig(projection[projI]);
+		gnFASSource::Write(proj_seq,seq_out,false,false);
+
+	}catch( gnException& gne ){
+		cerr << gne << endl;
+		return -1;
+	}catch( exception& e ){
+		cerr << e.what() << endl;
+		return -2;
+	}catch( char const* c ){
+		cerr << c << endl;
+		return -3;
+	}catch(...){
+		cerr << "Unhandled exception" << endl;
+		return -4;
+	}
+}
+
diff --git a/src/backbone_global_to_local.cpp b/src/backbone_global_to_local.cpp
new file mode 100644
index 0000000..d099ae0
--- /dev/null
+++ b/src/backbone_global_to_local.cpp
@@ -0,0 +1,60 @@
+#include "libMems/IntervalList.h"
+#include "libMems/Backbone.h"
+
+using namespace mems;
+using namespace genome;
+using namespace std;
+
+int main (int ARGC,char ** ARGV) {
+
+  IntervalList input_alignment;
+  ifstream align_file;
+  if(ARGC != 4){
+    cout <<"Usage:\nbackbone_global_to_local <xmfa file> <backbone file> <output file>\n";
+    return 0;
+  }
+  align_file.open(ARGV[1]);
+  if(!align_file.is_open()){
+    cerr <<"Couldn't read xmfa file: "<<ARGV[1]<<"\n";
+  }
+  input_alignment.ReadStandardAlignment(align_file);
+  LoadSequences(input_alignment,&cout);
+  ifstream backbone_file;
+  backbone_file.open(ARGV[2]);
+  if(!backbone_file.is_open()){
+    cerr <<"Couldn't read backbone file: "<<ARGV[2]<<"\n";
+  }
+  
+  ofstream new_backbone(ARGV[3]);
+  if(!align_file.is_open()){
+    cerr <<"Couldn't write to output file: "<<ARGV[3]<<"\n";
+  }
+
+  vector< bb_seqentry_t > backbone_struct;
+  readBackboneSeqFile(backbone_file, backbone_struct);
+
+  for(int i=0; i < backbone_struct.size(); i++){
+    for(int j=0; j < backbone_struct[i].size(); j++){
+      uint64 start = absolut(backbone_struct[i][j].first);
+      uint64 end = absolut(backbone_struct[i][j].second);
+      uint32 contig_num1;
+      uint32 contig_num2;
+      if(start == 0){
+	contig_num1=0;
+	contig_num2=0;
+      }else{
+	input_alignment.seq_table[j]->globalToLocal(contig_num1,start);
+        input_alignment.seq_table[j]->globalToLocal(contig_num2,end);
+      }
+
+      if(contig_num1 != contig_num2){
+	//cerr <<"Not the same contig!" <<contig_num1 <<" "<<contig_num2;
+      }
+      if(j>0){
+	new_backbone<<"\t";
+      }
+      new_backbone<<contig_num1 <<":"<<start<<"\t"<<contig_num2 <<":"<<end;
+    }
+    new_backbone <<"\n";
+  }
+}
diff --git a/src/bbAnalyze.cpp b/src/bbAnalyze.cpp
new file mode 100644
index 0000000..c2a0a8b
--- /dev/null
+++ b/src/bbAnalyze.cpp
@@ -0,0 +1,1411 @@
+#include <fstream>
+#include <sstream>
+#include <iomanip>
+#include "libMems/Backbone.h"
+#include "libGenome/gnFeature.h"
+#include "libGenome/gnBaseQualifier.h"
+#include "libMems/IntervalList.h"
+#include "libMems/AbstractMatch.h"
+#include "libMems/MatchList.h"
+#include "libMems/PhyloTree.h"
+#include "libMems/ProgressiveAligner.h"
+#include <boost/algorithm/string/erase.hpp>
+#include <boost/tuple/tuple.hpp>
+
+using namespace std;
+using namespace genome;
+using namespace mems;
+
+// important constants that affect inference
+const uint SHORT_SEGMENT = 5;	// when considering overlaps to genes, ignore overlaps less than this amount
+const uint DISCARD_SEGMENT = 20;	// do not consider segments shorter than this amount
+const double ALTERNALOG_MIN_SIZE = 15.0;
+
+
+class BbSeqComp
+{
+public:
+	BbSeqComp( uint seq ) : m_seq( seq ) {}
+	bool operator()( const bb_seqentry_t* a, const bb_seqentry_t* b )
+	{
+		return genome::absolut( (*a)[m_seq].first ) < genome::absolut( (*b)[m_seq].first );
+	}
+private:
+	uint m_seq;
+};
+
+template< typename PtrVector >
+void createMap( const PtrVector& mv_from, const PtrVector& mv_to, vector< size_t >& map )
+{
+	typedef typename PtrVector::value_type PtrType;
+	vector< pair< PtrType, size_t > > m1(mv_from.size());
+	vector< pair< PtrType, size_t > > m2(mv_to.size());
+	for( size_t i = 0; i < mv_from.size(); ++i )
+		m1[i] = make_pair( mv_from[i], i );
+	for( size_t i = 0; i < mv_to.size(); ++i )
+		m2[i] = make_pair( mv_to[i], i );
+	std::sort( m1.begin(), m1.end() );
+	std::sort( m2.begin(), m2.end() );
+	map.resize( m1.size() );
+	for( size_t i = 0; i < m1.size(); ++i )
+		map[m1[i].second] = m2[i].second;
+}
+
+size_t getCDScount( gnSequence* anno_seq )
+{
+	size_t count = 0;
+	for( size_t featureI = 0; featureI < anno_seq->getFeatureListLength(); ++featureI )
+	{
+		gnBaseFeature* feat = anno_seq->getFeature( featureI );
+		string feat_name = feat->GetName();
+		if( feat_name == "CDS"  )
+			count++;
+		delete feat;
+	}
+	return count;
+}
+
+void featureIntersect( vector< bb_seqentry_t >& bb_list, uint seqI, vector< vector< size_t > >& intersecting, gnSequence* anno_seq )
+{
+	// stores the bb segs that overlap each feature
+	intersecting.resize( anno_seq->getFeatureListLength() );
+
+	uint seq_count = bb_list.front().size();
+
+	vector< bb_seqentry_t* > bb_ptrs( bb_list.size() );
+	for( size_t i = 0; i < bb_list.size(); ++i )
+		bb_ptrs[i] = &bb_list[i];
+	vector< bb_seqentry_t* > orig_ptrs( bb_ptrs );
+	BbSeqComp bsc( seqI );
+	std::sort( bb_ptrs.begin(), bb_ptrs.end(), bsc );
+	vector< size_t > ptr_map;
+	createMap( bb_ptrs, orig_ptrs, ptr_map );
+
+	for( size_t featureI = 0; featureI < anno_seq->getFeatureListLength(); ++featureI )
+	{
+		gnBaseFeature* feat = anno_seq->getFeature( featureI );
+		string feat_name = feat->GetName();
+		if( feat_name != "CDS" && 
+			feat_name != "tRNA" &&
+			feat_name != "rRNA" &&
+			feat_name != "misc_rna" )
+			continue;	// don't deal with other feature types (source, misc_RNA, etc)
+		gnLocation loc = feat->GetLocation(0);
+		if( loc.GetFirst() > loc.GetLast() || loc.GetFirst() == 0 || loc.GetLast() == 0 )
+			continue;	// a problem parsing annotation?
+		// find where feature lands in our list
+		bb_seqentry_t tmp_bb( seq_count );
+		tmp_bb[seqI].first = loc.GetFirst();
+		tmp_bb[seqI].second = loc.GetFirst();
+		vector< bb_seqentry_t* >::iterator liter = std::lower_bound( bb_ptrs.begin(), bb_ptrs.end(), &tmp_bb, bsc );
+		tmp_bb[seqI].first = loc.GetLast();
+		tmp_bb[seqI].second = loc.GetLast();
+		vector< bb_seqentry_t* >::iterator uiter = std::lower_bound( bb_ptrs.begin(), bb_ptrs.end(), &tmp_bb, bsc );
+		if( liter == bb_ptrs.end() &&
+			bb_ptrs.size() > 0 &&
+			genome::absolut( (*bb_ptrs.back())[seqI].second ) >= loc.GetFirst() )
+			liter--;
+		while( liter != bb_ptrs.end() &&
+			liter != bb_ptrs.begin() &&
+			genome::absolut( (**liter)[seqI].second ) >= loc.GetFirst() )
+			--liter;
+		if( liter != bb_ptrs.end() &&
+			genome::absolut( (**liter)[seqI].second ) < loc.GetFirst() )
+			++liter;
+		for( ; liter != uiter; ++liter )
+		{
+			if( (**liter)[seqI].first == 0 )
+				continue;
+			// only add the bbseg if the intersection is larger than SHORT_SEGMENT
+			gnLocation bb_loc;
+			if( (**liter)[seqI].first > 0 )
+				bb_loc = gnLocation((**liter)[seqI].first, (**liter)[seqI].second);
+			else
+				bb_loc = gnLocation(-(**liter)[seqI].first, -(**liter)[seqI].second);
+
+			gnLocation intersect = loc.GetIntersection( bb_loc, gnLocation::determinedRegions );
+			if( intersect.GetLast() - intersect.GetFirst() <= SHORT_SEGMENT )
+				continue;
+
+			intersecting[ featureI ].push_back( ptr_map[ liter - bb_ptrs.begin() ] );
+		}
+		delete feat;
+	}
+}
+
+void getFeatureHits( const vector< vector< size_t > >& intersecting, const bitset_t& segs, bitset_t& features_hit )
+{
+	features_hit.resize(intersecting.size());
+	features_hit.reset();
+	for( size_t featI = 0; featI < intersecting.size(); featI++ )
+	{
+		for( size_t i = 0; i < intersecting[featI].size(); ++i )
+		{
+			if( segs.test( intersecting[featI][i] ) )
+				features_hit.set( featI );
+		}		
+	}
+}
+
+typedef map< string, map< string, double > > multifun_map_t;
+typedef map< string, map< string, string > > multifun_names_t;
+
+void makeMultiFunCount( gnSequence* anno_seq, multifun_map_t& mf_count, multifun_names_t& mf_names, bitset_t& feature_mask )
+{
+	for( size_t featureI = 0; featureI < anno_seq->getFeatureListLength(); ++featureI )
+	{
+		if( !feature_mask.test( featureI ) )
+			continue;	// skip this feature if we're not supposed to include it
+		gnBaseFeature* feat = anno_seq->getFeature( featureI );
+		string feat_name = feat->GetName();
+		if( feat_name != "CDS"  )
+		{
+			delete feat;
+			continue;	
+		}
+		bool found_multifun = false;
+		for( size_t qualI = 0; qualI < feat->GetQualifierListLength(); ++qualI )
+		{
+			gnBaseQualifier* gnq = feat->GetQualifier(qualI);
+			string qual_name = gnq->GetName();
+			if( qual_name != "function" )
+			{
+				delete gnq;
+				continue;
+			}
+			string qual_value = gnq->GetValue();
+			if( qual_value[0] == '"' )
+				qual_value = qual_value.substr(1);
+			stringstream qv_str( qual_value );
+			string mf_level1;
+			getline( qv_str, mf_level1, '.' );
+			if( mf_level1.size() > 1 )
+			{
+				// not a multifun tag
+				delete gnq;
+				continue;
+			}
+			string mf_level2;
+			mf_level2 += qv_str.get();
+			mf_count[mf_level1][mf_level2]++;
+
+			// get the name
+			string l1_name;
+			getline( qv_str, l1_name, ' ' );
+			getline( qv_str, l1_name, ';' );
+			string l2_name;
+			getline( qv_str, l2_name, ';' );
+			string cur_name = l1_name + ';' + l2_name;
+			std::remove( cur_name.begin(), cur_name.end(), '\r' );
+			std::remove( cur_name.begin(), cur_name.end(), '\n' );
+			string space_str = "  ";
+			boost::algorithm::erase_all( cur_name, space_str );
+			mf_names[mf_level1][mf_level2] = cur_name;
+			delete gnq;
+
+			found_multifun = true;
+		}
+		// if we didn't find multifun, call it an "Unknown"
+		if( !found_multifun )
+		{
+			string q = "?";
+			mf_names[q][q] = "Unknown; No MultiFun Tag";
+			mf_count[q][q]++;
+		}
+
+		delete feat;
+	}
+}
+
+typedef boost::tuple< size_t, size_t, double, double, string > anal_row_t;
+class AnalRowComp
+{
+public:
+	bool operator()( const anal_row_t& a, const anal_row_t& b )
+	{
+		return a.get<2>() < b.get<2>();
+	}
+};
+
+double chi_square_threshold = 5;
+double min_expected_threshold = 5;
+void mfAnalyze( ofstream& anal_output, multifun_map_t& all_mf, multifun_map_t& subset_mf, multifun_names_t& mf_names, double expect_freq )
+{
+	vector< anal_row_t > rows;
+	multifun_map_t::iterator l1_iter = subset_mf.begin();
+	for( ; l1_iter != subset_mf.end(); ++l1_iter )
+	{
+		multifun_map_t::iterator all_l1_iter = all_mf.find(l1_iter->first);
+		multifun_names_t::iterator names_l1_iter = mf_names.find(l1_iter->first);
+		map< string, double >::iterator l2_iter = l1_iter->second.begin();
+		for( ; l2_iter != l1_iter->second.end(); ++l2_iter )
+		{
+			map< string, double >::iterator all_l2_iter = all_l1_iter->second.find(l2_iter->first);
+			map< string, string >::iterator names_l2_iter = names_l1_iter->second.find(l2_iter->first);
+
+			// percent in this category:
+			double pct = (l2_iter->second / all_l2_iter->second) * 100;
+			// category number:
+			string cat_num = l1_iter->first + "." + l2_iter->first;
+			// chi-square
+			double chi_square = (l2_iter->second - (all_l2_iter->second*expect_freq));
+			chi_square *= chi_square;
+			chi_square /= (all_l2_iter->second*expect_freq);
+			// total category gene count
+			// category name
+			if( chi_square < chi_square_threshold )
+				continue;	// not significantly different
+			if( (all_l2_iter->second*expect_freq) < min_expected_threshold )
+				continue;	// don't have enough elements to make reliable estimation
+			rows.push_back( boost::make_tuple( l2_iter->second, all_l2_iter->second, pct, chi_square, names_l2_iter->second ) );
+		}
+	}
+	AnalRowComp arc;
+	string col_delim = " & ";
+	string new_row = "\\\\\n\\hline\n";
+	std::sort( rows.begin(), rows.end(), arc );
+	anal_output << "NumGenes" << col_delim << "GenesInCat" << col_delim << "Percent" << col_delim;
+	anal_output << "Chi_square" << col_delim << "Mf_Level_2_name" << new_row;
+	for( size_t rI = 0; rI < rows.size(); ++rI )
+	{
+		// if we transition from under to over-represented, output an empty row
+		if( rI > 0 && rows[rI-1].get<2>() < expect_freq * 100 && rows[rI].get<2>() > expect_freq * 100 )
+			anal_output << new_row;
+		anal_output << rows[rI].get<0>() << col_delim;
+		anal_output << rows[rI].get<1>() << col_delim;
+		anal_output << setprecision(3) << rows[rI].get<2>() << col_delim;
+		anal_output << setprecision(3) << rows[rI].get<3>() << col_delim;
+		anal_output << rows[rI].get<4>() << new_row;
+	}
+}
+
+
+void featureNearestNeighbors( const vector< bb_seqentry_t >& bb_list, const bitset_t& filter, uint seqI, vector< pair< size_t, size_t > >& neighbors, gnSequence* anno_seq, const vector< string >& feature_types )
+{
+	// stores the bb segs that overlap each feature
+	neighbors.resize( bb_list.size() );
+
+	uint seq_count = bb_list.front().size();
+
+	vector< gnBaseFeature* > feats( anno_seq->getFeatureListLength() );
+	vector< gnLocation > locs( anno_seq->getFeatureListLength() );
+	vector< string > names( anno_seq->getFeatureListLength() );
+	for( size_t featureI = 0; featureI < anno_seq->getFeatureListLength(); ++featureI )
+	{
+		feats[featureI] = anno_seq->getFeature( featureI );
+		locs[featureI] = feats[featureI]->GetLocation(0);
+		names[featureI] = feats[featureI]->GetName();
+	}
+	for( size_t bbI = 0; bbI < bb_list.size(); ++bbI )
+	{
+		// find the nearest feature
+		size_t best_left = (std::numeric_limits<size_t>::max)();
+		size_t best_right = (std::numeric_limits<size_t>::max)();
+		size_t best_left_dist = (std::numeric_limits<size_t>::max)();
+		size_t best_right_dist = (std::numeric_limits<size_t>::max)();
+		if( !filter.test(bbI) )
+		{
+			neighbors[bbI].first = best_left;
+			neighbors[bbI].second = best_right;
+			continue;
+		}
+		for( size_t featI = 0; featI < feats.size(); ++featI )
+		{
+			size_t ntype = 0;
+			for( ; ntype < feature_types.size(); ++ntype )
+				if( names[featI] == feature_types[ntype] )
+					break;
+			if( ntype == feature_types.size() )
+				continue;	// apparently this type of feature isn't interesting...
+			if( locs[featI].GetFirst() > locs[featI].GetLast() || locs[featI].GetFirst() == 0 || locs[featI].GetLast() == 0 )
+				continue;	// a problem parsing annotation?
+			if( genome::absolut(bb_list[bbI][seqI].first) > locs[featI].GetLast() - ALTERNALOG_MIN_SIZE &&
+				(int64)genome::absolut(bb_list[bbI][seqI].first) - (int64)locs[featI].GetLast() < best_left_dist )
+			{
+				best_left_dist = (int64)genome::absolut(bb_list[bbI][seqI].first) - (int64)locs[featI].GetLast();
+				best_left = featI;
+			}
+			if( genome::absolut(bb_list[bbI][seqI].second) < locs[featI].GetFirst() + ALTERNALOG_MIN_SIZE &&
+				(int64)locs[featI].GetFirst() - (int64)genome::absolut(bb_list[bbI][seqI].second) < best_right_dist )
+			{
+				best_right_dist = (int64)locs[featI].GetFirst() - (int64)genome::absolut(bb_list[bbI][seqI].second);
+				best_right = featI;
+			}
+		}
+		neighbors[bbI].first = best_left;
+		neighbors[bbI].second = best_right;
+	}
+	// clean up
+	for( size_t featureI = 0; featureI < feats.size(); ++featureI )
+		delete feats[featureI];
+}
+
+void printFilteredBbSeqList( ostream& os, const vector< bb_seqentry_t >& bb_seq_list, const bitset_t& filter )
+{
+	for( size_t aI = 0; aI < bb_seq_list.size(); ++aI )
+	{
+		if( filter.test(aI) )
+		{
+			printBbSeq( os, bb_seq_list[aI] );
+			os << endl;
+		}
+	}
+}
+
+void classifyIntergenic( ostream& os, const vector< bb_seqentry_t >& bbseq_list, const bitset_t& intergenic, 
+						uint anno_seqI, gnSequence* anno_seq, bitset_t& trna_neighbor, bitset_t& miscrna_neighbor, 
+						bitset_t& converging_cds, bitset_t& diverging_cds, bitset_t& inline_cds, 
+						bitset_t& variable_miscrna, bitset_t& variable_trna )
+{
+	vector< pair< size_t, size_t > > all_neighbors;
+	vector< string > all_types;
+	all_types.push_back( "CDS" );
+	all_types.push_back( "rRNA" );
+	all_types.push_back( "tRNA" );
+	all_types.push_back( "misc_RNA" );
+	trna_neighbor.resize( bbseq_list.size() );
+	miscrna_neighbor.resize( bbseq_list.size() );
+	variable_miscrna.resize(anno_seq->getFeatureListLength());
+	variable_trna.resize(anno_seq->getFeatureListLength());
+	featureNearestNeighbors( bbseq_list, intergenic, anno_seqI, all_neighbors, anno_seq, all_types );
+	for( size_t bbI = 0; bbI < bbseq_list.size(); ++bbI )
+	{
+		if( !intergenic.test(bbI) )
+			continue;
+		if( all_neighbors[bbI].first >= anno_seq->getFeatureListLength() ||
+			all_neighbors[bbI].second >= anno_seq->getFeatureListLength() )
+			continue;
+		gnBaseFeature* lfeat = anno_seq->getFeature(all_neighbors[bbI].first);
+		gnBaseFeature* rfeat = anno_seq->getFeature(all_neighbors[bbI].second);
+		if( lfeat->GetName() == "tRNA" || rfeat->GetName() == "tRNA" )
+			trna_neighbor.set(bbI);
+		if( lfeat->GetName() == "tRNA" )
+			variable_trna.set(all_neighbors[bbI].first);
+		if( rfeat->GetName() == "tRNA" )
+			variable_trna.set(all_neighbors[bbI].second);
+		if( lfeat->GetName() == "misc_RNA" || rfeat->GetName() == "misc_RNA" )
+			miscrna_neighbor.set(bbI);
+		if( lfeat->GetName() == "misc_RNA" )
+			variable_miscrna.set(all_neighbors[bbI].first);
+		if( rfeat->GetName() == "misc_RNA" )
+			variable_miscrna.set(all_neighbors[bbI].second);
+		delete lfeat;
+		delete rfeat;
+	}
+	
+	vector< pair< size_t, size_t > > cds_neighbors;
+	vector< string > cds_types;
+	cds_types.push_back( "CDS" );
+	featureNearestNeighbors( bbseq_list, intergenic, anno_seqI, cds_neighbors, anno_seq, cds_types );
+	
+	converging_cds.resize( bbseq_list.size() );
+	diverging_cds.resize( bbseq_list.size() );
+	inline_cds.resize( bbseq_list.size() );
+	for( size_t bbI = 0; bbI < bbseq_list.size(); ++bbI )
+	{
+		if( !intergenic.test(bbI) )
+			continue;
+		if( cds_neighbors[bbI].first >= anno_seq->getFeatureListLength() ||
+			cds_neighbors[bbI].second >= anno_seq->getFeatureListLength() )
+			continue;
+		gnBaseFeature* lfeat = anno_seq->getFeature(cds_neighbors[bbI].first);
+		gnBaseFeature* rfeat = anno_seq->getFeature(cds_neighbors[bbI].second);
+		if( lfeat->GetLocationType() == gnLocation::LT_Complement &&
+			rfeat->GetLocationType() != gnLocation::LT_Complement )
+			diverging_cds.set(bbI);
+		else if( lfeat->GetLocationType() != gnLocation::LT_Complement &&
+			rfeat->GetLocationType() == gnLocation::LT_Complement )
+			converging_cds.set(bbI);
+		else
+			inline_cds.set(bbI);
+		delete lfeat;
+		delete rfeat;
+	}
+}
+
+void findVariableSegmentsWithFlankingBB( const vector< bb_entry_t >& bb_list, const vector< double >& avg_lens, vector< pair< size_t, size_t > >& variable_segs, size_t min_bb_size = ALTERNALOG_MIN_SIZE, size_t min_variable_size = ALTERNALOG_MIN_SIZE, bool alternalogs = false )
+{
+	// find alternalogs (only at root node)
+	const size_t NO_PREVIOUS = (std::numeric_limits<size_t>::max)();
+	size_t prev_bb_seg = NO_PREVIOUS;
+	uint seq_count = bb_list.front().bb_seq.size();
+	for( size_t bbI = 0; bbI < bb_list.size(); ++bbI )
+	{
+		if( bb_list[bbI].bb_cols.Multiplicity() != seq_count ||
+			avg_lens[bbI] < min_bb_size )
+			continue;	// don't count this as n-way backbone
+		if( prev_bb_seg == NO_PREVIOUS ||
+			(bb_list[prev_bb_seg].iv != bb_list[bbI].iv)
+			)
+		{
+			// no intervening alternalog...
+			prev_bb_seg = bbI;
+			continue;
+		}
+		// was there an alternalog?
+		uint subset_count = 0;	// count the subset backbone of substantial size 
+		bitset_t in_bb( seq_count );
+		for( size_t segI = prev_bb_seg + 1; segI < bbI; ++segI )
+		{
+			if( avg_lens[segI] < min_variable_size )
+				continue;
+			bool found_new = false;
+			for( size_t seqI = 0; seqI < seq_count; ++seqI )
+			{
+				if( bb_list[segI].bb_seq[seqI].first != 0 )
+				{
+					if( !in_bb.test(seqI) )
+						found_new = true;
+					in_bb.set(seqI);
+				}
+			}
+			if( found_new )
+				subset_count++;
+		}
+		for( size_t seqI = 0; seqI < seq_count; ++seqI )
+		{
+			// debug:
+			if( (bb_list[bbI].bb_seq[seqI].first < 0 && bb_list[bbI].bb_seq[seqI].second > 0) ||
+				(bb_list[bbI].bb_seq[seqI].first > 0 && bb_list[bbI].bb_seq[seqI].second < 0) ||
+				(bb_list[bbI].bb_seq[seqI].first < 0 && bb_list[prev_bb_seg].bb_seq[seqI].first > 0) ||
+				(bb_list[bbI].bb_seq[seqI].first > 0 && bb_list[prev_bb_seg].bb_seq[seqI].first < 0) ||
+				(bb_list[bbI].bb_seq[seqI].first < 0 && bb_list[prev_bb_seg].bb_seq[seqI].second > 0) ||
+				(bb_list[bbI].bb_seq[seqI].first > 0 && bb_list[prev_bb_seg].bb_seq[seqI].second < 0) )
+			{
+				cerr << "mismatch parity!!\n";
+				genome::breakHere();
+			}
+			// normal:
+			if( in_bb.test(seqI) )
+				continue;
+			int64 diff = 0;
+			if( bb_list[bbI].bb_seq[seqI].first < 0 )
+				diff = genome::absolut( bb_list[prev_bb_seg].bb_seq[seqI].first - bb_list[bbI].bb_seq[seqI].second );
+			else
+				diff = bb_list[bbI].bb_seq[seqI].first - bb_list[prev_bb_seg].bb_seq[seqI].second;
+			if( diff >= min_variable_size )
+				subset_count++;
+		}
+		if( alternalogs && subset_count > 1 )
+			variable_segs.push_back( make_pair( prev_bb_seg, bbI ) );
+		else if( !alternalogs && subset_count > 0 )
+			variable_segs.push_back( make_pair( prev_bb_seg, bbI ) );
+		prev_bb_seg = bbI;
+	}
+}
+
+void makeVariableSegmentsCoordinateList( const vector< bb_entry_t >& bb_list, const vector< pair< size_t, size_t > >& alternalogs, vector< bb_seqentry_t >& alternabb_list )
+{
+	uint seq_count = bb_list.front().bb_seq.size();
+	alternabb_list.resize( alternalogs.size() );
+	for( size_t aI = 0; aI < alternalogs.size(); ++aI )
+	{
+		const bb_seqentry_t& a = bb_list[ alternalogs[aI].first ].bb_seq;
+		const bb_seqentry_t& b = bb_list[ alternalogs[aI].second ].bb_seq;
+		bb_seqentry_t alternabb = a;
+		for( size_t seqI = 0; seqI < seq_count; ++seqI )
+		{
+			if( alternabb[seqI].first < 0 )
+			{
+				alternabb[seqI].first = b[seqI].second;
+				alternabb[seqI].second = a[seqI].first;
+			}
+			else
+			{
+				alternabb[seqI].first = a[seqI].second;
+				alternabb[seqI].second = b[seqI].first;
+			}
+		}
+		alternabb_list[aI] = alternabb;
+	}
+}
+
+class LocComp {
+public:
+	bool operator()( const gnLocation& a, const gnLocation& b ) const
+	{
+		return a.GetFirst() < b.GetFirst();
+	}
+};
+
+void identifyIntergenicRanges( 	vector< gnSequence* >& seq_table, vector< vector< pair< size_t, size_t > > >& ranges )
+{
+	ranges.resize(seq_table.size());
+	for( size_t seqI = 0; seqI < seq_table.size(); seqI++ )
+	{
+		vector< gnLocation > loc_list;
+		for( size_t featI = 0; featI < seq_table[seqI]->getFeatureListLength(); featI++ )
+		{
+			gnBaseFeature* feat = seq_table[seqI]->getFeature(featI);
+			string feat_name = feat->GetName();
+			if( feat_name != "CDS" )
+				continue;	// don't deal with other feature types (source, etc)
+			loc_list.push_back( feat->GetLocation(0) );
+			delete feat;
+		}
+
+		size_t sum = 0;
+		LocComp lc;
+		std::sort( loc_list.begin(), loc_list.end(), lc );
+		size_t fI = 0; 
+		size_t lI = 1; 
+		while( fI < loc_list.size() && lI < loc_list.size() )
+		{
+			if( loc_list[fI].GetLast() < loc_list[lI].GetFirst() )
+			{
+				ranges[seqI].push_back( make_pair( loc_list[fI].GetLast(), loc_list[lI].GetFirst() ) );
+				sum += loc_list[lI].GetFirst() - loc_list[fI].GetLast() - 1;
+			}
+			fI++; lI++;
+			while( fI < loc_list.size() && lI < loc_list.size() &&
+				loc_list[fI].GetLast() >= loc_list[lI].GetFirst() )
+			{
+				if( loc_list[fI].GetLast() >= loc_list[lI].GetLast() )
+				{
+					fI++; lI++;
+					cerr << "danger, complete containment in seq " << seqI << endl;
+				}
+				fI++; lI++;
+			}
+		}
+	}
+}
+
+//big_coli_sam_fixed_goh0001_gou000001.xmfa guide.tre big_coli_sam_fixed_goh0001_gou000001.xmfa.backbone big_coli_sam_fixed_goh0001_gou000001.xmfa.bbcols 5 bb.out
+
+void classifyCoordinateRanges( 
+			const vector< bb_seqentry_t >& alternabb_list,			
+			gnSequence* annotated_seq,
+			vector< gnSequence* >& seq_table,
+			vector< bitset_t >& genic, 
+			vector< bitset_t >& genic_fudge, 
+			vector< bitset_t >& overlaps_cds_upstream, 
+			vector< bitset_t >& overlaps_cds_upstream_fudge, 
+			vector< bitset_t >& overlaps_cds_downstream, 
+			vector< bitset_t >& overlaps_cds_downstream_fudge, 
+			vector< bitset_t >& intergenic, 
+			vector< bitset_t >& spanner,
+			vector< bitset_t >& trna, 
+			vector< bitset_t >& rrna,
+			vector< bitset_t >& miscrna,
+			vector< bitset_t >& pseudogenized,
+			vector< bitset_t >& variable_miscrna,
+			vector< bitset_t >& variable_trna,
+			vector< bitset_t >& intergenic_segs
+			)
+{
+	if( alternabb_list.size() == 0 )
+		return;
+	uint seq_count = seq_table.size();
+	// count genic vs. intergenic alternalogs
+	// classify alternalogs as genic, intergenic, multigenic
+	// and pseudogenizing
+	bitset_t bbclass_tmp( alternabb_list.size() );
+	// all of these classifications should be mutually exclusive
+	genic.resize( seq_count, bbclass_tmp );
+	genic_fudge.resize( seq_count, bbclass_tmp );
+
+	// set to true if a variable segment ends in a CDS, but isn't contained by the CDS
+	overlaps_cds_upstream.resize( seq_count, bbclass_tmp );
+	overlaps_cds_upstream_fudge.resize( seq_count, bbclass_tmp );
+	overlaps_cds_downstream.resize( seq_count, bbclass_tmp );
+	overlaps_cds_downstream_fudge.resize( seq_count, bbclass_tmp );
+	intergenic.resize( seq_count, bbclass_tmp );
+	spanner.resize( seq_count, bbclass_tmp );
+//	vector< bitset_t > multigenic( seq_count, bbclass_tmp );
+	// these are true if trna or rrna are intersected
+	trna.resize( seq_count, bbclass_tmp );
+	rrna.resize( seq_count, bbclass_tmp );
+	miscrna.resize( seq_count, bbclass_tmp );
+	variable_miscrna.resize( seq_count );
+	variable_trna.resize( seq_count );
+	// an alternalog is pseudogenizing if it's genic in other sequences but not in the subject
+	pseudogenized.resize( seq_count, bbclass_tmp );
+
+	vector< vector< pair< size_t, size_t > > > ranges;
+	identifyIntergenicRanges( seq_table, ranges );
+	intergenic_segs.resize(seq_table.size());
+
+	vector< const bb_seqentry_t* > alterna_ptrs( alternabb_list.size() );
+	for( size_t i = 0; i < alternabb_list.size(); ++i )
+		alterna_ptrs[i] = &alternabb_list[i];
+	vector< const bb_seqentry_t* > orig_ptrs( alterna_ptrs );
+	for( size_t seqI = 0; seqI < seq_count; ++seqI )
+	{
+		BbSeqComp bsc( seqI );
+		std::sort( alterna_ptrs.begin(), alterna_ptrs.end(), bsc );
+		vector< size_t > ptr_map;
+		createMap( alterna_ptrs, orig_ptrs, ptr_map );
+
+		vector< vector< unsigned > > bb_features( alternabb_list.size() );	// stores feature IDs of overlapping features
+		variable_miscrna[seqI].resize(seq_table[seqI]->getFeatureListLength());
+		variable_trna[seqI].resize(seq_table[seqI]->getFeatureListLength());
+		for( size_t featureI = 0; featureI < seq_table[seqI]->getFeatureListLength(); ++featureI )
+		{
+			gnBaseFeature* feat = seq_table[seqI]->getFeature( featureI );
+			string feat_name = feat->GetName();
+			if( feat_name != "CDS" && 
+				feat_name != "tRNA" &&
+				feat_name != "rRNA" &&
+				feat_name != "misc_RNA" )
+				continue;	// don't deal with other feature types (source, etc)
+			if( feat->GetLocationListLength() > 1 )
+				continue;	// any multi-part CDS features are likely to be pseudogene annotations
+							// which we don't want to bias our results.  There are only a couple true multi-part
+							// CDS in enteric bacteria
+
+			gnLocation loc = feat->GetLocation(0);
+			if( loc.GetFirst() > loc.GetLast() || loc.GetFirst() == 0 || loc.GetLast() == 0 )
+				continue;	// a problem parsing annotation?
+			// find where feature lands in our list
+			bb_seqentry_t tmp_bb( seq_count );
+			tmp_bb[seqI].first = loc.GetFirst();
+			tmp_bb[seqI].second = loc.GetFirst();
+			vector< const bb_seqentry_t* >::iterator liter = std::lower_bound( alterna_ptrs.begin(), alterna_ptrs.end(), &tmp_bb, bsc );
+			tmp_bb[seqI].first = loc.GetLast();
+			tmp_bb[seqI].second = loc.GetLast();
+			vector< const bb_seqentry_t* >::iterator uiter = std::lower_bound( alterna_ptrs.begin(), alterna_ptrs.end(), &tmp_bb, bsc );
+			if( liter == alterna_ptrs.end() &&
+				alterna_ptrs.size() > 0 &&
+				genome::absolut( (*alterna_ptrs.back())[seqI].second ) >= loc.GetFirst() )
+				liter--;
+			while( liter != alterna_ptrs.end() &&
+				liter != alterna_ptrs.begin() &&
+				genome::absolut( (**liter)[seqI].second ) >= loc.GetFirst() )
+				--liter;
+			if( liter != alterna_ptrs.end() &&
+				genome::absolut( (**liter)[seqI].second ) < loc.GetFirst() )
+				++liter;
+			for( ; liter != uiter; ++liter )
+			{
+				bb_features[ liter - alterna_ptrs.begin() ].push_back( featureI );
+			}
+			delete feat;
+		}
+
+		intergenic_segs[seqI].resize(ranges[seqI].size());
+		for( size_t bbI = 0; bbI < alterna_ptrs.size(); ++bbI )
+		{
+			size_t l = (*alterna_ptrs[bbI])[seqI].first;
+			size_t r = (*alterna_ptrs[bbI])[seqI].second;
+			for( size_t rI = 0; rI < ranges[seqI].size(); ++rI )
+			{
+				if( (l < ranges[seqI][rI].first + 1 && ranges[seqI][rI].first + 1 <= r) ||		// left overlap and complete contains
+					(l <= ranges[seqI][rI].second - 1 && ranges[seqI][rI].first + 1 <= r) )		// right overlap and inside
+				{
+					intergenic_segs[seqI].set(rI);
+					break;
+				}
+			}
+		}
+
+		for( size_t bbI = 0; bbI < alterna_ptrs.size(); ++bbI )
+		{
+			gnLocation bb_loc;
+			if( (*alterna_ptrs[bbI])[seqI].first > 0 )
+				bb_loc = gnLocation((*alterna_ptrs[bbI])[seqI].first, (*alterna_ptrs[bbI])[seqI].second);
+			else
+				bb_loc = gnLocation(-(*alterna_ptrs[bbI])[seqI].first, -(*alterna_ptrs[bbI])[seqI].second);
+			if( (*alterna_ptrs[bbI])[0].first > 2302400 && (*alterna_ptrs[bbI])[0].second < 2303211 )
+			{
+				cerr << "debugme\n";
+			}
+			for( size_t featI = 0; featI < bb_features[bbI].size(); ++featI )
+			{
+				gnBaseFeature* feat = seq_table[seqI]->getFeature( bb_features[bbI][featI] );
+				gnLocation feat_loc = feat->GetLocation(0);
+				gnLocation intersect = feat_loc.GetIntersection( bb_loc, gnLocation::determinedRegions );
+				string name = feat->GetName();
+				if( intersect.GetFirst() == bb_loc.GetFirst() &&
+					intersect.GetLast() == bb_loc.GetLast() && 
+					name == "CDS" )
+				{
+					if( intersect.GetLast() - intersect.GetFirst() > ALTERNALOG_MIN_SIZE ||
+						intersect.GetFirst() - ALTERNALOG_MIN_SIZE > feat_loc.GetFirst() ||
+						intersect.GetLast() + ALTERNALOG_MIN_SIZE < feat_loc.GetLast() )
+					{
+						// alternalog completely contained by CDS, at least ALTERNALOG_MIN_SIZE inside the CDS
+						genic[seqI].set( ptr_map[bbI] );
+					}else{
+						genic_fudge[seqI].set( ptr_map[bbI] );	// small and close to the edge
+					}
+				}
+				else if( (intersect.GetFirst() == bb_loc.GetFirst() ||
+					intersect.GetLast() == bb_loc.GetLast()) && 
+					name == "CDS" )
+				{
+					bool up = false;
+					// overlaps a CDS by at least ALTERNALOG_MIN_SIZE nucleotides,
+					// but does not contain the CDS, nor is it contained by the CDS
+					if( intersect.GetFirst() == bb_loc.GetFirst() )
+					{
+						if( feat->GetLocationType() != gnLocation::LT_Standard )
+							up = true;
+					}else if( feat->GetLocationType() == gnLocation::LT_Standard )
+						up = true;
+
+					if( !up && intersect.GetLast() - intersect.GetFirst() > ALTERNALOG_MIN_SIZE )
+						overlaps_cds_downstream[seqI].set( ptr_map[bbI] );
+					else if( !up )
+						overlaps_cds_downstream_fudge[seqI].set( ptr_map[bbI] );
+					else if( up && intersect.GetLast() - intersect.GetFirst() > ALTERNALOG_MIN_SIZE )
+						overlaps_cds_upstream[seqI].set( ptr_map[bbI] );
+					else
+						overlaps_cds_upstream_fudge[seqI].set( ptr_map[bbI] );
+
+				}else if( intersect.GetLast() - intersect.GetFirst() > ALTERNALOG_MIN_SIZE &&
+					name == "CDS" )
+				{
+					// spans CDS
+					spanner[seqI].set( ptr_map[bbI] );
+				}
+				if( intersect.GetLast() - intersect.GetFirst() > ALTERNALOG_MIN_SIZE &&
+					name == "rRNA" )
+				{
+					// overlaps a rRNA by at least ALTERNALOG_MIN_SIZE nucleotides
+					rrna[seqI].set( ptr_map[bbI] );
+				}
+				if( intersect.GetLast() - intersect.GetFirst() > ALTERNALOG_MIN_SIZE &&
+					name == "tRNA" )
+				{
+					// overlaps a tRNA by at least ALTERNALOG_MIN_SIZE nucleotides
+					trna[seqI].set( ptr_map[bbI] );
+					variable_trna[seqI].set(bb_features[bbI][featI]);
+				}
+				if( intersect.GetLast() - intersect.GetFirst() > ALTERNALOG_MIN_SIZE &&
+					name == "misc_RNA" )
+				{
+					// overlaps a misc_RNA by at least ALTERNALOG_MIN_SIZE nucleotides
+					miscrna[seqI].set( ptr_map[bbI] );
+					variable_miscrna[seqI].set(bb_features[bbI][featI]);
+				}
+				delete feat;
+			}
+		}
+		intergenic[seqI] = genic[seqI] | overlaps_cds_upstream[seqI] | overlaps_cds_downstream[seqI] | rrna[seqI] | trna[seqI];
+		intergenic[seqI].flip();
+	}
+
+	// identify pseudogenizing segments as intergenic segments in one genome that
+	// are genic in other genomes
+	size_t seqI = 0;
+	for( seqI = 0; seqI < seq_count; ++seqI )
+	{
+		bitset_t pseudo = bbclass_tmp;
+		for( size_t seqJ = 0; seqJ < seq_count; ++seqJ )
+		{
+			if( seqJ == seqI )
+				continue;
+			pseudo |= genic[seqJ] | overlaps_cds_upstream[seqJ] | overlaps_cds_downstream[seqJ];
+		}
+		bitset_t fudge = genic_fudge[seqI] | overlaps_cds_upstream_fudge[seqI] | overlaps_cds_downstream_fudge[seqI];
+		fudge.flip();	// if it's questionably within a gene then don't let it be a pseudogene.  we want to be sure
+						// about these
+		pseudogenized[seqI] = intergenic[seqI] & pseudo & fudge;
+	}
+}
+
+void analyzeVariableSegments( ostream& os, const vector< bb_entry_t >& bb_list, const vector< double >& avg_lens, uint anno_seqI, vector< gnSequence* >& seq_table, string site_class_name = "alternalog", bool analyze_alternalogs = true )
+{
+	gnSequence* annotated_seq = seq_table[anno_seqI];
+	vector< pair< size_t, size_t > > alternalogs;
+	vector< bb_seqentry_t > alternabb_list;
+	findVariableSegmentsWithFlankingBB( bb_list, avg_lens, alternalogs, ALTERNALOG_MIN_SIZE, ALTERNALOG_MIN_SIZE, analyze_alternalogs );
+	makeVariableSegmentsCoordinateList( bb_list, alternalogs, alternabb_list );
+
+	os << "There are " << alternalogs.size() << " " << site_class_name << " sites\n";
+
+	// count genic vs. intergenic alternalogs
+	// classify alternalogs as genic, intergenic, etc.
+	vector< bitset_t > alt_genic, alt_overlaps_cds_upstream, alt_overlaps_cds_downstream;
+	vector< bitset_t > alt_intergenic, alt_spanner, alt_trna, alt_rrna, alt_pseudogenized;
+	vector< bitset_t > alt_genic_fudge, alt_overlaps_cds_upstream_fudge, alt_overlaps_cds_downstream_fudge;
+	vector< bitset_t > alt_miscrna, v_miscrna, v_trna;
+	vector< bitset_t > intergenic_segs;
+
+	classifyCoordinateRanges( 
+		alternabb_list, annotated_seq, seq_table, alt_genic, alt_genic_fudge, alt_overlaps_cds_upstream,
+		alt_overlaps_cds_upstream_fudge, alt_overlaps_cds_downstream, alt_overlaps_cds_downstream_fudge,
+		alt_intergenic, alt_spanner, alt_trna, alt_rrna, alt_miscrna, alt_pseudogenized, v_miscrna, v_trna, 
+		intergenic_segs 
+		);
+
+	// find alternalogs that are always inside annotated genes
+	bitset_t bbclass_tmp( alternabb_list.size() );
+	bitset_t alt_multi_allelic_genes( bbclass_tmp );
+	alt_multi_allelic_genes.flip();
+	// alternalogs that are always outside genes
+	bitset_t alt_multi_allelic_intergenic( bbclass_tmp );
+	bitset_t alt_multi_allelic_entirely_intergenic( bbclass_tmp );
+	alt_multi_allelic_intergenic.flip();
+	alt_multi_allelic_entirely_intergenic.flip();
+	uint seq_count = bb_list.front().bb_seq.size();
+	for( uint seqI = 0; seqI < seq_count; ++seqI )
+	{
+		alt_multi_allelic_genes &= alt_genic[seqI];
+		alt_multi_allelic_intergenic &= alt_intergenic[seqI];
+		bitset_t spanner_flip = alt_spanner[seqI];
+		spanner_flip.flip();
+		alt_multi_allelic_entirely_intergenic &= alt_intergenic[seqI] & spanner_flip;
+	}
+
+
+	os << " There are " << alt_multi_allelic_genes.count() << " apparently multi-allelic genes (" << site_class_name << ")\n";
+	os << " There are " << alt_multi_allelic_intergenic.count() << " apparently multi-allelic regions with intergenic endpoints (" << site_class_name << ")\n";
+	os << " Of those, " << alt_multi_allelic_entirely_intergenic.count() << " contain no annotated CDS (" << site_class_name << ")\n";
+	os << " The remaining segments span gene boundaries, but are not entirely contained in annotated genes\n";
+
+	bitset_t trna_neighbor;
+	bitset_t miscrna_neighbor;
+	bitset_t converging_cds;
+	bitset_t diverging_cds;
+	bitset_t inline_cds;
+	bitset_t vv_miscrna;
+	bitset_t vv_trna;
+	classifyIntergenic( os, alternabb_list, alt_multi_allelic_intergenic, anno_seqI, 
+		annotated_seq, trna_neighbor, miscrna_neighbor, converging_cds, diverging_cds, inline_cds, vv_miscrna, vv_trna );
+
+
+	os << "There are " << trna_neighbor.count() << " intergenic segments with a tRNA nearest neighbor\n";
+	os << "There are " << miscrna_neighbor.count() << " intergenic segments with a miscRNA nearest neighbor\n";
+	os << "There are " << converging_cds.count() << " intergenic segments surrounded by converging CDS\n";
+	os << "There are " << diverging_cds.count() << " intergenic segments surrounded by diverging CDS\n";
+	os << "There are " << inline_cds.count() << " intergenic segments surrounded by inline CDS\n";
+	bitset_t miscrna_inter = v_miscrna[anno_seqI] | vv_miscrna;
+	os << "There are " << miscrna_inter.count() << " annotated misc_RNA associated with variable segments\n";
+	os << "There are " << intergenic_segs[anno_seqI].size() << " intergenic sites in the ref genome, of which " << intergenic_segs[anno_seqI].count() << " exhibit variability\n";
+	bitset_t trna_inter = v_trna[anno_seqI] | vv_trna;
+	os << "There are " << trna_inter.count() << " annotated tRNA associated with variable segments\n";
+
+	if( miscrna_neighbor.count() > 0 )
+	{
+		os << "coordinates of variable segs with misc_RNA neighboring:\n";
+		printFilteredBbSeqList( os, alternabb_list, miscrna_neighbor );
+	}
+	if( diverging_cds.count() > 0 )
+	{
+		os << "coordinates of variable segs with diverging_cds neighboring:\n";
+		printFilteredBbSeqList( os, alternabb_list, diverging_cds );
+	}
+	bitset_t total_miscrna = alt_miscrna[anno_seqI] | miscrna_neighbor;
+	os << "Total variable intergenic segs that neighbor or contain miscRNA: " << total_miscrna.count() << endl;
+
+	os << "coordinates of multi-allelic genes:\n";
+	printFilteredBbSeqList( os, alternabb_list, alt_multi_allelic_genes );
+
+	os << "coordinates of multi-allelic intergenic regions without CDS:\n";
+	printFilteredBbSeqList( os, alternabb_list, alt_multi_allelic_entirely_intergenic );
+
+	for( size_t seqI = 0; seqI < seq_count; ++seqI )
+	{
+		os << "genome " << seqI << " has " << alt_genic[seqI].count() << " " << site_class_name << " within CDS\n";
+		os << "genome " << seqI << " has " << alt_spanner[seqI].count() << " " << site_class_name << " that span CDS boundaries\n";
+		os << "genome " << seqI << " has " << alt_intergenic[seqI].count() << " " << site_class_name << " that lie entirely in intergenic regions\n";
+		os << "genome " << seqI << " has " << alt_rrna[seqI].count() << " " << site_class_name << " that contain rRNA\n";
+		os << "genome " << seqI << " has " << alt_trna[seqI].count() << " " << site_class_name << " that contain tRNA\n";
+		os << "genome " << seqI << " has " << alt_miscrna[seqI].count() << " " << site_class_name << " that contain misc_RNA\n";
+		os << "genome " << seqI << " has " << alt_pseudogenized[seqI].count() << " apparent recent pseudogenes in " << site_class_name << "\n";
+		os.flush();
+
+/*
+		os << "coordinates of genic alternalogs:\n";
+		printFilteredBbSeqList( os, alternabb_list, genic[seqI] );
+*/
+
+		if( alt_trna[seqI].count() > 0 )
+		{
+			os << "coordinates of tRNA " << site_class_name << ":\n";
+			printFilteredBbSeqList( os, alternabb_list, alt_trna[seqI] );
+		}
+
+		if( alt_rrna[seqI].count() > 0 )
+		{
+			os << "coordinates of rRNA " << site_class_name << ":\n";
+			printFilteredBbSeqList( os, alternabb_list, alt_rrna[seqI] );
+		}
+
+		if( alt_miscrna[seqI].count() > 0 )
+		{
+			os << "coordinates of misc_RNA " << site_class_name << ":\n";
+			printFilteredBbSeqList( os, alternabb_list, alt_miscrna[seqI] );
+		}
+
+		os << "coordinates of possible pseudogenes:\n";
+		printFilteredBbSeqList( os, alternabb_list, alt_pseudogenized[seqI] );
+		os.flush();
+	}
+}
+
+const uint INTERNAL_NODE = (std::numeric_limits<uint>::max)();
+const uint INTERVAL_UNKNOWN = (std::numeric_limits<uint>::max)();
+
+int main( int argc, char* argv[] )
+{
+#if	WIN32
+	SetPriorityClass(GetCurrentProcess(), BELOW_NORMAL_PRIORITY_CLASS);
+#endif
+
+	if( argc < 7 )
+	{
+		cerr << "bbAnalyze <xmfa file> <guide tree> <backbone seqpos file> <backbone col file> <annotated seq index> <output file>\n";
+		cerr << "annotated seq index starts at 0.\n";
+		return -1;
+	}
+	string aln_fname( argv[1] );
+	string guide_tree_fname( argv[2] );
+	string bbseq_fname( argv[3] );
+	string bbcol_fname( argv[4] );
+	int gff_seq_index = atoi( argv[5] );
+	string output_fname( argv[6] );
+
+	ifstream aln_input( aln_fname.c_str() );
+	if( !aln_input.is_open() ){
+		cerr << "Error opening \"" << aln_fname << "\"" << endl;
+		return -2;
+	}
+	ifstream tree_input( guide_tree_fname.c_str() );
+	if( !tree_input.is_open() ){
+		cerr << "Error opening \"" << guide_tree_fname << "\"" << endl;
+		return -3;
+	}
+	ifstream bbseq_input( bbseq_fname.c_str() );
+	if( !bbseq_input.is_open() ){
+		cerr << "Error opening \"" << bbseq_fname << "\"" << endl;
+		return -4;
+	}
+	ifstream bbcol_input( bbcol_fname.c_str() );
+	if( !bbcol_input.is_open() ){
+		cerr << "Error opening \"" << bbcol_fname << "\"" << endl;
+		return -4;
+	}
+	ofstream anal_output( output_fname.c_str() );
+	if( !anal_output.is_open() ){
+		cerr << "Error opening \"" << output_fname << "\" for writing" << endl;
+		return -6;
+	}
+	
+	// read the guide tree
+	PhyloTree< TreeNode > tree;
+	tree.readTree( tree_input );
+
+	// read the backbone column file	
+	vector< bb_seqentry_t > bb_seq_list;
+	vector< pair< size_t, ULA > > bb_col_list;
+	readBackboneSeqFile( bbseq_input, bb_seq_list );
+	readBackboneColsFile( bbcol_input, bb_col_list );
+
+	// read the alignment
+	IntervalList iv_list;
+	iv_list.ReadStandardAlignment( aln_input );
+
+	LoadSequences(iv_list, &cout);
+
+
+
+	const size_t seq_count = iv_list.seq_table.size();
+
+	vector< bb_entry_t > bb_list( bb_seq_list.size() );
+	for( size_t i = 0; i < bb_seq_list.size(); ++i )
+	{
+		bb_list[i].bb_seq = bb_seq_list[i];
+		bb_list[i].bb_cols = bb_col_list[i].second;
+		bb_list[i].iv = bb_col_list[i].first;
+		// awful hack: homogenize the parity inside intervals.  this is a bug in progressiveMauve
+		for( size_t seqI = 0; seqI < seq_count; ++seqI )
+		{
+			AbstractMatch::orientation o = iv_list[bb_list[i].iv].Orientation(seqI);
+			if( o == AbstractMatch::undefined )
+				continue;
+			if( bb_list[i].bb_cols.LeftEnd(seqI) != NO_MATCH )
+				bb_list[i].bb_cols.SetOrientation( seqI, o );
+			if( (bb_list[i].bb_seq[seqI].first < 0 && o == AbstractMatch::forward) ||
+				(bb_list[i].bb_seq[seqI].first > 0 && o == AbstractMatch::reverse) )
+				bb_list[i].bb_seq[seqI].first *= -1;
+			if( (bb_list[i].bb_seq[seqI].second < 0 && o == AbstractMatch::forward) ||
+				(bb_list[i].bb_seq[seqI].second > 0 && o == AbstractMatch::reverse) )
+				bb_list[i].bb_seq[seqI].second *= -1;
+			if( genome::absolut( bb_list[i].bb_seq[seqI].first ) > genome::absolut( bb_list[i].bb_seq[seqI].second ) )
+				swap( bb_list[i].bb_seq[seqI].first, bb_list[i].bb_seq[seqI].second );
+		}
+	}
+
+
+	// make faux single-genome bb segments for anything not contained in
+	// real backbone
+	for( uint seqI = 0; seqI < seq_count; seqI++ )
+	{
+		vector< AbstractMatch* > seq_beeb;
+		ULA single_ula(1);
+		for( size_t i = 0; i < bb_seq_list.size(); ++i )
+		{
+			if( bb_seq_list[i][seqI].first == 0 )
+				continue;
+			single_ula.SetStart( 0, genome::absolut(bb_seq_list[i][seqI].first) );
+			single_ula.SetLength( genome::absolut(bb_seq_list[i][seqI].second - bb_seq_list[i][seqI].first) + 1 );
+			seq_beeb.push_back( single_ula.Copy() );
+		}
+		SingleStartComparator<AbstractMatch> ssc(0);
+		sort( seq_beeb.begin(), seq_beeb.end(), ssc );
+		// HACK!!
+		// trim single base pair overlaps in seq_beeb that arise due to an off-by-one bug in the backbone output...
+		EliminateOverlaps_v2( seq_beeb );
+		sort( seq_beeb.begin(), seq_beeb.end(), ssc );
+		list< AbstractMatch* > seq_beeb_list( seq_beeb.begin(), seq_beeb.end() );
+		AddGapMatches( seq_beeb_list, seq_beeb_list.begin(), seq_beeb_list.end(), 
+			   0, 1, iv_list.seq_table[seqI]->length()+1, AbstractMatch::forward, 1 );
+		sort( seq_beeb.begin(), seq_beeb.end() );
+		vector< AbstractMatch* > tmp_list( seq_beeb_list.begin(), seq_beeb_list.end() );
+		sort( tmp_list.begin(), tmp_list.end() );
+		vector< AbstractMatch* > new_beeb( seq_beeb_list.size() - seq_beeb.size() );
+		std::set_difference(  tmp_list.begin(), tmp_list.end(),
+				seq_beeb.begin(), seq_beeb.end(), new_beeb.begin() );
+
+		// add each new_beeb to the backbone list
+		size_t bbI = bb_list.size();
+		bb_list.resize( bbI + new_beeb.size() );
+		for( size_t i = 0; i < new_beeb.size(); ++i )
+		{
+			bb_list[bbI].bb_seq.resize( seq_count );
+			bb_list[bbI].bb_seq[seqI] = make_pair( new_beeb[i]->LeftEnd(0), new_beeb[i]->RightEnd(0) );
+			bb_list[bbI].iv = INTERVAL_UNKNOWN;
+			ULA cols(seq_count);
+			cols.SetLeftEnd(seqI, 1);
+			cols.SetLength(new_beeb[i]->Length(0));
+			bb_list[bbI].bb_cols = cols;
+			bbI++;
+		}
+		for( size_t i = 0; i < tmp_list.size(); ++i )
+			tmp_list[i]->Free();
+	}
+
+	// create a map between tree nodes and sequences
+	vector< uint > node_sequence_map( tree.size(), -1 );
+	for( uint seqI = 0; seqI < seq_count; seqI++ )
+	{
+		stringstream seq_name;
+		seq_name << "seq" << seqI + 1;
+		node_id_t nodeI = 0;
+		for( ; nodeI < tree.size(); nodeI++ )
+		{
+			if( seq_name.str() == tree[nodeI].name )
+			{
+				node_sequence_map[nodeI] = seqI;
+				break;
+			}
+		}
+		if( nodeI == tree.size() )
+			throw "Phylogenetic tree names unrecognized.  Should follow seqN naming format\n";
+	}
+
+	// mark small backbone segments
+	bitset_t too_small( bb_list.size(), false );
+	for( size_t bbI = 0; bbI < bb_list.size(); ++bbI )
+		if( bb_list[bbI].bb_cols.Length() < DISCARD_SEGMENT )
+			too_small.set(bbI, true);
+	bitset_t not_small = too_small;
+	not_small.flip();
+
+	vector< double > avg_lens( bb_list.size(), 0 );
+	for( size_t bbI = 0; bbI < bb_list.size(); ++bbI )
+	{
+		double ct = 0;
+		for( size_t seqI = 0; seqI < bb_list[bbI].bb_seq.size(); ++seqI )
+		{
+			if( bb_list[bbI].bb_seq[seqI].first != 0 )
+			{
+				ct++;
+				avg_lens[bbI] += genome::absolut( bb_list[bbI].bb_seq[seqI].second - bb_list[bbI].bb_seq[seqI].first ) + 1;
+			}
+		}
+		avg_lens[bbI] /= ct;
+	}
+
+
+	// got the backbone.  now do something with it.
+	// at each node of the tree, count the total amount backbone contained in nodes
+	// below that tree, both inside genes and outside genes
+
+
+	vector< node_id_t > all_leaves;
+	getLeaves( tree, tree.root, all_leaves );
+	sort( all_leaves.begin(), all_leaves.end() );
+
+	bitset_t true_temper( bb_list.size() );
+	true_temper.reset();
+	true_temper.flip();
+	bitset_t false_temper( bb_list.size() );
+	false_temper.reset();
+
+	vector< bitset_t > unique( tree.size(), true_temper );
+	// partial contains bb segs that have representation among two or more genomes below a given node
+	vector< bitset_t > partial( tree.size(), true_temper );
+	// conserved have representation in all genomes below a node, and possibly others
+	vector< bitset_t > conserved( tree.size(), true_temper );
+	// child partial have representation in one or more genomes below a node
+	vector< bitset_t > c1_partial( tree.size(), false_temper );
+	vector< bitset_t > c2_partial( tree.size(), false_temper );
+	vector< bitset_t > c1_complete( tree.size(), false_temper );
+	vector< bitset_t > c2_complete( tree.size(), false_temper );
+
+	// calculate which segments have heterogenous occurrence at each node
+	vector< bitset_t > hop_one( tree.size(), false_temper );
+	vector< bitset_t > hop_two( tree.size(), false_temper );
+	vector< double > pan_genome_size( tree.size(), 0 );
+	// hop_two if(c1_partial && c2_partial) && !c1_complete && !c2_complete
+	// hop_one if !hop_two && (!c1_complete || !c2_complete) && (c1_partial && c2_partial) && !(hop_one at incomplete child)
+
+	stack< node_id_t > node_stack;
+	node_stack.push( tree.root );
+	bitset_t visited( tree.size(), false );
+	while( node_stack.size() > 0 )
+	{
+		node_id_t nI = node_stack.top();
+		if( !visited[nI] && tree[nI].children.size() > 0 )
+		{
+			node_stack.push( tree[nI].children[0] );
+			node_stack.push( tree[nI].children[1] );
+			visited.set(nI,true);
+			continue;	// visit post-order
+		}
+		node_stack.pop();
+
+		vector< node_id_t > leaves;
+		getLeaves( tree, nI, leaves );
+		sort( leaves.begin(), leaves.end() );
+
+		vector< node_id_t > not_leaves( all_leaves.size() - leaves.size() );
+		std::set_difference( all_leaves.begin(), all_leaves.end(), 
+			leaves.begin(), leaves.end(), 
+			not_leaves.begin() );
+
+
+		vector< node_id_t > c1_leaves;
+		vector< node_id_t > c2_leaves;
+		if( tree[nI].children.size() > 0 )
+		{
+			getLeaves( tree, tree[nI].children[0], c1_leaves );
+			getLeaves( tree, tree[nI].children[1], c2_leaves );
+		}
+
+		for( size_t bbI = 0; bbI < bb_list.size(); ++bbI )
+		{
+			// do all the leaves have this segment?
+			size_t lI = 0;
+			size_t ct = 0;
+			for( lI = 0; lI < leaves.size(); ++lI )
+			{
+				if( bb_list[bbI].bb_seq[ node_sequence_map[ leaves[lI] ] ].first != 0 )
+					ct++;
+			}
+			unique[nI].set(bbI, ct == leaves.size());
+
+			// was this conserved in more than one?
+			partial[nI].set(bbI, ct > 1);
+			conserved[nI].set(bbI, ct == leaves.size());
+
+			// if this one was represented at all then it's part of the pan-genome
+			if( ct > 0 )
+				pan_genome_size[nI] += avg_lens[bbI];
+
+			// do only the leaves below this node have this segment?
+			for( lI = 0; lI < not_leaves.size(); ++lI )
+			{
+				if( bb_list[bbI].bb_seq[ node_sequence_map[ not_leaves[lI] ] ].first != 0 )
+					unique[nI].set(bbI, false);
+			}
+
+			// is the segment present in both children?
+			bool c1 = false;
+			bool c2 = false;
+			uint c1_ct = 0;
+			uint c2_ct = 0;
+			for( lI = 0; lI < c1_leaves.size(); ++lI )
+			{
+				if( bb_list[bbI].bb_seq[ node_sequence_map[ c1_leaves[lI] ] ].first != 0 )
+					c1_ct++;
+			}
+			for( lI = 0; lI < c2_leaves.size(); ++lI )
+			{
+				if( bb_list[bbI].bb_seq[ node_sequence_map[ c2_leaves[lI] ] ].first != 0 )
+					c2_ct++;
+			}
+			c1_partial[nI].set(bbI, c1_ct > 0);
+			c2_partial[nI].set(bbI, c2_ct > 0);
+			c1_complete[nI].set(bbI, c1_ct == c1_leaves.size());
+			c2_complete[nI].set(bbI, c2_ct == c2_leaves.size());
+		}
+	}
+
+	node_stack.push( tree.root );
+	visited = bitset_t( tree.size(), false );
+	vector< bitset_t > all_unique( tree.size(), false_temper );
+	while( node_stack.size() > 0 )
+	{
+		node_id_t nI = node_stack.top();
+		if( !visited[nI] && tree[nI].children.size() > 0 )
+		{
+			node_stack.push( tree[nI].children[0] );
+			node_stack.push( tree[nI].children[1] );
+			visited.set(nI,true);
+			continue;	// visit post-order
+		}
+		node_stack.pop();
+
+		all_unique[nI] = unique[nI];
+
+		if( tree[nI].children.size() == 0 )
+			continue;	// hop concept doesn't apply to leaf nodes
+		bitset_t not_c1_comp = c1_complete[nI];
+		not_c1_comp.flip();
+		bitset_t not_c2_comp = c2_complete[nI];
+		not_c2_comp.flip();
+		hop_two[nI] = c1_partial[nI] & c2_partial[nI] & not_c1_comp & not_c2_comp;
+		bitset_t not_hop_two_nI = hop_two[nI];
+		not_hop_two_nI.flip();
+		bitset_t not_child_hop = hop_one[ tree[nI].children[0] ] | hop_one[ tree[nI].children[1] ];
+		not_child_hop.flip();
+		hop_one[nI] = not_hop_two_nI & (not_c1_comp | not_c2_comp) & c1_partial[nI] & c2_partial[nI] & not_child_hop;
+
+		// don't count small segments in anything
+		hop_two[nI] &= not_small;
+		hop_one[nI] &= not_small;
+		unique[nI] &= not_small;
+		conserved[nI] &= not_small;
+		partial[nI] &= not_small;
+		all_unique[nI] = unique[nI] | all_unique[ tree[nI].children[0] ] | all_unique[ tree[nI].children[1] ];
+	}
+
+	// compute length statistics for various types of backbone
+	vector< double > conserved_len( tree.size(), 0 );
+	vector< double > unique_len( tree.size(), 0 );
+	vector< double > hop_one_len( tree.size(), 0 );
+	vector< double > hop_two_len( tree.size(), 0 );
+
+	for( size_t nI = 0; nI < tree.size(); nI++ )
+	{
+		// count up avg lengths
+		for( size_t bbI = 0; bbI < bb_list.size(); ++bbI )
+		{
+			if( conserved[nI].test(bbI) )
+				conserved_len[nI] += avg_lens[bbI];
+			if( unique[nI].test(bbI) )
+				unique_len[nI] += avg_lens[bbI];
+			if( hop_one[nI].test(bbI) )
+				hop_one_len[nI] += avg_lens[bbI];
+			if( hop_two[nI].test(bbI) )
+				hop_two_len[nI] += avg_lens[bbI];
+		}
+	}
+
+	// print a general summary of how clustered variable segments are...
+	bitset_t uni_root = unique[0] & not_small;
+	anal_output << "There are " << uni_root.count() << " segments conserved among all genomes\n";
+	anal_output << "and " << not_small.count()-uni_root.count() << " variable segments fall in between these\n";
+
+
+	// prepare to analyze distribution of gene functions in backbone
+	vector< bb_seqentry_t > m_bbseq_list( bb_list.size() );
+	for( size_t bbI = 0; bbI < bb_list.size(); ++bbI )
+		m_bbseq_list[bbI] = bb_list[bbI].bb_seq;
+	multifun_map_t all_mf_count;
+	multifun_names_t all_mf_names;
+	size_t cds_count = getCDScount( iv_list.seq_table[gff_seq_index] );
+	bitset_t all_features( iv_list.seq_table[gff_seq_index]->getFeatureListLength() );
+	all_features.flip();
+	makeMultiFunCount( iv_list.seq_table[gff_seq_index], all_mf_count, all_mf_names, all_features );
+
+	// print summaries for each node
+	anal_output << "#\n";
+	anal_output << "# Alignment tree summary\n";
+	anal_output << "#\n";
+	for( size_t nI = 0; nI < tree.size(); nI++ )
+	{
+		anal_output << "Node " << nI << endl;
+		vector< node_id_t > leaves;
+		getLeaves( tree, nI, leaves );
+		anal_output << "Genomes at or below this node:\n";
+		for( size_t lI = 0; lI < leaves.size(); ++lI )
+			anal_output << '\t' << iv_list.seq_filename[ node_sequence_map[ leaves[ lI ] ] ] << endl;
+
+		anal_output << "\tNumber of unique segments at this node: " << unique[nI].count() << endl; 
+		anal_output << "\tNumber of hop one (single deletion) segments at this node: " << hop_one[nI].count() << endl; 
+		anal_output << "\tNumber of hop two (multiple deletion or lgt) segments at this node: " << hop_two[nI].count() << endl; 
+
+		anal_output << "total avg. \"core-genome\" size at this node: " << conserved_len[nI] << endl;
+		anal_output << "total avg. unique length at this node: " << unique_len[nI] << endl;
+		anal_output << "total avg. hop one length at this node: " << hop_one_len[nI] << endl;
+		anal_output << "total avg. hop two length at this node: " << hop_two_len[nI] << endl;
+		anal_output << "total \"pan-genome\" size at this node: " << pan_genome_size[nI] << endl;
+
+		// if this node has the annotated genome below it then analyze the distribution of
+		// backbone content
+		vector< uint > leaf_seqids( leaves.size() );
+		for( size_t i = 0; i < leaves.size(); ++i )
+			leaf_seqids[i] = node_sequence_map[leaves[i]];
+		vector< uint >::iterator id_iter =std::find( leaf_seqids.begin(), leaf_seqids.end(), gff_seq_index );
+		if( id_iter != leaf_seqids.end() )
+		{
+			vector< vector< size_t > > intersecting;
+			featureIntersect( m_bbseq_list, gff_seq_index, intersecting, iv_list.seq_table[gff_seq_index] );
+			bitset_t features_hit;
+			getFeatureHits( intersecting, conserved[nI], features_hit );
+			multifun_map_t bb_mf_count;
+			multifun_names_t bb_mf_names;
+			double expect_freq = (double)features_hit.count() / (double)cds_count;
+			makeMultiFunCount( iv_list.seq_table[gff_seq_index], bb_mf_count, bb_mf_names, features_hit );
+			anal_output << "#\n#Conserved gene content distribution\n#\n";
+			anal_output << "Avg percent conserved " << setprecision(3) << expect_freq * 100 << endl;
+			mfAnalyze( anal_output, all_mf_count, bb_mf_count, all_mf_names, expect_freq );	
+
+			// analyze hop_one distributions
+			intersecting.clear();
+			featureIntersect( m_bbseq_list, gff_seq_index, intersecting, iv_list.seq_table[gff_seq_index] );
+			features_hit.clear();
+			getFeatureHits( intersecting, hop_one[nI], features_hit );
+			bb_mf_count.clear();
+			bb_mf_names.clear();
+			expect_freq = (double)features_hit.count() / (double)cds_count;
+			makeMultiFunCount( iv_list.seq_table[gff_seq_index], bb_mf_count, bb_mf_names, features_hit );
+			anal_output << "#\n#Hop one gene content distribution\n#\n";
+			anal_output << "Avg percent in hop_one " << setprecision(3) << expect_freq * 100 << endl;
+			mfAnalyze( anal_output, all_mf_count, bb_mf_count, all_mf_names, expect_freq );
+
+
+			// analyze hop_two distributions
+			intersecting.clear();
+			featureIntersect( m_bbseq_list, gff_seq_index, intersecting, iv_list.seq_table[gff_seq_index] );
+			features_hit.clear();
+			getFeatureHits( intersecting, hop_two[nI], features_hit );
+			bb_mf_count.clear();
+			bb_mf_names.clear();
+			expect_freq = (double)features_hit.count() / (double)cds_count;
+			makeMultiFunCount( iv_list.seq_table[gff_seq_index], bb_mf_count, bb_mf_names, features_hit );
+			anal_output << "#\n#Hop two gene content distribution\n#\n";
+			anal_output << "Avg percent in hop_two " << setprecision(3) << expect_freq * 100 << endl;
+			mfAnalyze( anal_output, all_mf_count, bb_mf_count, all_mf_names, expect_freq );
+
+
+			// analyze distributions of segments unique to this clade
+			intersecting.clear();
+			featureIntersect( m_bbseq_list, gff_seq_index, intersecting, iv_list.seq_table[gff_seq_index] );
+			features_hit.clear();
+			getFeatureHits( intersecting, all_unique[nI], features_hit );
+			bb_mf_count.clear();
+			bb_mf_names.clear();
+			expect_freq = (double)features_hit.count() / (double)cds_count;
+			makeMultiFunCount( iv_list.seq_table[gff_seq_index], bb_mf_count, bb_mf_names, features_hit );
+			anal_output << "#\n#Unique to this clade gene content distribution\n#\n";
+			anal_output << "Avg percent in unique_to_clade " << setprecision(3) << expect_freq * 100 << endl;
+			mfAnalyze( anal_output, all_mf_count, bb_mf_count, all_mf_names, expect_freq );
+
+		}
+	}
+
+	// first analyze all variable segments
+	analyzeVariableSegments( anal_output, bb_list, avg_lens, gff_seq_index, iv_list.seq_table, "variable segments", false );
+
+	// then analyze "alternalogs": variable segments with at least two non-null alleles
+	analyzeVariableSegments( anal_output, bb_list, avg_lens, gff_seq_index, iv_list.seq_table, "alternalogs", true );
+	anal_output.flush();
+}
+
diff --git a/src/bbBreakOnGenes.cpp b/src/bbBreakOnGenes.cpp
new file mode 100644
index 0000000..04bf652
--- /dev/null
+++ b/src/bbBreakOnGenes.cpp
@@ -0,0 +1,358 @@
+#include "libMems/Backbone.h"
+#include "libMems/ProgressiveAligner.h"
+#include <sstream>
+using namespace mems;
+using namespace std;
+using namespace genome;
+
+
+template< typename MatchVector >
+void getBpList( MatchVector& mvect, uint seq, vector< gnSeqI >& bp_list )
+{
+	bp_list.clear();
+	for( size_t ivI = 0; ivI < mvect.size(); ivI++ )
+	{
+		if( mvect[ivI]->LeftEnd(seq) == NO_MATCH )
+			continue;
+		bp_list.push_back( mvect[ivI]->LeftEnd(seq) );
+		bp_list.push_back( mvect[ivI]->RightEnd(seq)+1 );
+	}
+	std::sort( bp_list.begin(), bp_list.end() );
+}
+
+template< typename MatchVector >
+void createMap( const MatchVector& mv_from, const MatchVector& mv_to, vector< size_t >& map )
+{
+	typedef typename MatchVector::value_type MatchPtr;
+	vector< pair< MatchPtr, size_t > > m1(mv_from.size());
+	vector< pair< MatchPtr, size_t > > m2(mv_to.size());
+	for( size_t i = 0; i < mv_from.size(); ++i )
+		m1[i] = make_pair( mv_from[i], i );
+	for( size_t i = 0; i < mv_to.size(); ++i )
+		m2[i] = make_pair( mv_to[i], i );
+	std::sort( m1.begin(), m1.end() );
+	std::sort( m2.begin(), m2.end() );
+	map.resize( m1.size() );
+	for( size_t i = 0; i < m1.size(); ++i )
+		map[m1[i].second] = m2[i].second;
+}
+
+
+void makeAllPairwiseGenomeHSSBreakOnGenes( IntervalList& iv_list, vector< CompactGappedAlignment<>* >& iv_ptrs, vector< CompactGappedAlignment<>* >& iv_orig_ptrs, pairwise_genome_hss_t& hss_cols, const HssDetector* detector, vector< vector< gnSeqI > >& gene_bounds )
+{
+	uint seq_count = iv_list.seq_table.size();
+	// make pairwise projections of intervals and find LCBs...
+	for( size_t seqI = 0; seqI < seq_count; ++seqI )
+	{
+		for( size_t seqJ = seqI+1; seqJ < seq_count; ++seqJ )
+		{
+			vector< uint > projection;
+			projection.push_back( seqI );
+			projection.push_back( seqJ );
+			vector< vector< MatchProjectionAdapter* > > LCB_list;
+			vector< LCB > projected_adjs;
+			projectIntervalList( iv_list, projection, LCB_list, projected_adjs );
+			// make intervals
+			IntervalList pair_ivs;
+			pair_ivs.seq_table.push_back( iv_list.seq_table[seqI] );
+			pair_ivs.seq_table.push_back( iv_list.seq_table[seqJ] );
+			pair_ivs.resize( LCB_list.size() );
+			for( size_t lcbI = 0; lcbI < LCB_list.size(); ++lcbI )
+				pair_ivs[lcbI].SetMatches( LCB_list[lcbI] );
+			LCB_list.clear();
+
+			vector< CompactGappedAlignment<>* > pair_cgas( pair_ivs.size() );
+			for( size_t lcbI = 0; lcbI < pair_ivs.size(); ++lcbI )
+			{
+				CompactGappedAlignment<> tmp_cga;
+				pair_cgas[lcbI] = tmp_cga.Copy();
+				new (pair_cgas[lcbI])CompactGappedAlignment<>( pair_ivs[lcbI] );
+			}
+
+			vector< CompactGappedAlignment<>* > hss_list;
+			// now find islands
+			hss_array_t hss_array;
+			(*detector)( pair_cgas, pair_ivs.seq_table, hss_array );
+			HssArrayToCga(pair_cgas, pair_ivs.seq_table, hss_array, hss_list);
+
+			for( size_t cgaI = 0; cgaI < pair_cgas.size(); ++cgaI )
+				pair_cgas[cgaI]->Free();
+			pair_cgas.clear();
+
+			// now split up on iv boundaries
+			vector< gnSeqI > bp_list;
+			getBpList( iv_ptrs, seqI, bp_list );
+			GenericMatchSeqManipulator< CompactGappedAlignment<> > gmsm(0);
+			SingleStartComparator< CompactGappedAlignment<> > ssc(0);
+			std::sort(hss_list.begin(), hss_list.end(), ssc );
+			applyBreakpoints( bp_list, hss_list, gmsm );
+			// break on gene bounds in seqI
+			std::sort(hss_list.begin(), hss_list.end(), ssc );
+//			if( !(seqI == 1 && seqJ == 15 ) )
+			applyBreakpoints( gene_bounds[seqI], hss_list, gmsm );
+			// and again on seqJ
+			getBpList( iv_ptrs, seqJ, bp_list );
+			GenericMatchSeqManipulator< CompactGappedAlignment<> > gmsm1(1);
+			SingleStartComparator< CompactGappedAlignment<> > ssc1(1);
+			std::sort(hss_list.begin(), hss_list.end(), ssc1 );
+			applyBreakpoints( bp_list, hss_list, gmsm1 );
+			// break on gene bounds in seqJ
+			std::sort(hss_list.begin(), hss_list.end(), ssc1 );
+//			if( !(seqI == 1 && seqJ == 15 ) )
+			applyBreakpoints( gene_bounds[seqJ], hss_list, gmsm1 );
+
+			// now transform into interval-specific columns
+			std::sort(hss_list.begin(), hss_list.end(), ssc );
+
+			SingleStartComparator< CompactGappedAlignment<> > ivcomp(seqI);
+			std::sort( iv_ptrs.begin(), iv_ptrs.end(), ivcomp );
+			vector< size_t > iv_map;
+			createMap( iv_ptrs, iv_orig_ptrs, iv_map );
+			size_t ivI = 0;
+			while( ivI < iv_ptrs.size() && iv_ptrs[ivI]->LeftEnd(0) == NO_MATCH )
+				++ivI;
+			for( size_t hssI = 0; hssI < hss_list.size(); ++hssI )
+			{
+				if( hss_list[hssI]->LeftEnd(0) == NO_MATCH || hss_list[hssI]->Length(0) == 0 )
+					continue;
+				if( ivI == iv_ptrs.size() )
+				{
+					cerr << "huh?\n";
+					cerr << hss_list[hssI]->LeftEnd(0) << endl;
+					cerr << hss_list[hssI]->RightEnd(0) << endl;
+					cerr << iv_ptrs.back()->LeftEnd(seqI) << endl;
+					cerr << iv_ptrs.back()->RightEnd(seqI) << endl;
+				}
+				while( ivI < iv_ptrs.size() && 
+					(iv_ptrs[ivI]->LeftEnd(seqI) == NO_MATCH ||
+					hss_list[hssI]->LeftEnd(0) > iv_ptrs[ivI]->RightEnd(seqI) ) )
+					++ivI;
+				if( ivI == iv_ptrs.size() )
+				{
+					cerr << "hssI fit!!\n";
+					genome::breakHere();
+				}
+				// check for containment in seqJ
+				if( iv_ptrs[ivI]->LeftEnd(seqJ) == NO_MATCH ||
+					iv_ptrs[ivI]->RightEnd(seqJ) < hss_list[hssI]->LeftEnd(1) ||
+					hss_list[hssI]->RightEnd(1) < iv_ptrs[ivI]->LeftEnd(seqJ) )
+					continue;	// this hss falls to an invalid range in seqJ
+
+				if( hss_list[hssI]->RightEnd(0) < iv_ptrs[ivI]->LeftEnd(seqI) )
+				{
+					cerr << "huh 2?\n";
+					cerr << hss_list[hssI]->LeftEnd(0) << endl;
+					cerr << hss_list[hssI]->RightEnd(0) << endl;
+					cerr << iv_ptrs[ivI]->LeftEnd(seqI) << endl;
+					cerr << iv_ptrs[ivI]->RightEnd(seqI) << endl;
+					hssI++;
+					continue;
+				}
+
+				vector< pair< size_t, size_t > >& cur_hss_cols = hss_cols[seqI][seqJ][iv_map[ivI]];
+
+				gnSeqI left_col = iv_ptrs[ivI]->SeqPosToColumn( seqI, hss_list[hssI]->LeftEnd(0) );
+				gnSeqI right_col = iv_ptrs[ivI]->SeqPosToColumn( seqI, hss_list[hssI]->RightEnd(0) );
+				if(left_col > right_col && iv_ptrs[ivI]->Orientation(seqI) == AbstractMatch::reverse )
+				{
+					swap(left_col, right_col);	// must have been a revcomp seq
+				}
+				else if(left_col > right_col)
+				{
+					cerr << "bad cols\n";
+					cerr << hss_list[hssI]->LeftEnd(0) << endl;
+					cerr << hss_list[hssI]->RightEnd(0) << endl;
+					cerr << iv_ptrs[ivI]->LeftEnd(seqI) << endl;
+					cerr << iv_ptrs[ivI]->RightEnd(seqI) << endl;
+					genome::breakHere();
+				}
+
+				if( left_col > 2000000000 || right_col > 2000000000 )
+				{
+					cerr << "huh 2?\n";
+					cerr << hss_list[hssI]->LeftEnd(0) << endl;
+					cerr << hss_list[hssI]->RightEnd(0) << endl;
+					cerr << iv_ptrs[ivI]->LeftEnd(seqI) << endl;
+					cerr << iv_ptrs[ivI]->RightEnd(seqI) << endl;
+					genome::breakHere();
+				}
+				cur_hss_cols.push_back( make_pair( left_col, right_col ) );
+			}
+			for( size_t hssI = 0; hssI < hss_list.size(); ++hssI )
+				hss_list[hssI]->Free();
+		}
+	}
+}
+
+
+class IntervalSeqManipulator
+{
+public:
+	IntervalSeqManipulator( uint seq ) : m_seq(seq) {}
+	gnSeqI LeftEnd(Interval& m) const{ return m.LeftEnd(m_seq); }
+	gnSeqI Length(Interval& m) const{ return m.Length(m_seq); }
+	void CropLeft(Interval& m, gnSeqI amount ) const{ m.CropLeft(amount, m_seq); }
+	void CropRight(Interval& m, gnSeqI amount ) const{ m.CropRight(amount, m_seq); }
+	template< typename ContainerType >
+	void AddCopy(ContainerType& c, Interval& m) const{ c.push_back( m ); }
+private:
+	uint m_seq;
+};
+
+
+void detectBackboneBreakOnGenes( IntervalList& iv_list, backbone_list_t& bb_list, const HssDetector* detector, vector< CompactGappedAlignment<>* >& iv_orig_ptrs, vector< vector< gnSeqI > >& gene_bounds )
+{
+	uint seq_count = iv_list.seq_table.size();
+
+	// indexed by seqI, seqJ, ivI, hssI (left col, right col)
+	pairwise_genome_hss_t hss_cols(boost::extents[seq_count][seq_count][iv_list.size()]);
+
+	// ugg.  need CompactGappedAlignment for its SeqPosToColumn
+	vector< CompactGappedAlignment<>* > iv_ptrs(iv_list.size());
+	for( size_t i = 0; i < iv_list.size(); ++i )
+	{
+		CompactGappedAlignment<> tmp_cga;
+		iv_ptrs[i] = tmp_cga.Copy();
+		new (iv_ptrs[i])CompactGappedAlignment<>( iv_list[i] );
+	}
+
+	iv_orig_ptrs = iv_ptrs;
+	makeAllPairwiseGenomeHSSBreakOnGenes( iv_list, iv_ptrs, iv_orig_ptrs, hss_cols, detector, gene_bounds );
+
+	// merge overlapping pairwise homology predictions into n-way predictions
+	mergePairwiseHomologyPredictions( iv_orig_ptrs, hss_cols, bb_list );
+}
+
+int main( int argc, char* argv[] )
+{
+#if	WIN32
+	SetPriorityClass(GetCurrentProcess(), BELOW_NORMAL_PRIORITY_CLASS);
+#endif
+
+	if( argc < 4 )
+	{
+		cerr << "bbBreakOnGenes <xmfa file> <min bb gap size> <bb output>\n";
+		return -1;
+	}
+	string xmfa_fname( argv[1] );
+	int min_bb_gap = atoi( argv[2] );
+	string output_fname( argv[3] );
+
+	ifstream xmfa_input( xmfa_fname.c_str() );
+	if( !xmfa_input.is_open() ){
+		cerr << "Error opening \"" << xmfa_fname << "\"" << endl;
+		return -4;
+	}
+	ofstream bb_output( output_fname.c_str() );
+	if( !bb_output.is_open() ){
+		cerr << "Error opening \"" << output_fname << "\" for writing" << endl;
+		return -6;
+	}
+
+
+	// read the alignment
+	IntervalList iv_list;
+	iv_list.ReadStandardAlignment( xmfa_input );
+	LoadSequences(iv_list, &cout);
+	vector< vector< gnSeqI > > gene_bounds( iv_list.seq_table.size() );
+
+	if( argc - 4 == iv_list.seq_filename.size() )
+	{
+		cerr << "Reading gene coordinates from .ptt files\n";
+		// read ptt files instead
+		for( size_t aI = 0; aI < iv_list.seq_filename.size(); aI++ )
+		{
+			ifstream ptt_in( argv[aI+4] );
+			string bub;
+			getline( ptt_in, bub );
+			getline( ptt_in, bub );
+			getline( ptt_in, bub );
+			while( getline( ptt_in, bub ) )
+			{
+				stringstream line_str(bub);
+				string buf;
+				getline( line_str, buf, '.' );
+				int64 lend = atoi(buf.c_str());
+				getline( line_str, buf, '.' );
+				getline( line_str, buf );
+				int64 rend = atoi(buf.c_str());
+				gene_bounds[aI].push_back( lend -1);
+				gene_bounds[aI].push_back( lend );
+				gene_bounds[aI].push_back( rend );
+				gene_bounds[aI].push_back( rend+1 );
+	
+			}
+		}
+	}else{
+
+		// get gene boundary coordinates, break bb segs on genes...
+		for( size_t genomeI = 0; genomeI < iv_list.seq_table.size(); genomeI++ )
+		{
+			for( size_t featureI = 0; featureI < iv_list.seq_table[genomeI]->getFeatureListLength(); ++featureI )
+			{
+				gnBaseFeature* feat = iv_list.seq_table[genomeI]->getFeature( featureI );
+				string feat_name = feat->GetName();
+				if( feat_name != "CDS" )
+					continue;	// don't deal with other feature types (source, misc_RNA, etc)
+				gnLocation loc = feat->GetLocation(0);
+				if( loc.GetFirst() > loc.GetLast() || loc.GetFirst() == 0 || loc.GetLast() == 0 )
+					continue;	// a problem parsing annotation?
+				gene_bounds[genomeI].push_back( loc.GetFirst() );
+				gene_bounds[genomeI].push_back( loc.GetLast() +1 );
+			}
+//			IntervalSeqManipulator ism(genomeI);
+			std::sort( gene_bounds[genomeI].begin(), gene_bounds[genomeI].end() );
+			cerr << "Found " << gene_bounds[genomeI].size() / 2 << " genes for " << iv_list.seq_filename[genomeI] << endl;
+		}
+	}
+
+	// detect big gaps
+	backbone_list_t bb_list;
+	vector< CompactGappedAlignment<>* > iv_orig_ptrs;
+	BigGapsDetector bgd( min_bb_gap );
+	detectBackboneBreakOnGenes( iv_list, bb_list, &bgd, iv_orig_ptrs, gene_bounds );
+
+	writeBackboneSeqCoordinates( bb_list, iv_list, bb_output );
+	std::vector< bb_seqentry_t > bb_seq_list;
+	bb_output.close();
+	std::ifstream bbseq_input( output_fname.c_str() );
+	readBackboneSeqFile( bbseq_input, bb_seq_list );
+
+	// testing:  check whether any gene boundaries are violated
+	gene_bounds[0].push_back(31337);	// test the test:
+	gene_bounds[0].push_back(31333);	// insert some bogus gene bounds to make sure
+	gene_bounds[0].push_back(31341);	// they get found and reported
+	gene_bounds[0].push_back(31345);
+	for( uint seqI = 0; seqI < iv_list.seq_table.size(); seqI++ )
+	{
+		cerr << "Checking seq " << seqI << " for errors\n";
+		std::sort( gene_bounds[seqI].begin(), gene_bounds[seqI].end() );
+		BbSeqEntrySorter bs(seqI);
+		std::sort( bb_seq_list.begin(), bb_seq_list.end(), bs );
+		size_t gI = 0;
+		size_t bI = 0;
+		cerr << gene_bounds[seqI].size() << " gene boundaries and " << bb_seq_list.size() << " bb segs\n";
+		for( ; gI < gene_bounds[seqI].size() && bI < bb_seq_list.size(); gI++ )
+		{
+			cout << "checking " << bb_seq_list[bI][seqI].first << ", " <<bb_seq_list[bI][seqI].second << endl;  
+			while( bI < bb_seq_list.size() && gene_bounds[seqI][gI] > abs(bb_seq_list[bI][seqI].second) )
+				bI++;
+			if( bI == bb_seq_list.size() )
+				break;
+			if(abs(bb_seq_list[bI][seqI].first) + 1 < gene_bounds[seqI][gI] && gene_bounds[seqI][gI] < abs(bb_seq_list[bI][seqI].second) - 1)
+			{
+				cerr << "segment " <<bb_seq_list[bI][seqI].first << ", " <<bb_seq_list[bI][seqI].second << " violates gene boundary " << gene_bounds[seqI][gI] << " in seq " << seqI << endl;  
+			}else
+				cout << "segment " <<bb_seq_list[bI][seqI].first << ", " <<bb_seq_list[bI][seqI].second << " is okay for " << gene_bounds[seqI][gI] << " in seq " << seqI << endl;  
+		}
+	}
+
+//	mergeAdjacentSegments( bb_seq_list );
+//	addUniqueSegments( bb_seq_list );
+	bbseq_input.close();
+	bb_output.open(output_fname.c_str());
+	writeBackboneSeqFile( bb_output, bb_seq_list );
+
+	return 0;
+}
+
diff --git a/src/bbFilter.cpp b/src/bbFilter.cpp
new file mode 100644
index 0000000..9d62587
--- /dev/null
+++ b/src/bbFilter.cpp
@@ -0,0 +1,292 @@
+#include "libMems/Backbone.h"
+using namespace mems;
+using namespace std;
+using namespace genome;
+
+typedef pair< bb_seqentry_t, size_t > labeled_bb_t;
+
+class BbSorter
+{
+public:
+	BbSorter( size_t seqI ){ m_seq = seqI; }
+	bool operator()( const labeled_bb_t& a, const labeled_bb_t& b )
+	{
+		return genome::absolut(a.first[m_seq].first) < genome::absolut(b.first[m_seq].first);
+	}
+	size_t m_seq;
+};
+
+
+
+class ShorterThan {
+public:
+	bool operator()( const bb_seqentry_t& a )
+	{
+		size_t sc = 0;
+		size_t tot = 0;
+		for( size_t i = 0; i < a.size(); i++ )
+			if( a[i].first != 0 )
+			{
+				tot += genome::absolut(a[i].second - a[i].first) + 1;
+				sc++;
+			}
+		if( tot == 0 )
+			return true;
+		return (tot / sc) < 20;
+	}
+};
+
+int main( int argc, char* argv[] )
+{
+#if	WIN32
+	SetPriorityClass(GetCurrentProcess(), BELOW_NORMAL_PRIORITY_CLASS);
+#endif
+
+	if( argc < 4 )
+	{
+		cerr << "bbFilter <backbone file> <independent dist> <output file> <beast|gp> [<seq1> <seq2>...<seqN>]\n";
+		cerr << "seq index starts at 0.\n";
+		cerr << "\nExample:\n";
+		cerr << "bbFilter my_alignment.backbone 50 my_feats.bin gp\n";
+		cerr << "the above command extracts binary features from \"my_alignment.backbone\" which are separated by a minimum of 50nt sequence conserved among all taxa in the alignment.  The output is written to my_feats.bin in genoplast format\n";
+		cerr << "\n\nExample 2:\nbbFilter aln.backbone 100 feats.xml beast 0 1 2 5 6\n";
+		cerr << "the above command extracts binary features from \"aln.backbone\" which are separated by a minimum of 100nt sequence conserved among genomes 0,1,2,5, and 6 from the alignment.  The output is written to feats.xml in beast format\n";
+		return -1;
+	}
+	string bbseq_fname( argv[1] );
+	int indie_dist = atoi( argv[2] );
+	string output_fname( argv[3] );
+	string target_format( argv[4] );
+	bool allow_alternalogs = true;
+	bool check_independence = false;
+
+	ifstream bbseq_input( bbseq_fname.c_str() );
+	if( !bbseq_input.is_open() ){
+		cerr << "Error opening \"" << bbseq_fname << "\"" << endl;
+		return -4;
+	}
+	ofstream anal_output( output_fname.c_str() );
+	if( !anal_output.is_open() ){
+		cerr << "Error opening \"" << output_fname << "\" for writing" << endl;
+		return -6;
+	}
+	
+	// read the backbone column file	
+	vector< bb_seqentry_t > bb_seq_list;
+	readBackboneSeqFile( bbseq_input, bb_seq_list );
+
+	// read the list of seqs of interest
+	vector< int > seqs;
+	for( int i = 5; i < argc; i++ )
+		seqs.push_back(atoi(argv[i]));
+
+	// assume all seqs are of interest
+	if( seqs.size() == 0 && bb_seq_list.size() > 0 )
+	{
+		for( int i = 0; i < bb_seq_list[0].size(); i++ )
+			seqs.push_back(i);
+	}
+	// add any genome-specific segments
+	addUniqueSegments( bb_seq_list );
+
+	// remove short segments
+	ShorterThan st;
+	vector< bb_seqentry_t >::iterator new_end = std::remove_if( bb_seq_list.begin(), bb_seq_list.end(), st );
+	cout << "Removing " << bb_seq_list.end() - new_end << " features shorter than 20 nt\n";
+	bb_seq_list.erase( new_end, bb_seq_list.end() );
+
+	// now assign tracking IDs to the backbone segments
+	vector< labeled_bb_t > bb_segs;
+	for( size_t i = 0; i < bb_seq_list.size(); i++ )
+	{
+		bb_segs.push_back( make_pair( bb_seq_list[i], i ) );
+	}
+
+	// create a sorted list for each genome and a map to the segment ID
+	vector< vector< labeled_bb_t > > sorted_segs( seqs.size(), bb_segs );
+	vector< vector< size_t > > seg_id_maps( seqs.size(), vector< size_t >( bb_segs.size() ) );
+	for( size_t seqI = 0; seqI < seqs.size(); seqI++ )
+	{
+		BbSorter bbs(seqs[seqI]);
+		std::sort( sorted_segs[seqI].begin(), sorted_segs[seqI].end(), bbs );
+		for( size_t bbI = 0; bbI < sorted_segs[seqI].size(); bbI++ )
+			seg_id_maps[ seqI ][ sorted_segs[seqI][bbI].second ] = bbI;
+	}
+
+
+	bitset_t good_bb( bb_seq_list.size() );
+	bitset_t nway( bb_seq_list.size() );
+	bitset_t nunya( bb_seq_list.size() );
+
+	// mark anything that has all of the seqs or none of seqs as not useful
+	for( size_t bbI = 0; bbI < bb_seq_list.size(); bbI++ )
+	{
+		bool all = true;
+		bool none = true;
+		for( size_t sI = 0; sI < seqs.size(); sI++ )
+		{
+			if( bb_seq_list[bbI][seqs[sI]].first == 0 )
+				all = false;
+			else
+				none = false;
+		}
+		if(all)
+			nway.set(bbI);
+		if(none)
+			nunya.set(bbI);
+	}
+	good_bb = nway | nunya;
+	good_bb.flip();
+	
+	// now mark segs that are too close to each other to be considered independent
+	for( size_t sI = 0; check_independence && sI < seqs.size(); sI++ )
+	{
+		BbSorter bbs(seqs[sI]);
+		std::sort( bb_segs.begin(), bb_segs.end(), bbs );
+		for( size_t bbI = 1; bbI < bb_segs.size()-1; bbI++ )
+		{
+			if( nway[bb_segs[bbI].second] )
+				continue;
+			if( bb_segs[bbI].first[seqs[sI]].first == 0 )
+				continue;
+			// ensure that it has n-way on both sides and that they are at least "indie_dist" long
+			if( nway.test(bb_segs[bbI-1].second) && 
+				nway.test(bb_segs[bbI+1].second) &&
+				absolut(bb_segs[bbI-1].first[seqs[sI]].second - bb_segs[bbI-1].first[seqs[sI]].first) >= indie_dist &&
+				absolut(bb_segs[bbI+1].first[seqs[sI]].second - bb_segs[bbI+1].first[seqs[sI]].first) >= indie_dist )
+			{
+				if( !allow_alternalogs ){
+					// ensure that there is no other feature in the other genomes
+					for( size_t k = 0; k < seqs.size(); k++ )
+					{
+						if( k == sI )
+							continue;
+						size_t oid = seg_id_maps[k][ bb_segs[bbI-1].second ];
+						int parity = ((bb_segs[bbI-1].first[seqs[sI]].first > 0 && bb_segs[bbI-1].first[seqs[k]].first > 0) ||
+							(bb_segs[bbI-1].first[seqs[sI]].first < 0 && bb_segs[bbI-1].first[seqs[k]].first < 0)) ? 1 : -1;
+						size_t prev_in_sI = bb_segs[bbI-1].second;
+						size_t cur_in_sI = bb_segs[bbI].second;
+						size_t next_in_sI = bb_segs[bbI+1].second;
+						size_t prev_in_k = sorted_segs[k][oid].second;
+						size_t cur_in_k = sorted_segs[k][oid+parity].second;
+						size_t next_in_k = sorted_segs[k][oid+parity*2].second;
+						if( (cur_in_sI == cur_in_k && next_in_sI == next_in_k) ||
+							(next_in_sI == cur_in_k))					
+							; // it's good because no other segs intervene
+						else
+						{
+							good_bb.set( bb_segs[bbI].second, false );
+							break;	// it's an alternalog or overlapping, no sense in checking other seqs
+						}
+					}
+				}
+			}else
+				good_bb.set(bb_segs[bbI].second, false);
+		}
+	}
+
+	// create site patterns, then write out the good ones
+	bitset_t empty( bb_seq_list.size() );
+	vector< bitset_t > spa_seqs( seqs.size(), empty );
+	for( size_t bbI = 0; bbI < bb_seq_list.size(); bbI++ )
+		for( size_t seqI = 0; seqI < seqs.size(); seqI++ )
+			spa_seqs[seqI].set(bbI, bb_seq_list[bbI][seqs[seqI]].first != 0);
+
+	vector< string > binseqs( seqs.size(), string( good_bb.count(), '0' ) );
+	for( size_t seqI = 0; seqI < seqs.size(); seqI++ )
+	{
+		size_t goodI = 0;
+		for( size_t bbI = 0; bbI < good_bb.size(); bbI++ )
+			if(good_bb.test(bbI))
+			{
+				if(spa_seqs[seqI].test(bbI))
+					binseqs[seqI][goodI] = '1';
+				goodI++;
+			}
+	}
+	map< string, int > sitepattern_count;
+	// count how many segments of each site pattern
+	for( size_t bbI = 0; bbI < good_bb.size(); bbI++ )
+	{
+		if(!good_bb.test(bbI))	continue;
+		size_t length=0;
+		size_t sc=0;
+		string sitepat( seqs.size(), '0' );
+		for( int seqI = 0; seqI < seqs.size(); seqI++ )
+		{
+			sitepat[seqI] = spa_seqs[seqI][bbI] ? '1' : '0';
+			if(spa_seqs[seqI][bbI]){
+				length += genome::absolut(bb_seq_list[bbI][seqI].second - bb_seq_list[bbI][seqI].first);
+				sc++;
+			}
+		}
+		length /= sc;
+		map< string, int >::iterator iter = sitepattern_count.find(sitepat);
+		if(iter == sitepattern_count.end())
+			sitepattern_count.insert( make_pair( sitepat, length ) );
+		else
+			iter->second+= length;
+	}
+
+	// write out the seqs!!
+	if( target_format == "beast" )
+	{
+		anal_output << "\t<taxa id=\"taxa\">\n";
+		for( size_t seqI = 0; seqI < seqs.size(); seqI++ )
+		{
+			anal_output << "\t\t<taxon id=\"seq" << seqI << "\"/>\n";
+	
+		}
+		anal_output << "\t</taxa>\n";
+		anal_output << "\t<alignment id=\"alignment\" dataType=\"binary\">\n";
+	
+		for( size_t seqI = 0; seqI < seqs.size(); seqI++ )
+		{
+			anal_output << "\t\t<sequence>\n";
+			anal_output << "\t\t\t<taxon idref=\"seq" << seqI << "\"/>\n";
+			anal_output << "\t\t\t" << binseqs[seqI] << endl;
+			anal_output << "\t\t</sequence>\n";
+//			anal_output << "> seq" << seqI << endl;
+//			for( size_t i = 0; i < binseqs[seqI].size(); i+=80 )
+//				anal_output << binseqs[seqI].substr(i, 80) << endl;
+		}
+		anal_output << "\t</alignment>\n";
+	}else{
+		// write out a header line with the number of times each site pattern is used.
+		map<string,int>::iterator f = sitepattern_count.begin();
+		for(; f!= sitepattern_count.end(); f++){
+			if(f!=sitepattern_count.begin())	anal_output << ' ';
+			anal_output << (f->second / 20);
+		}
+		anal_output << endl;
+		// write genoplast format
+		for( size_t seqI = 0; seqI < seqs.size(); seqI++ )
+		{
+			f = sitepattern_count.begin();
+			for(; f!= sitepattern_count.end(); f++){
+				if(f!=sitepattern_count.begin())	anal_output << ' ';
+				anal_output << f->first[seqI];
+			}
+			anal_output << endl;
+		}
+	}
+
+	anal_output.close();
+
+	string loc_fname = output_fname + ".locs";
+	ofstream location_output( loc_fname.c_str() );
+	for( size_t bbI = 0; bbI < good_bb.size(); bbI++ )
+	{
+		if( good_bb.test(bbI) )
+		{
+			for( size_t seqI = 0; seqI < seqs.size(); seqI++ )
+			{
+				if( seqI > 0 )
+					location_output << '\t';
+				location_output << bb_seq_list[bbI][seqI].first << '\t' << bb_seq_list[bbI][seqI].second;
+			}
+			location_output << std::endl;
+		}
+	}
+}
+
diff --git a/src/calculateBackboneCoverage.cpp b/src/calculateBackboneCoverage.cpp
new file mode 100644
index 0000000..8a57db4
--- /dev/null
+++ b/src/calculateBackboneCoverage.cpp
@@ -0,0 +1,138 @@
+/*******************************************************************************
+ * $Id: calculateBackboneCoverage.cpp,v 1.5 2004/02/28 00:01:31 darling Exp $
+ * This file is copyright 2002-2004 Aaron Darling.  All rights reserved.
+ * Please see the file called COPYING for licensing, copying, and modification
+ * rights.  Redistribution of this file, in whole or in part is prohibited
+ * without express permission.
+ ******************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/IntervalList.h"
+#include "libMems/Islands.h"
+#include "libMems/DistanceMatrix.h"
+
+using namespace std;
+using namespace genome;
+using namespace mems;
+
+void print_usage( const char* pname ){
+	cerr << "Usage: " << pname << " <source alignment> <min bb sequence length> <max bb gap size> <sequence 1>...<sequence N>\n";
+}
+
+#define NELEMS(a) ( sizeof( a ) / sizeof( *a ) )
+
+int main( int argc, const char* argv[] ){
+
+// debugging command line
+#if defined(__MWERKS__) && defined(__GNDEBUG__)
+	const char* m_argv[] = {
+		"calculateBackboneCoverage",
+		"9coli.dat",
+		"50",
+		"50",
+		"\\\\Ramesses\\Workspace\\aaron\\diarrhea\\ecolim52.fas",
+		"\\\\Ramesses\\Workspace\\aaron\\diarrhea\\EDL933.fas",
+		"\\\\Ramesses\\Workspace\\aaron\\diarrhea\\o157sakai.fas",
+		"\\\\Ramesses\\Workspace\\aaron\\diarrhea\\CFTv17.fas",
+		"\\\\Ramesses\\Workspace\\aaron\\diarrhea\\Sflex57_v3.gbk",
+		"\\\\Ramesses\\Workspace\\aaron\\diarrhea\\shigella_flexnerii_2a.fas",
+		"\\\\Ramesses\\Workspace\\aaron\\diarrhea\\typhimurium.fas",
+		"\\\\Ramesses\\Workspace\\aaron\\diarrhea\\styphi.fas",
+		"\\\\Ramesses\\Workspace\\aaron\\diarrhea\\STY2.fas",
+	};
+	int m_argc = NELEMS( m_argv );
+
+	argv = m_argv;
+	argc = m_argc;
+#endif
+
+try{
+	if( argc <= 0 ){
+		print_usage( "extractBackbone" );
+		return -1;
+	}
+	if( argc < 6 ){
+		print_usage( argv[0] );
+		return -1;
+	}
+	
+	string alignment_fname = argv[1];
+	int64 min_bb_length = atol( argv[2] );
+	int64 max_gap_length = atol( argv[3] );
+	vector< string > sequence_fname;
+	vector< gnSequence* > source_seqs;
+	for( uint argI = 4; argI < argc; argI++ ){
+		sequence_fname.push_back( argv[ argI ] );
+		cout << "Loading " << sequence_fname[ argI - 4 ];
+		try{
+			source_seqs.push_back( new gnSequence() );
+			source_seqs[ argI - 4 ]->LoadSource( sequence_fname[ argI - 4 ] );
+		}catch( gnException& gne ){
+			cerr << gne << endl;
+			return -1;
+		}
+		cout << "   " << source_seqs[ argI - 4 ]->length() << " bp\n";
+	}
+	
+	ifstream alignment_in;
+	alignment_in.open( alignment_fname.c_str() );
+	if( !alignment_in.is_open() ){
+		cerr << "Error opening " << alignment_fname << endl;
+		return -1;
+	}
+	
+	cout << "Loading alignment...\n";
+	IntervalList aligned_ivs;
+	aligned_ivs.ReadStandardAlignment( alignment_in );
+	
+	// add the sequence data to the interval list
+	aligned_ivs.seq_table = source_seqs;
+	uint seq_count = source_seqs.size();
+	cout << "Extracting backbone..." << endl;
+	vector< GappedAlignment > backbone_data;
+	simpleFindBackbone( aligned_ivs, min_bb_length, max_gap_length, backbone_data );
+
+	IntervalList backbone_ivs;
+	backbone_ivs.seq_table = aligned_ivs.seq_table;
+	
+	cout << "There are " << backbone_data.size() << " backbone segments\n";
+
+	// count up the total length of backbone in each genome
+	cout << "Averaging backbone lengths..." << endl;
+	vector< gnSeqI > total_bb( seq_count, 0 );
+	NumericMatrix< double > overall_identity;
+	for( uint bbI = 0; bbI < backbone_data.size(); bbI++ ){
+		vector<AbstractMatch*> tmp(1, &backbone_data[ bbI ]);
+		backbone_ivs.push_back( Interval(tmp.begin(), tmp.end()) );
+		for( uint seqI = 0; seqI < seq_count; seqI++ ){
+			total_bb[ seqI ] += backbone_data[ bbI ].Length( seqI );
+		}
+	}
+
+	IdentityMatrix( backbone_ivs, overall_identity );
+		
+	gnSeqI avg_bb = 0;
+	for( uint seqI = 0; seqI < aligned_ivs.seq_table.size(); seqI++ ){
+		cout << "seq " << seqI << " backbone: " << total_bb[ seqI ] << endl;
+		avg_bb += total_bb[ seqI ];
+	}
+	avg_bb /= aligned_ivs.seq_table.size();
+	cout << "Average: " << avg_bb << endl;
+	
+	// output the identity matrix
+	cout << "Identity matrix: " << endl;
+	overall_identity.print( cout );
+	cout << endl;
+	
+}catch( gnException& gne ){
+	cerr << gne << endl;
+}catch( exception& e ){
+	cerr << e.what() << endl;
+}catch(...){
+
+}
+	return 0;
+}
diff --git a/src/calculateBackboneCoverage2.cpp b/src/calculateBackboneCoverage2.cpp
new file mode 100644
index 0000000..5a898ec
--- /dev/null
+++ b/src/calculateBackboneCoverage2.cpp
@@ -0,0 +1,132 @@
+/*******************************************************************************
+ * $Id: calculateBackboneCoverage.cpp,v 1.5 2004/02/28 00:01:31 darling Exp $
+ * This file is copyright 2002-2004 Aaron Darling.  All rights reserved.
+ * Please see the file called COPYING for licensing, copying, and modification
+ * rights.  Redistribution of this file, in whole or in part is prohibited
+ * without express permission.
+ ******************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/IntervalList.h"
+#include "libMems/Islands.h"
+#include "libMems/DistanceMatrix.h"
+
+using namespace std;
+using namespace genome;
+using namespace mems;
+
+void print_usage( const char* pname ){
+	cerr << "Usage: " << pname << " <XMFA alignment> <min bb sequence length> <max bb gap size> \n";
+}
+
+
+int main( int argc, const char* argv[] ){
+
+try{
+	if( argc <= 0 ){
+		print_usage( "extractBackbone" );
+		return -1;
+	}
+	if( argc < 4 ){
+		print_usage( argv[0] );
+		return -1;
+	}
+	
+	string alignment_fname = argv[1];
+	int64 min_bb_length = atol( argv[2] );
+	int64 max_gap_length = atol( argv[3] );
+	vector< string > sequence_fname;
+	vector< gnSequence* > source_seqs;
+	
+	ifstream alignment_in;
+	alignment_in.open( alignment_fname.c_str() );
+	if( !alignment_in.is_open() ){
+		cerr << "Error opening " << alignment_fname << endl;
+		return -1;
+	}
+	
+	cout << "Loading alignment...\n";
+	IntervalList aligned_ivs;
+	aligned_ivs.ReadStandardAlignment( alignment_in );	
+	LoadSequences(aligned_ivs, &cout);
+	source_seqs = aligned_ivs.seq_table;
+	// calculate total lengths covered
+	uint seq_count = source_seqs.size();
+	double avg_coverage = 0;
+	double total_lcb_len = 0;
+	for( uint seqI = 0; seqI < seq_count; ++seqI )
+	{
+		double cur_size = 0;
+		for( uint ivI = 0; ivI < aligned_ivs.size(); ++ivI )
+			cur_size += aligned_ivs[ivI].Length(seqI);
+		total_lcb_len += cur_size;
+		cout << "Genome " << seqI << " coverage is: " << cur_size << " / " << source_seqs[seqI]->length() << " = ";
+		cur_size /= (double)source_seqs[seqI]->length();
+		cout << cur_size << endl;
+		avg_coverage += cur_size; 
+	}
+	avg_coverage /= (double)seq_count;
+	cout << "Average coverage = " << avg_coverage << endl;
+	double avg_lcb_len = total_lcb_len / (double)(seq_count * aligned_ivs.size());
+	double lcb_len_variance = 0;
+
+	for( uint seqI = 0; seqI < seq_count; ++seqI )
+	{
+		for( uint ivI = 0; ivI < aligned_ivs.size(); ++ivI )
+			lcb_len_variance += (aligned_ivs[ivI].Length(seqI) - avg_lcb_len) * (aligned_ivs[ivI].Length(seqI) - avg_lcb_len);
+	}
+	lcb_len_variance /= (double)((seq_count*aligned_ivs.size()) - 1.0);
+	cout << "Avg lcb len: " << avg_lcb_len << endl;
+	cout << "variance: " << lcb_len_variance << endl;
+	cout << "std dev: " << pow( lcb_len_variance, 0.5 ) << endl;
+
+	cout << "Extracting backbone..." << endl;
+	vector< GappedAlignment > backbone_data;
+	simpleFindBackbone( aligned_ivs, min_bb_length, max_gap_length, backbone_data );
+
+	IntervalList backbone_ivs;
+	backbone_ivs.seq_table = aligned_ivs.seq_table;
+	
+	cout << "There are " << backbone_data.size() << " backbone segments\n";
+
+	// count up the total length of backbone in each genome
+	cout << "Averaging backbone lengths..." << endl;
+	vector< gnSeqI > total_bb( seq_count, 0 );
+	NumericMatrix< double > overall_identity;
+	for( uint bbI = 0; bbI < backbone_data.size(); bbI++ ){
+		vector<AbstractMatch*> tmp(1, &backbone_data[ bbI ]);
+		backbone_ivs.push_back( Interval(tmp.begin(), tmp.end()) );
+		for( uint seqI = 0; seqI < seq_count; seqI++ ){
+			total_bb[ seqI ] += backbone_data[ bbI ].Length( seqI );
+		}
+	}
+	vector< AbstractMatch* > bbivs;
+	for( uint bbI = 0; bbI < backbone_ivs.size(); bbI++ )
+		bbivs.push_back( &backbone_ivs[bbI] );
+	BackboneIdentityMatrix( bbivs, aligned_ivs.seq_table, overall_identity );
+		
+	gnSeqI avg_bb = 0;
+	for( uint seqI = 0; seqI < aligned_ivs.seq_table.size(); seqI++ ){
+		cout << "seq " << seqI << " backbone: " << total_bb[ seqI ] << endl;
+		avg_bb += total_bb[ seqI ];
+	}
+	avg_bb /= aligned_ivs.seq_table.size();
+	cout << "Average: " << avg_bb << endl;
+	
+	// output the identity matrix
+	cout << "Identity matrix: " << endl;
+	overall_identity.print( cout );
+	cout << endl;
+	
+}catch( gnException& gne ){
+	cerr << gne << endl;
+}catch( exception& e ){
+	cerr << e.what() << endl;
+}catch(...){
+
+}
+	return 0;
+}
diff --git a/src/calculateCoverage.cpp b/src/calculateCoverage.cpp
new file mode 100644
index 0000000..907616c
--- /dev/null
+++ b/src/calculateCoverage.cpp
@@ -0,0 +1,89 @@
+/*******************************************************************************
+ * $Id: calculateCoverage.cpp,v 1.5 2004/02/28 00:01:31 darling Exp $
+ * This file is copyright 2002-2004 Aaron Darling.  All rights reserved.
+ * Please see the file called COPYING for licensing, copying, and modification
+ * rights.  Redistribution of this file, in whole or in part is prohibited
+ * without express permission.
+ ******************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/IntervalList.h"
+#include "libMems/Islands.h"
+
+using namespace std;
+using namespace genome;
+using namespace mems;
+
+void print_usage( const char* pname ){
+	cerr << "Usage: " << pname << " <source mauve alignment>  <sequence 1>...<sequence N>\n";
+}
+
+#define NELEMS(a) ( sizeof( a ) / sizeof( *a ) )
+
+int main( int argc, const char* argv[] ){
+
+try{
+	if( argc <= 0 ){
+		print_usage( "calculateCoverage" );
+		return -1;
+	}
+	if( argc < 2 ){
+		print_usage( argv[0] );
+		return -1;
+	}
+	
+	//
+	// Load sequences
+	//
+	string alignment_fname = argv[1];
+	vector< string > sequence_fname;
+	vector< gnSequence* > source_seqs;
+	for( uint argI = 2; argI < argc; argI++ ){
+		sequence_fname.push_back( argv[ argI ] );
+		cout << "Loading " << sequence_fname[ argI - 2 ];
+		try{
+			source_seqs.push_back( new gnSequence() );
+			source_seqs[ argI - 2 ]->LoadSource( sequence_fname[ argI - 2 ] );
+		}catch( gnException& gne ){
+			cerr << gne << endl;
+			return -1;
+		}
+		cout << "   " << source_seqs[ argI - 2 ]->length() << " bp\n";
+	}
+	
+	//
+	// Load IntervalList matches
+	//
+	ifstream alignment_in;
+	alignment_in.open( alignment_fname.c_str() );
+	if( !alignment_in.is_open() ){
+		cerr << "Error opening " << alignment_fname << endl;
+		return -1;
+	}
+	
+	cout << "Loading alignment...\n";
+	IntervalList aligned_ivs;
+	aligned_ivs.ReadList( alignment_in );
+	
+	for( uint ivI = 0; ivI < aligned_ivs.size(); ivI++ ){
+		cout << "Interval " << ivI;
+		Interval& iv = aligned_ivs[ ivI ];
+		for( uint seqI = 0; seqI < source_seqs.size(); seqI++ ){
+			cout << '\t' << iv.Length( seqI );
+		}
+		cout << endl;
+	}
+
+	
+}catch( gnException& gne ){
+	cerr << gne << endl;
+}catch( exception& e ){
+	cerr << e.what() << endl;
+}catch(...){
+
+}
+	return 0;
+}
diff --git a/src/checkForLGT.cpp b/src/checkForLGT.cpp
new file mode 100644
index 0000000..ab993cd
--- /dev/null
+++ b/src/checkForLGT.cpp
@@ -0,0 +1,253 @@
+#include "libMems/PhyloTree.h"
+#include <vector>
+#include <sstream>
+#include <algorithm>
+#include <utility>
+#include <fstream>
+#include <set>
+
+using namespace std;
+
+typedef unsigned int uint;
+
+/**
+ * Depth first search to check whether a subtree contains a given node
+ */
+bool containsNode( PhyloTree< TreeNode >& t, node_id_t subtree_nodeI, node_id_t query_nodeI )
+{
+	stack< node_id_t > node_stack;
+	node_stack.push( subtree_nodeI );
+	while( node_stack.size() > 0 )
+	{
+		node_id_t cur_node = node_stack.top();
+		node_stack.pop();
+		if( cur_node == query_nodeI )
+			return true;
+		if( t[cur_node].children.size() > 0 )
+		{
+			for( size_t childI = 0; childI < t[cur_node].children.size(); childI++ )
+				node_stack.push( t[cur_node].children[childI] );
+		}
+	}
+	return false;
+}
+
+void setTaxonNames( PhyloTree< TreeNode >& t, char** taxon_names )
+{
+	for( node_id_t nI = 0; nI < t.size(); nI++ )
+	{
+		if( t[nI].name.size() == 0 )
+			continue;
+		stringstream ss( t[nI].name );
+		uint num;
+		ss >> num;
+		t[nI].name = taxon_names[num];
+	}
+}
+
+int main( int argc, char* argv[] )
+{
+	if( argc != 3 )
+	{
+		cerr << "Usage: checkForLGT <newick input file> <newick output file>\n";
+		return -1;
+	}
+	string input_filename = argv[1];
+	string tree_outfname = argv[2];
+	vector< string > group_1;
+	vector< string > group_2;
+	for( uint taxonI = 0; taxonI < 16; taxonI++ )
+	{
+		stringstream ss;
+		ss << taxonI;
+		group_1.push_back( ss.str() );
+	}
+	for( uint taxonI = 16; taxonI < 21; taxonI++ )
+	{
+		stringstream ss;
+		ss << taxonI;
+		group_2.push_back( ss.str() );
+	}
+	char* taxon_names[] = {
+		"E. coli 53638",
+		"E. coli b171",
+		"E. coli b7a",
+		"E. coli e110019",
+		"E. coli e22",
+		"E. coli e24377a",
+		"E. coli f11",
+		"E. coli HS",
+		"S. boydii BS512",
+		"S. sonnei Ss046",
+		"S. flexneri 2457T",
+		"S. flexneri 301",
+		"E. coli CFT073",
+		"E. coli O157_H7 RIMD",
+		"E. coli O157_H7 EDL933",
+		"E. coli K-12 MG1655",
+		"S. enterica B67",
+		"S. enterica CT18",
+		"S. enterica LT2",
+		"S. enterica PA9150",
+		"S. enterica Ty2",
+	};
+
+	ifstream input_file( input_filename.c_str() );
+	if( !input_file.is_open() )
+	{
+		cerr << "Error opening \"" << input_filename << "\"\n";
+		return -1;
+	}
+	
+	uint tree_count = 0;
+	vector< PhyloTree< TreeNode > > tree_list;
+	while( true )
+	{
+		PhyloTree< TreeNode > new_t;
+		tree_list.push_back( new_t );
+		PhyloTree< TreeNode >& t = tree_list[tree_list.size() - 1];
+		t.readTree( input_file );
+		if( t.size() == 0 )
+			break;
+		tree_count++;
+	}
+	tree_list.erase( tree_list.end() - 1 );
+
+	for( size_t treeI = 0; treeI < tree_list.size(); treeI++ )
+	{
+		PhyloTree< TreeNode >& t = tree_list[treeI];
+
+		if( t[t.root].children.size() != 2 )
+		{
+			cout << treeI << "\t1\n";
+			continue;
+		}
+
+		vector< node_id_t > group1_id;
+		vector< node_id_t > group2_id;
+		node_id_t nI = 0;
+		size_t gI = 0;
+		for( gI = 0; gI < group_1.size(); gI++ )
+		{
+			nI = 0;
+			for( ; nI < t.size(); nI++ )
+			{
+				if( t[nI].name == group_1[gI] )
+				{
+					group1_id.push_back( nI );
+					break;
+				}
+			}
+			if( nI == t.size() )
+			{
+				cerr << "Couldn't find node " << group_1[gI] << " in tree " << treeI << endl;
+				return -1;
+			}
+		}
+		for( gI = 0; gI < group_2.size(); gI++ )
+		{
+			nI = 0;
+			for( ; nI < t.size(); nI++ )
+			{
+				if( t[nI].name == group_2[gI] )
+				{
+					group2_id.push_back( nI );
+					break;
+				}
+			}
+			if( nI == t.size() )
+			{
+				cerr << "Couldn't find node " << group_2[gI] << " in tree " << treeI << endl;
+				return -1;
+			}
+		}
+
+
+		node_id_t g1_subtree;
+		if( containsNode( t, t[t.root].children[0], group1_id[0] ) )
+			g1_subtree = t[t.root].children[0];
+		else
+			g1_subtree = t[t.root].children[1];
+
+		bool g1_monophyletic = true;
+		bool g2_monophyletic = true;
+
+		node_id_t cur_parent = group1_id[0];
+		set<node_id_t> g1_remaining;
+		g1_remaining.insert( group1_id.begin(), group1_id.end() );
+		// find the least common ancestor of all g1 nodes
+		while(g1_remaining.size() > 0)
+		{
+			// go to parent
+			cur_parent = t[cur_parent].parents[0];
+			set<node_id_t>::iterator iter = g1_remaining.begin();
+			while( iter != g1_remaining.end() )
+			{
+				if( containsNode( t, cur_parent, *iter ) )
+				{
+					set<node_id_t>::iterator erase_iter = iter;
+					iter++;
+					g1_remaining.erase( erase_iter );
+				}else
+					iter++;
+			}
+		}
+		// check none of group 2 is below the group 1 LCA
+		for( gI = 0; gI < group2_id.size(); gI++ )
+			if( containsNode( t, cur_parent, group2_id[gI] ) )
+				break;
+		if( gI < group2_id.size() )
+			g1_monophyletic = false;
+
+
+		cur_parent = group2_id[0];
+		set<node_id_t> g2_remaining;
+		g2_remaining.insert( group2_id.begin(), group2_id.end() );
+		// find the least common ancestor of all g1 nodes
+		while(g2_remaining.size() > 0)
+		{
+			// go to parent
+			cur_parent = t[cur_parent].parents[0];
+			set<node_id_t>::iterator iter = g2_remaining.begin();
+			while( iter != g2_remaining.end() )
+			{
+				if( containsNode( t, cur_parent, *iter ) )
+				{
+					set<node_id_t>::iterator erase_iter = iter;
+					iter++;
+					g2_remaining.erase( erase_iter );
+				}else
+					iter++;
+			}
+		}
+
+		// check none of group 1 is below the group 2 LCA
+		for( gI = 0; gI < group1_id.size(); gI++ )
+			if( containsNode( t, cur_parent, group1_id[gI] ) )
+				break;
+		if( gI < group1_id.size() )
+			g2_monophyletic = false;
+
+		if( !g1_monophyletic && !g2_monophyletic )
+			cout << treeI << "\t2\n"; // found something interesting?
+		else if( !g1_monophyletic )
+			cout << treeI << "\t3\n";
+		else if( !g2_monophyletic )
+			cout << treeI << "\t4\n";
+		else
+			cout << treeI << "\t0\n"; // nothing to see here
+	}
+
+	ofstream tree_out( tree_outfname.c_str() );
+	if( !tree_out.is_open() )
+	{
+		cerr << "Error opening \"" << tree_outfname << "\"\n";
+		return -1;
+	}
+	for( size_t treeI = 0; treeI < tree_list.size(); treeI++ )
+	{
+		setTaxonNames(tree_list[treeI], taxon_names);
+		tree_list[treeI].writeTree(tree_out);
+	}
+	return 0;
+}
\ No newline at end of file
diff --git a/src/coordinateTranslate.cpp b/src/coordinateTranslate.cpp
new file mode 100644
index 0000000..ad6a4fc
--- /dev/null
+++ b/src/coordinateTranslate.cpp
@@ -0,0 +1,51 @@
+// coordinateTranslate
+// (c) Aaron Darling 2011
+// Licensed under the GPL
+
+#include <libMems/IntervalList.h>
+#include <fstream>
+#include <libMems/CompactGappedAlignment.h>
+#include <libMems/MatchList.h>
+
+using namespace mems;
+using namespace std;
+using namespace genome;
+
+int main( int argc, char* argv[] ){
+	if(argc != 3){
+		cerr << "Usage: coordinateTranslate <XMFA alignment> <alignment coordinate file>\n";
+		cerr << "Alignment coordinate file should be structured into 2 tab-delimited columns: <block ID> <column>\n";
+		cerr << "Output will be the nearest aligned position for each genome in the block, with 0 entries for genomes undefined in the block\n";
+		return -1;
+	}
+	ifstream in_aln( argv[1] );
+	if(!in_aln.is_open() ){
+		cerr << "Error opening alignment file \"" << argv[1] << "\"\n";
+		return -2;
+	}
+	IntervalList iv_list;
+	iv_list.ReadStandardAlignment(in_aln);
+	LoadSequences( iv_list, NULL );
+
+	ifstream in_coords( argv[2] );
+	if(!in_coords.is_open() ){
+		cerr << "Error opening coordinate file \"" << argv[2] << "\"\n";
+		return -2;
+	}
+	int block_id;
+	while( in_coords >> block_id ){
+		int block_col;
+		in_coords >> block_col;
+		std::vector<gnSeqI> pos;
+		std::vector<bool> column;
+		iv_list[block_id].GetColumn( block_col, pos, column );
+		for(int i=0; i<pos.size(); i++){
+			if(i>0) cout << "\t";
+			cout << (column[i] ? pos[i] : 0); 
+		}
+		cout << "\n";
+	}
+	return 0;
+}
+
+
diff --git a/src/countInPlaceInversions.cpp b/src/countInPlaceInversions.cpp
new file mode 100644
index 0000000..025a0d1
--- /dev/null
+++ b/src/countInPlaceInversions.cpp
@@ -0,0 +1,69 @@
+#include "libMems/IntervalList.h"
+#include "libMems/Aligner.h"
+#include <fstream>
+#include <string>
+#include <vector>
+#include <utility>
+
+using namespace std;
+using namespace genome;
+using namespace mems;
+
+int main( int argc, char* argv[] )
+{
+	if( argc != 2 )
+	{
+		cerr << "Usage: countInPlaceInversions <Mauve Alignment>\n";
+		return -1;
+	}
+	ifstream aln_file( argv[1] );
+	if( !aln_file.is_open() )
+	{
+		cerr << "Error opening \"" << argv[1] << "\"\n";
+		return -1;
+	}
+
+	IntervalList iv_list;
+	iv_list.ReadList( aln_file );
+	vector< int64 > weights = vector< int64 >( iv_list.size(), 1 );
+	vector< LCB > adjacencies;
+	computeLCBAdjacencies_v2( iv_list, weights, adjacencies );
+	uint seq_count = iv_list.seq_filename.size();
+	vector< pair< uint, uint > > inv_seqs;
+
+	for( uint adjI = 0; adjI < adjacencies.size(); adjI++ )
+	{
+		// find in place inversions
+		uint seqI = 1;
+		for( ; seqI < seq_count; seqI++ )
+		{
+			if( adjacencies[adjI].left_adjacency[0] != adjacencies[adjI].left_adjacency[seqI] ||
+				adjacencies[adjI].right_adjacency[0] != adjacencies[adjI].right_adjacency[seqI] )
+				break;
+		}
+		if( seqI == seq_count )
+		{
+			// in place inversion
+			// count forward
+			uint forward_count = 0;
+			for( seqI = 0; seqI < seq_count; seqI++ )
+			{
+				if( adjacencies[adjI].left_end[seqI] > 0 )
+					forward_count++;
+			}
+			for( seqI = 0; seqI < seq_count; seqI++ )
+			{
+				if( forward_count * 2 > seqI && adjacencies[adjI].left_end[seqI] < 0 )
+					inv_seqs.push_back( make_pair( adjI, seqI ) );
+				if( forward_count * 2 < seqI && adjacencies[adjI].left_end[seqI] > 0 )
+					inv_seqs.push_back( make_pair( adjI, seqI ) );
+			}
+		}
+	}
+	for( uint invI = 0; invI < inv_seqs.size(); invI++ )
+	{
+		cout << "In-place inversion in seq " << inv_seqs[invI].second;
+		cout << "\tlend: " << adjacencies[inv_seqs[invI].first].left_end[inv_seqs[invI].second];
+		cout << "\trend: " << adjacencies[inv_seqs[invI].first].right_end[inv_seqs[invI].second] << endl;
+	}
+}
diff --git a/src/createBackboneMFA.cpp b/src/createBackboneMFA.cpp
new file mode 100644
index 0000000..ce5533b
--- /dev/null
+++ b/src/createBackboneMFA.cpp
@@ -0,0 +1,57 @@
+#include "libMems/Interval.h"
+#include "libMems/Islands.h"
+#include "libGenome/gnFASSource.h"
+
+using namespace std;
+using namespace mems;
+using namespace genome;
+
+int main( int argc, char* argv[] )
+{
+	IntervalList iv_list;
+	if( argc != 3 )
+	{
+		cerr << "Usage: <input interval file> <output MFA name>\n";
+		return -1;
+	}
+	ifstream in_file( argv[1] );
+	if( !in_file.is_open() )
+	{
+		cerr << "Error opening \"" << argv[1] << "\"\n";
+		return -1;
+	}
+	iv_list.ReadList( in_file );
+	LoadSequences(iv_list, NULL);
+	string base_name = argv[2];
+	cout << "Input alignment has " << iv_list.size() << " intervals\n";
+	vector< string > superaln = vector<string>( iv_list.seq_table.size() );
+	for( uint lcbI = 0; lcbI < iv_list.size(); lcbI++ )
+	{
+		// only use 1/30 LCBs
+		if( lcbI % 30 != 0 )
+			continue;
+		gnAlignedSequences gnas;
+		iv_list[lcbI].GetAlignedSequences( gnas, iv_list.seq_table );
+		for( uint seqI = 0; seqI < gnas.sequences.size(); seqI++ )
+			superaln[seqI] += gnas.sequences[seqI];
+	}
+
+	ofstream out_file( base_name.c_str() );
+	if( !out_file.is_open() )
+	{
+		cerr << "Error opening \"" << base_name << "\"\n";
+		return -2;
+	}
+	gnSequence gns;
+	for( uint seqI = 0; seqI < superaln.size(); seqI++ )
+	{
+		stringstream seq_name;
+		seq_name << seqI;
+//		seq_name << "(" << iv_list[lcbI].Start(seqI) << "-" << iv_list[lcbI].Start(seqI) + iv_list[lcbI].Length(seqI) << ")";
+		gns += superaln[seqI];
+		gns.setContigName( gns.contigListSize()-1, seq_name.str() );
+	}
+	gnFASSource::Write( gns, out_file, false, false );
+	return 0;
+}
+
diff --git a/src/evd.cpp b/src/evd.cpp
new file mode 100644
index 0000000..932d1bc
--- /dev/null
+++ b/src/evd.cpp
@@ -0,0 +1,129 @@
+#include "libMems/Islands.h"
+#include "libMems/IntervalList.h"
+#include "libMems/MatchList.h"
+#include "libGenome/gnSequence.h"
+
+#include <sstream>
+
+using namespace std;
+using namespace genome;
+using namespace mems;
+
+template< typename MatchVector >
+void getLocalRecordHeights( const MatchVector& iv_list, std::vector< genome::gnSequence* >& seq_table, vector< score_t >& lrh )
+{
+	typedef typename MatchVector::value_type MatchType;
+	if( iv_list.size() == 0 )
+		return;
+	uint seq_count = seq_table.size();
+	for( uint iv_listI = 0; iv_listI < iv_list.size(); iv_listI++ ){
+		const MatchType& iv = iv_list[ iv_listI ];
+		std::vector< std::string > aln_table;
+		GetAlignment( *iv, seq_table, aln_table );
+		
+		for( uint seqI = 0; seqI < seq_count; seqI++ ){
+			uint seqJ;
+			for( seqJ = seqI + 1; seqJ < seq_count; seqJ++ ){
+
+				std::vector< score_t > scores;
+				PairwiseScoringScheme pss;
+				computeMatchScores( aln_table[seqI], aln_table[seqJ], pss, scores );
+				computeGapScores( aln_table[seqI], aln_table[seqJ], pss, scores );
+
+				// Invert the scores since we're trying to detect rare bouts of non-homologous sequence
+				for( size_t sI = 0; sI < scores.size(); ++sI )
+					if( scores[sI] != INVALID_SCORE)
+						scores[sI] = -scores[sI];
+
+				score_t score_sum = 0;	// start in an hss
+				score_t local_record_height = 0;
+				for( size_t colI = 0; colI < scores.size(); ++colI )
+				{
+					if( scores[colI] == INVALID_SCORE )
+						continue;
+
+					if( score_sum > 0 && score_sum + scores[colI] < 0 )
+					{
+						// end of an excursion
+						score_sum = 0;
+						lrh.push_back( local_record_height );
+						local_record_height = 0;
+					}else if( score_sum == 0 && scores[colI] > 0 )
+					{
+						// start a new excursion
+						score_sum += scores[colI];
+						if( score_sum > local_record_height )
+							local_record_height = score_sum;
+					}else if( score_sum > 0 ){
+						score_sum += scores[colI];
+						if( score_sum > local_record_height )
+							local_record_height = score_sum;
+					}
+				}
+			}
+		}
+	}
+}
+
+
+// read each input file, write summary statistics about the EVD to stdout
+int main( int argc, char* argv[] )
+{
+	vector< score_t > lrh_all;
+	if( argc != 2 )
+	{
+		cerr << "Usage: evd <simulation run count>\n";
+		cerr << "This program must be run from a directory which contains alignjob directories\n";
+		return -1;
+	}
+	int run_count = atoi( argv[1] );
+	int simu_count = 0;
+	for( int runI = 0; runI < run_count; ++runI )
+	{
+		IntervalList iv_list;
+		stringstream aln_fname;
+		aln_fname << "alignjob." << runI << "/evolved.dat";
+		ifstream in_file( aln_fname.str().c_str() );
+		if( !in_file.is_open() )
+		{
+			cerr << "Error opening " << aln_fname.str() << endl;
+			continue;
+		}
+		simu_count++;
+		iv_list.ReadStandardAlignment(in_file);
+		stringstream seq_fname;
+		seq_fname << "alignjob." << runI << "/evolved_seqs.fas";
+		MatchList ml;
+		LoadMFASequences(ml, seq_fname.str(), &cout);
+		iv_list.seq_table = ml.seq_table;
+
+		vector< Interval* > iv_ptrs( iv_list.size() );
+		for( size_t ivI = 0; ivI < iv_list.size(); ++ivI )
+			iv_ptrs[ivI] = &iv_list[ivI];
+
+		vector< score_t > lrh;
+		getLocalRecordHeights( iv_ptrs, iv_list.seq_table, lrh );
+		lrh_all.insert( lrh_all.end(), lrh.begin(), lrh.end() );
+	}
+	std::sort( lrh_all.begin(), lrh_all.end() );
+	size_t index_95 = lrh_all.size() * .95;
+	size_t index_99 = lrh_all.size() * .99;
+	size_t index_999 = lrh_all.size() * .999;
+	size_t index_9999 = lrh_all.size() * .9999;
+	index_95 = std::min(index_95, lrh_all.size()-1);
+	index_99 = std::min(index_99, lrh_all.size()-1);
+	index_999 = std::min(index_999, lrh_all.size()-1);
+	index_9999 = std::min(index_9999, lrh_all.size()-1);
+	cout << "Total number of simulations: " << simu_count << endl;
+	cout << "Total number of excursions: " << lrh_all.size() << endl;
+	cout << "95% score threshold: " << lrh_all[index_95] << endl;
+	cout << "Number excursions above 95%: " << lrh_all.size() - index_95 << endl;
+	cout << "99% score threshold: " << lrh_all[index_99] << endl;
+	cout << "Number excursions above 99%: " << lrh_all.size() - index_99 << endl;
+	cout << "99.9% score threshold: " << lrh_all[index_999] << endl;
+	cout << "Number excursions above 99.9%: " << lrh_all.size() - index_999 << endl;
+	cout << "99.99% score threshold: " << lrh_all[index_9999] << endl;
+	cout << "Number excursions above 99.99%: " << lrh_all.size() - index_9999 << endl;
+}
+
+
diff --git a/src/extractBCITrees.cpp b/src/extractBCITrees.cpp
new file mode 100644
index 0000000..7e9d6a5
--- /dev/null
+++ b/src/extractBCITrees.cpp
@@ -0,0 +1,369 @@
+#include "libMems/PhyloTree.h"
+#include <vector>
+#include <sstream>
+#include <algorithm>
+#include <utility>
+#include <fstream>
+#include <boost/random/uniform_real.hpp>
+#include <boost/random/lagged_fibonacci.hpp>
+
+using namespace std;
+
+typedef unsigned int uint;
+
+bool taxonNameLessThan( string name1, string name2 )
+{
+	stringstream n1_str( name1 );
+	stringstream n2_str( name2 );
+	int n1, n2;
+	n1_str >> n1;
+	n2_str >> n2;
+	return n1 < n2;
+}
+
+template<class T, class S>
+void findAndErase( T& container, S& item )
+{
+	T new_container;
+	for( typename T::iterator t_iter = container.begin(); t_iter != container.end(); t_iter++ )
+		if( *t_iter != item )
+			new_container.push_back( *t_iter );
+	container = new_container;
+};
+
+/**
+ * Depth first search to check whether a subtree contains a given node
+ */
+bool containsNode( PhyloTree< TreeNode >& t, node_id_t subtree_nodeI, node_id_t query_nodeI )
+{
+	stack< node_id_t > node_stack;
+	node_stack.push( subtree_nodeI );
+	while( node_stack.size() > 0 )
+	{
+		node_id_t cur_node = node_stack.top();
+		node_stack.pop();
+		if( cur_node == query_nodeI )
+			return true;
+		if( t[cur_node].children.size() > 0 )
+		{
+			for( size_t childI = 0; childI < t[cur_node].children.size(); childI++ )
+				node_stack.push( t[cur_node].children[childI] );
+		}
+	}
+	return false;
+}
+
+
+/** place a root on the branch with endpoints root_left and root_right
+ */
+void rerootTree( PhyloTree< TreeNode >& t, node_id_t new_root )
+{
+	// new root must be an internal node
+	if( t[new_root].children.size() == 0 )
+		throw "Can't root on a leaf node";
+	if( new_root == t.root )
+		return;	// idiot caller didn't realize it's already rooted here
+
+	// change the old root node to an internal node
+	uint childI = 0;
+	for( ; childI < t[t.root].children.size(); childI++ ){
+		if( containsNode( t, t[t.root].children[childI], new_root ) )
+		{
+			t[t.root].parents.push_back( t[t.root].children[childI] );
+			findAndErase( t[t.root].children, t[t.root].children[childI] );
+			break;
+		}
+	}
+	// shake the tree out on the new root node
+	t.root = new_root;
+	t[t.root].children.insert( t[t.root].children.end(), t[t.root].parents.begin(), t[t.root].parents.end() );
+
+	stack<node_id_t> node_stack;
+	node_stack.push(t.root);
+	while( node_stack.size() > 0 )
+	{
+		// delete the current node from all of its child nodes lists 
+		// and insert it as a parent
+		// make all other nodes reference by the child grandchildren
+		// recurse on each child
+		node_id_t cur_node = node_stack.top();
+		node_stack.pop();
+		for( uint childI = 0; childI < t[cur_node].children.size(); childI++ )
+		{
+			TreeNode& child_n = t[t[cur_node].children[childI]]; 
+			findAndErase( child_n.children, cur_node );
+			findAndErase( child_n.parents, cur_node );
+			child_n.children.insert( child_n.children.end(), child_n.parents.begin(), child_n.parents.end() );
+			child_n.parents.clear();
+			child_n.parents.push_back(cur_node);
+			node_stack.push(t[cur_node].children[childI]);
+		}
+	}
+}
+
+/**
+ * Find the leaf node lexicographically least taxon name in the 
+ * subtree below nodeI
+ */
+node_id_t getRepresentativeTaxon( PhyloTree< TreeNode >& t, node_id_t nodeI )
+{
+	stack< node_id_t > node_stack;
+	node_stack.push( nodeI );
+	string least_name = "";
+	node_id_t least_node = nodeI;
+	while( node_stack.size() > 0 )
+	{
+		node_id_t cur_node = node_stack.top();
+		node_stack.pop();
+		if( t[cur_node].children.size() > 0 )
+		{
+			for( size_t childI = 0; childI < t[cur_node].children.size(); childI++ )
+				node_stack.push( t[cur_node].children[childI] );
+		}
+		else
+		{
+			if( least_name == "" )
+			{
+				least_name = t[cur_node].name;
+				least_node = cur_node;
+			}
+			if( taxonNameLessThan( t[cur_node].name, least_name ) )
+			{
+				least_name = t[cur_node].name;
+				least_node = cur_node;
+			}
+		}
+	}
+	return least_node;
+}
+
+class TaxonNamePairComparator
+{
+public:
+	bool operator()( const pair<string, size_t>& p1, const pair<string, node_id_t>& p2 )
+	{
+		return taxonNameLessThan( p1.first, p2.first );
+	}
+};
+
+void sortTaxa( PhyloTree< TreeNode >& t )
+{
+	for( node_id_t nodeI = 0; nodeI < t.size(); nodeI++ )
+	{
+		if( t[nodeI].children.size() == 0 )
+			continue;
+		// get the "representative" of each subtree
+		vector< pair<string, node_id_t> > representatives = vector< pair<string, node_id_t> >( t[nodeI].children.size() );
+		for( size_t repI = 0; repI < representatives.size(); repI++ )
+		{
+			node_id_t rep_node = getRepresentativeTaxon( t, t[nodeI].children[ repI ] );
+			representatives[ repI ] = make_pair( t[rep_node].name, repI );
+		}
+		// sort children on their representative taxon names
+		TaxonNamePairComparator tnc;
+		sort( representatives.begin(), representatives.end(), tnc );
+		// repopulate the children array with the sorted order
+		vector< node_id_t > sorted_children;
+		for( size_t repI = 0; repI < representatives.size(); repI++ )
+			sorted_children.push_back( t[nodeI].children[representatives[repI].second] );
+		t[nodeI].children = sorted_children;
+	}
+}
+
+/**
+ * Assumes that taxa have numeric labels starting at 1 and simply
+ * subtracts 1 from each node label
+ */
+void relabelTaxaToStartWithZero( PhyloTree< TreeNode >& t )
+{
+	for( node_id_t nodeI = 0; nodeI < t.size(); nodeI++ )
+	{
+		if( t[nodeI].name == "" )
+			continue;
+		stringstream name_str( t[nodeI].name );
+		uint number;
+		name_str >> number;
+		number--;
+		stringstream new_name_str;
+		new_name_str << number;
+		t[nodeI].name = new_name_str.str();
+	}
+}
+
+int main( int argc, char* argv[] )
+{
+	if( argc < 5 )
+	{
+		cerr << "Usage: extractBCITrees <random seed> <BCI threshold> <max output trees> <MrBayes .trprobs input file 1 .. N> <nexus output file>\n";
+		cerr << "This program reads all trees and their posterior from a set of MrBayes .trprobs files\n";
+		cerr << "and sums and normalizes posteriors for each topology.  All trees that meet a Bayes Credible\n";
+		cerr << "Interval threshold will be saved, up to some maximum number of trees.\n";
+		cerr << "<BCI Threshold>\tA number between 0 and 1 giving the BCI threshold.  0.9 is a good choice.\n";
+		cerr << "<max output trees>\tLimit the output to this many trees.\n";
+		cerr << "All trees in the input file must have the same number of taxa and the same taxon labels\n";
+		return -1;
+	}
+	boost::uint32_t prng_seed = atoi( argv[1] );
+	double bci_threshold = atof( argv[2] );
+	uint max_output_trees = atoi( argv[3] );
+	vector< string > trprobs_fnames;
+	for( uint argI = 4; argI < argc - 1; argI++ )
+		trprobs_fnames.push_back( argv[argI] );
+	if( trprobs_fnames.size() == 0 )
+	{
+		cerr << "At least one .trprobs file must be given\n";
+		return -1;
+	}
+	string output_filename = argv[argc-1];
+
+
+	ofstream output_file( output_filename.c_str() );
+	if( !output_file.is_open() )
+	{
+		cerr << "Error opening \"" << output_filename << "\"\n";
+		return -1;
+	}
+	
+	size_t tree_sizes = 0;
+	uint tree_count = 0;
+	vector< pair< string, double > > tree_and_pp_list;
+	for( size_t fileI = 0; fileI < trprobs_fnames.size(); fileI++ )
+	{
+		ifstream input_file( trprobs_fnames[fileI].c_str() );
+		if( !input_file.is_open() )
+		{
+			cerr << "Error opening \"" << trprobs_fnames[fileI] << "\"\n";
+			return -1;
+		}
+		// scan ahead to start of trees
+		string cur_line;
+		while( getline( input_file, cur_line ) )
+		{
+			stringstream line_str( cur_line );
+			string first_token;
+			line_str >> first_token;
+			if( first_token == "tree" )
+				break;
+		}
+		do
+		{
+			stringstream line_str( cur_line );
+			string token;
+			line_str >> token;
+			if( token != "tree" )
+				break;
+			for( int i = 0; i < 6; i++ )
+				line_str >> token;
+
+			line_str >> token;
+			// read the cumulative posterior
+			stringstream cum_str( token );
+			string cum;
+			getline( cum_str, cum, ']' );
+			double cumulative = 0;
+			stringstream cc_str(cum);
+			cum_str >> cumulative;
+			if( cumulative > bci_threshold )
+				break;
+
+			for( int i = 0; i < 3; i++ )
+				line_str >> token;
+
+			// read the weight
+			stringstream w_str( token );
+			string w;
+			getline( w_str, w, ']' );
+			double weight = 0;
+			stringstream ww_str(w);
+			ww_str >> weight;
+
+			// read the tree
+			line_str >> token;
+			stringstream tree_str( token );
+			PhyloTree< TreeNode > t;
+			t.readTree( tree_str );
+			if( t.size() == 0 )
+				break;
+			if( tree_sizes == 0 )
+				tree_sizes = t.size();
+			if( t.size() != tree_sizes )
+			{
+				cerr << "Error: tree " << tree_count + 1 << " has a different number of taxa\n";
+				return -2;
+			}
+ 			sortTaxa( t );
+			relabelTaxaToStartWithZero( t );
+			stringstream ss;
+			t.writeTree(ss);
+			tree_and_pp_list.push_back(make_pair(ss.str(),weight));
+			cout << "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b";
+			cout << "Read " << tree_and_pp_list.size() << " trees";
+		}while( getline( input_file, cur_line ) );
+
+	}
+
+	sort( tree_and_pp_list.begin(), tree_and_pp_list.end() );
+
+	long unique_count = 0;
+
+	// identify unique trees
+	vector< pair< string, double > > unique_tree_and_pp_list;
+	for( size_t treeI = 0; treeI < tree_and_pp_list.size(); treeI++ )
+	{
+		if( treeI > 0 && tree_and_pp_list[treeI].first == tree_and_pp_list[treeI - 1].first )
+		{
+			unique_tree_and_pp_list.back().second += tree_and_pp_list[treeI].second;
+			continue;
+		}
+		unique_tree_and_pp_list.push_back( tree_and_pp_list[treeI] );
+		unique_count++;
+	}
+
+	// if the number of unique trees is less than the max, just write them out
+	// otherwise we need to subsample	
+	if( unique_tree_and_pp_list.size() < max_output_trees )
+	{
+		cout << endl;
+		cout << "Writing unique trees to \"" << output_filename << "\"\n";
+		for( size_t treeI = 0; treeI < unique_tree_and_pp_list.size(); treeI++ )
+			output_file << unique_tree_and_pp_list[treeI].first;
+		cerr << "There are " << unique_count << " unique trees\n";
+		return 0;
+	}
+
+	// create a running sum of posteriors
+	double sum = 0;
+	for( size_t treeI = 0; treeI < unique_tree_and_pp_list.size(); treeI++ )
+		sum += unique_tree_and_pp_list[treeI].second;
+	// sample a tree
+	vector< string > subsample;
+	boost::lagged_fibonacci44497 rng;
+	rng.seed(prng_seed);
+	for( size_t treeI = 0; treeI < max_output_trees; treeI++ )
+	{
+		// get a random number
+		boost::uniform_real<> url( 0, sum );
+		double dart = url(rng);
+		double cursum = 0;
+		size_t i = 0;
+		for( ; i < unique_tree_and_pp_list.size(); i++ )
+		{
+			cursum += unique_tree_and_pp_list[i].second;
+			if( cursum > dart )
+				break;
+		}
+		if( i == unique_tree_and_pp_list.size() )
+			i--;
+		unique_tree_and_pp_list[i].second = 0;
+		subsample.push_back( unique_tree_and_pp_list[i].first );
+	}
+
+
+	cout << endl;
+	cout << "Writing unique trees to \"" << output_filename << "\"\n";
+	for( size_t treeI = 0; treeI < subsample.size(); treeI++ )
+		output_file << subsample[treeI];
+	cerr << "There are " << unique_count << " unique trees\n";
+	cerr << "The subsample contains " << subsample.size() << " trees\n";
+	return 0;
+}
diff --git a/src/extractBackbone.cpp b/src/extractBackbone.cpp
new file mode 100644
index 0000000..cc432ce
--- /dev/null
+++ b/src/extractBackbone.cpp
@@ -0,0 +1,83 @@
+/*******************************************************************************
+ * $Id: extractBackbone.cpp,v 1.2 2004/02/28 00:01:31 darling Exp $
+ * This file is copyright 2002-2004 Aaron Darling.  All rights reserved.
+ * Please see the file called COPYING for licensing, copying, and modification
+ * rights.  Redistribution of this file, in whole or in part is prohibited
+ * without express permission.
+ ******************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/IntervalList.h"
+#include "libMems/Islands.h"
+
+using namespace std;
+using namespace genome;
+using namespace mems;
+
+void print_usage( const char* pname ){
+	cerr << "Usage: " << pname << " <source sequences> <source alignment> <min bb sequence length> <max bb gap size> <backbone output>\n";
+}
+
+int main( int argc, const char* argv[] ){
+	if( argc <= 0 ){
+		print_usage( "extractBackbone" );
+		return -1;
+	}
+	if( argc != 6 ){
+		print_usage( argv[0] );
+		return -1;
+	}
+	
+	string sequence_fname = argv[1];
+	string alignment_fname = argv[2];
+	int64 min_bb_length = atol( argv[3] );
+	int64 max_gap_length = atol( argv[4] );
+	string output_fname = argv[5];
+
+	gnSequence source_seqs;
+	try{
+		source_seqs.LoadSource( sequence_fname );
+	}catch( gnException& gne ){
+		cerr << gne << endl;
+		return -1;
+	}
+	
+	ifstream alignment_in;
+	alignment_in.open( alignment_fname.c_str() );
+	if( !alignment_in.is_open() ){
+		cerr << "Error opening " << alignment_fname << endl;
+		return -1;
+	}
+	
+	
+	IntervalList aligned_ivs;
+	aligned_ivs.ReadStandardAlignment( alignment_in );
+	
+	// add the sequence data to the interval list
+	for( uint seqI = 0; seqI < source_seqs.contigListSize(); seqI++ ){
+		aligned_ivs.seq_table.push_back( new gnSequence( source_seqs.contig( seqI ) ) );
+	}
+	
+	vector< GappedAlignment > backbone_data;
+	simpleFindBackbone( aligned_ivs, min_bb_length, max_gap_length, backbone_data );
+	IntervalList backbone_ivs;
+	backbone_ivs.seq_table = aligned_ivs.seq_table;
+	// construct a new IntervalList containing only backbone regions
+	for( uint bbI = 0; bbI < backbone_data.size(); bbI++ )
+	{
+		vector<AbstractMatch*> tmp(1, &backbone_data[ bbI ] );
+		backbone_ivs.push_back( Interval(tmp.begin(), tmp.end()) );
+	}
+	
+	ofstream output( output_fname.c_str() );
+	if( !output.is_open() ){
+		cerr << "Error opening " << output_fname << endl;
+		return -1;
+	}
+	backbone_ivs.WriteStandardAlignment( output );
+
+	return 0;
+}
diff --git a/src/extractBackbone2.cpp b/src/extractBackbone2.cpp
new file mode 100644
index 0000000..4bfe004
--- /dev/null
+++ b/src/extractBackbone2.cpp
@@ -0,0 +1,70 @@
+/*******************************************************************************
+ * $Id: extractBackbone.cpp,v 1.2 2004/02/28 00:01:31 darling Exp $
+ * This file is copyright 2002-2004 Aaron Darling.  All rights reserved.
+ * Please see the file called COPYING for licensing, copying, and modification
+ * rights.  Redistribution of this file, in whole or in part is prohibited
+ * without express permission.
+ ******************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/IntervalList.h"
+#include "libMems/Islands.h"
+
+using namespace std;
+using namespace genome;
+using namespace mems;
+
+void print_usage( const char* pname ){
+	cerr << "Usage: " << pname << " <mauve alignment> <min bb sequence length> <max bb gap size> <backbone output>\n";
+}
+
+int main( int argc, const char* argv[] ){
+	if( argc <= 0 ){
+		print_usage( "extractBackbone" );
+		return -1;
+	}
+	if( argc != 5 ){
+		print_usage( argv[0] );
+		return -1;
+	}
+	
+	string alignment_fname = argv[1];
+	int64 min_bb_length = atol( argv[2] );
+	int64 max_gap_length = atol( argv[3] );
+	string output_fname = argv[4];
+
+	ifstream alignment_in;
+	alignment_in.open( alignment_fname.c_str() );
+	if( !alignment_in.is_open() ){
+		cerr << "Error opening " << alignment_fname << endl;
+		return -1;
+	}
+	
+	
+	IntervalList aligned_ivs;
+	aligned_ivs.ReadList( alignment_in );
+	LoadSequences(aligned_ivs, &cout);
+
+	vector< GappedAlignment > backbone_data;
+	simpleFindBackbone( aligned_ivs, min_bb_length, max_gap_length, backbone_data );
+	IntervalList backbone_ivs;
+	backbone_ivs.seq_table = aligned_ivs.seq_table;
+	backbone_ivs.seq_filename = aligned_ivs.seq_filename;
+	// construct a new IntervalList containing only backbone regions
+	for( uint bbI = 0; bbI < backbone_data.size(); bbI++ ){
+		vector< AbstractMatch* > tmp( 1, &backbone_data[ bbI ] );
+		backbone_ivs.push_back( Interval( tmp.begin(), tmp.end() ) );
+	}
+	
+	ofstream output( output_fname.c_str() );
+	if( !output.is_open() ){
+		cerr << "Error opening " << output_fname << endl;
+		return -1;
+	}
+	backbone_ivs.WriteList( output );
+
+	return 0;
+}
diff --git a/src/extractSubalignments.cpp b/src/extractSubalignments.cpp
new file mode 100644
index 0000000..761cc7e
--- /dev/null
+++ b/src/extractSubalignments.cpp
@@ -0,0 +1,96 @@
+#include "libMems/IntervalList.h"
+#include "libGenome/gnFASSource.h"
+#include <sstream>
+
+using namespace std;
+using namespace genome;
+using namespace mems;
+
+void extractSubAlignment( IntervalList& iv_list, IntervalList& sub_list, uint seqI, gnSeqI lend, gnSeqI length )
+{
+	// find the relevant interval
+	uint ivI = 0;
+	for( ; ivI < iv_list.size(); ivI++ )
+		if( iv_list[ivI].LeftEnd(seqI) <= lend &&
+			lend < iv_list[ivI].LeftEnd(seqI) + iv_list[ivI].Length(seqI) )
+			break;
+
+	// we've now got the starting interval, crop appropriately
+	gnSeqI crop_left_amt = lend - iv_list[ivI].LeftEnd(seqI);
+	Interval iv(iv_list[ivI]);
+	iv.CropLeft(crop_left_amt, seqI);
+	gnSeqI crop_right_amt = length < iv.Length(seqI) ?  iv.Length(seqI) - length : 0;
+	iv.CropRight(crop_right_amt, seqI);
+	iv.CalculateOffset();
+	sub_list.push_back(iv);
+}
+
+int main( int argc, char* argv[] )
+{
+	if( argc != 4 )
+	{
+		cerr << "Usage: extractSubAlignment <XMFA alignment input> <Multi-FastA base name> <sub-alignment spec file>\n";
+		cerr << "where subalignment spec file is tab delimited text of the form:\n";
+		cerr << "<genome id>\t<left end>\t<length>\n";
+	}
+
+	string alignment_infilename = argv[1];
+	string alignment_outfilename = argv[2];
+	string spec_filename = argv[3];
+
+	ifstream alignment_infile( alignment_infilename.c_str() );
+	if( !alignment_infile.is_open() )
+	{
+		cerr << "Error opening \"" << alignment_infilename << "\"\n";
+		return -1;
+	}
+	
+	IntervalList iv_list, iv_sublist;
+	iv_list.ReadStandardAlignment( alignment_infile );
+
+	ifstream spec_infile( spec_filename.c_str() );
+	if( !spec_infile.is_open() )
+	{
+		cerr << "Error opening \"" << spec_filename << "\"\n";
+		return -1;
+	}
+	vector< gnSequence* > seq_table( iv_list.seq_filename.size(), new gnSequence() );
+	size_t ivI = 0;
+	string cur_line;
+	while( getline( spec_infile, cur_line ) )
+	{
+		stringstream line_str( cur_line );
+		uint seqI;
+		int64 lend;
+		gnSeqI length;
+		if( !(line_str >> seqI) )
+			break;
+		if( !(line_str >> lend) )
+			break;
+		if( !(line_str >> length) )
+			break;
+		extractSubAlignment( iv_list, iv_sublist, seqI, lend, length );
+
+		gnAlignedSequences gnas;
+		iv_sublist[0].GetAlignedSequences( gnas, seq_table );
+		stringstream ss;
+		ss << alignment_outfilename << ".interval_" << ivI;
+		ofstream out_file( ss.str().c_str() );
+		if( !out_file.is_open() )
+		{
+			cerr << "Error opening \"" << ss.str() << "\"\n";
+			return -1;
+		}
+		gnSequence mfa;
+		for( uint seqI = 0; seqI < seq_table.size(); seqI++ )
+		{
+			mfa += gnas.sequences[seqI];
+			stringstream cname;
+			cname << seqI << "(" << iv_sublist[0].Start(seqI) << ":" << iv_sublist[0].Start(seqI) + iv_sublist[0].Length(seqI) << ")";
+			mfa.setContigName( mfa.contigListLength() - 1, cname.str() );
+		}
+		gnFASSource::Write( mfa, out_file, false, false );
+		iv_sublist.clear();
+		ivI++;
+	}
+}
\ No newline at end of file
diff --git a/src/gappiness.cpp b/src/gappiness.cpp
new file mode 100644
index 0000000..4fe2a35
--- /dev/null
+++ b/src/gappiness.cpp
@@ -0,0 +1,53 @@
+#include "libGenome/gnFASSource.h"
+
+using namespace std;
+using namespace genome;
+
+int main( int argc, char* argv[] )
+{
+	if( argc != 2 )
+	{
+		cerr << "Usage: gappiness <MFA file>\n";
+	}
+	string aln_fname = argv[1];
+	gnSequence gns;
+	gns.LoadSource( aln_fname );
+	cout << "aln_length\t" << gns.contig(0).length() << endl;
+	gnSeqI total_len = 0;
+	for( uint seqI = 0; seqI < gns.contigListSize(); seqI++ )
+	{
+		string cur_seq = gns.contig(seqI).ToString();
+		gnSeqI len = 0;
+		for( size_t charI = 0; charI < cur_seq.size(); charI++ )
+		{
+			if( cur_seq[charI] != '-' )
+				len++;
+		}
+		cout << "seq" << seqI << "_len\t" << len << endl;
+		total_len += len;
+	}
+	double avg_seq_len = (double)total_len / (double)gns.contigListSize();
+	cout << "avg_seq_len\t" << avg_seq_len << endl;
+	cout << "gappiness\t" << (double)(gns.contig(0).length()) / avg_seq_len << endl;
+
+	// compute average pairwise identity
+	gnSeqI total_id = 0;
+	gnSeqI total_possible = 0;
+	for( uint seqI = 0; seqI < gns.contigListSize(); seqI++ )
+		for( uint seqJ = seqI + 1; seqJ < gns.contigListSize(); seqJ++ )
+	{
+		string cur_seqI = gns.contig(seqI).ToString();
+		string cur_seqJ = gns.contig(seqJ).ToString();
+		for( size_t colI = 0; colI < cur_seqI.size(); colI++ )
+		{
+			if( cur_seqI[colI] == '-' || cur_seqJ[colI] == '-' )
+				continue;
+			total_possible++;
+			if( toupper(cur_seqI[colI]) == toupper(cur_seqJ[colI]) )
+				total_id++;
+		}
+	}
+	cout << "percent_id\t" << (double)total_id / (double)total_possible << endl;
+	return 0;
+}
+
diff --git a/src/getAlignmentWindows.cpp b/src/getAlignmentWindows.cpp
new file mode 100644
index 0000000..6ce86cb
--- /dev/null
+++ b/src/getAlignmentWindows.cpp
@@ -0,0 +1,137 @@
+/*******************************************************************************
+ * $Id: getAlignmentWindows.cpp,v 1.2 2004/02/28 00:01:31 darling Exp $
+ * This file is copyright 2002-2004 Aaron Darling.  All rights reserved.
+ * Please see the file called COPYING for licensing, copying, and modification
+ * rights.  Redistribution of this file, in whole or in part is prohibited
+ * without express permission.
+ ******************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/IntervalList.h"
+#include "libMems/Islands.h"
+#include "libGenome/gnFASSource.h"
+#include "libMems/GappedAlignment.h"
+#include "libMems/Interval.h"
+#include <boost/filesystem/operations.hpp>
+#include <boost/algorithm/string/erase.hpp>
+
+using namespace std;
+using namespace genome;
+using namespace mems;
+
+void print_usage( const char* pname ){
+	cerr << "Usage: " << pname << " <XMFA alignment> <window length> <window shift amount> <base output filename>\n";
+}
+
+int main( int argc, const char* argv[] ){
+	if( argc <= 0 ){
+		print_usage( "getAlignmentWindows" );
+		return -1;
+	}
+	if( argc != 5 ){
+		print_usage( argv[0] );
+		return -1;
+	}
+	
+	string alignment_fname = argv[1];
+	int64 window_length = atol( argv[2] );
+	int64 shift_length = atol( argv[3] );
+	string output_basename = argv[4];
+	
+	ifstream alignment_in;
+	alignment_in.open( alignment_fname.c_str() );
+	if( !alignment_in.is_open() ){
+		cerr << "Error opening " << alignment_fname << endl;
+		return -1;
+	}
+	
+	IntervalList aligned_ivs;
+	aligned_ivs.ReadStandardAlignment( alignment_in );
+	cout << "Read " << aligned_ivs[0].SeqCount() << " sequences with " << aligned_ivs.size() << " aligned intervals from " << alignment_fname << endl;
+	cout.flush();
+	MatchList mlist;
+	mlist.seq_filename = aligned_ivs.seq_filename;
+	if( mlist.seq_filename.size() > 0 )
+		LoadSequences(mlist, &cout);
+	else if( aligned_ivs.size() == 1 )
+	{
+		mlist.seq_filename.resize( aligned_ivs[0].SeqCount() );	
+		mlist.seq_table.resize( aligned_ivs[0].SeqCount() );
+		std::vector< mems::AbstractMatch* > matches;
+		aligned_ivs[0].StealMatches(matches);
+		std::vector< string > seqs = mems::GetAlignment( *((mems::GappedAlignment*)matches[0]), mlist.seq_table );
+		for( size_t seqI = 0; seqI < mlist.seq_table.size(); ++seqI )
+		{
+			boost::algorithm::erase_all( seqs[seqI], std::string("-") );
+			mlist.seq_table[seqI] = new gnSequence( seqs[seqI] );
+		}
+		aligned_ivs[0].SetMatches( matches );
+	}else{
+		cerr << "Error, source sequence file references not given\n";
+	}
+	// for each interval, extract sliding windows and write them to Multi-FastA files
+	for( uint ivI = 0; ivI < aligned_ivs.size(); ivI++ )
+	{
+		vector< string > alignment;
+		GetAlignment( aligned_ivs[ivI], mlist.seq_table, alignment );
+		Interval& iv = aligned_ivs[ivI];
+		stringstream ivnum;
+		ivnum << ivI;
+		boost::filesystem::path base_path = output_basename;
+		boost::filesystem::create_directory( base_path );
+		boost::filesystem::path iv_path = output_basename;
+		iv_path /= "interval_" + ivnum.str();
+		boost::filesystem::create_directory( iv_path );
+		for( gnSeqI window_leftend = 0; window_leftend < iv.AlignmentLength(); window_leftend += shift_length )
+		{
+			gnSeqI cur_window_size = window_leftend + window_length < iv.AlignmentLength() ? window_length : iv.AlignmentLength() - window_leftend;
+
+			stringstream window_filename;
+			window_filename << "window_" << window_leftend << "_to_" << window_leftend + cur_window_size - 1 << ".mfa";
+			boost::filesystem::path window_path = iv_path;
+			window_path /= window_filename.str();
+			ofstream out_file( window_path.string().c_str() );
+			if( !out_file.is_open() )
+			{
+				cerr << "Error opening \"" << window_filename.str() << "\"\n";
+				return -2;
+			}
+			// write a multi-FastA
+			gnSequence gns;
+			for( uint seqI = 0; seqI < iv.SeqCount(); seqI++ )
+			{
+				stringstream seq_name;
+				seq_name << seqI;
+				gns += alignment[seqI].substr(window_leftend, cur_window_size);
+				gns.setContigName( gns.contigListSize()-1, seq_name.str() );
+			}
+			gnFASSource::Write( gns, out_file, false, false );
+			if( cur_window_size < window_length )
+				break;
+		}
+		// now write the whole interval as a single MFA
+		boost::filesystem::path lcb_path = iv_path;
+		lcb_path /= "lcb.mfa";
+		ofstream lcb_out( lcb_path.string().c_str() );
+		if( !lcb_out.is_open() )
+		{
+			cerr << "Error opening " << lcb_path.string() << endl;
+			return -3;
+		}
+		gnSequence fns;
+		for( uint seqI = 0; seqI < iv.SeqCount(); seqI++ )
+		{
+			stringstream seq_name;
+			seq_name << seqI;
+			fns += alignment[seqI];
+			fns.setContigName( fns.contigListSize()-1, seq_name.str() );
+		}
+		gnFASSource::Write( fns, lcb_out, false, false );
+
+	}
+	return 0;
+}
+
diff --git a/src/getOrthologList.cpp b/src/getOrthologList.cpp
new file mode 100644
index 0000000..de8d790
--- /dev/null
+++ b/src/getOrthologList.cpp
@@ -0,0 +1,317 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <string>
+#include <fstream>
+#include <vector>
+#include <algorithm>
+#include <iostream>
+#include <sstream>
+#include "libGenome/gnFilter.h"
+#include "libMems/IntervalList.h"
+#include "libMems/MatchList.h"
+#include "libMems/GappedAlignment.h"
+#include "libMems/Matrix.h"
+#include "libMems/MatchProjectionAdapter.h"
+#include "libMems/Aligner.h"
+#include "libMems/Islands.h"
+#include "libGenome/gnFASSource.h"
+#include <boost/tuple/tuple.hpp>
+#include "libMems/ProgressiveAligner.h"
+#include "libMems/Backbone.h"
+#include "libGenome/gnFeature.h"
+#include "libGenome/gnFASSource.h"
+#include "libMems/DistanceMatrix.h"
+
+using namespace std;
+using namespace genome;
+using namespace mems;
+
+typedef boost::tuple< uint, gnSeqI, gnSeqI, vector< uint > > bbcol_t;
+
+void printGI( ostream& out, gnBaseFeature* f )
+{
+	// print out the feature GI
+	size_t qI = 0;
+	for( ; qI < f->GetQualifierListLength(); qI++ )
+	{
+		if( f->GetQualifierName(qI) == "db_xref" )
+		{
+			string qval = f->GetQualifierValue(qI);
+			if( qval.substr(0,4) == "\"GI:" )
+			{
+				out << qval;
+			}
+		}
+	}
+}
+
+double computeAvgCoverage( vector< bb_seqentry_t >& backbone, vector< size_t >& nway_bb, vector< gnBaseFeature* >& ortho_cds )
+{
+	vector< double > covs( ortho_cds.size() );
+	double cov_sum = 0;
+	for( size_t oI = 0; oI < ortho_cds.size(); oI++ )
+	{
+		gnLocation floc = ortho_cds[oI]->GetLocation(0);
+		double intlen = 0;
+		for( size_t bbI = 0; bbI < nway_bb.size(); bbI++ )
+		{
+			gnLocation loc;
+			loc.SetStart(absolut(backbone[nway_bb[bbI]][oI].first));
+			loc.SetEnd(absolut(backbone[nway_bb[bbI]][oI].second));
+			gnLocation intloc = floc.GetIntersection(loc,gnLocation::determinedRegions);
+			intlen += intloc.GetEnd()-intloc.GetStart();
+		}
+		covs[oI] = intlen / (double)(floc.GetEnd()-floc.GetStart());
+		cov_sum += covs[oI];
+	}
+	return cov_sum / ((double)ortho_cds.size());
+}
+
+
+int main( int argc, char* argv[] )
+{
+	if( argc < 6 )
+	{
+		cerr << "Usage: getOrthologList <input xmfa> <backbone seq file> <reference genome> <CDS ortholog filename> <CDS alignment base name>\n";
+		return -1;
+	}
+	ifstream aln_in;
+	aln_in.open( argv[1] );
+	if( !aln_in.is_open() ){
+		cerr << "Error opening " << argv[1] << endl;
+		return -1;
+	}
+	uint sgI = atoi( argv[3] );
+	string ortho_fname = argv[4];
+	string output_base = argv[5];
+
+	IntervalList input_ivs;
+	input_ivs.ReadStandardAlignment( aln_in );
+	aln_in.close();
+	LoadSequences( input_ivs, &cout );
+
+	size_t seq_count = input_ivs.seq_table.size();
+	
+	vector< bb_seqentry_t > backbone;
+	ifstream bb_in;
+	bb_in.open( argv[2] );
+	if( !bb_in.is_open() ){
+		cerr << "Error opening \"" << argv[2] << "\"" << endl;
+		return -2;
+	}
+	readBackboneSeqFile( bb_in, backbone );
+	bb_in.close();
+
+	ofstream ortho_out( ortho_fname.c_str() );
+	if( !ortho_out.is_open() )
+	{
+		cerr << "Error opening \"" << ortho_fname << "\"\n";
+		return -3;
+	}
+
+	gnSequence* gen0 = input_ivs.seq_table[sgI];
+	vector< gnBaseFeature* > genes;
+	for( size_t featI = 0; featI < gen0->getFeatureListLength(); featI++ )
+	{
+		gnBaseFeature* feat = gen0->getFeature(featI);
+		if( feat->GetName() == "CDS" )
+			genes.push_back( feat );
+		else
+			delete feat;
+	}
+
+	cout << genes.size() << " of the " << gen0->getFeatureListLength() << " annotated features are CDS\n";
+
+	size_t ortho_count = 0;
+	size_t rr_count = 0;
+	size_t partial_rr_annotated = 0;
+
+	ortho_out << "OrthoID";
+	for( size_t seqI = 0; seqI < seq_count; seqI++ )
+		ortho_out << "\tGI_in_Genome_" << seqI;
+	ortho_out << "\tCoverage\tIdentity\tRearranged\n";
+
+	// pick a gene at random from the first genome, extract the alignment, and write it to a file
+	for( size_t geneI = 0; geneI < genes.size(); geneI++ )
+	{
+		if( geneI == 156 )
+			cerr << "watchme\n";
+		// is this gene part of N-way backbone?
+		gnLocation loc = genes[geneI]->GetLocation(0);
+		int64 lend = loc.GetFirst();
+		int64 rend = loc.GetLast();
+		vector< size_t > intersecting_bb;
+		size_t bbI = 0;
+		for( size_t bbI = 0; bbI < backbone.size(); bbI++ )
+		{
+			if( (absolut(backbone[bbI][sgI].first) <= lend && lend <= absolut(backbone[bbI][sgI].second)) ||
+			    (absolut(backbone[bbI][sgI].first) <= rend && rend <= absolut(backbone[bbI][sgI].second)) ||
+			    (lend <= absolut(backbone[bbI][sgI].first) && absolut(backbone[bbI][sgI].first) <= rend) )
+				intersecting_bb.push_back(bbI);
+		}
+		vector< size_t > nway_bb;
+		for( size_t bbI = 0; bbI < intersecting_bb.size(); bbI++ )
+		{
+			size_t seqI = 0;
+			for( ; seqI < input_ivs.seq_table.size(); ++seqI )
+			{
+				if( backbone[intersecting_bb[bbI]][seqI].first == 0 || backbone[intersecting_bb[bbI]][seqI].second == 0 )
+					break;
+			}
+			if( seqI == input_ivs.seq_table.size() )
+				nway_bb.push_back(intersecting_bb[bbI]);
+		}
+
+		// skip to the next CDS if this one wasn't part of some n-way backbone
+		if( nway_bb.size() == 0 )
+			continue;
+
+		// use the alignment to find CDS that overlap in this region
+
+
+		// extract the alignment
+		size_t ivI = 0;
+		// identify the interval that has the biggest intersection
+		vector< pair< size_t, size_t > > iv_overlap;
+		for( ivI = 0; ivI < input_ivs.size(); ivI++ )
+		{
+			if( input_ivs[ivI].Start(sgI) != NO_MATCH )
+			{
+				size_t inter_size = 0;
+				for( size_t bbI = 0; bbI < nway_bb.size(); bbI++ )
+				{
+					gnLocation loc1;
+					loc1.SetStart( input_ivs[ivI].LeftEnd(sgI) );
+					loc1.SetEnd( input_ivs[ivI].RightEnd(sgI) );
+					gnLocation loc2;
+					loc2.SetStart( absolut(backbone[nway_bb[bbI]][sgI].first) );
+					loc2.SetEnd( absolut(backbone[nway_bb[bbI]][sgI].second) );
+					gnLocation intloc = loc1.GetIntersection( loc2, gnLocation::determinedRegions );
+					gnLocation intloc2 = intloc.GetIntersection( loc, gnLocation::determinedRegions );
+					inter_size += intloc2.GetEnd() - intloc2.GetStart();
+				}
+				if( inter_size > 0 )
+					iv_overlap.push_back( make_pair( inter_size, ivI ) );
+			}
+		}
+		bool partial_rr = false;
+		std::sort( iv_overlap.begin(), iv_overlap.end() );
+		if( iv_overlap.size() == 0 )
+		{
+			cerr << "Warning: unable to assign gene to an interval!\n" << "coordinates: " << lend << '\t' << rend << endl;
+			continue;
+		}else{
+			ivI = iv_overlap.back().second;
+			if( iv_overlap.size() > 1 )
+			{
+				partial_rr = true;
+				rr_count++;
+			}
+		}
+		CompactGappedAlignment<> iv_cga(input_ivs[ivI]);
+		CompactGappedAlignment<> col_cga;
+		gnLocation loc1;
+		loc1.SetStart( input_ivs[ivI].LeftEnd(sgI) );
+		loc1.SetEnd( input_ivs[ivI].RightEnd(sgI) );
+		gnLocation intloc = loc1.GetIntersection( loc, gnLocation::determinedRegions );
+		gnSeqI lcol = iv_cga.SeqPosToColumn( sgI, intloc.GetStart() );
+		gnSeqI rcol = iv_cga.SeqPosToColumn( sgI, intloc.GetEnd() );
+		if( rcol < lcol )
+			swap( rcol, lcol );	// handle reverse complement
+		iv_cga.copyRange(col_cga, lcol, rcol-lcol + 1);
+		vector< string > aln;
+		GetAlignment( col_cga, input_ivs.seq_table, aln );
+		gnSequence gene_aln;
+		for( size_t i = 0; i < aln.size(); i++ )
+		{
+			gene_aln += aln[i];
+			stringstream ss;
+			ss << "seq" << i;
+			gene_aln.setContigName(i, ss.str());
+		}
+
+		stringstream of_name;
+		of_name << output_base << "_" << ortho_count << ".fas";
+		gnFASSource::Write( gene_aln, of_name.str() );
+
+		// find orthologous CDS features...
+		vector< gnBaseFeature* > ortho_cds( seq_count, NULL );
+		size_t ocds_count = 0;
+		for( size_t seqI = 0; seqI < input_ivs.seq_table.size(); seqI++ )
+		{
+			gnLocation seqloc;
+			seqloc.SetStart(col_cga.LeftEnd(seqI));
+			seqloc.SetEnd(col_cga.RightEnd(seqI));
+			vector< gnBaseFeature* > int_feats;
+			vector< uint32 > indie;
+			input_ivs.seq_table[seqI]->getIntersectingFeatures( seqloc, int_feats, indie );
+			vector< pair< gnSeqI, size_t > > overlap_frac;
+			for( size_t featI = 0; featI < int_feats.size(); featI++ )
+			{
+				if( int_feats[featI]->GetName() == "CDS" )
+				{
+					gnLocation l = seqloc.GetIntersection( int_feats[featI]->GetLocation(0), gnLocation::determinedRegions );
+					size_t max_bb = 0;
+					for( size_t bbI = 0; bbI < nway_bb.size(); bbI++ )
+					{
+						gnLocation bbloc;
+						bbloc.SetBounds( absolut(backbone[nway_bb[bbI]][seqI].first), absolut(backbone[nway_bb[bbI]][seqI].second) );
+						gnLocation l2 = bbloc.GetIntersection( l, gnLocation::determinedRegions );
+						if( l2.GetEnd() - l2.GetStart() > max_bb )
+							max_bb = l2.GetEnd() - l2.GetStart();
+					}
+					overlap_frac.push_back( make_pair( max_bb, featI ) );
+				}else
+					delete int_feats[featI];
+			}
+			std::sort( overlap_frac.begin(), overlap_frac.end() );
+			if( overlap_frac.size() > 0 )
+			{
+				ortho_cds[seqI] = int_feats[ overlap_frac.back().second ];
+				ocds_count++;
+			}
+		}
+
+		if( ocds_count == seq_count )
+		{
+			if( ortho_count == 88 )
+				cerr << "watchme\n";
+			ortho_out << ortho_count;
+			for( size_t i = 0; i < seq_count; i++ )
+			{
+				ortho_out << '\t';
+				printGI( ortho_out, ortho_cds[i] );
+			}
+
+			double cov = computeAvgCoverage( backbone, nway_bb, ortho_cds );
+			ortho_out << '\t' << cov;
+			NumericMatrix<double> identity;
+			vector< AbstractMatch* > amvec( 1, &col_cga );
+			BackboneIdentityMatrix( amvec, input_ivs.seq_table, identity );
+			double id = 0;
+			for( size_t i = 0; i < seq_count; i++ )
+				for( size_t j = i+1; j < seq_count; j++ )
+					id += identity(i,j);
+			id /= (double)(seq_count * (seq_count-1)) / 2.0;
+
+			ortho_out << '\t' << id;
+			ortho_out << '\t';
+			if( partial_rr )
+			{
+				partial_rr_annotated++;
+				ortho_out << "*";
+			}
+			ortho_out << endl;
+			ortho_count++;
+		}
+		for( size_t oI = 0; oI < ortho_cds.size(); oI++ )
+			if( ortho_cds[oI] != NULL )
+				delete ortho_cds[oI];
+
+	}
+	cout << ortho_count << " out of " << genes.size() << " genes were at least partially conserved\n";
+	cout << rr_count << " CDS appear to be broken by rearrangement, of which " << partial_rr_annotated << " are still annotated as CDS in all genomes\n";
+}
+
diff --git a/src/getopt.c b/src/getopt.c
new file mode 100644
index 0000000..3a85480
--- /dev/null
+++ b/src/getopt.c
@@ -0,0 +1,1279 @@
+/* Getopt for GNU.
+   NOTE: getopt is now part of the C library, so if you don't know what
+   "Keep this file name-space clean" means, talk to drepper at gnu.org
+   before changing it!
+   Copyright (C) 1987,88,89,90,91,92,93,94,95,96,98,99,2000,2001,2002
+   	Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+

+/* This tells Alpha OSF/1 not to define a getopt prototype in <stdio.h>.
+   Ditto for AIX 3.2 and <stdlib.h>.  */
+#ifndef _NO_PROTO
+# define _NO_PROTO
+#endif
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#if !defined __STDC__ || !__STDC__
+/* This is a separate conditional since some stdc systems
+   reject `defined (const)'.  */
+# ifndef const
+#  define const
+# endif
+#endif
+
+#include <stdio.h>
+
+/* Comment out all this code if we are using the GNU C Library, and are not
+   actually compiling the library itself.  This code is part of the GNU C
+   Library, but also included in many other GNU distributions.  Compiling
+   and linking in this code is a waste when using the GNU C library
+   (especially if it is a shared library).  Rather than having every GNU
+   program understand `configure --with-gnu-libc' and omit the object files,
+   it is simpler to just do this in the source for each such file.  */
+
+#define GETOPT_INTERFACE_VERSION 2
+#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2
+# include <gnu-versions.h>
+# if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION
+#  define ELIDE_CODE
+# endif
+#endif
+
+#ifndef ELIDE_CODE
+
+
+/* This needs to come after some library #include
+   to get __GNU_LIBRARY__ defined.  */
+#ifdef	__GNU_LIBRARY__
+/* Don't include stdlib.h for non-GNU C libraries because some of them
+   contain conflicting prototypes for getopt.  */
+# include <stdlib.h>
+# include <unistd.h>
+#endif	/* GNU C library.  */
+
+#ifdef VMS
+# include <unixlib.h>
+# if HAVE_STRING_H - 0
+#  include <string.h>
+# endif
+#endif
+
+#ifndef _
+/* This is for other GNU distributions with internationalized messages.  */
+# if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC
+#  include <libintl.h>
+#  ifndef _
+#   define _(msgid)	gettext (msgid)
+#  endif
+# else
+#  define _(msgid)	(msgid)
+# endif
+# if defined _LIBC && defined USE_IN_LIBIO
+#  include <wchar.h>
+# endif
+#endif
+
+#ifndef attribute_hidden
+# define attribute_hidden
+#endif
+
+/* This version of `getopt' appears to the caller like standard Unix `getopt'
+   but it behaves differently for the user, since it allows the user
+   to intersperse the options with the other arguments.
+
+   As `getopt' works, it permutes the elements of ARGV so that,
+   when it is done, all the options precede everything else.  Thus
+   all application programs are extended to handle flexible argument order.
+
+   Setting the environment variable POSIXLY_CORRECT disables permutation.
+   Then the behavior is completely standard.
+
+   GNU application programs can use a third alternative mode in which
+   they can distinguish the relative order of options and other arguments.  */
+
+#include "getopt.h"
+
+/* For communication from `getopt' to the caller.
+   When `getopt' finds an option that takes an argument,
+   the argument value is returned here.
+   Also, when `ordering' is RETURN_IN_ORDER,
+   each non-option ARGV-element is returned here.  */
+
+char *optarg;
+
+/* Index in ARGV of the next element to be scanned.
+   This is used for communication to and from the caller
+   and for communication between successive calls to `getopt'.
+
+   On entry to `getopt', zero means this is the first call; initialize.
+
+   When `getopt' returns -1, this is the index of the first of the
+   non-option elements that the caller should itself scan.
+
+   Otherwise, `optind' communicates from one call to the next
+   how much of ARGV has been scanned so far.  */
+
+/* 1003.2 says this must be 1 before any call.  */
+int optind = 1;
+
+/* Formerly, initialization of getopt depended on optind==0, which
+   causes problems with re-calling getopt as programs generally don't
+   know that. */
+
+int __getopt_initialized attribute_hidden;
+
+/* The next char to be scanned in the option-element
+   in which the last option character we returned was found.
+   This allows us to pick up the scan where we left off.
+
+   If this is zero, or a null string, it means resume the scan
+   by advancing to the next ARGV-element.  */
+
+static char *nextchar;
+
+/* Callers store zero here to inhibit the error message
+   for unrecognized options.  */
+
+int opterr = 1;
+
+/* Set to an option character which was unrecognized.
+   This must be initialized on some systems to avoid linking in the
+   system's own getopt implementation.  */
+
+int optopt = '?';
+
+/* Describe how to deal with options that follow non-option ARGV-elements.
+
+   If the caller did not specify anything,
+   the default is REQUIRE_ORDER if the environment variable
+   POSIXLY_CORRECT is defined, PERMUTE otherwise.
+
+   REQUIRE_ORDER means don't recognize them as options;
+   stop option processing when the first non-option is seen.
+   This is what Unix does.
+   This mode of operation is selected by either setting the environment
+   variable POSIXLY_CORRECT, or using `+' as the first character
+   of the list of option characters.
+
+   PERMUTE is the default.  We permute the contents of ARGV as we scan,
+   so that eventually all the non-options are at the end.  This allows options
+   to be given in any order, even with programs that were not written to
+   expect this.
+
+   RETURN_IN_ORDER is an option available to programs that were written
+   to expect options and other ARGV-elements in any order and that care about
+   the ordering of the two.  We describe each non-option ARGV-element
+   as if it were the argument of an option with character code 1.
+   Using `-' as the first character of the list of option characters
+   selects this mode of operation.
+
+   The special argument `--' forces an end of option-scanning regardless
+   of the value of `ordering'.  In the case of RETURN_IN_ORDER, only
+   `--' can cause `getopt' to return -1 with `optind' != ARGC.  */
+
+static enum
+{
+  REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER
+} ordering;
+
+/* Value of POSIXLY_CORRECT environment variable.  */
+static char *posixly_correct;
+

+#ifdef	__GNU_LIBRARY__
+/* We want to avoid inclusion of string.h with non-GNU libraries
+   because there are many ways it can cause trouble.
+   On some systems, it contains special magic macros that don't work
+   in GCC.  */
+# include <string.h>
+# define my_index	strchr
+#else
+
+# if HAVE_STRING_H
+#  include <string.h>
+# else
+#  ifndef WIN32
+#   include <strings.h>
+#  endif
+# endif
+
+/* Avoid depending on library functions or files
+   whose names are inconsistent.  */
+
+#ifndef getenv
+extern char *getenv ();
+#endif
+
+static char *
+my_index (str, chr)
+     const char *str;
+     int chr;
+{
+  while (*str)
+    {
+      if (*str == chr)
+	return (char *) str;
+      str++;
+    }
+  return 0;
+}
+
+/* If using GCC, we can safely declare strlen this way.
+   If not using GCC, it is ok not to declare it.  */
+#ifdef __GNUC__
+/* Note that Motorola Delta 68k R3V7 comes with GCC but not stddef.h.
+   That was relevant to code that was here before.  */
+# if (!defined __STDC__ || !__STDC__) && !defined strlen
+/* gcc with -traditional declares the built-in strlen to return int,
+   and has done so at least since version 2.4.5. -- rms.  */
+extern int strlen (const char *);
+# endif /* not __STDC__ */
+#endif /* __GNUC__ */
+
+#endif /* not __GNU_LIBRARY__ */
+

+/* Handle permutation of arguments.  */
+
+/* Describe the part of ARGV that contains non-options that have
+   been skipped.  `first_nonopt' is the index in ARGV of the first of them;
+   `last_nonopt' is the index after the last of them.  */
+
+static int first_nonopt;
+static int last_nonopt;
+
+#ifdef _LIBC
+/* Stored original parameters.
+   XXX This is no good solution.  We should rather copy the args so
+   that we can compare them later.  But we must not use malloc(3).  */
+extern int __libc_argc;
+extern char **__libc_argv;
+
+/* Bash 2.0 gives us an environment variable containing flags
+   indicating ARGV elements that should not be considered arguments.  */
+
+# ifdef USE_NONOPTION_FLAGS
+/* Defined in getopt_init.c  */
+extern char *__getopt_nonoption_flags;
+
+static int nonoption_flags_max_len;
+static int nonoption_flags_len;
+# endif
+
+# ifdef USE_NONOPTION_FLAGS
+#  define SWAP_FLAGS(ch1, ch2) \
+  if (nonoption_flags_len > 0)						      \
+    {									      \
+      char __tmp = __getopt_nonoption_flags[ch1];			      \
+      __getopt_nonoption_flags[ch1] = __getopt_nonoption_flags[ch2];	      \
+      __getopt_nonoption_flags[ch2] = __tmp;				      \
+    }
+# else
+#  define SWAP_FLAGS(ch1, ch2)
+# endif
+#else	/* !_LIBC */
+# define SWAP_FLAGS(ch1, ch2)
+#endif	/* _LIBC */
+
+/* Exchange two adjacent subsequences of ARGV.
+   One subsequence is elements [first_nonopt,last_nonopt)
+   which contains all the non-options that have been skipped so far.
+   The other is elements [last_nonopt,optind), which contains all
+   the options processed since those non-options were skipped.
+
+   `first_nonopt' and `last_nonopt' are relocated so that they describe
+   the new indices of the non-options in ARGV after they are moved.  */
+
+#if defined __STDC__ && __STDC__
+static void exchange (char **);
+#endif
+
+static void
+exchange (argv)
+     char **argv;
+{
+  int bottom = first_nonopt;
+  int middle = last_nonopt;
+  int top = optind;
+  char *tem;
+
+  /* Exchange the shorter segment with the far end of the longer segment.
+     That puts the shorter segment into the right place.
+     It leaves the longer segment in the right place overall,
+     but it consists of two parts that need to be swapped next.  */
+
+#if defined _LIBC && defined USE_NONOPTION_FLAGS
+  /* First make sure the handling of the `__getopt_nonoption_flags'
+     string can work normally.  Our top argument must be in the range
+     of the string.  */
+  if (nonoption_flags_len > 0 && top >= nonoption_flags_max_len)
+    {
+      /* We must extend the array.  The user plays games with us and
+	 presents new arguments.  */
+      char *new_str = malloc (top + 1);
+      if (new_str == NULL)
+	nonoption_flags_len = nonoption_flags_max_len = 0;
+      else
+	{
+	  memset (__mempcpy (new_str, __getopt_nonoption_flags,
+			     nonoption_flags_max_len),
+		  '\0', top + 1 - nonoption_flags_max_len);
+	  nonoption_flags_max_len = top + 1;
+	  __getopt_nonoption_flags = new_str;
+	}
+    }
+#endif
+
+  while (top > middle && middle > bottom)
+    {
+      if (top - middle > middle - bottom)
+	{
+	  /* Bottom segment is the short one.  */
+	  int len = middle - bottom;
+	  register int i;
+
+	  /* Swap it with the top part of the top segment.  */
+	  for (i = 0; i < len; i++)
+	    {
+	      tem = argv[bottom + i];
+	      argv[bottom + i] = argv[top - (middle - bottom) + i];
+	      argv[top - (middle - bottom) + i] = tem;
+	      SWAP_FLAGS (bottom + i, top - (middle - bottom) + i);
+	    }
+	  /* Exclude the moved bottom segment from further swapping.  */
+	  top -= len;
+	}
+      else
+	{
+	  /* Top segment is the short one.  */
+	  int len = top - middle;
+	  register int i;
+
+	  /* Swap it with the bottom part of the bottom segment.  */
+	  for (i = 0; i < len; i++)
+	    {
+	      tem = argv[bottom + i];
+	      argv[bottom + i] = argv[middle + i];
+	      argv[middle + i] = tem;
+	      SWAP_FLAGS (bottom + i, middle + i);
+	    }
+	  /* Exclude the moved top segment from further swapping.  */
+	  bottom += len;
+	}
+    }
+
+  /* Update records for the slots the non-options now occupy.  */
+
+  first_nonopt += (optind - last_nonopt);
+  last_nonopt = optind;
+}
+
+/* Initialize the internal data when the first call is made.  */
+
+#if defined __STDC__ && __STDC__
+static const char *_getopt_initialize (int, char *const *, const char *);
+#endif
+static const char *
+_getopt_initialize (argc, argv, optstring)
+     int argc;
+     char *const *argv;
+     const char *optstring;
+{
+  /* Start processing options with ARGV-element 1 (since ARGV-element 0
+     is the program name); the sequence of previously skipped
+     non-option ARGV-elements is empty.  */
+
+  first_nonopt = last_nonopt = optind;
+
+  nextchar = NULL;
+
+  posixly_correct = getenv ("POSIXLY_CORRECT");
+
+  /* Determine how to handle the ordering of options and nonoptions.  */
+
+  if (optstring[0] == '-')
+    {
+      ordering = RETURN_IN_ORDER;
+      ++optstring;
+    }
+  else if (optstring[0] == '+')
+    {
+      ordering = REQUIRE_ORDER;
+      ++optstring;
+    }
+  else if (posixly_correct != NULL)
+    ordering = REQUIRE_ORDER;
+  else
+    ordering = PERMUTE;
+
+#if defined _LIBC && defined USE_NONOPTION_FLAGS
+  if (posixly_correct == NULL
+      && argc == __libc_argc && argv == __libc_argv)
+    {
+      if (nonoption_flags_max_len == 0)
+	{
+	  if (__getopt_nonoption_flags == NULL
+	      || __getopt_nonoption_flags[0] == '\0')
+	    nonoption_flags_max_len = -1;
+	  else
+	    {
+	      const char *orig_str = __getopt_nonoption_flags;
+	      int len = nonoption_flags_max_len = strlen (orig_str);
+	      if (nonoption_flags_max_len < argc)
+		nonoption_flags_max_len = argc;
+	      __getopt_nonoption_flags =
+		(char *) malloc (nonoption_flags_max_len);
+	      if (__getopt_nonoption_flags == NULL)
+		nonoption_flags_max_len = -1;
+	      else
+		memset (__mempcpy (__getopt_nonoption_flags, orig_str, len),
+			'\0', nonoption_flags_max_len - len);
+	    }
+	}
+      nonoption_flags_len = nonoption_flags_max_len;
+    }
+  else
+    nonoption_flags_len = 0;
+#endif
+
+  return optstring;
+}
+

+/* Scan elements of ARGV (whose length is ARGC) for option characters
+   given in OPTSTRING.
+
+   If an element of ARGV starts with '-', and is not exactly "-" or "--",
+   then it is an option element.  The characters of this element
+   (aside from the initial '-') are option characters.  If `getopt'
+   is called repeatedly, it returns successively each of the option characters
+   from each of the option elements.
+
+   If `getopt' finds another option character, it returns that character,
+   updating `optind' and `nextchar' so that the next call to `getopt' can
+   resume the scan with the following option character or ARGV-element.
+
+   If there are no more option characters, `getopt' returns -1.
+   Then `optind' is the index in ARGV of the first ARGV-element
+   that is not an option.  (The ARGV-elements have been permuted
+   so that those that are not options now come last.)
+
+   OPTSTRING is a string containing the legitimate option characters.
+   If an option character is seen that is not listed in OPTSTRING,
+   return '?' after printing an error message.  If you set `opterr' to
+   zero, the error message is suppressed but we still return '?'.
+
+   If a char in OPTSTRING is followed by a colon, that means it wants an arg,
+   so the following text in the same ARGV-element, or the text of the following
+   ARGV-element, is returned in `optarg'.  Two colons mean an option that
+   wants an optional arg; if there is text in the current ARGV-element,
+   it is returned in `optarg', otherwise `optarg' is set to zero.
+
+   If OPTSTRING starts with `-' or `+', it requests different methods of
+   handling the non-option ARGV-elements.
+   See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above.
+
+   Long-named options begin with `--' instead of `-'.
+   Their names may be abbreviated as long as the abbreviation is unique
+   or is an exact match for some defined option.  If they have an
+   argument, it follows the option name in the same ARGV-element, separated
+   from the option name by a `=', or else the in next ARGV-element.
+   When `getopt' finds a long-named option, it returns 0 if that option's
+   `flag' field is nonzero, the value of the option's `val' field
+   if the `flag' field is zero.
+
+   The elements of ARGV aren't really const, because we permute them.
+   But we pretend they're const in the prototype to be compatible
+   with other systems.
+
+   LONGOPTS is a vector of `struct option' terminated by an
+   element containing a name which is zero.
+
+   LONGIND returns the index in LONGOPT of the long-named option found.
+   It is only valid when a long-named option has been found by the most
+   recent call.
+
+   If LONG_ONLY is nonzero, '-' as well as '--' can introduce
+   long-named options.  */
+
+int
+_getopt_internal (argc, argv, optstring, longopts, longind, long_only)
+     int argc;
+     char *const *argv;
+     const char *optstring;
+     const struct option *longopts;
+     int *longind;
+     int long_only;
+{
+  int print_errors = opterr;
+  if (optstring[0] == ':')
+    print_errors = 0;
+
+  if (argc < 1)
+    return -1;
+
+  optarg = NULL;
+
+  if (optind == 0 || !__getopt_initialized)
+    {
+      if (optind == 0)
+	optind = 1;	/* Don't scan ARGV[0], the program name.  */
+      optstring = _getopt_initialize (argc, argv, optstring);
+      __getopt_initialized = 1;
+    }
+
+  /* Test whether ARGV[optind] points to a non-option argument.
+     Either it does not have option syntax, or there is an environment flag
+     from the shell indicating it is not an option.  The later information
+     is only used when the used in the GNU libc.  */
+#if defined _LIBC && defined USE_NONOPTION_FLAGS
+# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0'	      \
+		      || (optind < nonoption_flags_len			      \
+			  && __getopt_nonoption_flags[optind] == '1'))
+#else
+# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0')
+#endif
+
+  if (nextchar == NULL || *nextchar == '\0')
+    {
+      /* Advance to the next ARGV-element.  */
+
+      /* Give FIRST_NONOPT & LAST_NONOPT rational values if OPTIND has been
+	 moved back by the user (who may also have changed the arguments).  */
+      if (last_nonopt > optind)
+	last_nonopt = optind;
+      if (first_nonopt > optind)
+	first_nonopt = optind;
+
+      if (ordering == PERMUTE)
+	{
+	  /* If we have just processed some options following some non-options,
+	     exchange them so that the options come first.  */
+
+	  if (first_nonopt != last_nonopt && last_nonopt != optind)
+	    exchange ((char **) argv);
+	  else if (last_nonopt != optind)
+	    first_nonopt = optind;
+
+	  /* Skip any additional non-options
+	     and extend the range of non-options previously skipped.  */
+
+	  while (optind < argc && NONOPTION_P)
+	    optind++;
+	  last_nonopt = optind;
+	}
+
+      /* The special ARGV-element `--' means premature end of options.
+	 Skip it like a null option,
+	 then exchange with previous non-options as if it were an option,
+	 then skip everything else like a non-option.  */
+
+      if (optind != argc && !strcmp (argv[optind], "--"))
+	{
+	  optind++;
+
+	  if (first_nonopt != last_nonopt && last_nonopt != optind)
+	    exchange ((char **) argv);
+	  else if (first_nonopt == last_nonopt)
+	    first_nonopt = optind;
+	  last_nonopt = argc;
+
+	  optind = argc;
+	}
+
+      /* If we have done all the ARGV-elements, stop the scan
+	 and back over any non-options that we skipped and permuted.  */
+
+      if (optind == argc)
+	{
+	  /* Set the next-arg-index to point at the non-options
+	     that we previously skipped, so the caller will digest them.  */
+	  if (first_nonopt != last_nonopt)
+	    optind = first_nonopt;
+	  return -1;
+	}
+
+      /* If we have come to a non-option and did not permute it,
+	 either stop the scan or describe it to the caller and pass it by.  */
+
+      if (NONOPTION_P)
+	{
+	  if (ordering == REQUIRE_ORDER)
+	    return -1;
+	  optarg = argv[optind++];
+	  return 1;
+	}
+
+      /* We have found another option-ARGV-element.
+	 Skip the initial punctuation.  */
+
+      nextchar = (argv[optind] + 1
+		  + (longopts != NULL && argv[optind][1] == '-'));
+    }
+
+  /* Decode the current option-ARGV-element.  */
+
+  /* Check whether the ARGV-element is a long option.
+
+     If long_only and the ARGV-element has the form "-f", where f is
+     a valid short option, don't consider it an abbreviated form of
+     a long option that starts with f.  Otherwise there would be no
+     way to give the -f short option.
+
+     On the other hand, if there's a long option "fubar" and
+     the ARGV-element is "-fu", do consider that an abbreviation of
+     the long option, just like "--fu", and not "-f" with arg "u".
+
+     This distinction seems to be the most useful approach.  */
+
+  if (longopts != NULL
+      && (argv[optind][1] == '-'
+	  || (long_only && (argv[optind][2] || !my_index (optstring, argv[optind][1])))))
+    {
+      char *nameend;
+      const struct option *p;
+      const struct option *pfound = NULL;
+      int exact = 0;
+      int ambig = 0;
+      int indfound = -1;
+      int option_index;
+
+      for (nameend = nextchar; *nameend && *nameend != '='; nameend++)
+	/* Do nothing.  */ ;
+
+      /* Test all long options for either exact match
+	 or abbreviated matches.  */
+      for (p = longopts, option_index = 0; p->name; p++, option_index++)
+	if (!strncmp (p->name, nextchar, nameend - nextchar))
+	  {
+	    if ((unsigned int) (nameend - nextchar)
+		== (unsigned int) strlen (p->name))
+	      {
+		/* Exact match found.  */
+		pfound = p;
+		indfound = option_index;
+		exact = 1;
+		break;
+	      }
+	    else if (pfound == NULL)
+	      {
+		/* First nonexact match found.  */
+		pfound = p;
+		indfound = option_index;
+	      }
+	    else if (long_only
+		     || pfound->has_arg != p->has_arg
+		     || pfound->flag != p->flag
+		     || pfound->val != p->val)
+	      /* Second or later nonexact match found.  */
+	      ambig = 1;
+	  }
+
+      if (ambig && !exact)
+	{
+	  if (print_errors)
+	    {
+#if defined _LIBC && defined USE_IN_LIBIO
+	      char *buf;
+
+	      if (__asprintf (&buf, _("%s: option `%s' is ambiguous\n"),
+			      argv[0], argv[optind]) >= 0)
+		{
+
+		  if (_IO_fwide (stderr, 0) > 0)
+		    __fwprintf (stderr, L"%s", buf);
+		  else
+		    fputs (buf, stderr);
+
+		  free (buf);
+		}
+#else
+	      fprintf (stderr, _("%s: option `%s' is ambiguous\n"),
+		       argv[0], argv[optind]);
+#endif
+	    }
+	  nextchar += strlen (nextchar);
+	  optind++;
+	  optopt = 0;
+	  return '?';
+	}
+
+      if (pfound != NULL)
+	{
+	  option_index = indfound;
+	  optind++;
+	  if (*nameend)
+	    {
+	      /* Don't test has_arg with >, because some C compilers don't
+		 allow it to be used on enums.  */
+	      if (pfound->has_arg)
+		optarg = nameend + 1;
+	      else
+		{
+		  if (print_errors)
+		    {
+#if defined _LIBC && defined USE_IN_LIBIO
+		      char *buf;
+		      int n;
+#endif
+
+		      if (argv[optind - 1][1] == '-')
+			{
+			  /* --option */
+#if defined _LIBC && defined USE_IN_LIBIO
+			  n = __asprintf (&buf, _("\
+%s: option `--%s' doesn't allow an argument\n"),
+					  argv[0], pfound->name);
+#else
+			  fprintf (stderr, _("\
+%s: option `--%s' doesn't allow an argument\n"),
+				   argv[0], pfound->name);
+#endif
+			}
+		      else
+			{
+			  /* +option or -option */
+#if defined _LIBC && defined USE_IN_LIBIO
+			  n = __asprintf (&buf, _("\
+%s: option `%c%s' doesn't allow an argument\n"),
+					  argv[0], argv[optind - 1][0],
+					  pfound->name);
+#else
+			  fprintf (stderr, _("\
+%s: option `%c%s' doesn't allow an argument\n"),
+				   argv[0], argv[optind - 1][0], pfound->name);
+#endif
+			}
+
+#if defined _LIBC && defined USE_IN_LIBIO
+		      if (n >= 0)
+			{
+			  if (_IO_fwide (stderr, 0) > 0)
+			    __fwprintf (stderr, L"%s", buf);
+			  else
+			    fputs (buf, stderr);
+
+			  free (buf);
+			}
+#endif
+		    }
+
+		  nextchar += strlen (nextchar);
+
+		  optopt = pfound->val;
+		  return '?';
+		}
+	    }
+	  else if (pfound->has_arg == 1)
+	    {
+	      if (optind < argc)
+		optarg = argv[optind++];
+	      else
+		{
+		  if (print_errors)
+		    {
+#if defined _LIBC && defined USE_IN_LIBIO
+		      char *buf;
+
+		      if (__asprintf (&buf, _("\
+%s: option `%s' requires an argument\n"),
+				      argv[0], argv[optind - 1]) >= 0)
+			{
+			  if (_IO_fwide (stderr, 0) > 0)
+			    __fwprintf (stderr, L"%s", buf);
+			  else
+			    fputs (buf, stderr);
+
+			  free (buf);
+			}
+#else
+		      fprintf (stderr,
+			       _("%s: option `%s' requires an argument\n"),
+			       argv[0], argv[optind - 1]);
+#endif
+		    }
+		  nextchar += strlen (nextchar);
+		  optopt = pfound->val;
+		  return optstring[0] == ':' ? ':' : '?';
+		}
+	    }
+	  nextchar += strlen (nextchar);
+	  if (longind != NULL)
+	    *longind = option_index;
+	  if (pfound->flag)
+	    {
+	      *(pfound->flag) = pfound->val;
+	      return 0;
+	    }
+	  return pfound->val;
+	}
+
+      /* Can't find it as a long option.  If this is not getopt_long_only,
+	 or the option starts with '--' or is not a valid short
+	 option, then it's an error.
+	 Otherwise interpret it as a short option.  */
+      if (!long_only || argv[optind][1] == '-'
+	  || my_index (optstring, *nextchar) == NULL)
+	{
+	  if (print_errors)
+	    {
+#if defined _LIBC && defined USE_IN_LIBIO
+	      char *buf;
+	      int n;
+#endif
+
+	      if (argv[optind][1] == '-')
+		{
+		  /* --option */
+#if defined _LIBC && defined USE_IN_LIBIO
+		  n = __asprintf (&buf, _("%s: unrecognized option `--%s'\n"),
+				  argv[0], nextchar);
+#else
+		  fprintf (stderr, _("%s: unrecognized option `--%s'\n"),
+			   argv[0], nextchar);
+#endif
+		}
+	      else
+		{
+		  /* +option or -option */
+#if defined _LIBC && defined USE_IN_LIBIO
+		  n = __asprintf (&buf, _("%s: unrecognized option `%c%s'\n"),
+				  argv[0], argv[optind][0], nextchar);
+#else
+		  fprintf (stderr, _("%s: unrecognized option `%c%s'\n"),
+			   argv[0], argv[optind][0], nextchar);
+#endif
+		}
+
+#if defined _LIBC && defined USE_IN_LIBIO
+	      if (n >= 0)
+		{
+		  if (_IO_fwide (stderr, 0) > 0)
+		    __fwprintf (stderr, L"%s", buf);
+		  else
+		    fputs (buf, stderr);
+
+		  free (buf);
+		}
+#endif
+	    }
+	  nextchar = (char *) "";
+	  optind++;
+	  optopt = 0;
+	  return '?';
+	}
+    }
+
+  /* Look at and handle the next short option-character.  */
+
+  {
+    char c = *nextchar++;
+    char *temp = my_index (optstring, c);
+
+    /* Increment `optind' when we start to process its last character.  */
+    if (*nextchar == '\0')
+      ++optind;
+
+    if (temp == NULL || c == ':')
+      {
+	if (print_errors)
+	  {
+#if defined _LIBC && defined USE_IN_LIBIO
+	      char *buf;
+	      int n;
+#endif
+
+	    if (posixly_correct)
+	      {
+		/* 1003.2 specifies the format of this message.  */
+#if defined _LIBC && defined USE_IN_LIBIO
+		n = __asprintf (&buf, _("%s: illegal option -- %c\n"),
+				argv[0], c);
+#else
+		fprintf (stderr, _("%s: illegal option -- %c\n"), argv[0], c);
+#endif
+	      }
+	    else
+	      {
+#if defined _LIBC && defined USE_IN_LIBIO
+		n = __asprintf (&buf, _("%s: invalid option -- %c\n"),
+				argv[0], c);
+#else
+		fprintf (stderr, _("%s: invalid option -- %c\n"), argv[0], c);
+#endif
+	      }
+
+#if defined _LIBC && defined USE_IN_LIBIO
+	    if (n >= 0)
+	      {
+		if (_IO_fwide (stderr, 0) > 0)
+		  __fwprintf (stderr, L"%s", buf);
+		else
+		  fputs (buf, stderr);
+
+		free (buf);
+	      }
+#endif
+	  }
+	optopt = c;
+	return '?';
+      }
+    /* Convenience. Treat POSIX -W foo same as long option --foo */
+    if (temp[0] == 'W' && temp[1] == ';')
+      {
+	char *nameend;
+	const struct option *p;
+	const struct option *pfound = NULL;
+	int exact = 0;
+	int ambig = 0;
+	int indfound = 0;
+	int option_index;
+
+	/* This is an option that requires an argument.  */
+	if (*nextchar != '\0')
+	  {
+	    optarg = nextchar;
+	    /* If we end this ARGV-element by taking the rest as an arg,
+	       we must advance to the next element now.  */
+	    optind++;
+	  }
+	else if (optind == argc)
+	  {
+	    if (print_errors)
+	      {
+		/* 1003.2 specifies the format of this message.  */
+#if defined _LIBC && defined USE_IN_LIBIO
+		char *buf;
+
+		if (__asprintf (&buf,
+				_("%s: option requires an argument -- %c\n"),
+				argv[0], c) >= 0)
+		  {
+		    if (_IO_fwide (stderr, 0) > 0)
+		      __fwprintf (stderr, L"%s", buf);
+		    else
+		      fputs (buf, stderr);
+
+		    free (buf);
+		  }
+#else
+		fprintf (stderr, _("%s: option requires an argument -- %c\n"),
+			 argv[0], c);
+#endif
+	      }
+	    optopt = c;
+	    if (optstring[0] == ':')
+	      c = ':';
+	    else
+	      c = '?';
+	    return c;
+	  }
+	else
+	  /* We already incremented `optind' once;
+	     increment it again when taking next ARGV-elt as argument.  */
+	  optarg = argv[optind++];
+
+	/* optarg is now the argument, see if it's in the
+	   table of longopts.  */
+
+	for (nextchar = nameend = optarg; *nameend && *nameend != '='; nameend++)
+	  /* Do nothing.  */ ;
+
+	/* Test all long options for either exact match
+	   or abbreviated matches.  */
+	for (p = longopts, option_index = 0; p->name; p++, option_index++)
+	  if (!strncmp (p->name, nextchar, nameend - nextchar))
+	    {
+	      if ((unsigned int) (nameend - nextchar) == strlen (p->name))
+		{
+		  /* Exact match found.  */
+		  pfound = p;
+		  indfound = option_index;
+		  exact = 1;
+		  break;
+		}
+	      else if (pfound == NULL)
+		{
+		  /* First nonexact match found.  */
+		  pfound = p;
+		  indfound = option_index;
+		}
+	      else
+		/* Second or later nonexact match found.  */
+		ambig = 1;
+	    }
+	if (ambig && !exact)
+	  {
+	    if (print_errors)
+	      {
+#if defined _LIBC && defined USE_IN_LIBIO
+		char *buf;
+
+		if (__asprintf (&buf, _("%s: option `-W %s' is ambiguous\n"),
+				argv[0], argv[optind]) >= 0)
+		  {
+		    if (_IO_fwide (stderr, 0) > 0)
+		      __fwprintf (stderr, L"%s", buf);
+		    else
+		      fputs (buf, stderr);
+
+		    free (buf);
+		  }
+#else
+		fprintf (stderr, _("%s: option `-W %s' is ambiguous\n"),
+			 argv[0], argv[optind]);
+#endif
+	      }
+	    nextchar += strlen (nextchar);
+	    optind++;
+	    return '?';
+	  }
+	if (pfound != NULL)
+	  {
+	    option_index = indfound;
+	    if (*nameend)
+	      {
+		/* Don't test has_arg with >, because some C compilers don't
+		   allow it to be used on enums.  */
+		if (pfound->has_arg)
+		  optarg = nameend + 1;
+		else
+		  {
+		    if (print_errors)
+		      {
+#if defined _LIBC && defined USE_IN_LIBIO
+			char *buf;
+
+			if (__asprintf (&buf, _("\
+%s: option `-W %s' doesn't allow an argument\n"),
+					argv[0], pfound->name) >= 0)
+			  {
+			    if (_IO_fwide (stderr, 0) > 0)
+			      __fwprintf (stderr, L"%s", buf);
+			    else
+			      fputs (buf, stderr);
+
+			    free (buf);
+			  }
+#else
+			fprintf (stderr, _("\
+%s: option `-W %s' doesn't allow an argument\n"),
+				 argv[0], pfound->name);
+#endif
+		      }
+
+		    nextchar += strlen (nextchar);
+		    return '?';
+		  }
+	      }
+	    else if (pfound->has_arg == 1)
+	      {
+		if (optind < argc)
+		  optarg = argv[optind++];
+		else
+		  {
+		    if (print_errors)
+		      {
+#if defined _LIBC && defined USE_IN_LIBIO
+			char *buf;
+
+			if (__asprintf (&buf, _("\
+%s: option `%s' requires an argument\n"),
+					argv[0], argv[optind - 1]) >= 0)
+			  {
+			    if (_IO_fwide (stderr, 0) > 0)
+			      __fwprintf (stderr, L"%s", buf);
+			    else
+			      fputs (buf, stderr);
+
+			    free (buf);
+			  }
+#else
+			fprintf (stderr,
+				 _("%s: option `%s' requires an argument\n"),
+				 argv[0], argv[optind - 1]);
+#endif
+		      }
+		    nextchar += strlen (nextchar);
+		    return optstring[0] == ':' ? ':' : '?';
+		  }
+	      }
+	    nextchar += strlen (nextchar);
+	    if (longind != NULL)
+	      *longind = option_index;
+	    if (pfound->flag)
+	      {
+		*(pfound->flag) = pfound->val;
+		return 0;
+	      }
+	    return pfound->val;
+	  }
+	  nextchar = NULL;
+	  return 'W';	/* Let the application handle it.   */
+      }
+    if (temp[1] == ':')
+      {
+	if (temp[2] == ':')
+	  {
+	    /* This is an option that accepts an argument optionally.  */
+	    if (*nextchar != '\0')
+	      {
+		optarg = nextchar;
+		optind++;
+	      }
+	    else
+	      optarg = NULL;
+	    nextchar = NULL;
+	  }
+	else
+	  {
+	    /* This is an option that requires an argument.  */
+	    if (*nextchar != '\0')
+	      {
+		optarg = nextchar;
+		/* If we end this ARGV-element by taking the rest as an arg,
+		   we must advance to the next element now.  */
+		optind++;
+	      }
+	    else if (optind == argc)
+	      {
+		if (print_errors)
+		  {
+		    /* 1003.2 specifies the format of this message.  */
+#if defined _LIBC && defined USE_IN_LIBIO
+		    char *buf;
+
+		    if (__asprintf (&buf, _("\
+%s: option requires an argument -- %c\n"),
+				    argv[0], c) >= 0)
+		      {
+			if (_IO_fwide (stderr, 0) > 0)
+			  __fwprintf (stderr, L"%s", buf);
+			else
+			  fputs (buf, stderr);
+
+			free (buf);
+		      }
+#else
+		    fprintf (stderr,
+			     _("%s: option requires an argument -- %c\n"),
+			     argv[0], c);
+#endif
+		  }
+		optopt = c;
+		if (optstring[0] == ':')
+		  c = ':';
+		else
+		  c = '?';
+	      }
+	    else
+	      /* We already incremented `optind' once;
+		 increment it again when taking next ARGV-elt as argument.  */
+	      optarg = argv[optind++];
+	    nextchar = NULL;
+	  }
+      }
+    return c;
+  }
+}
+
+int
+getopt (argc, argv, optstring)
+     int argc;
+     char *const *argv;
+     const char *optstring;
+{
+  return _getopt_internal (argc, argv, optstring,
+			   (const struct option *) 0,
+			   (int *) 0,
+			   0);
+}
+
+#endif	/* Not ELIDE_CODE.  */
+

+#ifdef TEST
+
+/* Compile with -DTEST to make an executable for use in testing
+   the above definition of `getopt'.  */
+
+int
+main (argc, argv)
+     int argc;
+     char **argv;
+{
+  int c;
+  int digit_optind = 0;
+
+  while (1)
+    {
+      int this_option_optind = optind ? optind : 1;
+
+      c = getopt (argc, argv, "abc:d:0123456789");
+      if (c == -1)
+	break;
+
+      switch (c)
+	{
+	case '0':
+	case '1':
+	case '2':
+	case '3':
+	case '4':
+	case '5':
+	case '6':
+	case '7':
+	case '8':
+	case '9':
+	  if (digit_optind != 0 && digit_optind != this_option_optind)
+	    printf ("digits occur in two different argv-elements.\n");
+	  digit_optind = this_option_optind;
+	  printf ("option %c\n", c);
+	  break;
+
+	case 'a':
+	  printf ("option a\n");
+	  break;
+
+	case 'b':
+	  printf ("option b\n");
+	  break;
+
+	case 'c':
+	  printf ("option c with value `%s'\n", optarg);
+	  break;
+
+	case '?':
+	  break;
+
+	default:
+	  printf ("?? getopt returned character code 0%o ??\n", c);
+	}
+    }
+
+  if (optind < argc)
+    {
+      printf ("non-option ARGV-elements: ");
+      while (optind < argc)
+	printf ("%s ", argv[optind++]);
+      printf ("\n");
+    }
+
+  exit (0);
+}
+
+#endif /* TEST */
diff --git a/src/getopt.cpp b/src/getopt.cpp
new file mode 100644
index 0000000..29a76bd
--- /dev/null
+++ b/src/getopt.cpp
@@ -0,0 +1,772 @@
+/* Getopt for GNU.
+   NOTE: getopt is now part of the C library, so if you don't know what
+   "Keep this file name-space clean" means, talk to roland at gnu.ai.mit.edu
+   before changing it!
+
+   Copyright (C) 1987, 88, 89, 90, 91, 92, 93, 94
+	Free Software Foundation, Inc.
+
+Changes by monty:
+- Added include of string.h when nessessary.
+- Removed two warnings from gcc.
+
+This file is part of the GNU C Library.  Its master source is NOT part of
+the C library, however.  The master source lives in /gd/gnu/lib.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB.  If
+not, write to the Free Software Foundation, Inc., 675 Mass Ave,
+Cambridge, MA 02139, USA.  */
+

+#ifdef	__cplusplus
+extern "C" {
+/* This tells Alpha OSF/1 not to define a getopt prototype in <stdio.h>.
+   Ditto for AIX 3.2 and <stdlib.h>.  */
+#ifndef _NO_PROTO
+#define _NO_PROTO
+#endif
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#if (!defined (__STDC__) || !__STDC__) && !defined(MSDOS)
+/* This is a separate conditional since some stdc systems
+   reject `defined (const)'.  */
+#ifndef const
+#define const
+#endif
+#endif
+
+#ifndef WIN32
+#include <global.h>				/* Changes for mysys */
+#include <m_string.h>
+#else
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "getopt.h"
+
+int
+getopt_long (int argc, char *const *argv, const char *options, const struct option *long_options, int *opt_index)
+{
+  return _getopt_internal (argc, argv, options, long_options, opt_index, 0);
+}
+
+/* Like getopt_long, but '-' as well as '--' can indicate a long option.
+   If an option that starts with '-' (not '--') doesn't match a long option,
+   but does match a short option, it is parsed as a short option
+   instead.  */
+
+int
+getopt_long_only (int argc, char *const *argv, const char *options, const struct option *long_options, int *opt_index)
+{
+  return _getopt_internal (argc, argv, options, long_options, opt_index, 1);
+}
+
+#endif
+/* Comment out all this code if we are using the GNU C Library, and are not
+   actually compiling the library itself.  This code is part of the GNU C
+   Library, but also included in many other GNU distributions.	Compiling
+   and linking in this code is a waste when using the GNU C library
+   (especially if it is a shared library).  Rather than having every GNU
+   program understand `configure --with-gnu-libc' and omit the object files,
+   it is simpler to just do this in the source for each such file.  */
+
+#if defined (_LIBC) || !defined (__GNU_LIBRARY__)
+
+
+/* This needs to come after some library #include
+   to get __GNU_LIBRARY__ defined.  */
+#ifdef	__GNU_LIBRARY__
+/* Don't include stdlib.h for non-GNU C libraries because some of them
+   contain conflicting prototypes for getopt.  */
+#include <stdlib.h>
+#endif	/* GNU C library.  */
+
+/* This version of `getopt' appears to the caller like standard Unix `getopt'
+   but it behaves differently for the user, since it allows the user
+   to intersperse the options with the other arguments.
+
+   As `getopt' works, it permutes the elements of ARGV so that,
+   when it is done, all the options precede everything else.  Thus
+   all application programs are extended to handle flexible argument order.
+
+   Setting the environment variable POSIXLY_CORRECT disables permutation.
+   Then the behavior is completely standard.
+
+   GNU application programs can use a third alternative mode in which
+   they can distinguish the relative order of options and other arguments.  */
+
+#include "getopt.h"
+
+/* For communication from `getopt' to the caller.
+   When `getopt' finds an option that takes an argument,
+   the argument value is returned here.
+   Also, when `ordering' is RETURN_IN_ORDER,
+   each non-option ARGV-element is returned here.  */
+
+char *optarg = NULL;
+
+/* Index in ARGV of the next element to be scanned.
+   This is used for communication to and from the caller
+   and for communication between successive calls to `getopt'.
+
+   On entry to `getopt', zero means this is the first call; initialize.
+
+   When `getopt' returns EOF, this is the index of the first of the
+   non-option elements that the caller should itself scan.
+
+   Otherwise, `optind' communicates from one call to the next
+   how much of ARGV has been scanned so far.  */
+
+/* XXX 1003.2 says this must be 1 before any call.  */
+int optind = 1;
+
+/* The next char to be scanned in the option-element
+   in which the last option character we returned was found.
+   This allows us to pick up the scan where we left off.
+
+   If this is zero, or a null string, it means resume the scan
+   by advancing to the next ARGV-element.  */
+
+static char *nextchar;
+
+/* Callers store zero here to inhibit the error message
+   for unrecognized options.  */
+
+int opterr = 1;
+
+/* Set to an option character which was unrecognized.
+   This must be initialized on some systems to avoid linking in the
+   system's own getopt implementation.	*/
+
+int optopt = '?';
+
+/* Describe how to deal with options that follow non-option ARGV-elements.
+
+   If the caller did not specify anything,
+   the default is REQUIRE_ORDER if the environment variable
+   POSIXLY_CORRECT is defined, PERMUTE otherwise.
+
+   REQUIRE_ORDER means don't recognize them as options;
+   stop option processing when the first non-option is seen.
+   This is what Unix does.
+   This mode of operation is selected by either setting the environment
+   variable POSIXLY_CORRECT, or using `+' as the first character
+   of the list of option characters.
+
+   PERMUTE is the default.  We permute the contents of ARGV as we scan,
+   so that eventually all the non-options are at the end.  This allows options
+   to be given in any order, even with programs that were not written to
+   expect this.
+
+   RETURN_IN_ORDER is an option available to programs that were written
+   to expect options and other ARGV-elements in any order and that care about
+   the ordering of the two.  We describe each non-option ARGV-element
+   as if it were the argument of an option with character code 1.
+   Using `-' as the first character of the list of option characters
+   selects this mode of operation.
+
+   The special argument `--' forces an end of option-scanning regardless
+   of the value of `ordering'.	In the case of RETURN_IN_ORDER, only
+   `--' can cause `getopt' to return EOF with `optind' != ARGC.  */
+
+static enum
+{
+  REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER
+} ordering;
+
+/* Value of POSIXLY_CORRECT environment variable.  */
+static char *posixly_correct;
+

+#ifdef	__GNU_LIBRARY__
+/* We want to avoid inclusion of string.h with non-GNU libraries
+   because there are many ways it can cause trouble.
+   On some systems, it contains special magic macros that don't work
+   in GCC.  */
+#include <string.h>
+#define my_index	strchr
+#else
+
+/* Avoid depending on library functions or files
+   whose names are inconsistent.  */
+
+char *getenv (const char *);
+
+static char *
+my_index (const char *str, int chr)
+{
+  while (*str)
+    {
+      if (*str == chr)
+	return (char *) str;
+      str++;
+    }
+  return 0;
+}
+
+/* If using GCC, we can safely declare strlen this way.
+   If not using GCC, it is ok not to declare it.  */
+#ifdef __GNUC__
+/* Note that Motorola Delta 68k R3V7 comes with GCC but not stddef.h.
+   That was relevant to code that was here before.  */
+#if !defined (__STDC__) || !__STDC__
+/* gcc with -traditional declares the built-in strlen to return int,
+   and has done so at least since version 2.4.5. -- rms.  */
+extern int strlen (const char *);
+#endif /* not __STDC__ */
+#endif /* __GNUC__ */
+
+#endif /* not __GNU_LIBRARY__ */
+

+/* Handle permutation of arguments.  */
+
+/* Describe the part of ARGV that contains non-options that have
+   been skipped.  `first_nonopt' is the index in ARGV of the first of them;
+   `last_nonopt' is the index after the last of them.  */
+
+static int first_nonopt;
+static int last_nonopt;
+
+/* Exchange two adjacent subsequences of ARGV.
+   One subsequence is elements [first_nonopt,last_nonopt)
+   which contains all the non-options that have been skipped so far.
+   The other is elements [last_nonopt,optind), which contains all
+   the options processed since those non-options were skipped.
+
+   `first_nonopt' and `last_nonopt' are relocated so that they describe
+   the new indices of the non-options in ARGV after they are moved.  */
+
+static void
+exchange (char **argv)
+{
+  int bottom = first_nonopt;
+  int middle = last_nonopt;
+  int top = optind;
+  char *tem;
+
+  /* Exchange the shorter segment with the far end of the longer segment.
+     That puts the shorter segment into the right place.
+     It leaves the longer segment in the right place overall,
+     but it consists of two parts that need to be swapped next.  */
+
+  while (top > middle && middle > bottom)
+    {
+      if (top - middle > middle - bottom)
+	{
+	  /* Bottom segment is the short one.  */
+	  int len = middle - bottom;
+	  register int i;
+
+	  /* Swap it with the top part of the top segment.  */
+	  for (i = 0; i < len; i++)
+	    {
+	      tem = argv[bottom + i];
+	      argv[bottom + i] = argv[top - (middle - bottom) + i];
+	      argv[top - (middle - bottom) + i] = tem;
+	    }
+	  /* Exclude the moved bottom segment from further swapping.  */
+	  top -= len;
+	}
+      else
+	{
+	  /* Top segment is the short one.  */
+	  int len = top - middle;
+	  register int i;
+
+	  /* Swap it with the bottom part of the bottom segment.  */
+	  for (i = 0; i < len; i++)
+	    {
+	      tem = argv[bottom + i];
+	      argv[bottom + i] = argv[middle + i];
+	      argv[middle + i] = tem;
+	    }
+	  /* Exclude the moved top segment from further swapping.  */
+	  bottom += len;
+	}
+    }
+
+  /* Update records for the slots the non-options now occupy.  */
+
+  first_nonopt += (optind - last_nonopt);
+  last_nonopt = optind;
+}
+
+/* Initialize the internal data when the first call is made.  */
+
+static const char *
+_getopt_initialize (const char *optstring)
+{
+  /* Start processing options with ARGV-element 1 (since ARGV-element 0
+     is the program name); the sequence of previously skipped
+     non-option ARGV-elements is empty.  */
+
+  first_nonopt = last_nonopt = optind = 1;
+
+  nextchar = NULL;
+
+  posixly_correct = getenv ("POSIXLY_CORRECT");
+
+  /* Determine how to handle the ordering of options and nonoptions.  */
+
+  if (optstring[0] == '-')
+    {
+      ordering = RETURN_IN_ORDER;
+      ++optstring;
+    }
+  else if (optstring[0] == '+')
+    {
+      ordering = REQUIRE_ORDER;
+      ++optstring;
+    }
+  else if (posixly_correct != NULL)
+    ordering = REQUIRE_ORDER;
+  else
+    ordering = PERMUTE;
+
+  return optstring;
+}
+

+/* Scan elements of ARGV (whose length is ARGC) for option characters
+   given in OPTSTRING.
+
+   If an element of ARGV starts with '-', and is not exactly "-" or "--",
+   then it is an option element.  The characters of this element
+   (aside from the initial '-') are option characters.	If `getopt'
+   is called repeatedly, it returns successively each of the option characters
+   from each of the option elements.
+
+   If `getopt' finds another option character, it returns that character,
+   updating `optind' and `nextchar' so that the next call to `getopt' can
+   resume the scan with the following option character or ARGV-element.
+
+   If there are no more option characters, `getopt' returns `EOF'.
+   Then `optind' is the index in ARGV of the first ARGV-element
+   that is not an option.  (The ARGV-elements have been permuted
+   so that those that are not options now come last.)
+
+   OPTSTRING is a string containing the legitimate option characters.
+   If an option character is seen that is not listed in OPTSTRING,
+   return '?' after printing an error message.	If you set `opterr' to
+   zero, the error message is suppressed but we still return '?'.
+
+   If a char in OPTSTRING is followed by a colon, that means it wants an arg,
+   so the following text in the same ARGV-element, or the text of the following
+   ARGV-element, is returned in `optarg'.  Two colons mean an option that
+   wants an optional arg; if there is text in the current ARGV-element,
+   it is returned in `optarg', otherwise `optarg' is set to zero.
+
+   If OPTSTRING starts with `-' or `+', it requests different methods of
+   handling the non-option ARGV-elements.
+   See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above.
+
+   Long-named options begin with `--' instead of `-'.
+   Their names may be abbreviated as long as the abbreviation is unique
+   or is an exact match for some defined option.  If they have an
+   argument, it follows the option name in the same ARGV-element, separated
+   from the option name by a `=', or else the in next ARGV-element.
+   When `getopt' finds a long-named option, it returns 0 if that option's
+   `flag' field is nonzero, the value of the option's `val' field
+   if the `flag' field is zero.
+
+   The elements of ARGV aren't really const, because we permute them.
+   But we pretend they're const in the prototype to be compatible
+   with other systems.
+
+   LONGOPTS is a vector of `struct option' terminated by an
+   element containing a name which is zero.
+
+   LONGIND returns the index in LONGOPT of the long-named option found.
+   It is only valid when a long-named option has been found by the most
+   recent call.
+
+   If LONG_ONLY is nonzero, '-' as well as '--' can introduce
+   long-named options.	*/
+
+int
+_getopt_internal (int argc, char *const *argv, const char *optstring, const struct option *longopts, int *longind, int long_only)
+{
+  optarg = NULL;
+
+  if (optind == 0)
+    optstring = _getopt_initialize (optstring);
+
+  if (nextchar == NULL || *nextchar == '\0')
+    {
+      /* Advance to the next ARGV-element.  */
+
+      if (ordering == PERMUTE)
+	{
+	  /* If we have just processed some options following some non-options,
+	     exchange them so that the options come first.  */
+
+	  if (first_nonopt != last_nonopt && last_nonopt != optind)
+	    exchange ((char **) argv);
+	  else if (last_nonopt != optind)
+	    first_nonopt = optind;
+
+	  /* Skip any additional non-options
+	     and extend the range of non-options previously skipped.  */
+
+	  while (optind < argc
+		 && (argv[optind][0] != '-' || argv[optind][1] == '\0'))
+	    optind++;
+	  last_nonopt = optind;
+	}
+
+      /* The special ARGV-element `--' means premature end of options.
+	 Skip it like a null option,
+	 then exchange with previous non-options as if it were an option,
+	 then skip everything else like a non-option.  */
+
+      if (optind != argc && !strcmp (argv[optind], "--"))
+	{
+	  optind++;
+
+	  if (first_nonopt != last_nonopt && last_nonopt != optind)
+	    exchange ((char **) argv);
+	  else if (first_nonopt == last_nonopt)
+	    first_nonopt = optind;
+	  last_nonopt = argc;
+
+	  optind = argc;
+	}
+
+      /* If we have done all the ARGV-elements, stop the scan
+	 and back over any non-options that we skipped and permuted.  */
+
+      if (optind == argc)
+	{
+	  /* Set the next-arg-index to point at the non-options
+	     that we previously skipped, so the caller will digest them.  */
+	  if (first_nonopt != last_nonopt)
+	    optind = first_nonopt;
+	  return EOF;
+	}
+
+      /* If we have come to a non-option and did not permute it,
+	 either stop the scan or describe it to the caller and pass it by.  */
+
+      if ((argv[optind][0] != '-' || argv[optind][1] == '\0'))
+	{
+	  if (ordering == REQUIRE_ORDER)
+	    return EOF;
+	  optarg = argv[optind++];
+	  return 1;
+	}
+
+      /* We have found another option-ARGV-element.
+	 Skip the initial punctuation.	*/
+
+      nextchar = (argv[optind] + 1
+		  + (longopts != NULL && argv[optind][1] == '-'));
+    }
+
+  /* Decode the current option-ARGV-element.  */
+
+  /* Check whether the ARGV-element is a long option.
+
+     If long_only and the ARGV-element has the form "-f", where f is
+     a valid short option, don't consider it an abbreviated form of
+     a long option that starts with f.	Otherwise there would be no
+     way to give the -f short option.
+
+     On the other hand, if there's a long option "fubar" and
+     the ARGV-element is "-fu", do consider that an abbreviation of
+     the long option, just like "--fu", and not "-f" with arg "u".
+
+     This distinction seems to be the most useful approach.  */
+
+  if (longopts != NULL
+      && (argv[optind][1] == '-'
+	  || (long_only && (argv[optind][2] || !my_index (optstring, argv[optind][1])))))
+    {
+      char *nameend;
+      const struct option *p;
+      const struct option *pfound = NULL;
+      int exact = 0;
+      int ambig = 0;
+      int indfound=0;				/* Keep gcc happy */
+      int option_index;
+
+      for (nameend = nextchar; *nameend && *nameend != '='; nameend++)
+	/* Do nothing.	*/ ;
+
+      /* Test all long options for either exact match
+	 or abbreviated matches.  */
+      for (p = longopts, option_index = 0; p->name; p++, option_index++)
+	if (!strncmp (p->name, nextchar, nameend - nextchar))
+	  {
+	    if ((size_t) (nameend - nextchar) == (size_t) strlen (p->name))
+	      {
+		/* Exact match found.  */
+		pfound = p;
+		indfound = option_index;
+		exact = 1;
+		break;
+	      }
+	    else if (pfound == NULL)
+	      {
+		/* First nonexact match found.	*/
+		pfound = p;
+		indfound = option_index;
+	      }
+	    else
+	      /* Second or later nonexact match found.	*/
+	      ambig = 1;
+	  }
+
+      if (ambig && !exact)
+	{
+	  if (opterr)
+	    fprintf (stderr, "%s: option `%s' is ambiguous\n",
+		     argv[0], argv[optind]);
+	  nextchar += strlen (nextchar);
+	  optind++;
+	  return '?';
+	}
+
+      if (pfound != NULL)
+	{
+	  option_index = indfound;
+	  optind++;
+	  if (*nameend)
+	    {
+	      /* Don't test has_arg with >, because some C compilers don't
+		 allow it to be used on enums.	*/
+	      if (pfound->has_arg)
+		optarg = nameend + 1;
+	      else
+		{
+		  if (opterr)
+		    {
+		      if (argv[optind - 1][1] == '-')
+			/* --option */
+			fprintf (stderr,
+				 "%s: option `--%s' doesn't allow an argument\n",
+				 argv[0], pfound->name);
+		      else
+			/* +option or -option */
+			fprintf (stderr,
+			     "%s: option `%c%s' doesn't allow an argument\n",
+			     argv[0], argv[optind - 1][0], pfound->name);
+		    }
+		  nextchar += strlen (nextchar);
+		  return '?';
+		}
+	    }
+	  else if (pfound->has_arg == 1)
+	    {
+	      if (optind < argc)
+		optarg = argv[optind++];
+	      else
+		{
+		  if (opterr)
+		    fprintf (stderr, "%s: option `%s' requires an argument\n",
+			     argv[0], argv[optind - 1]);
+		  nextchar += strlen (nextchar);
+		  return optstring[0] == ':' ? ':' : '?';
+		}
+	    }
+	  nextchar += strlen (nextchar);
+	  if (longind != NULL)
+	    *longind = option_index;
+	  if (pfound->flag)
+	    {
+	      *(pfound->flag) = pfound->val;
+	      return 0;
+	    }
+	  return pfound->val;
+	}
+
+      /* Can't find it as a long option.  If this is not getopt_long_only,
+	 or the option starts with '--' or is not a valid short
+	 option, then it's an error.
+	 Otherwise interpret it as a short option.  */
+      if (!long_only || argv[optind][1] == '-'
+	  || my_index (optstring, *nextchar) == NULL)
+	{
+	  if (opterr)
+	    {
+	      if (argv[optind][1] == '-')
+		/* --option */
+		fprintf (stderr, "%s: unrecognized option `--%s'\n",
+			 argv[0], nextchar);
+	      else
+		/* +option or -option */
+		fprintf (stderr, "%s: unrecognized option `%c%s'\n",
+			 argv[0], argv[optind][0], nextchar);
+	    }
+	  nextchar = (char *) "";
+	  optind++;
+	  return '?';
+	}
+    }
+
+  /* Look at and handle the next short option-character.  */
+
+  {
+    char c = *nextchar++;
+    char *temp = my_index (optstring, c);
+
+    /* Increment `optind' when we start to process its last character.	*/
+    if (*nextchar == '\0')
+      ++optind;
+
+    if (temp == NULL || c == ':')
+      {
+	if (opterr)
+	  {
+	    if (posixly_correct)
+	      /* 1003.2 specifies the format of this message.  */
+	      fprintf (stderr, "%s: illegal option -- %c\n", argv[0], c);
+	    else
+	      fprintf (stderr, "%s: invalid option -- %c\n", argv[0], c);
+	  }
+	optopt = c;
+	return '?';
+      }
+    if (temp[1] == ':')
+      {
+	if (temp[2] == ':')
+	  {
+	    /* This is an option that accepts an argument optionally.  */
+	    if (*nextchar != '\0')
+	      {
+		optarg = nextchar;
+		optind++;
+	      }
+	    else
+	      optarg = NULL;
+	    nextchar = NULL;
+	  }
+	else
+	  {
+	    /* This is an option that requires an argument.  */
+	    if (*nextchar != '\0')
+	      {
+		optarg = nextchar;
+		/* If we end this ARGV-element by taking the rest as an arg,
+		   we must advance to the next element now.  */
+		optind++;
+	      }
+	    else if (optind == argc)
+	      {
+		if (opterr)
+		  {
+		    /* 1003.2 specifies the format of this message.  */
+		    fprintf (stderr, "%s: option requires an argument -- %c\n",
+			     argv[0], c);
+		  }
+		optopt = c;
+		if (optstring[0] == ':')
+		  c = ':';
+		else
+		  c = '?';
+	      }
+	    else
+	      /* We already incremented `optind' once;
+		 increment it again when taking next ARGV-elt as argument.  */
+	      optarg = argv[optind++];
+	    nextchar = NULL;
+	  }
+      }
+    return c;
+  }
+}
+
+int
+getopt (int argc, char *const *argv, const char *optstring)
+{
+  return _getopt_internal (argc, argv, optstring,
+			   (const struct option *) 0,
+			   (int *) 0,
+			   0);
+}
+
+#endif	/* _LIBC or not __GNU_LIBRARY__.  */
+

+#ifdef TEST
+
+/* Compile with -DTEST to make an executable for use in testing
+   the above definition of `getopt'.  */
+
+int
+main (argc, argv)
+     int argc;
+     char **argv;
+{
+  int c;
+  int digit_optind = 0;
+
+  while (1)
+    {
+      int this_option_optind = optind ? optind : 1;
+
+      c = getopt (argc, argv, "abc:d:0123456789");
+      if (c == EOF)
+	break;
+
+      switch (c)
+	{
+	case '0':
+	case '1':
+	case '2':
+	case '3':
+	case '4':
+	case '5':
+	case '6':
+	case '7':
+	case '8':
+	case '9':
+	  if (digit_optind != 0 && digit_optind != this_option_optind)
+	    printf ("digits occur in two different argv-elements.\n");
+	  digit_optind = this_option_optind;
+	  printf ("option %c\n", c);
+	  break;
+
+	case 'a':
+	  printf ("option a\n");
+	  break;
+
+	case 'b':
+	  printf ("option b\n");
+	  break;
+
+	case 'c':
+	  printf ("option c with value `%s'\n", optarg);
+	  break;
+
+	case '?':
+	  break;
+
+	default:
+	  printf ("?? getopt returned character code 0%o ??\n", c);
+	}
+    }
+
+  if (optind < argc)
+    {
+      printf ("non-option ARGV-elements: ");
+      while (optind < argc)
+	printf ("%s ", argv[optind++]);
+      printf ("\n");
+    }
+
+  exit (0);
+}
+
+#endif /* TEST */
+}
+#endif
\ No newline at end of file
diff --git a/src/getopt.h b/src/getopt.h
new file mode 100644
index 0000000..36fcf74
--- /dev/null
+++ b/src/getopt.h
@@ -0,0 +1,185 @@
+/* Declarations for getopt.
+   Copyright (C) 1989-1994, 1996-1999, 2001 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#ifndef _GETOPT_H
+
+#ifndef __need_getopt
+# define _GETOPT_H 1
+#endif
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+/* If __GNU_LIBRARY__ is not already defined, either we are being used
+   standalone, or this is the first header included in the source file.
+   If we are being used with glibc, we need to include <features.h>, but
+   that does not exist if we are standalone.  So: if __GNU_LIBRARY__ is
+   not defined, include <ctype.h>, which will pull in <features.h> for us
+   if it's from glibc.  (Why ctype.h?  It's guaranteed to exist and it
+   doesn't flood the namespace with stuff the way some other headers do.)  */
+#if !defined __GNU_LIBRARY__
+# include <ctype.h>
+#endif
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/* For communication from `getopt' to the caller.
+   When `getopt' finds an option that takes an argument,
+   the argument value is returned here.
+   Also, when `ordering' is RETURN_IN_ORDER,
+   each non-option ARGV-element is returned here.  */
+
+extern char *optarg;
+
+/* Index in ARGV of the next element to be scanned.
+   This is used for communication to and from the caller
+   and for communication between successive calls to `getopt'.
+
+   On entry to `getopt', zero means this is the first call; initialize.
+
+   When `getopt' returns -1, this is the index of the first of the
+   non-option elements that the caller should itself scan.
+
+   Otherwise, `optind' communicates from one call to the next
+   how much of ARGV has been scanned so far.  */
+
+extern int optind;
+
+/* Callers store zero here to inhibit the error message `getopt' prints
+   for unrecognized options.  */
+
+extern int opterr;
+
+/* Set to an option character which was unrecognized.  */
+
+extern int optopt;
+
+#ifndef __need_getopt
+/* Describe the long-named options requested by the application.
+   The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector
+   of `struct option' terminated by an element containing a name which is
+   zero.
+
+   The field `has_arg' is:
+   no_argument		(or 0) if the option does not take an argument,
+   required_argument	(or 1) if the option requires an argument,
+   optional_argument 	(or 2) if the option takes an optional argument.
+
+   If the field `flag' is not NULL, it points to a variable that is set
+   to the value given in the field `val' when the option is found, but
+   left unchanged if the option is not found.
+
+   To have a long-named option do something other than set an `int' to
+   a compiled-in constant, such as set a value from `optarg', set the
+   option's `flag' field to zero and its `val' field to a nonzero
+   value (the equivalent single-letter option character, if there is
+   one).  For long options that have a zero `flag' field, `getopt'
+   returns the contents of the `val' field.  */
+
+struct option
+{
+# if (defined __STDC__ && __STDC__) || defined __cplusplus
+  const char *name;
+# else
+  char *name;
+# endif
+  /* has_arg can't be an enum because some compilers complain about
+     type mismatches in all the code that assumes it is an int.  */
+  int has_arg;
+  int *flag;
+  int val;
+};
+
+/* Names for the values of the `has_arg' field of `struct option'.  */
+
+# define no_argument		0
+# define required_argument	1
+# define optional_argument	2
+#endif	/* need getopt */
+
+
+/* Get definitions and prototypes for functions to process the
+   arguments in ARGV (ARGC of them, minus the program name) for
+   options given in OPTS.
+
+   Return the option character from OPTS just read.  Return -1 when
+   there are no more options.  For unrecognized options, or options
+   missing arguments, `optopt' is set to the option letter, and '?' is
+   returned.
+
+   The OPTS string is a list of characters which are recognized option
+   letters, optionally followed by colons, specifying that that letter
+   takes an argument, to be placed in `optarg'.
+
+   If a letter in OPTS is followed by two colons, its argument is
+   optional.  This behavior is specific to the GNU `getopt'.
+
+   The argument `--' causes premature termination of argument
+   scanning, explicitly telling `getopt' that there are no more
+   options.
+
+   If OPTS begins with `--', then non-option arguments are treated as
+   arguments to the option '\0'.  This behavior is specific to the GNU
+   `getopt'.  */
+
+#if (defined __STDC__ && __STDC__) || defined __cplusplus
+# ifdef __GNU_LIBRARY__
+/* Many other libraries have conflicting prototypes for getopt, with
+   differences in the consts, in stdlib.h.  To avoid compilation
+   errors, only prototype getopt for the GNU C library.  */
+extern int getopt (int ___argc, char *const *___argv, const char *__shortopts);
+# elif GETOPT_UNDEFINED /* not __GNU_LIBRARY__ */
+extern int getopt ();
+# endif /* __GNU_LIBRARY__ */
+
+# ifndef __need_getopt
+extern int getopt_long (int ___argc, char *const *___argv,
+			const char *__shortopts,
+		        const struct option *__longopts, int *__longind);
+extern int getopt_long_only (int ___argc, char *const *___argv,
+			     const char *__shortopts,
+		             const struct option *__longopts, int *__longind);
+
+/* Internal only.  Users should not call this directly.  */
+extern int _getopt_internal (int ___argc, char *const *___argv,
+			     const char *__shortopts,
+		             const struct option *__longopts, int *__longind,
+			     int __long_only);
+# endif
+#else /* not __STDC__ */
+extern int getopt ();
+# ifndef __need_getopt
+extern int getopt_long ();
+extern int getopt_long_only ();
+
+extern int _getopt_internal ();
+# endif
+#endif /* __STDC__ */
+
+#ifdef	__cplusplus
+}
+#endif
+
+/* Make sure we later can get all the definitions and declarations.  */
+#undef __need_getopt
+
+#endif /* getopt.h */
diff --git a/src/getopt1.c b/src/getopt1.c
new file mode 100644
index 0000000..45c35ed
--- /dev/null
+++ b/src/getopt1.c
@@ -0,0 +1,196 @@
+/* getopt_long and getopt_long_only entry points for GNU getopt.
+   Copyright (C) 1987,88,89,90,91,92,93,94,96,97,98
+     Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+

+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#ifdef _LIBC
+# include <getopt.h>
+#else
+# include "getopt.h"
+#endif
+
+#if !defined __STDC__ || !__STDC__
+/* This is a separate conditional since some stdc systems
+   reject `defined (const)'.  */
+#ifndef const
+#define const
+#endif
+#endif
+
+#include <stdio.h>
+
+/* Comment out all this code if we are using the GNU C Library, and are not
+   actually compiling the library itself.  This code is part of the GNU C
+   Library, but also included in many other GNU distributions.  Compiling
+   and linking in this code is a waste when using the GNU C library
+   (especially if it is a shared library).  Rather than having every GNU
+   program understand `configure --with-gnu-libc' and omit the object files,
+   it is simpler to just do this in the source for each such file.  */
+
+#define GETOPT_INTERFACE_VERSION 2
+#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2
+#include <gnu-versions.h>
+#if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION
+#define ELIDE_CODE
+#endif
+#endif
+
+#ifndef ELIDE_CODE
+
+
+/* This needs to come after some library #include
+   to get __GNU_LIBRARY__ defined.  */
+#ifdef __GNU_LIBRARY__
+#include <stdlib.h>
+#endif
+
+#ifndef	NULL
+#define NULL 0
+#endif
+
+int
+getopt_long (argc, argv, options, long_options, opt_index)
+     int argc;
+     char *const *argv;
+     const char *options;
+     const struct option *long_options;
+     int *opt_index;
+{
+  return _getopt_internal (argc, argv, options, long_options, opt_index, 0);
+}
+
+/* Like getopt_long, but '-' as well as '--' can indicate a long option.
+   If an option that starts with '-' (not '--') doesn't match a long option,
+   but does match a short option, it is parsed as a short option
+   instead.  */
+
+int
+getopt_long_only (argc, argv, options, long_options, opt_index)
+     int argc;
+     char *const *argv;
+     const char *options;
+     const struct option *long_options;
+     int *opt_index;
+{
+  return _getopt_internal (argc, argv, options, long_options, opt_index, 1);
+}
+
+# ifdef _LIBC
+libc_hidden_def (getopt_long)
+libc_hidden_def (getopt_long_only)
+# endif
+
+#endif	/* Not ELIDE_CODE.  */
+

+#ifdef TEST
+
+#include <stdio.h>
+
+int
+main (argc, argv)
+     int argc;
+     char **argv;
+{
+  int c;
+  int digit_optind = 0;
+
+  while (1)
+    {
+      int this_option_optind = optind ? optind : 1;
+      int option_index = 0;
+      static struct option long_options[] =
+      {
+	{"add", 1, 0, 0},
+	{"append", 0, 0, 0},
+	{"delete", 1, 0, 0},
+	{"verbose", 0, 0, 0},
+	{"create", 0, 0, 0},
+	{"file", 1, 0, 0},
+	{0, 0, 0, 0}
+      };
+
+      c = getopt_long (argc, argv, "abc:d:0123456789",
+		       long_options, &option_index);
+      if (c == -1)
+	break;
+
+      switch (c)
+	{
+	case 0:
+	  printf ("option %s", long_options[option_index].name);
+	  if (optarg)
+	    printf (" with arg %s", optarg);
+	  printf ("\n");
+	  break;
+
+	case '0':
+	case '1':
+	case '2':
+	case '3':
+	case '4':
+	case '5':
+	case '6':
+	case '7':
+	case '8':
+	case '9':
+	  if (digit_optind != 0 && digit_optind != this_option_optind)
+	    printf ("digits occur in two different argv-elements.\n");
+	  digit_optind = this_option_optind;
+	  printf ("option %c\n", c);
+	  break;
+
+	case 'a':
+	  printf ("option a\n");
+	  break;
+
+	case 'b':
+	  printf ("option b\n");
+	  break;
+
+	case 'c':
+	  printf ("option c with value `%s'\n", optarg);
+	  break;
+
+	case 'd':
+	  printf ("option d with value `%s'\n", optarg);
+	  break;
+
+	case '?':
+	  break;
+
+	default:
+	  printf ("?? getopt returned character code 0%o ??\n", c);
+	}
+    }
+
+  if (optind < argc)
+    {
+      printf ("non-option ARGV-elements: ");
+      while (optind < argc)
+	printf ("%s ", argv[optind++]);
+      printf ("\n");
+    }
+
+  exit (0);
+}
+
+#endif /* TEST */
diff --git a/src/joinAlignmentFiles.cpp b/src/joinAlignmentFiles.cpp
new file mode 100644
index 0000000..025946d
--- /dev/null
+++ b/src/joinAlignmentFiles.cpp
@@ -0,0 +1,108 @@
+#include "libMems/IntervalList.h"
+#include <fstream>
+#include <vector>
+#include <sstream>
+#include "libMems/SlotAllocator.h"
+#include "libMems/Match.h"
+#include "libMems/GappedAlignment.h"
+
+using namespace std;
+using namespace genome;
+using namespace mems;
+
+int main( int argc, char* argv[] )
+{
+	if( argc != 4 )
+	{
+		cerr << "joinAlignments <mauve .mln base name> <number of files> <mauve output file>\n";
+		return -1;
+	}
+	string base_name = argv[1];
+	stringstream aln_count_str(argv[2]);
+	string out_fname = argv[3];
+	uint aln_count;
+	aln_count_str >> aln_count;
+	cerr << "aln_count is: " << aln_count << endl;
+	cerr << "fix this trash code\n";
+	throw "shit";
+/*
+try{
+	SlotAllocator< Match >& sa = SlotAllocator< Match >::GetSlotAllocator();
+	IntervalList all_iv_list;
+	for( uint alnI = 1; alnI <= aln_count; alnI++ )
+	{
+		IntervalList cur_iv_list;
+		try{
+			stringstream aln_fname;
+			aln_fname << base_name << alnI << ".mln";
+			ifstream cur_aln_file( aln_fname.str().c_str() );
+			if( !cur_aln_file.is_open() )
+			{
+				cerr << "Couldn't open: \"" << aln_fname.str() << "\"\n";
+				return -1;
+			}
+			cur_iv_list.ReadList( cur_aln_file );
+			// hack: trim out all gapped alignments
+			for( uint ivI = 0; ivI < cur_iv_list.size(); ivI++ )
+			{
+				Interval& cur_iv = cur_iv_list[ivI];
+				vector<AbstractMatch*> new_matches;
+				for( uint mI = 0; mI < cur_iv.matches.size(); mI++ )
+				{
+					GappedAlignment* ga = dynamic_cast<GappedAlignment*>(cur_iv.matches[mI]);
+					if( ga == NULL )
+					{
+						if( mI < 5 || mI > cur_iv.matches.size() - 5 )
+							new_matches.push_back( cur_iv.matches[mI] );
+						else
+							sa.Free(static_cast<Match*>(cur_iv.matches[mI]));
+						continue;
+					}
+					delete ga;
+				}
+				cur_iv.matches = new_matches;
+				cur_iv.CalculateOffset();
+			}
+		}catch(gnException& gne){
+			// try reading the .alignment file instead of the .mln
+			stringstream aln_fname;
+			aln_fname << base_name << alnI << ".alignment";
+			ifstream cur_aln_file( aln_fname.str().c_str() );
+			if( !cur_aln_file.is_open() )
+			{
+				cerr << "Couldn't open: \"" << aln_fname.str() << "\"\n";
+				return -1;
+			}
+			cur_iv_list.ReadStandardAlignment( cur_aln_file );
+			for( uint ivI = 0; ivI < cur_iv_list.size(); ivI++ )
+			{
+				cout << ((GappedAlignment*)cur_iv_list[ivI].matches[0])->Start(0) << endl;
+			}
+		}
+		if( alnI == 0 )
+		{
+			all_iv_list = cur_iv_list;
+		}else{
+			all_iv_list.insert( all_iv_list.end(), cur_iv_list.begin(), cur_iv_list.end() );
+		}
+		// progress update
+		if( (alnI*100)/aln_count != ((alnI*100)-1)/aln_count ){
+			cout << "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\bRead " << (alnI*100)/aln_count << "% of data";
+			cout.flush();
+		}
+	}
+	cout << endl << "Writing output\n";
+	ofstream out_file( out_fname.c_str() );
+	if( !out_file.is_open() )
+	{
+		cerr << "Error opening \"" << out_fname << "\"\n";
+		return -2;
+	}
+	all_iv_list.WriteList( out_file );
+}catch( gnException& gne )
+{
+        cerr << gne << endl;
+}
+*/
+	return 0;
+}
diff --git a/src/makeBadgerMatrix.cpp b/src/makeBadgerMatrix.cpp
new file mode 100644
index 0000000..66ed5fc
--- /dev/null
+++ b/src/makeBadgerMatrix.cpp
@@ -0,0 +1,117 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <string>
+#include <fstream>
+#include <vector>
+#include <algorithm>
+#include <iostream>
+#include "libMems/IntervalList.h"
+#include "libMems/MatchList.h"
+#include "libMems/Aligner.h"
+
+using namespace std;
+using namespace genome;
+using namespace mems;
+
+class livComp {
+public:
+	livComp( uint seq ){ m_seq = seq; };
+	bool operator()( const pair< Interval*, uint >& a, const pair< Interval*, uint >& b )
+	{
+		return a.first->LeftEnd(m_seq) < b.first->LeftEnd(m_seq);
+	}
+protected:
+	uint m_seq;
+};
+
+int main( int argc, char* argv[] )
+{
+	if( argc != 4 )
+	{
+		cerr << "Usage: makeBadgerMatrix <input xmfa> <output badger file> <LCB coordinate file>\n";
+		return -1;
+	}
+	ifstream aln_in;
+	aln_in.open( argv[1] );
+	if( !aln_in.is_open() ){
+		cerr << "Error opening " << argv[1] << endl;
+		return -1;
+	}
+	ofstream badger_out;
+	badger_out.open( argv[2] );
+	if( !badger_out.is_open() ){
+		cerr << "Error writing to " << argv[2] << endl;
+		return -1;
+	}
+
+	ofstream coord_out;
+	coord_out.open( argv[3] );
+	if( !coord_out.is_open() ){
+		cerr << "Error writing to " << argv[3] << endl;
+		return -2;
+	}
+
+	try{
+		IntervalList input_ivs;
+		input_ivs.ReadStandardAlignment( aln_in );
+		aln_in.close();
+
+		vector< pair< Interval*, uint > > labeled_ivs( input_ivs.size() );
+		for( size_t ivI = 0; ivI < input_ivs.size(); ivI++ )
+			labeled_ivs[ivI] = make_pair( &input_ivs[ivI], ivI );
+
+		// write out block boundaries
+		for( uint seqI = 0; seqI < input_ivs.seq_filename.size(); ++seqI )
+		{
+			if(seqI > 0) coord_out << '\t';
+			coord_out << "seq" << seqI << "_leftend\tseq" << seqI << "_rightend";
+		}
+		coord_out << endl;
+		for( size_t ivI = 0; ivI < input_ivs.size(); ivI++ )
+		{
+			if( labeled_ivs[ivI].first->Multiplicity() == 1 )
+				continue;
+			for( uint seqI = 0; seqI < input_ivs.seq_filename.size(); ++seqI )
+			{
+				if(seqI > 0) coord_out << '\t';
+				string sign = labeled_ivs[ivI].first->Start(seqI) < 0 ? "-" : "";
+				coord_out << sign << labeled_ivs[ivI].first->LeftEnd(seqI) << '\t' << sign << labeled_ivs[ivI].first->RightEnd(seqI);
+			}
+			coord_out << endl;
+		}
+
+		for( uint seqI = 0; seqI < input_ivs.seq_filename.size(); ++seqI )
+		{
+			badger_out << input_ivs.seq_filename[seqI];
+			livComp lc(seqI);
+			std::sort( labeled_ivs.begin(), labeled_ivs.end(), lc );
+			for( size_t ivI = 0; ivI < labeled_ivs.size(); ivI++ )
+			{
+				if( labeled_ivs[ivI].first->LeftEnd(seqI) == NO_MATCH )
+					continue;
+				if( labeled_ivs[ivI].first->Multiplicity() == 1 )
+					continue;
+				int fs = labeled_ivs[ivI].first->FirstStart();
+				const char* dir = labeled_ivs[ivI].first->Orientation(seqI) == labeled_ivs[ivI].first->Orientation(fs) ? "" : "-";
+				badger_out << "," << dir << labeled_ivs[ivI].second + 1;
+			}
+			badger_out << endl;
+		}
+
+	}catch( gnException& gne ){
+		cerr << gne << endl;
+		return -1;
+	}catch( exception& e ){
+		cerr << e.what() << endl;
+		return -2;
+	}catch( char const* c ){
+		cerr << c << endl;
+		return -3;
+	}catch(...){
+		cerr << "Unhandled exception" << endl;
+		return -4;
+	}
+}
+
diff --git a/src/makeMc4Matrix.cpp b/src/makeMc4Matrix.cpp
new file mode 100644
index 0000000..391cf2a
--- /dev/null
+++ b/src/makeMc4Matrix.cpp
@@ -0,0 +1,112 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <string>
+#include <fstream>
+#include <vector>
+#include <algorithm>
+#include <iostream>
+#include "libMems/IntervalList.h"
+
+using namespace std;
+using namespace genome;
+using namespace mems;
+
+class livComp {
+public:
+	livComp( uint seq ){ m_seq = seq; };
+	bool operator()( const pair< Interval*, uint >& a, const pair< Interval*, uint >& b )
+	{
+		return a.first->LeftEnd(m_seq) < b.first->LeftEnd(m_seq);
+	}
+protected:
+	uint m_seq;
+};
+
+int main( int argc, char* argv[] )
+{
+	if( argc < 3 )
+	{
+		cerr << "Usage: makeBadgerMatrix <input xmfa> <output badger file>\n";
+		return -1;
+	}
+	ifstream aln_in;
+	aln_in.open( argv[1] );
+	if( !aln_in.is_open() ){
+		cerr << "Error opening " << argv[1] << endl;
+		return -1;
+	}
+	ofstream badger_out;
+	badger_out.open( argv[2] );
+	if( !badger_out.is_open() ){
+		cerr << "Error writing to " << argv[2] << endl;
+		return -1;
+	}
+
+	try{
+		IntervalList input_ivs;
+		input_ivs.ReadStandardAlignment( aln_in );
+		aln_in.close();
+
+		vector< pair< Interval*, uint > > labeled_ivs;
+		for( size_t ivI = 0; ivI < input_ivs.size(); ivI++ )
+		{
+			if( input_ivs[ivI].Multiplicity() != input_ivs.seq_filename.size() )
+				continue;	// not an N-way block
+			labeled_ivs.push_back( make_pair( &input_ivs[ivI], ivI ) );
+		}
+		for( uint seqI = 0; seqI < input_ivs.seq_filename.size(); ++seqI )
+		{
+			badger_out << input_ivs.seq_filename[seqI];
+			livComp lc(seqI);
+			std::sort( labeled_ivs.begin(), labeled_ivs.end(), lc );
+			if( seqI == 0 )
+			{
+				for( size_t ivI = 0; ivI < labeled_ivs.size(); ivI++ )
+				{
+					labeled_ivs[ivI].second = ivI + 1;
+					if( labeled_ivs[ivI].first->Orientation(seqI) == AbstractMatch::reverse )
+						labeled_ivs[ivI].first->Invert();
+				}
+			}
+			vector< size_t > other( labeled_ivs.size() * 2 + 2 );
+			for( size_t ivI = 0; ivI < labeled_ivs.size(); ivI++ )
+			{
+				if(labeled_ivs[ivI].first->Orientation(seqI) == AbstractMatch::forward)
+				{
+					other[ivI*2+1] = absolut(labeled_ivs[ivI].second)*2 - 1;
+					other[ivI*2+2] = absolut(labeled_ivs[ivI].second)*2;
+				}else{
+					other[ivI*2+1] = absolut(labeled_ivs[ivI].second)*2;
+					other[ivI*2+2] = absolut(labeled_ivs[ivI].second)*2 - 1;
+				}
+			}
+			for( size_t ivI = 0; ivI < other.size(); ivI++ )
+			{
+				badger_out << "," << other[ivI];
+			}
+			badger_out << "\nstandard";
+			for( size_t ivI = 0; ivI < labeled_ivs.size(); ivI++ )
+			{
+				badger_out << "," << (labeled_ivs[ivI].first->Orientation(seqI) == AbstractMatch::reverse? "-" : "") << labeled_ivs[ivI].second;
+			}
+
+			badger_out << endl;
+		}
+
+	}catch( gnException& gne ){
+		cerr << gne << endl;
+		return -1;
+	}catch( exception& e ){
+		cerr << e.what() << endl;
+		return -2;
+	}catch( char const* c ){
+		cerr << c << endl;
+		return -3;
+	}catch(...){
+		cerr << "Unhandled exception" << endl;
+		return -4;
+	}
+}
+
diff --git a/src/mauveAligner.cpp b/src/mauveAligner.cpp
new file mode 100644
index 0000000..5eea936
--- /dev/null
+++ b/src/mauveAligner.cpp
@@ -0,0 +1,919 @@
+/*******************************************************************************
+ * $Id: memsApp.cpp,v 1.49 2004/04/23 00:18:45 darling Exp $
+ * This file is copyright 2002-2004 Aaron Darling.  All rights reserved.
+ * Please see the file called COPYING for licensing, copying, and modification
+ * rights.  Redistribution of this file, in whole or in part is prohibited
+ * without express permission.
+ ******************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "mauveAligner.h"
+#include "getopt.h"
+#include <sstream>
+#include <stdexcept>
+#include "libGenome/gnSequence.h"
+#include "libMems/Matrix.h"
+#include "libMems/NumericMatrix.h"
+#include "libMems/DNAFileSML.h"
+#include "libMems/MemHash.h"
+#include "libMems/MaskedMemHash.h"
+#include "libMems/Aligner.h"
+#include "libMems/MatchList.h"
+#include "libMems/RepeatHash.h"
+#include "libMems/Interval.h"
+#include "libMems/IntervalList.h"
+#include "libMems/gnAlignedSequences.h"
+#include "libMems/Islands.h"
+#include "libMems/MuscleInterface.h"
+#include "libMems/DistanceMatrix.h"
+
+#include "boost/filesystem/operations.hpp"
+
+using namespace std;
+using namespace genome;
+using namespace mems;
+
+class MLDeleter {
+public:
+	MLDeleter( MatchList& ml ) : mlist( ml ) {}
+	~MLDeleter(){ mlist.Clear(); }
+private:
+	MatchList& mlist;
+};
+
+#define NELEMS(a) ( sizeof( a ) / sizeof( *a ) )
+
+int main( int argc, char* argv[] ){
+#if	WIN32
+// Multi-tasking does not work well in CPU-bound
+// console apps running under Win32.
+// Reducing the process priority allows GUI apps
+// to run responsively in parallel.
+	SetPriorityClass(GetCurrentProcess(), BELOW_NORMAL_PRIORITY_CLASS);
+#endif
+	return doAlignment(argc, argv);
+}
+
+/**
+ * This application uses libMems to produce full scale multiple
+ * genomic alignments.  First the command line is parsed to get the names of data files
+ * and user specified options.  Next each sequence and its corresponding sorted mer list
+ * are loaded.  If the sorted mer list fails to load a new one is created.  
+ * If it is necessary to find matches in the sequences instead of loading them, each 
+ * sequence and SML are added to a MemHash which searches for exact matches.  
+ * Then LCBs are found if the user requested it.  Finally, either the MatchList or the
+ * LCB list is written to disk.
+ */
+int doAlignment( int argc, char* argv[] ){
+try{
+	if( argc <= 0 ){
+		print_usage( "mauveAligner" );
+		return -1;
+	}
+	if( argc == 1 ){
+		print_usage( argv[0] );
+		return -1;
+	}
+
+	// set the Muscle path
+	MuscleInterface& mi = MuscleInterface::getMuscleInterface();
+	mi.ParseMusclePath( argv[0] );
+
+	//
+	// definitions of the variables that can be set by the user on the command line:
+	//
+	vector<string> seq_files;
+	vector<string> sml_files;
+	vector<gnSequence*> seq_table;
+	vector<DNAFileSML*> sml_table;
+	uint seed_size = 0;	// Use default settings
+	int seed_rank = 0;
+	boolean recursive = true;
+	boolean lcb_extension = true;
+	boolean gapped_alignment = true;
+	boolean create_LCBs = true;
+	boolean calculate_coverage = false;
+	int64 LCB_size = -1;
+	string output_file = "";
+	boolean read_matches = false;
+	boolean read_lcbs = false;
+	boolean find_repeats = false;
+	boolean print_stats = false;
+	boolean eliminate_overlaps = false;
+	boolean nway_filter = false;
+	boolean collinear_genomes = false;
+	string match_input_file = "";
+	string lcb_stats_file = "";
+	string island_file = "";
+	string lcb_file = "";
+	string tree_filename = "";
+	string coverage_list_file = "";
+	boolean output_alignment = false;
+	string alignment_output_dir = "";
+	string alignment_output_format = "";
+	string alignment_output_file = "";
+	string match_log = "";
+	string offset_log = "";
+	string merge_log = "";
+	// island related
+	uint island_size = 0;
+	uint island_break_min = 0;
+	// backbone related
+	uint backbone_size = 0;
+	uint max_backbone_gap = 0;
+	int64 min_r_gap_length = -1;
+	string backbone_file = "";
+	boolean output_backbone = false;
+	// for parallelization of LCB alignment
+	vector< int > realign_lcbs;
+	string muscle_args = "";
+	string gapped_aligner;
+
+	string permutation_filename;
+	int64 permutation_weight = -1;
+
+	boolean lcb_match_input_format = false;
+	int opt_max_extension_iters = -1;
+
+	uint seqI;
+	boolean print_version = false;
+	int max_gapped_alignment_length = -1;
+	
+	ostream* detail_list_out = NULL;	/**< output stream for detail list */
+
+	//
+	// parse command line with gnu getopt
+	//
+	int opt;
+	int config_opt;
+	int ac = argc;
+	char** av = argv;
+	// 'm' mer size
+	// 'r' recursive
+	const char* short_args= "";
+	enum opt_names{
+		opt_mums,
+		opt_no_recursion,
+		opt_no_lcb_extension,
+		opt_no_gapped_alignment,
+		opt_seed_size,
+		opt_seed_type,
+		opt_weight,
+		opt_output,
+		opt_eliminate_overlaps,
+		opt_n_way_filter,
+		opt_match_input,
+		opt_lcb_input,
+		opt_output_alignment,
+		opt_id_matrix,
+		opt_island_size,
+		opt_island_output,
+		opt_island_break_min,
+		opt_backbone_size,
+		opt_max_backbone_gap,
+		opt_backbone_output,
+		opt_coverage_output,
+		opt_repeats,
+		opt_gapped_aligner,
+		opt_max_gapped_aligner_length,
+		opt_min_recursive_gap_length,
+		opt_output_guide_tree,
+		opt_alignment_output_dir,
+		opt_alignment_output_format,
+		opt_match_log,
+		opt_offset_log,
+		opt_merge_match_log,
+		opt_version,
+		opt_scratch_path,
+		opt_realign_lcb,
+		opt_id_matrix_input,
+		opt_collinear,
+		opt_muscle_args,
+		opt_permutation_matrix_output,
+		opt_permutation_matrix_min_weight,
+		opt_lcb_match_input,
+		opt_max_extension_iterations,
+	};
+	struct option long_opts[] = {
+		{"mums", no_argument, &config_opt, opt_mums},
+		{"no-recursion", no_argument, &config_opt, opt_no_recursion},
+		{"no-lcb-extension", no_argument, &config_opt, opt_no_lcb_extension},
+		{"no-gapped-alignment", no_argument, &config_opt, opt_no_gapped_alignment},
+		{"seed-size", required_argument, &config_opt, opt_seed_size},
+		{"seed-type", required_argument, &config_opt, opt_seed_type},
+		{"weight", required_argument, &config_opt, opt_weight},
+		{"output", required_argument, &config_opt, opt_output},
+		{"eliminate-overlaps", no_argument, &config_opt, opt_eliminate_overlaps},
+		{"n-way-filter", no_argument, &config_opt, opt_n_way_filter},
+		{"match-input", required_argument, &config_opt, opt_match_input},
+		{"lcb-input", required_argument, &config_opt, opt_lcb_input},
+		{"output-alignment", optional_argument, &config_opt, opt_output_alignment},
+		{"id-matrix", optional_argument, &config_opt, opt_id_matrix},
+		{"island-size", required_argument, &config_opt, opt_island_size},
+		{"island-output", required_argument, &config_opt, opt_island_output},
+		{"island-break-min", required_argument, &config_opt, opt_island_break_min},
+		{"backbone-size", required_argument, &config_opt, opt_backbone_size},
+		{"max-backbone-gap", required_argument, &config_opt, opt_max_backbone_gap},
+		{"backbone-output", optional_argument, &config_opt, opt_backbone_output},
+		{"coverage-output", optional_argument, &config_opt, opt_coverage_output},
+		{"repeats", no_argument, &config_opt, opt_repeats},
+		{"max-gapped-aligner-length", required_argument, &config_opt, opt_max_gapped_aligner_length},
+		{"min-recursive-gap-length", required_argument, &config_opt, opt_min_recursive_gap_length},
+		{"output-guide-tree", required_argument, &config_opt, opt_output_guide_tree},
+		{"alignment-output-dir", required_argument, &config_opt, opt_alignment_output_dir},
+		{"alignment-output-format", required_argument, &config_opt, opt_alignment_output_format},
+		{"match-log", required_argument, &config_opt, opt_match_log},
+		{"offset-log", required_argument, &config_opt, opt_offset_log},
+		{"merge-match-log", required_argument, &config_opt, opt_merge_match_log},
+		{"version", no_argument, &config_opt, opt_version},
+		{"scratch-path", required_argument, &config_opt, opt_scratch_path},
+		{"realign-lcb", required_argument, &config_opt, opt_realign_lcb},
+		{"id-matrix-input", required_argument, &config_opt, opt_id_matrix_input},
+		{"collinear", no_argument, &config_opt, opt_collinear},
+		{"muscle-args", required_argument, &config_opt, opt_muscle_args},
+		{"permutation-matrix-output", required_argument, &config_opt, opt_permutation_matrix_output},
+		{"permutation-matrix-min-weight", required_argument, &config_opt, opt_permutation_matrix_min_weight},
+		{"lcb-match-input", no_argument, &config_opt, opt_lcb_match_input},
+		{"max-extension-iterations", required_argument, &config_opt, opt_max_extension_iterations},
+
+		{0, 0, 0, 0}	// for correct termination of option list
+						// getopt_long can segfault without this
+	};
+
+	int indexptr;
+	while( (opt = getopt_long( ac, av, short_args, long_opts, &indexptr )) != EOF ){
+		switch( opt ){
+			case 0:
+				switch(config_opt){
+					case opt_mums:
+						create_LCBs = false;
+						break;
+					case opt_no_recursion:
+						recursive = false;
+						break;
+					case opt_no_lcb_extension:
+						lcb_extension = false;
+						break;
+					case opt_no_gapped_alignment:
+						gapped_alignment = false;
+						break;
+					case opt_seed_size:
+						seed_size = atoi( optarg );
+						break;
+					case opt_seed_type:
+						if( strcmp( "solid", optarg ) == 0 )
+							seed_rank = SOLID_SEED;
+						else if( strcmp( "coding", optarg ) == 0 )
+							seed_rank = CODING_SEED;
+						else if( strcmp( "spaced", optarg ) == 0 )
+							seed_rank = 0;
+						else if( strcmp( "spaced1", optarg ) == 0 )
+							seed_rank = 1;
+						else if( strcmp( "spaced2", optarg ) == 0 )
+							seed_rank = 2;
+						else
+							cerr << "Warning: --seed-type parameter not understood.  Using default spaced seeds\n";
+						break;
+					case opt_weight:
+						LCB_size = atol( optarg );
+						break;
+					case opt_output:
+						output_file = optarg;
+						break;
+					case opt_eliminate_overlaps:
+						eliminate_overlaps = true;
+						break;
+					case opt_n_way_filter:
+						nway_filter = true;
+						break;
+					case opt_match_input:
+						read_matches = true;
+						match_input_file = optarg;
+						break;
+					case opt_lcb_input:
+						lcb_file = optarg;
+						read_lcbs = true;
+						break;
+					case opt_output_alignment:
+						output_alignment = true;
+						if( optarg != NULL )
+							alignment_output_file = optarg;
+						break;
+					case opt_id_matrix:
+						break;
+					case opt_island_size:
+						island_size = atoi( optarg );
+						break;
+					case opt_island_output:
+						island_file = optarg;
+						break;
+					case opt_island_break_min:
+						island_break_min = atoi( optarg );
+						break;
+					case opt_backbone_size:
+						backbone_size = atoi( optarg );
+						break;
+					case opt_max_backbone_gap:
+						max_backbone_gap = atoi( optarg );
+						break;
+					case opt_backbone_output:
+						backbone_file = optarg;
+						output_backbone = true;
+						break;
+					case opt_coverage_output:
+						if( optarg != NULL )
+							coverage_list_file = optarg;
+						calculate_coverage = true;
+						break;
+					case opt_repeats:
+						find_repeats = true;
+						break;
+					case opt_gapped_aligner:
+						gapped_aligner = optarg;
+						break;
+					case opt_max_gapped_aligner_length:
+						max_gapped_alignment_length = atoi( optarg );
+						break;
+					case opt_min_recursive_gap_length:
+						min_r_gap_length = atol( optarg );
+						break;
+					case opt_output_guide_tree:
+						tree_filename = optarg;
+						break;
+					case opt_alignment_output_dir:
+						alignment_output_dir = optarg;
+						break;
+					case opt_alignment_output_format:
+						alignment_output_format = optarg;
+						break;
+					case opt_match_log:
+						match_log = optarg;
+						break;
+					case opt_offset_log:
+						offset_log = optarg;
+						break;
+					case opt_merge_match_log:
+						merge_log = optarg;
+						break;
+					case opt_version:
+						print_version = true;
+						break;
+					case opt_scratch_path:
+						FileSML::registerTempPath( optarg );
+						break;
+					case opt_realign_lcb:
+						realign_lcbs.push_back( atoi( optarg ) );
+						break;
+					case opt_id_matrix_input:
+					case opt_collinear:
+						collinear_genomes = true;
+						break;
+					case opt_muscle_args:
+						muscle_args = optarg;
+						mi.SetExtraMuscleArguments( muscle_args );
+						break;
+					case opt_permutation_matrix_output:
+						permutation_filename = optarg;
+						break;
+					case opt_permutation_matrix_min_weight:
+						permutation_weight = atol(optarg);
+						break;
+					case opt_lcb_match_input:
+						lcb_match_input_format = true;
+						break;
+					case opt_max_extension_iterations:
+						opt_max_extension_iters = atoi(optarg);
+						break;
+					default:
+						print_usage( argv[0] );
+						return -1;
+				}
+				break;
+			default:
+				print_usage( argv[0] );
+				return -1;
+		}
+	}
+	// now read in the seq and sml file names from av
+	boolean seq_name_arg = true;
+	for( int optI = optind; optI < argc; optI++ ){
+		if( seq_name_arg )
+			seq_files.push_back( av[ optI ] );
+		else
+			sml_files.push_back( av[ optI ] );
+		seq_name_arg = !seq_name_arg;
+	}
+	
+	// print the version if the user requested it
+	if( print_version ){
+		cerr << "mauveAligner " << " build date " << __DATE__ << " at " << __TIME__ << endl;
+	}
+
+
+	//
+	// check validity of command line option combinations
+	//
+	if( ( island_size != 0 && island_file == "" ) || ( island_size == 0 && island_file != "" ) ){
+		cerr << "Error: Both --island-output and --island-size must be specified to generate islands\n";
+		return -1;
+	}
+
+	if( (alignment_output_dir == "" && alignment_output_format != "") || 
+		(alignment_output_dir != "" && alignment_output_format == "") ){
+		cerr << "Error: Both --alignment-output-dir and --alignment-output-format must be specified in order to generate alignment output in a custom format\n";
+		return -1;
+	}
+	
+	if( alignment_output_format != "" ){
+		if( !gnAlignedSequences::isSupportedFormat( alignment_output_format ) ){
+			cerr << "Error:  " << alignment_output_format << " is not a supported alignment format.\n";
+			return -1;
+		}
+	}
+
+	if( find_repeats ){
+		if( create_LCBs || read_matches || read_lcbs || calculate_coverage || 
+		    island_file != "" || island_size != 0 || recursive || lcb_stats_file != "" ){
+			cerr << "A paramater has been specified that is incompatible with repeat list generation\n";
+			return -1;
+		}
+	}
+
+	//
+	// done parsing and checking command line options
+	// Start doing the work
+	//
+
+	MatchList match_list;
+	MLDeleter deleter( match_list );
+	
+	if( seq_files.size() == 1 && sml_files.size() == 0 ){
+		LoadMFASequences( match_list, seq_files[0], &cout);
+		if( find_repeats || ( !read_lcbs && !read_matches ) )
+			match_list.CreateMemorySMLs(seed_size, &cout, seed_rank);
+	}else if( seq_files.size() != sml_files.size() ){
+		cerr << "Error: Each sequence file must have a corresponding SML file specified.\n";
+		return -1;
+	}else{
+		match_list.seq_filename = seq_files;
+		match_list.sml_filename = sml_files;
+		LoadSequences( match_list, &cout );
+		if( find_repeats || !read_matches || ( !read_lcbs && !read_matches ) )
+			match_list.LoadSMLs( seed_size, &cout, seed_rank );
+	}
+
+	ostream* match_out;
+	if( output_file != "" ){
+		ofstream* match_out_file = new ofstream( output_file.c_str() );
+		if( !match_out_file->is_open() ){
+			cerr << "Error opening " << output_file << endl;
+			return -2;
+		}
+		match_out = match_out_file;
+	}else
+		match_out = &cout;
+	
+	// search for repetitive regions
+	if( find_repeats ){
+		RepeatHash repeat_finder;
+		repeat_finder.LogProgress( &cout );
+		repeat_finder.FindMatches( match_list );
+		WriteList( match_list, *match_out );
+		match_out->flush();
+		return 0;
+	}
+	
+	// read matches if the user requested it
+	if( read_matches ){
+		ifstream match_in( match_input_file.c_str() );
+		if( !match_in.is_open() ){
+			cerr << "Error opening " << match_input_file << endl;
+			return -2;
+		}
+		if( !lcb_match_input_format )
+		{
+			try{
+				ReadList( match_list, match_in );
+			}catch( gnException& gne ){
+				cerr << "Error reading " << match_input_file << "\nPossibly corrupt file or invalid file format\n";
+				return -2;
+			}
+		}else{
+			IntervalList m_iv_list;
+			m_iv_list.ReadList( match_in );
+			for( int ivI = 0; ivI < m_iv_list.size(); ivI++ ){
+				for( int mI = 0; mI < m_iv_list[ivI].GetMatches().size(); mI++ ){
+					Match* m = dynamic_cast< Match* >(m_iv_list[ivI].GetMatches()[mI]);
+					if( m != NULL && m->Multiplicity() > 1)
+						match_list.push_back(m->Copy());
+				}
+			}
+		}
+		if( seq_files.size() > 1 )
+			match_list.seq_filename = seq_files;
+		else if( match_list.seq_table.size() == 0 )
+			// fill seq_table with empty sequences
+			for( seqI = 0; seqI < match_list.seq_filename.size(); seqI++ )
+				match_list.seq_table.push_back( new gnSequence() );
+	}else if ( !read_lcbs ){
+		// get full subset matches
+		MaskedMemHash match_finder;
+
+		if( nway_filter ){
+			// only find the n-way matches
+			uint64 nway_mask = 1;
+			nway_mask <<= match_list.seq_table.size();
+			nway_mask--;
+			match_finder.SetMask( nway_mask );
+		}
+		match_finder.LogProgress( &cout );
+		fstream match_log_out;
+		if( match_log != "" ){
+			match_log_out.open( match_log.c_str(), ios::in | ios::out );
+			if( !match_log_out.is_open() ){
+				cerr << "Error opening " << match_log << endl;
+				return -1;
+			}
+			match_finder.SetMatchLog( &match_log_out );
+			// append to whatever's already in the file
+			match_log_out.seekg( 0, ios::end );
+		}
+		fstream offset_log_out;
+		vector< gnSeqI > offset_start;
+		for( seqI = 0; seqI < match_list.seq_table.size(); seqI++ )
+			offset_start.push_back( 0 );
+		
+		if( offset_log != "" ){
+			offset_log_out.open( offset_log.c_str(), ios::in | ios::out );
+			if( !offset_log_out.is_open() ){
+				cerr << "Error opening " << offset_log << endl;
+				return -1;
+			}
+			match_finder.SetOffsetLog( &offset_log_out );
+			string last_line;
+			string cur_line;
+			while( getline( offset_log_out, cur_line ) ){
+				last_line = cur_line;
+			}
+			if( last_line != "" ){
+				stringstream cur_off_stream( last_line );
+				for( seqI = 0; seqI < match_list.seq_table.size(); seqI++ )
+					cur_off_stream >> offset_start[ seqI ];
+			}
+			offset_log_out.clear();
+		}
+		ifstream merge_log_in;
+		if( merge_log != "" ){
+			merge_log_in.open( merge_log.c_str() );
+			if( !merge_log_in.is_open() ){
+				cerr << "Error opening " << merge_log << endl;
+				return -1;
+			}
+
+			for( seqI = 0; seqI < match_list.seq_table.size(); seqI++ ){
+				if( !match_finder.AddSequence( match_list.sml_table[ seqI ], match_list.seq_table[ seqI ] ) ){
+					ErrorMsg( "Error adding " + match_list.seq_filename[seqI] + "\n");
+					return -1;
+				}
+			}
+			match_finder.LoadFile( merge_log_in );
+			match_finder.GetMatchList( match_list );
+		}else{
+			match_finder.FindMatchesFromPosition( match_list, offset_start );
+		}
+		match_log_out.close();
+		offset_log_out.close();
+		match_finder.Clear();
+	}
+		
+
+	// write out a match list if the user doesn't want LCBs
+	if( !create_LCBs && !read_lcbs){
+		if( eliminate_overlaps ){
+			EliminateOverlaps( match_list );
+		}
+
+		if( nway_filter ){
+			match_list.MultiplicityFilter( match_list.seq_table.size() );
+		}
+		
+		WriteList( match_list, *match_out );
+		match_out->flush();
+		
+		// output a guide tree or a coverage list if necessary
+		// beware that selecting the nway filter above will cause the guide tree
+		// and coverage lists to be incorrect
+		vector< pair< uint64, uint64 > > coverage_list;
+		if( tree_filename != "" || calculate_coverage ){
+			// only count each base pair once!
+			if( !eliminate_overlaps )
+				EliminateOverlaps( match_list );
+		}
+
+		if( tree_filename != "" ){
+			NumericMatrix< double > distance;
+			DistanceMatrix( match_list.seq_table.size(), coverage_list, distance );
+			MuscleInterface& mi = MuscleInterface::getMuscleInterface();
+			if( tree_filename == "" )
+				tree_filename = CreateTempFileName("guide_tree");
+			mi.CreateTree( distance, tree_filename );
+		}
+
+		return 0;
+	}
+	
+	// check whether the input sequences were masked to eliminate excess NNNNNs
+	for( seqI = 0; seqI < match_list.sml_table.size(); seqI++ ){
+		FileSML* cur_sml = dynamic_cast< FileSML* >(match_list.sml_table[ seqI ]);
+		if( cur_sml != NULL ){
+			const vector< int64 >& seq_coords = cur_sml->getUsedCoordinates();
+			if( seq_coords.size() > 0 ){
+				transposeMatches( match_list, seqI, seq_coords );
+			}
+		}
+	}
+	
+	// at this point any SortedMerLists used to identify the initial set of MUMs
+	// are no longer necessary.  Free them
+	for( uint smlI = 0; smlI < match_list.sml_table.size(); smlI++ ){
+		match_list.sml_table[ smlI ]->Clear();
+		delete match_list.sml_table[ smlI ];
+	}
+	match_list.sml_table.clear();
+	
+	// Align the sequences if necessary
+	if( LCB_size < 0 ){
+		// calculate a default LCB weight, 3 times the mer size times the seq. count
+		if( seed_size <= 0 )	
+			seed_size = MatchList::GetDefaultMerSize( match_list.seq_table );
+		LCB_size = seed_size * 3 * match_list.seq_table.size();
+	}else{
+		// adjust the LCB weight for the number of sequences being aligned
+		LCB_size *= match_list.seq_table.size();
+	}
+
+	// check that LCB_size can be set appropriately
+	if( create_LCBs && LCB_size < 0) {
+		cerr << "A minimum LCB size greater than 0 must be specified in order to create LCBs.\n";
+		return -1;
+	}
+
+	// hack to communicate that the genomes are collinear
+	if( collinear_genomes )
+		LCB_size = -1;
+	
+	Aligner aligner( match_list.seq_table.size() );
+
+	if( min_r_gap_length >= 0 ){
+		aligner.SetMinRecursionGapLength( min_r_gap_length );
+	}
+
+	aligner.SetGappedAligner( MuscleInterface::getMuscleInterface() );
+	if( max_gapped_alignment_length != -1 )
+		aligner.SetMaxGappedAlignmentLength( max_gapped_alignment_length );
+	
+	if( permutation_weight != -1 && permutation_filename == "" )
+		cerr << "A permutation output file must be specified to generate signed permutations\n";
+	if( permutation_weight == -1 && permutation_filename != "" )
+		permutation_weight = LCB_size;
+	if( permutation_weight != -1 )
+	{
+		permutation_weight *= match_list.seq_table.size();
+		aligner.SetPermutationOutput( permutation_filename, permutation_weight );
+	}
+	if( opt_max_extension_iters != -1 )
+	{
+		aligner.SetMaxExtensionIterations(opt_max_extension_iters);
+	}
+
+	IntervalList interval_list;
+	interval_list.seq_table = match_list.seq_table;
+	interval_list.seq_filename = match_list.seq_filename;
+	if( lcb_file == "" ){
+
+		try{
+			aligner.align( match_list, interval_list, 0, LCB_size, recursive, lcb_extension, gapped_alignment, tree_filename );
+		}catch( gnException& gne ){
+			cerr << gne << endl;
+		}
+		interval_list.WriteList( *match_out );
+		match_out->flush();
+
+	}else if( read_lcbs ){
+		ifstream lcb_input( lcb_file.c_str() );
+		if( !lcb_input.is_open() ){
+			cerr << "Error opening " << lcb_file << endl;
+			return -2;
+		}
+		try{
+
+			interval_list.seq_table = match_list.seq_table;
+			interval_list.seq_filename = match_list.seq_filename;
+			interval_list.ReadList( lcb_input );
+//			addUnalignedIntervals( interval_list );
+		}catch( gnException& gne ){
+			cerr << gne << endl;
+			cerr << "Error reading " << lcb_file << "\nPossibly corrupt file or invalid file format\n";
+			return -2;
+		}
+	}
+	if( realign_lcbs.size() > 0 ){
+		// set up a new IntervalList
+		IntervalList realigned_intervals;
+		realigned_intervals.seq_table = interval_list.seq_table;
+		realigned_intervals.seq_filename = interval_list.seq_filename;
+		for( int realignI = 0; realignI < realign_lcbs.size(); realignI++ ){
+			// extract a match list from the interval list for this LCB
+			Interval& iv = interval_list[ realignI ];
+			// clear any matches from the current match_list
+			match_list.clear();
+			for( int matchI = 0; matchI < iv.GetMatches().size(); matchI++ ){
+				AbstractMatch* m = iv.GetMatches()[ matchI ];
+				Match* match = dynamic_cast< Match* >( m );
+				if( match != NULL && m->Multiplicity() > 1)
+					match_list.push_back( match->Copy() );
+			}
+			aligner.align( match_list, realigned_intervals, 0, LCB_size, recursive, false, gapped_alignment, tree_filename );
+		}
+		
+		// once all intervals have been realigned reset the interval_list
+		interval_list = realigned_intervals;
+	}
+	
+	if( output_alignment ){
+		if( !gapped_alignment )
+			addUnalignedIntervals( interval_list );
+		if( alignment_output_file == "" || alignment_output_file == "-" ){
+			interval_list.WriteStandardAlignment( cout );
+		}else{
+			ofstream align_out( alignment_output_file.c_str() );
+			if( !align_out.is_open() ){
+				cerr << "Error opening " << alignment_output_file << endl;
+				return -1;
+			}
+			interval_list.WriteStandardAlignment( align_out );
+			align_out.close();
+		}
+	}
+	uint lcbI;
+	
+	// output alignments in another format if the user asked for it
+	if( alignment_output_dir != "" ){
+		boost::filesystem::path output_dir = alignment_output_dir;
+		boost::filesystem::create_directory( output_dir );
+
+		for( lcbI = 0; lcbI < interval_list.size(); lcbI++ ){
+			gnAlignedSequences gnas;
+			interval_list[ lcbI ].GetAlignedSequences( gnas, match_list.seq_table );
+			ostringstream oss;
+			oss << "lcb_" << lcbI << ".txt";
+			boost::filesystem::path outtie = output_dir / oss.str();
+			ofstream alignment_lcb_out( outtie.string().c_str(), ios::trunc );
+			if( !alignment_lcb_out.is_open() ){
+				cerr << "Error opening " << oss.str() << endl;
+				return -1;
+			}
+			gnas.output( alignment_output_format, alignment_lcb_out );
+		}
+	}
+
+	//
+	// output an identity matrix if requested
+	//
+	if( print_stats ){
+		ostream* stats_out;
+		if( lcb_stats_file == "" || lcb_stats_file == "-" ){
+			stats_out = &cout;
+		}else{
+			ofstream* stats_out_file = new ofstream( lcb_stats_file.c_str() );
+			if( !stats_out_file->is_open() ){
+				cerr << "Error opening " << lcb_stats_file << endl;
+				return -1;
+			}
+			stats_out = stats_out_file;
+		}
+		NumericMatrix< double > identity;
+		IdentityMatrix( interval_list, identity );
+		identity.print( *stats_out );
+		if( lcb_stats_file == "" || lcb_stats_file == "-" ){
+			delete stats_out;
+		}
+	}
+
+	//
+	// output backbone if it was requested
+	//
+	if( output_backbone ){
+		ostream* backbone_out;
+		if( backbone_file != "" ){
+			ofstream* backbone_out_file = new ofstream( backbone_file.c_str() );
+			if( !backbone_out_file->is_open() ){
+				cerr << "Error opening " << backbone_file << endl;
+				return -1;
+			}
+			backbone_out = backbone_out_file;
+		}else
+			backbone_out = &cout;
+
+		vector< GappedAlignment > backbone_data;
+		simpleFindBackbone( interval_list, backbone_size, max_backbone_gap, backbone_data );
+		outputBackbone( backbone_data, *backbone_out );
+		if( backbone_file != "" ){
+			delete backbone_out;
+		}
+	}
+
+	//
+	// output islands if they were requested
+	//
+	if( island_file != "" ){
+		ostream* island_out;
+		if( island_file == "-" )
+			island_out = &cout;
+		else{
+			ofstream* island_out_file = new ofstream( island_file.c_str() );
+			if( !island_out_file->is_open() ){
+				cerr << "Error opening " << island_file << endl;
+				return -1;
+			}
+			island_out = island_out_file;
+		}
+		simpleFindIslands( interval_list, island_size, *island_out );
+		findIslandsBetweenLCBs( interval_list, island_size, *island_out );
+
+		if( island_file != "-" ){
+			delete island_out;
+		}
+	}
+	match_list.clear();	// bad.  leaks memory.
+}catch( gnException& gne ) {
+	cerr << "Unhandled gnException: " << gne << endl;
+	return -10;
+}catch( exception& e ) {
+	cerr << "Unhandled exception: " << e.what() << endl;
+	return -11;
+}catch( char* message ){
+	cerr << "Unhandled exception: " << message << endl;
+	return -12;
+}catch(...){
+	cerr << "Unknown exception occurred.\n";
+	return -13;
+}
+
+	return 0;
+}
+
+void print_usage( const char* pname ){
+	cerr << "Usage:" << endl;
+	cerr << pname << " [options] <seq1 filename> <sml1 filename> ... "
+		<< " <seqN filename> <smlN filename>" << endl;
+	cerr << "Options:" << endl;
+	cerr << "\t    --output=<file> Output file name.  Prints to screen by default" << endl;
+	cerr << "\t    --mums Find MUMs only, do not attempt to determine locally collinear blocks (LCBs)\n";
+	cerr << "\t    --no-recursion Don't perform recursive anchor identification (implies --no-gapped-alignment)" << endl;
+	cerr << "\t    --no-lcb-extension If determining LCBs, don't attempt to extend the LCBs\n";
+	cerr << "\t    --seed-size=<number> Initial seed match size, default is log_2( average seq. length )" << endl;
+	cerr << "\t    --max-extension-iterations=<number> Limit LCB extensions to this number of attempts, default is 4\n";
+	cerr << "\t    --eliminate-inclusions Eliminate linked inclusions in subset matches.\n";
+	cerr << "\t    --weight=<number> Minimum LCB weight in base pairs per sequence" << endl;
+	cerr << "\t    --match-input=<file> Use specified match file instead of searching for matches\n";
+	cerr << "\t    --lcb-match-input  Indicates that the match input file contains matches that have been clustered into LCBs\n";
+	cerr << "\t    --lcb-input=<file> Use specified lcb file instead of constructing LCBs (skips LCB generation)\n";
+	cerr << "\t    --scratch-path=<path>  For large genomes, use a directory for storage of temporary data.  Should be given two or more times to with different paths.\n";
+	cerr << "\t    --id-matrix=<file> Generate LCB stats and write them to the specified file\n";
+	cerr << "\t    --island-size=<number> Find islands larger than the given number\n";
+	cerr << "\t    --island-output=<file> Output islands the given file (requires --island-size)\n";
+	cerr << "\t    --backbone-size=<number> Find stretches of backbone longer than the given number of b.p.\n";
+	cerr << "\t    --max-backbone-gap=<number> Allow backbone to be interrupted by gaps up to this length in b.p.\n";
+	cerr << "\t    --backbone-output=<file> Output islands the given file (requires --island-size)\n";
+	cerr << "\t    --coverage-output=<file> Output a coverage list to the specified file (- for stdout)\n";
+	cerr << "\t    --repeats Generates a repeat map.  Only one sequence can be specified\n";
+	cerr << "\t    --output-guide-tree=<file> Write out a guide tree to the designated file\n";
+	cerr << "\t    --collinear Assume that input sequences are collinear--they have no rearrangements\n";
+	cerr << "\nGapped alignment controls:\n";
+	cerr << "\t    --no-gapped-alignment Don't perform a gapped alignment\n";
+	cerr << "\t    --max-gapped-aligner-length=<number> Maximum number of base pairs to attempt aligning with the gapped aligner\n";
+	cerr << "\t    --min-recursive-gap-length=<number> Minimum size of gaps that Mauve will perform recursive MUM anchoring on (Default is 200)\n";
+	cerr << "\nSigned permutation matrix options:\n";
+	cerr << "\t    --permutation-matrix-output=<file> Write out the LCBs as a signed permutation matrix to the given file\n";
+	cerr << "\t    --permutation-matrix-min-weight=<number> A permutation matrix will be written for every set of LCBs with weight between this value and the value of --weight\n";
+	cerr << "\nAlignment output options:\n";
+	cerr << "\t    --alignment-output-dir=<directory> Outputs a set of alignment files (one per LCB) to a given directory\n";
+	cerr << "\t    --alignment-output-format=<directory> Selects the output format for --alignment-output-dir\n";
+	cerr << "\t    --output-alignment=<file> Write out an XMFA format alignment to the designated file\n";
+	cerr << endl;
+	
+	const vector< string >& formats = gnAlignedSequences::getSupportedFormats();
+	cerr << "Supported alignment output formats are: ";
+	for( int formatI = 0; formatI < formats.size(); formatI++ ){
+		if( formatI > 0 )
+			cerr << ", ";
+		cerr << formats[ formatI ];
+	}
+	cerr << endl;
+	cerr << endl;
+}
+
diff --git a/src/mauveAligner.h b/src/mauveAligner.h
new file mode 100644
index 0000000..4fcf2da
--- /dev/null
+++ b/src/mauveAligner.h
@@ -0,0 +1,10 @@
+#ifndef _MAUVEALIGNER_H
+
+#ifndef __need_getopt
+# define _MAUVEALIGNER_H 1
+#endif
+
+void print_usage( const char* pname );
+int doAlignment( int argc, char* argv[] );
+
+#endif
diff --git a/src/mauveToXMFA.cpp b/src/mauveToXMFA.cpp
new file mode 100644
index 0000000..8f7cdfc
--- /dev/null
+++ b/src/mauveToXMFA.cpp
@@ -0,0 +1,35 @@
+#include "libMems/IntervalList.h"
+#include <fstream>
+#include <string>
+#include "libMems/MatchList.h"
+
+using namespace std;
+using namespace genome;
+using namespace mems;
+
+int main( int argc, char* argv[] )
+{
+	if( argc != 3 )
+	{
+		cerr << "Usage: mauveToXMFA <Mauve Alignment input> <XMFA output>\n";
+		return -1;
+	}
+	ifstream mauve_file( argv[1] );
+	if( !mauve_file.is_open() )
+	{
+		cerr << "Error opening \"" << argv[1] << "\"\n";
+		return -2;
+	}
+	ofstream xmfa_file( argv[2] );
+	if( !xmfa_file.is_open() )
+	{
+		cerr << "Error opening \"" << argv[2] << "\"\n";
+		return -3;
+	}
+
+	IntervalList iv_list;
+	iv_list.ReadList( mauve_file );
+	LoadSequences(iv_list, &cout);
+	iv_list.WriteStandardAlignment( xmfa_file );
+}
+
diff --git a/src/mfa2xmfa.cpp b/src/mfa2xmfa.cpp
new file mode 100644
index 0000000..60de675
--- /dev/null
+++ b/src/mfa2xmfa.cpp
@@ -0,0 +1,117 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libGenome/gnFASSource.h"
+#include "libGenome/gnSequence.h"
+#include <algorithm>
+#include <fstream>
+
+using namespace std;
+using namespace genome;
+
+void print_usage( const char* pname ){
+	cerr << "Usage: " << pname << " <MFA alignment input> <XMFA alignment output> [Unaligned FastA output]\n";
+}
+
+int main( int argc, char* argv[] ) {
+	if( argc < 3 ){
+		if( argc == 0 )
+			print_usage( "mfa2xmfa" );
+		else
+			print_usage( argv[0] );
+		return -1;
+	}
+	
+	gnSequence mfa_seq;
+	string mfa_name = argv[1];
+	try{
+		mfa_seq.LoadSource( mfa_name );
+	}catch( gnException& gne ){
+		cerr << gne << endl;
+		return -1;
+	}
+	ofstream xmfa_out( argv[2] );
+	if( !xmfa_out.is_open() ){
+		cerr << "Error opening " << argv[2] << endl;
+		return -1;
+	}
+
+	// if unaligned mfa output is desired, create it now
+	string mfa_output_name;
+	ofstream mfa_out;
+	if(argc >= 4)
+	{
+		mfa_output_name = argv[3];
+		mfa_out.open( mfa_output_name.c_str() );
+		if( !mfa_out.is_open() ){
+			cerr << "Error opening " << argv[3] << endl;
+			return -1;
+		}
+
+		gnSequence unaligned;
+		for( size_t seqI = 0; seqI < mfa_seq.contigListSize(); seqI++ )
+		{
+			string cur_seq = mfa_seq.contig(seqI).ToString();
+			string::iterator striter = std::remove( cur_seq.begin(), cur_seq.end(), '-' );
+			cur_seq.resize( striter - cur_seq.begin() );
+			unaligned += cur_seq;
+			unaligned.setContigName( seqI, mfa_seq.contigName(seqI) );
+		}
+		gnFASSource::Write( unaligned, mfa_out, false, false );
+
+		// create xmfa header if unaligned seq is to be written
+		xmfa_out << "#FormatVersion Mauve1\n";
+	}
+
+	unsigned int seq_count = mfa_seq.contigListSize();
+	// find the max length alignment entry and add gaps
+	// to the ends of shorter entries for consistency
+	gnSeqI max_length = 0;
+	for( uint seqI = 0; seqI < seq_count; seqI++ ){
+		if( mfa_seq.contigLength( seqI ) > max_length )
+			max_length = mfa_seq.contigLength( seqI );
+	}
+
+
+	// count the number of base pairs in each sequence
+	vector< gnSeqI > seq_lens( seq_count, 0 );
+	for( uint seqI = 0; seqI < seq_count; seqI++ ){
+		string cur_seq;
+		mfa_seq.contig( seqI ).ToString(cur_seq);
+		for( gnSeqI baseI = 0; baseI < cur_seq.length(); baseI++ ){
+			if( cur_seq[ baseI ] != '-' )
+				seq_lens[ seqI ]++;
+		}
+		// fill in xmfa header details if unaligned seq is to be written
+		if(mfa_output_name.size() > 0)
+		{
+			xmfa_out << "#Sequence" << seqI + 1 << "File\t" << mfa_output_name << endl;
+			xmfa_out << "#Sequence" << seqI + 1 << "Entry\t" << seqI + 1 << endl;
+			xmfa_out << "#Sequence" << seqI + 1 << "Format\tFastA\n";
+		}
+	}
+
+	// write xmfa body	
+	for( uint seqI = 0; seqI < seq_count; seqI++ ){
+		string cur_seq;
+		mfa_seq.contig( seqI ).ToString(cur_seq);
+		// if the alignment entry is shorter than the max length
+		// then add gaps to the end for consistency
+		if( cur_seq.length() < max_length )
+			cur_seq += string( max_length - cur_seq.length(), '-' );
+
+		xmfa_out << "> " << seqI + 1 << ":";
+		xmfa_out << 1 << "-" << seq_lens[ seqI ] << " + " << mfa_seq.contigName( seqI );
+		xmfa_out << endl;
+		gnSeqI cur_pos = 0;
+		for( ; cur_pos < cur_seq.length(); cur_pos += 80 ){
+			gnSeqI cur_len = cur_pos + 80 < cur_seq.length() ? 80 : cur_seq.length() - cur_pos;
+			xmfa_out.write( cur_seq.data() + cur_pos, cur_len );
+			xmfa_out << endl;
+		}
+	}
+		
+	xmfa_out << "=" << endl;
+	
+}
diff --git a/src/multiEVD.cpp b/src/multiEVD.cpp
new file mode 100644
index 0000000..8952683
--- /dev/null
+++ b/src/multiEVD.cpp
@@ -0,0 +1,217 @@
+/**
+ * multiEVD
+ * (c)left 2007 aaron darling
+ * A program to calculate the extreme value distribution of alignment drops in homologous sequence.
+ * INPUT: a simulated multiple alignment as input
+ * OUTPUT: the 95%ile, 99%ile, etc of scores in the extreme value distribution
+ * THEORY:
+ * computes inverse substitution and gap scores, never allowing
+ * the inverse score to drop below 0.  Each time the score rises above 0, an "excursion" begins, and when the score
+ * drops back to 0, the excursion has ended.  The highest score achieved by the excursion is the "extreme value".
+ * Each extreme value is recorded, and the distribution of these extreme values is what gets output.
+ */
+#include "libMems/Islands.h"
+#include "libMems/IntervalList.h"
+#include "libMems/MatchList.h"
+#include "libMems/MuscleInterface.h"
+#include "libGenome/gnSequence.h"
+#include "libMems/MatchProjectionAdapter.h"
+#include "libMems/ProgressiveAligner.h"
+
+#include <sstream>
+
+#include <boost/multi_array.hpp>
+
+using namespace std;
+using namespace genome;
+using namespace mems;
+
+template< typename MatchVector >
+void getLocalRecordHeights( const MatchVector& iv_list, std::vector< genome::gnSequence* >& seq_table, vector< score_t >& lrh )
+{
+	typedef typename MatchVector::value_type MatchType;
+	if( iv_list.size() == 0 )
+		return;
+	uint seq_count = seq_table.size();
+	for( uint iv_listI = 0; iv_listI < iv_list.size(); iv_listI++ ){
+		const MatchType& iv = iv_list[ iv_listI ];
+		std::vector< std::string > aln_table;
+		GetAlignment( *iv, seq_table, aln_table );
+		
+		std::vector< score_t > scores;
+		PairwiseScoringScheme pss;
+		score_t total_score;
+
+		stripGapColumns(aln_table);
+		computeSPScore( aln_table, pss, scores, total_score );
+
+		// Invert the scores since we're trying to detect rare bouts of non-homologous sequence
+		for( size_t sI = 0; sI < scores.size(); ++sI )
+			if( scores[sI] != INVALID_SCORE)
+				scores[sI] = -scores[sI];
+
+		score_t score_sum = 0;	// start in an hss
+		score_t local_record_height = 0;
+		for( size_t colI = 0; colI < scores.size(); ++colI )
+		{
+			if( scores[colI] == INVALID_SCORE )
+				continue;
+
+			if( score_sum > 0 && score_sum + scores[colI] < 0 )
+			{
+				// end of an excursion
+				score_sum = 0;
+				lrh.push_back( local_record_height );
+				local_record_height = 0;
+			}else if( score_sum == 0 && scores[colI] > 0 )
+			{
+				// start a new excursion
+				score_sum += scores[colI];
+				if( score_sum > local_record_height )
+					local_record_height = score_sum;
+			}else if( score_sum > 0 ){
+				score_sum += scores[colI];
+				if( score_sum > local_record_height )
+					local_record_height = score_sum;
+			}
+		}
+	}
+}
+
+//bad: copied from progressiveAligner.cpp
+template< class BoostMatType >
+void print2d_matrix( BoostMatType& mat, std::ostream& os )
+{
+	for( size_t i = 0; i < mat.shape()[0]; ++i )
+	{
+		for( size_t j = 0; j < mat.shape()[1]; ++j )
+		{
+			if( j > 0 )
+				os << "\t";
+			os << mat[i][j];
+		}
+		os << endl;
+	}
+}
+
+
+// read each input file, write summary statistics about the EVD to stdout
+int main( int argc, char* argv[] )
+{
+//	vector< score_t > lrh_all;
+	if( argc != 2 )
+	{
+		cerr << "Usage: multiEVD <simulation run count>\n";
+		cerr << "This program must be run from a directory which contains alignjob directories\n";
+		return -1;
+	}
+	int run_count = atoi( argv[1] );
+	int simu_count = 0;
+	vector< vector< score_t > > lrh_all;
+	size_t seq_count = 0;
+	for( int runI = 0; runI < run_count; ++runI )
+	{
+		IntervalList iv_list;
+		stringstream aln_fname;
+		aln_fname << "alignjob." << runI << "/evolved.dat";
+		ifstream in_file( aln_fname.str().c_str() );
+		if( !in_file.is_open() )
+		{
+			cerr << "Error opening " << aln_fname.str() << endl;
+			continue;
+		}
+		simu_count++;
+		iv_list.ReadStandardAlignment(in_file);
+		stringstream seq_fname;
+		seq_fname << "alignjob." << runI << "/evolved_seqs.fas";
+		MatchList ml;
+		LoadMFASequences(ml, seq_fname.str(), &cout);
+		iv_list.seq_table = ml.seq_table;
+		if( seq_count == 0 )
+		{
+			seq_count = iv_list.seq_table.size();
+			lrh_all.resize(seq_count+1);
+		}
+
+		vector< Interval* > iv_ptrs( iv_list.size() );
+		for( size_t ivI = 0; ivI < iv_list.size(); ++ivI )
+			iv_ptrs[ivI] = &iv_list[ivI];
+
+		vector< gnSequence* > seq_table = iv_list.seq_table;
+
+		vector< uint > proj_seqs(seq_count);
+		for( size_t sI = 0; sI < seq_count; ++sI )
+			proj_seqs[sI] = sI;
+
+		std::vector< std::vector< mems::MatchProjectionAdapter* > > LCB_list;
+		std::vector< mems::LCB > projected_adjs;
+		for( size_t mult = seq_count; mult > 1; mult-- )
+		{
+			vector< score_t > lrh;
+			getLocalRecordHeights( iv_ptrs, seq_table, lrh );
+			lrh_all[mult].insert( lrh_all[mult].end(), lrh.begin(), lrh.end() );
+			// randomly pick a sequence to discard
+			int disc = rand() % proj_seqs.size();
+			proj_seqs.erase(proj_seqs.begin()+disc);
+			seq_table.erase(seq_table.begin()+disc);
+			// project the original alignment down to the remaining sequences
+			projectIntervalList( iv_list, proj_seqs, LCB_list, projected_adjs );
+			// free storage used by the previous set of projections
+			if( mult != seq_count )
+			{
+				for( size_t ivI = 0; ivI < iv_ptrs.size(); ivI++ )
+					iv_ptrs[ivI]->Free();	
+			}
+			// update iv_ptrs to contain the new projections
+			iv_ptrs.resize(LCB_list.size());
+			for( size_t lcbI = 0; lcbI < LCB_list.size(); lcbI++ )
+			{
+				Interval iv;
+				iv_ptrs[lcbI] = iv.Copy();
+				iv_ptrs[lcbI]->SetMatches(LCB_list[lcbI]);
+			}
+		}
+	}
+
+	boost::multi_array<score_t, 2> evd_table;
+	evd_table.resize( boost::extents[4][seq_count-1] );
+	boost::multi_array<size_t, 2> ss_table;
+	ss_table.resize( boost::extents[4][seq_count-1] );
+	for( size_t mult = 2; mult < seq_count + 1; mult++ )
+	{
+		std::sort( lrh_all[mult].begin(), lrh_all[mult].end() );
+		size_t index_95 = lrh_all[mult].size() * .95;
+		size_t index_99 = lrh_all[mult].size() * .99;
+		size_t index_999 = lrh_all[mult].size() * .999;
+		size_t index_9999 = lrh_all[mult].size() * .9999;
+		index_95 = (std::min)(index_95, lrh_all[mult].size()-1);
+		index_99 = (std::min)(index_99, lrh_all[mult].size()-1);
+		index_999 = (std::min)(index_999, lrh_all[mult].size()-1);
+		index_9999 = (std::min)(index_9999, lrh_all[mult].size()-1);
+//		cout << "Total number of simulations: " << simu_count << endl;
+//		cout << "Total number of excursions: " << lrh_all[mult].size() << endl;
+//		cout << "95% score threshold: " << lrh_all[mult][index_95] << endl;
+		evd_table[0][mult-2] = lrh_all[mult][index_95];
+//		cout << "Number excursions above 95%: " << lrh_all[mult].size() - index_95 << endl;
+		ss_table[0][mult-2] = lrh_all[mult].size() - index_95;
+//		cout << "99% score threshold: " << lrh_all[mult][index_99] << endl;
+		evd_table[1][mult-2] = lrh_all[mult][index_99];
+//		cout << "Number excursions above 99%: " << lrh_all[mult].size() - index_99 << endl;
+		ss_table[1][mult-2] = lrh_all[mult].size() - index_99;
+//		cout << "99.9% score threshold: " << lrh_all[mult][index_999] << endl;
+		evd_table[2][mult-2] = lrh_all[mult][index_999];
+//		cout << "Number excursions above 99.9%: " << lrh_all[mult].size() - index_999 << endl;
+		ss_table[2][mult-2] = lrh_all[mult].size() - index_999;
+//		cout << "99.99% score threshold: " << lrh_all[mult][index_9999] << endl;
+		evd_table[3][mult-2] = lrh_all[mult][index_9999];
+//		cout << "Number excursions above 99.99%: " << lrh_all[mult].size() - index_9999 << endl;
+		ss_table[3][mult-2] = lrh_all[mult].size() - index_9999;
+	}
+	cout << "Matrix of score thresholds:\n";
+	print2d_matrix( evd_table, cout );
+	cout << "\n\nMatrix of sample sizes:\n";
+	print2d_matrix( ss_table, cout );
+	cout << endl;
+}
+
+
diff --git a/src/multiToRawSequence.cpp b/src/multiToRawSequence.cpp
new file mode 100644
index 0000000..d910c6f
--- /dev/null
+++ b/src/multiToRawSequence.cpp
@@ -0,0 +1,28 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libGenome/gnSequence.h"
+#include "libGenome/gnRAWSource.h"
+
+int main( int argc, char* argv[] ){
+
+	if( argc != 3 ){
+		cout << argv[0] << " <input sequence> <output file>\n";
+	}
+	gnSequence seq;
+	try{
+		seq.LoadSource( argv[1] );
+		cout << argv[1] << " has " << seq.contigListLength() << " contigs\n";
+		for( int contigI = 0; contigI < seq.contigListLength(); contigI++ ){
+			gnSequence contig = seq.contig( contigI );
+			string contig_name = seq.contigName( contigI );
+			cout << "contig " << contig_name << " has " << contig.length() << "b.p.\n";
+			gnRAWSource::Write( contig, contig_name+".raw" );
+		}
+	}catch( gnException& gne ){
+		cerr << gne << endl;
+		return -1;
+	}
+	return 0;
+}
diff --git a/src/pairCompare.cpp b/src/pairCompare.cpp
new file mode 100644
index 0000000..e770bf1
--- /dev/null
+++ b/src/pairCompare.cpp
@@ -0,0 +1,85 @@
+#include "libMems/IntervalList.h"
+#include "libMems/Islands.h"
+#include "libMems/DistanceMatrix.h"
+#include <sstream>
+#include <fstream>
+using namespace std;
+using namespace genome;
+using namespace mems;
+
+int main( int argc, char* argv[] )
+{
+	if( argc != 2 )
+	{
+		cerr << "Usage: pairCompare <sequence count>\n";
+		return -1;
+	}
+	int seq_count = atoi( argv[1] );
+	cout << "SeqI\tSeqJ\tNTidentity\tAvgBBpct\tLCB count\n";
+	for( size_t seqI = 10; seqI < seq_count; seqI++ )
+	{
+		for( size_t seqJ = 0; seqJ < seq_count; seqJ++ )
+		{
+			if( seqJ <= seqI )
+				continue;
+			cout << seqI << '\t' << seqJ << '\t';
+
+			size_t lcb_count = 0;
+
+			stringstream aln_in_fname;
+			aln_in_fname << "all_pairs/pair_" << seqI << "." << seqJ << ".xmfa";
+			ifstream alignment_in(aln_in_fname.str().c_str());
+			IntervalList aligned_ivs;
+			aligned_ivs.ReadStandardAlignment( alignment_in );
+
+
+			LoadSequences(aligned_ivs, NULL);
+
+			// add the sequence data to the interval list
+			uint seq_count = aligned_ivs.seq_table.size();
+			vector< GappedAlignment > backbone_data;
+			simpleFindBackbone( aligned_ivs, 50, 50, backbone_data );
+
+			IntervalList backbone_ivs;
+			backbone_ivs.seq_table = aligned_ivs.seq_table;
+
+			// count up the total length of backbone in each genome
+			vector< gnSeqI > total_bb( seq_count, 0 );
+			NumericMatrix< double > overall_identity;
+			for( uint bbI = 0; bbI < backbone_data.size(); bbI++ ){
+				vector<AbstractMatch*> tmp_iv(1, &backbone_data[ bbI ]);
+				backbone_ivs.push_back( Interval( tmp_iv.begin(), tmp_iv.end() ) );
+				for( uint seqI = 0; seqI < seq_count; seqI++ ){
+					total_bb[ seqI ] += backbone_data[ bbI ].Length( seqI );
+				}
+			}
+
+			vector< AbstractMatch* > bbivs;
+			for( uint bbI = 0; bbI < backbone_ivs.size(); bbI++ )
+				bbivs.push_back( &backbone_ivs[bbI] );
+			BackboneIdentityMatrix( bbivs, aligned_ivs.seq_table, overall_identity );
+
+			gnSeqI avg_bb = 0;
+			double seq_len_average = 0;
+			for( uint seqI = 0; seqI < aligned_ivs.seq_table.size(); seqI++ ){
+				avg_bb += total_bb[ seqI ];
+				seq_len_average += aligned_ivs.seq_table[seqI]->length();
+			}
+			avg_bb /= aligned_ivs.seq_table.size();
+			seq_len_average /= (double)seq_count;
+
+
+			for( size_t lcbI = 0; lcbI < aligned_ivs.size(); lcbI++ )
+				if( aligned_ivs[lcbI].Multiplicity() > 1 )
+					lcb_count++;
+
+
+			cout << overall_identity(0,1) << '\t';
+			cout << avg_bb / seq_len_average << '\t';
+			cout << lcb_count << endl;
+
+
+		}
+	}
+
+}
diff --git a/src/progressiveMauve.cpp b/src/progressiveMauve.cpp
new file mode 100644
index 0000000..abf23ba
--- /dev/null
+++ b/src/progressiveMauve.cpp
@@ -0,0 +1,768 @@
+/*******************************************************************************
+ * $Id: memsApp.cpp,v 1.49 2004/04/23 00:18:45 darling Exp $
+ * This file is copyright 2002-2004 Aaron Darling.  All rights reserved.
+ * Please see the file called COPYING for licensing, copying, and modification
+ * rights.  Redistribution of this file, in whole or in part is prohibited
+ * without express permission.
+ ******************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "mauveAligner.h"
+#include "getopt.h"
+#include <sstream>
+#include <stdexcept>
+#include "libMems/Matrix.h"
+#include "libMems/NumericMatrix.h"
+#include "libGenome/gnSequence.h"
+#include "libMems/DNAFileSML.h"
+#include "libMems/MemHash.h"
+#include "libMems/MatchList.h"
+#include "libMems/Interval.h"
+#include "libMems/IntervalList.h"
+#include "libMems/gnAlignedSequences.h"
+#include "libMems/Islands.h"
+#include "libMems/MuscleInterface.h"
+#include "libMems/Backbone.h"
+//#include "libMems/twister.h"
+
+#include "libMems/ProgressiveAligner.h"
+#include "libMems/PairwiseMatchFinder.h"
+#include "libMems/HomologyHMM/parameters.h"
+#include "UniqueMatchFinder.h"
+
+#include <boost/filesystem.hpp>
+
+#include "libMems/Memory.h"
+
+using namespace std;
+using namespace genome;
+using namespace mems;
+
+class MLDeleter {
+public:
+	MLDeleter( MatchList& ml ) : mlist( ml ) {}
+	~MLDeleter(){ mlist.Clear(); }
+private:
+	MatchList& mlist;
+};
+
+class OptionList;
+
+class MauveOption : public option
+{
+public:
+	MauveOption( OptionList& ol, const char* name, int has_arg, const std::string& usage_info);
+
+	boolean set;
+	std::string arg_value;
+	std::string usage_info;
+};
+
+
+class OptionList : public vector< MauveOption* >
+{
+public:
+	OptionList() : opt_list(NULL){};
+	~OptionList()
+	{
+		if( opt_list != NULL )
+			delete[] opt_list;
+	}
+	struct option* getOptions()
+	{
+		if( opt_list == NULL )
+		{
+			opt_list = new option[ this->size() + 1 ];
+			int i = 0;
+			for( ; i < this->size(); i++ ){
+				opt_list[i] = *(*this)[i];
+			}
+			struct option empty = {0,0,0,0};
+			opt_list[i] = empty;
+		}
+		return opt_list;
+	}
+	int config_opt;
+protected:
+	struct option* opt_list;
+};
+
+MauveOption::MauveOption( OptionList& ol, const char* name, int has_arg, const std::string& usage_info ) :
+	set( false ),
+	usage_info( usage_info )
+{
+	this->name = name;
+	this->has_arg = has_arg;
+	this->flag = &ol.config_opt;
+	this->val = ol.size();
+	ol.push_back(this);
+}
+
+void print_usage( const char* pname, OptionList& option_list )
+{
+	cerr << "progressiveMauve usage:\n\n";
+	cerr << "When each genome resides in a separate file:" << endl;
+	cerr << pname << " [options] <seq1 filename> ... <seqN filename>" << endl << endl;
+	cerr << "When all genomes are in a single file:" << endl;
+	cerr << pname << " [options] <seq filename>" << endl << endl;
+	cerr << "Options:" << endl;
+	for( size_t optionI = 0; optionI < option_list.size(); optionI++ )
+	{
+		cerr << "\t" << "--" << option_list[optionI]->name;
+		cerr << (option_list[optionI]->has_arg == no_argument ? " " : "=");
+		cerr << option_list[optionI]->usage_info << endl;
+	}
+	cerr << endl << endl;
+	cerr << "Examples:\n";
+	cerr << pname << " --output=my_seqs.xmfa my_genome1.gbk my_genome2.gbk my_genome3.fasta\n";
+	cerr << "\nIf genomes are in a single file and have no rearrangement:\n";
+	cerr << pname << " --collinear --output=my_seqs.xmfa my_genomes.fasta\n";
+}
+
+void printMatchSizes()
+{
+	UngappedLocalAlignment< HybridAbstractMatch<> > ula;
+	UngappedLocalAlignment< SparseAbstractMatch<> > sula;
+	CompactGappedAlignment<> cga;
+	MatchHashEntry	mhe;
+	bitset_t bitset;
+	Match m;
+	cerr << "sizeof(UngappedLocalAlignment< HybridAbstractMatch<> >) " << sizeof(ula) << endl;
+	cerr << "sizeof(UngappedLocalAlignment< SparseAbstractMatch<> >) " << sizeof(sula) << endl;
+	cerr << "sizeof(m) " << sizeof(m) << endl;
+	cerr << "sizeof(CompactGappedAlignment<>) " << sizeof(cga) << endl;
+	cerr << "sizeof(boost::dynamic_bitset) " << sizeof(bitset) << endl;
+	cerr << "sizeof(MatchHashEntry) " << sizeof(mhe) << endl;
+}
+
+#ifndef WIN32
+#include <signal.h>
+#endif
+
+/**
+ * Aborts the running progressiveMauve program
+ */
+void terminateProgram( int sig )
+{
+	std::cerr << "Caught signal " << sig << std::endl;
+	std::cerr << "Cleaning up and exiting!\n";
+	deleteRegisteredFiles();
+	std::cerr << "Temporary files deleted.\n";
+	exit(sig);	
+}
+
+#ifdef WIN32
+BOOL WINAPI handler(DWORD dwCtrlType)
+{
+	switch(dwCtrlType)
+	{
+	case CTRL_C_EVENT:
+	case CTRL_BREAK_EVENT:
+	case CTRL_CLOSE_EVENT:
+	case CTRL_LOGOFF_EVENT:
+	case CTRL_SHUTDOWN_EVENT:
+		terminateProgram(dwCtrlType);
+	default:
+		break;
+	}
+	return true;
+}
+#endif
+
+int main( int argc, char* argv[] )
+{
+#if	WIN32
+// Multi-tasking does not work well in CPU-bound
+// console apps running under Win32.
+// Reducing the process priority allows GUI apps
+// to run responsively in parallel. (thanks Bob Edgar!)
+	SetPriorityClass(GetCurrentProcess(), BELOW_NORMAL_PRIORITY_CLASS);
+// also register a handler to clean up during abnormal shutdown
+	BOOL status = SetConsoleCtrlHandler(handler, TRUE);
+#else
+// register a signal handler to catch errors and control-c and clean up...
+	signal( SIGINT, terminateProgram );
+	signal( SIGTERM, terminateProgram );
+	signal( SIGSEGV, terminateProgram );
+#endif
+	// delete temp files at program exit!
+	atexit( &deleteRegisteredFiles );
+
+	return doAlignment(argc, argv);
+}
+
+void getPatternText( int64 seed_pattern, char pattern[65] )
+{
+	char pat[65] = {
+		'0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0',
+		'0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0',
+		'0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0',
+		'0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0',
+		'\0'};
+	int lastone = 64;
+	for( int i = 63; i >= 0; i-- )
+	{
+		pat[i] = seed_pattern & 0x1 ? '1' : '0';
+		lastone = pat[i] == '1' ? i : lastone;
+		seed_pattern >>= 1;
+	}
+	memcpy( pattern, pat + lastone, 65 - lastone );
+}
+
+void getDefaultSmlFileNames( const vector< string >& seq_files, vector< string >& sml_files, int seed_weight, int seed_rank )
+{
+	int64 seed_pattern = getSeed(seed_weight, seed_rank);
+	// convert seed pattern to text;
+	char pattern[65];
+	getPatternText(seed_pattern, pattern);
+	sml_files.resize(seq_files.size());
+	for( int seqI = 0; seqI < seq_files.size(); seqI++ )
+		sml_files[seqI] = seq_files[seqI] + "." + pattern + ".sslist";
+}
+
+void applyBackbone( IntervalList& iv_list, string& bbcols_fname, string& bb_fname, size_t island_gap_size, double hmm_identity, double pgh, double pgu )
+{
+	ofstream bb_out( bb_fname.c_str() );
+	backbone_list_t bb_list;
+	// adapt to the GC of the sequences
+	double gc_content = computeGC( iv_list.seq_table );
+	std::cout << "Organisms have " << std::setprecision(3) << gc_content*100 << "% GC\n";
+
+	Params hmm_params = getAdaptedHoxdMatrixParameters( gc_content );
+	hmm_params.iGoHomologous = pgh;
+	hmm_params.iGoUnrelated = pgu;
+	adaptToPercentIdentity( hmm_params, hmm_identity );
+
+	detectAndApplyBackbone(iv_list, bb_list, hmm_params);
+	bb_list.clear();
+
+	BigGapsDetector bgd( island_gap_size );
+	detectBackbone( iv_list, bb_list, &bgd );
+
+	writeBackboneSeqCoordinates( bb_list, iv_list, bb_out );
+	std::vector< bb_seqentry_t > bb_seq_list;
+	bb_out.close();
+	std::ifstream bbseq_input( bb_fname.c_str() );
+	readBackboneSeqFile( bbseq_input, bb_seq_list );
+
+	mergeAdjacentSegments( bb_seq_list );
+	addUniqueSegments( bb_seq_list );
+	bbseq_input.close();
+	bb_out.open(bb_fname.c_str());
+	writeBackboneSeqFile( bb_out, bb_seq_list );
+
+	ofstream bbcols_out( bbcols_fname.c_str() );
+	writeBackboneColumns( bbcols_out, bb_list );
+	iv_list.backbone_filename = bbcols_fname;
+}
+
+/**
+ * progressive alignment.  wheee.
+ */
+int doAlignment( int argc, char* argv[] ){
+//try{
+	OptionList mauve_options;
+	MauveOption opt_island_gap_size( mauve_options, "island-gap-size", required_argument, "<number> Alignment gaps above this size in nucleotides are considered to be islands [20]" );
+	MauveOption opt_profile( mauve_options, "profile", required_argument, "<file> (Not yet implemented) Read an existing sequence alignment in XMFA format and align it to other sequences or alignments" );
+	MauveOption opt_apply_backbone( mauve_options, "apply-backbone", required_argument, "<file> Read an existing sequence alignment in XMFA format and apply backbone statistics to it" );
+	MauveOption opt_disable_backbone( mauve_options, "disable-backbone", no_argument, "Disable backbone detection" );
+	MauveOption opt_mums( mauve_options, "mums", no_argument, "Find MUMs only, do not attempt to determine locally collinear blocks (LCBs)" );
+	MauveOption opt_seed_weight( mauve_options, "seed-weight", required_argument, "<number> Use the specified seed weight for calculating initial anchors" );
+	MauveOption opt_output( mauve_options, "output", required_argument, "<file> Output file name.  Prints to screen by default" );
+	MauveOption opt_backbone_output( mauve_options, "backbone-output", required_argument, "<file> Backbone output file name (optional)." );
+	MauveOption opt_match_input( mauve_options, "match-input", required_argument, "<file> Use specified match file instead of searching for matches" );
+	MauveOption opt_input_id_matrix( mauve_options, "input-id-matrix", required_argument, "<file> An identity matrix describing similarity among all pairs of input sequences/alignments" );
+	MauveOption opt_max_gapped_aligner_length( mauve_options, "max-gapped-aligner-length", required_argument, "<number> Maximum number of base pairs to attempt aligning with the gapped aligner" );
+	MauveOption opt_input_guide_tree( mauve_options, "input-guide-tree", required_argument, "<file> A phylogenetic guide tree in NEWICK format that describes the order in which sequences will be aligned" );
+	MauveOption opt_output_guide_tree( mauve_options, "output-guide-tree", required_argument, "<file> Write out the guide tree used for alignment to a file" );
+	MauveOption opt_version( mauve_options, "version", no_argument, "Display software version information" );
+	MauveOption opt_debug( mauve_options, "debug", no_argument, "Run in debug mode (perform internal consistency checks--very slow)" );
+	MauveOption opt_scratch_path_1( mauve_options, "scratch-path-1", required_argument, "<path> Designate a path that can be used for temporary data storage.  Two or more paths should be specified." );
+	MauveOption opt_scratch_path_2( mauve_options, "scratch-path-2", required_argument, "<path> Designate a path that can be used for temporary data storage.  Two or more paths should be specified." );
+	MauveOption opt_collinear( mauve_options, "collinear", no_argument, "Assume that input sequences are collinear--they have no rearrangements" );
+	MauveOption opt_scoring_scheme( mauve_options, "scoring-scheme", required_argument, "<ancestral|sp_ancestral|sp> Selects the anchoring score function.  Default is extant sum-of-pairs (sp)." );
+	MauveOption opt_no_weight_scaling( mauve_options, "no-weight-scaling", no_argument, "Don't scale LCB weights by conservation distance and breakpoint distance" );
+	MauveOption opt_max_breakpoint_distance_scale( mauve_options, "max-breakpoint-distance-scale", required_argument, "<number [0,1]> Set the maximum weight scaling by breakpoint distance.  Defaults to 0.5" );
+	MauveOption opt_conservation_distance_scale( mauve_options, "conservation-distance-scale", required_argument, "<number [0,1]> Scale conservation distances by this amount.  Defaults to 0.5" );
+	MauveOption opt_muscle_args( mauve_options, "muscle-args", required_argument, "<arguments in quotes> Additional command-line options for MUSCLE.  Any quotes should be escaped with a backslash" );
+	MauveOption opt_skip_refinement( mauve_options, "skip-refinement", no_argument, "Do not perform iterative refinement" );
+	MauveOption opt_skip_gapped_alignment( mauve_options, "skip-gapped-alignment", no_argument, "Do not perform gapped alignment" );
+	MauveOption opt_bp_dist_estimate_min_score( mauve_options, "bp-dist-estimate-min-score", required_argument, "<number> Minimum LCB score for estimating pairwise breakpoint distance" );
+	MauveOption opt_mem_clean( mauve_options, "mem-clean", no_argument, "Set this to true when debugging memory allocations" );
+	MauveOption opt_gap_open( mauve_options, "gap-open", required_argument, "<number> Gap open penalty" );
+	MauveOption opt_penalize_repeats( mauve_options, "repeat-penalty", required_argument, "<negative|zero> Sets whether the repeat scores go negative or go to zero for highly repetitive sequences.  Default is negative." );
+	MauveOption opt_gap_extend( mauve_options, "gap-extend", required_argument, "<number> Gap extend penalty" );
+	MauveOption opt_substitution_matrix( mauve_options, "substitution-matrix", required_argument, "<file> Nucleotide substitution matrix in NCBI format" );
+	MauveOption opt_weight( mauve_options, "weight", required_argument, "<number> Minimum pairwise LCB score" );
+	MauveOption opt_min_scaled_penalty( mauve_options, "min-scaled-penalty", required_argument, "<number> Minimum breakpoint penalty after scaling the penalty by expected divergence" );
+	MauveOption opt_go_homologous( mauve_options, "hmm-p-go-homologous", required_argument, "<number> Probability of transitioning from the unrelated to the homologous state [0.00001]" );
+	MauveOption opt_go_unrelated( mauve_options, "hmm-p-go-unrelated", required_argument, "<number> Probability of transitioning from the homologous to the unrelated state [0.000000001]" );
+	MauveOption opt_hmm_identity( mauve_options, "hmm-identity", required_argument, "<number> Expected level of sequence identity among pairs of sequences, ranging between 0 and 1 [0.7]" );
+	MauveOption opt_seed_family( mauve_options, "seed-family", no_argument, "Use a family of spaced seeds to improve sensitivity" );
+	MauveOption opt_solid_seeds( mauve_options, "solid-seeds", no_argument, "Use solid seeds. Do not permit substitutions in anchor matches." );
+	MauveOption opt_coding_seeds( mauve_options, "coding-seeds", no_argument, "Use coding pattern seeds. Useful to generate matches coding regions with 3rd codon position degeneracy." );
+	MauveOption opt_disable_cache( mauve_options, "disable-cache", no_argument, "Disable recursive anchor search cacheing to workaround a crash bug" );
+	MauveOption opt_recursive( mauve_options, "no-recursion", no_argument, "Disable recursive anchor search" );
+
+	if( argc <= 0 ){
+		print_usage( "mauveAligner", mauve_options );
+		return -1;
+	}
+	if( argc == 1 ){
+		print_usage( argv[0], mauve_options );
+		return -1;
+	}
+
+	// default values for homology HMM transitions
+	double pgh = 0.00001;
+	double pgu = 0.000000001;
+	double hmm_identity = 0.7;	// percent identity modeled by the HMM homologous state
+	size_t island_gap_size = 20;
+
+	// set the Muscle path
+	MuscleInterface& mi = MuscleInterface::getMuscleInterface();
+	mi.ParseMusclePath( argv[0] );
+
+	// parse the options
+	//
+	// parse command line with gnu getopt
+	//
+	int opt;
+	int ac = argc;
+	char** av = argv;
+	int indexptr;
+	while( (opt = getopt_long( ac, av, "", mauve_options.getOptions(), &indexptr )) != EOF ){
+		if( opt == 0 )
+		{
+			mauve_options[mauve_options.config_opt]->set = true;
+			if( optarg != NULL )
+				mauve_options[mauve_options.config_opt]->arg_value = optarg;
+		}else{
+			print_usage( argv[0], mauve_options );
+			return -1;
+		}
+	}
+	
+	if( opt_scratch_path_1.set )
+		FileSML::registerTempPath( opt_scratch_path_1.arg_value.c_str() );
+	if( opt_scratch_path_2.set )
+		FileSML::registerTempPath( opt_scratch_path_2.arg_value.c_str() );
+
+	// set the random number generator to a fixed seed for repeatability
+	// this should be changed if the algorithm ever depends on true pseudo-randomness
+	SetTwisterSeed(37);
+
+	if( opt_go_homologous.set )
+		pgh = strtod( opt_go_homologous.arg_value.c_str(), NULL );
+	if( opt_go_unrelated.set )
+		pgu = strtod( opt_go_unrelated.arg_value.c_str(), NULL );
+	if( opt_hmm_identity.set )
+		hmm_identity = strtod( opt_hmm_identity.arg_value.c_str(), NULL );
+	if( opt_island_gap_size.set )
+		island_gap_size = atoi( opt_island_gap_size.arg_value.c_str() );
+
+	// for debugging only:
+	if( opt_apply_backbone.set )
+	{
+		IntervalList iv_list;
+		ifstream in_file( opt_apply_backbone.arg_value.c_str() );
+		ofstream out_file( opt_output.arg_value.c_str() );
+		iv_list.ReadStandardAlignment(in_file);
+		MatchList ml;
+		ml.seq_filename = iv_list.seq_filename;
+		if( ml.seq_filename[0] != ml.seq_filename[1] )
+			LoadSequences(ml, &cout);
+		else
+			LoadMFASequences(ml, ml.seq_filename[0], &cout);
+		iv_list.seq_table = ml.seq_table;
+		string bb_fname = opt_output.arg_value + ".backbone";
+		string bbcols_fname = opt_output.arg_value + ".bbcols";
+		applyBackbone( iv_list, bbcols_fname, bb_fname, island_gap_size, hmm_identity, pgh, pgu );
+		iv_list.WriteStandardAlignment(out_file);
+		return 0;
+	}
+
+	//
+	// definitions of the variables that can be set by the user on the command line:
+	//
+	vector<string> seq_files;
+	vector<string> sml_files;
+	vector<gnSequence*> seq_table;
+	vector<DNAFileSML*> sml_table;
+	uint mer_size = 0;	// Use default settings
+	boolean create_LCBs = true;
+	string output_file = "";
+	string tree_filename = "";
+
+	boolean lcb_match_input_format = false;
+
+	uint seqI;
+	
+	ostream* detail_list_out = NULL;	/**< output stream for detail list */
+
+	// now read in the seq file names from av
+	boolean seq_name_arg = true;
+	for( int optI = optind; optI < argc; optI++ )
+		seq_files.push_back( av[ optI ] );
+
+	// set sml_names
+	for( size_t seq_fileI = 0; seq_fileI < seq_files.size(); seq_fileI++ )
+		sml_files.push_back( seq_files[seq_fileI] + ".sslist" );
+	
+	// print the version if the user requested it
+	if( opt_version.set ){
+		cerr << "progressiveMauve " << " build date " << __DATE__ << " at " << __TIME__ << endl;
+	}
+
+	if( seq_files.size() == 0 )
+	{
+		if( !opt_version.set )
+			print_usage( argv[0], mauve_options );
+		return 0;
+	}
+
+	//
+	// done parsing and checking command line options
+	// Start doing the work
+	//
+
+	MatchList pairwise_match_list;
+	if( opt_seed_weight.set )
+	{
+		mer_size = atoi( opt_seed_weight.arg_value.c_str() );
+	}
+	
+	if( seq_files.size() == 1 ){
+		LoadMFASequences( pairwise_match_list, seq_files[0], &cout );
+		pairwise_match_list.CreateMemorySMLs( mer_size, &cout );
+	}else{
+		pairwise_match_list.seq_filename = seq_files;
+		pairwise_match_list.sml_filename = sml_files;
+		// testing: rewrite seq files in RAW format
+		LoadAndCreateRawSequences( pairwise_match_list, &cout );
+//		LoadSequences( pairwise_match_list, &cout );
+		if(opt_solid_seeds.set)
+			pairwise_match_list.LoadSMLs( mer_size, &cout, SOLID_SEED, true );
+		else if(opt_coding_seeds.set)
+			pairwise_match_list.LoadSMLs( mer_size, &cout, CODING_SEED );
+		else
+			pairwise_match_list.LoadSMLs( mer_size, &cout, CODING_SEED );
+	}
+
+	ostream* match_out;
+	if( opt_output.set ){
+		ofstream* match_out_file = new ofstream( opt_output.arg_value.c_str() );
+		if( !match_out_file->is_open() ){
+			cerr << "Unable to open output file \"" << opt_output.arg_value << "\" for writing.\nCheck that you have permission to write files in this location and that the disk has free space.\n";
+			return -2;
+		}
+		match_out = match_out_file;
+	}else
+		match_out = &cout;
+	
+	if(opt_mem_clean.set)
+		debugging_memory = true;
+
+	// read matches if the user requested it
+	if( opt_match_input.set ){
+		ifstream match_in( opt_match_input.arg_value.c_str() );
+		if( !match_in.is_open() ){
+			cerr << "Error opening " << opt_match_input.arg_value << endl;
+			return -2;
+		}
+		try{
+			ReadList( pairwise_match_list, match_in );
+		}catch( gnException& gne ){
+			cerr << gne << endl;
+			cerr << "Error reading " << opt_match_input.arg_value << "\nPossibly corrupt file or invalid file format\n";
+			return -2;
+		}
+
+		if( seq_files.size() > 1 )
+			pairwise_match_list.seq_filename = seq_files;
+		else if( pairwise_match_list.seq_table.size() == 0 )
+			// fill seq_table with empty sequences
+			for( seqI = 0; seqI < pairwise_match_list.seq_filename.size(); seqI++ )
+				pairwise_match_list.seq_table.push_back( new gnSequence() );
+	}else if( !opt_seed_family.set ){
+		if( pairwise_match_list.seq_table.size() > 4 )
+		{
+			UniqueMatchFinder umf;
+			umf.LogProgress( &cout );
+			umf.FindMatches( pairwise_match_list );
+			umf.Clear();
+		}else{
+			PairwiseMatchFinder pmf;
+			pmf.LogProgress( &cout );
+			pmf.FindMatches( pairwise_match_list );
+			pmf.Clear();
+		}
+		cout << "done.\n";
+	}else{
+		// use an entire seed family to do the search
+		if( mer_size == 0 )
+		{
+			size_t avg = 0;
+			for( int seqI = 0; seqI < pairwise_match_list.seq_table.size(); seqI++ )
+				avg += pairwise_match_list.seq_table[seqI]->length();
+			avg /= pairwise_match_list.seq_table.size();
+			mer_size = getDefaultSeedWeight( avg );
+		}
+		// search with the longest seeds first so that overlapping matches tend to get contained
+		vector< pair< int, int > > length_ranks(3);
+		length_ranks[0] = make_pair( getSeedLength( getSeed(mer_size, 0) ), 0 );
+		length_ranks[1] = make_pair( getSeedLength( getSeed(mer_size, 1) ), 1 );
+		length_ranks[2] = make_pair( getSeedLength( getSeed(mer_size, 2) ), 2 );
+		std::sort( length_ranks.begin(), length_ranks.end() );
+
+		UniqueMatchFinder umf;
+		for( int seedI = 2; seedI >= 0; seedI-- )
+		{
+			umf.LogProgress( &cout );
+			int64 seed_pattern = getSeed(mer_size, length_ranks[seedI].second );
+			char pattern[65];
+			getPatternText( seed_pattern, pattern );
+			cout << "\nSearching with seed pattern " << pattern << "\n";
+			MatchList cur_list;
+			cur_list.seq_filename = pairwise_match_list.seq_filename;
+			cur_list.seq_table = pairwise_match_list.seq_table;
+			if( seq_files.size() == 1 )
+				cur_list.CreateMemorySMLs( mer_size, &cout, length_ranks[seedI].second );
+			else
+			{
+				getDefaultSmlFileNames( cur_list.seq_filename, cur_list.sml_filename, mer_size, length_ranks[seedI].second );
+				cur_list.LoadSMLs(mer_size, &cout, length_ranks[seedI].second);
+			}
+			umf.FindMatches( cur_list );
+			umf.ClearSequences();
+			for( size_t smlI = 0; smlI < cur_list.sml_table.size(); smlI++ )
+				delete cur_list.sml_table[smlI];	// free memory
+			for( size_t curI = 0; curI < cur_list.size(); curI++ )
+				cur_list[curI]->Free();	// free more memory!
+		}
+		umf.GetMatchList(pairwise_match_list);
+		cout << "done\n";
+		umf.Clear();
+	}
+	
+	if( opt_mums.set )
+	{
+		WriteList(pairwise_match_list, *match_out);
+		for( size_t seqI = 0; seqI < pairwise_match_list.seq_table.size(); seqI++ )
+			delete pairwise_match_list.seq_table[seqI];	// an auto_ptr or shared_ptr could be great for this
+		for( size_t seqI = 0; seqI < pairwise_match_list.sml_table.size(); seqI++ )
+			delete pairwise_match_list.sml_table[seqI];
+		return 0;
+	}
+
+	// check whether the input sequences were masked to eliminate excess NNNNNs
+	for( seqI = 0; seqI < pairwise_match_list.sml_table.size(); seqI++ ){
+		FileSML* cur_sml = dynamic_cast< FileSML* >(pairwise_match_list.sml_table[ seqI ]);
+		if( cur_sml != NULL ){
+			const vector< int64 >& seq_coords = cur_sml->getUsedCoordinates();
+			if( seq_coords.size() > 0 ){
+				transposeMatches( pairwise_match_list, seqI, seq_coords );
+			}
+		}
+	}
+	
+	// free any match search memory
+	SlotAllocator<MatchHashEntry>& allocator = SlotAllocator<MatchHashEntry>::GetSlotAllocator();
+	allocator.Purge();
+	
+	ProgressiveAligner aligner( pairwise_match_list.seq_table.size() );
+	if( opt_skip_gapped_alignment.set )
+		aligner.setGappedAlignment(false);
+	if( opt_skip_refinement.set )
+		aligner.setRefinement(false);
+	if( opt_debug.set )
+		debug_aligner = true;
+
+	// check that LCB_size can be set appropriately
+	if( opt_weight.set )
+	{
+		double lcb_weight = strtod( opt_weight.arg_value.c_str(), NULL );
+		if( lcb_weight < 0 )
+		{
+			cerr << "A minimum LCB size greater than 0 must be specified in order to create LCBs.\n";
+			return -1;
+		}else
+			aligner.setBreakpointPenalty( lcb_weight );
+	}
+
+	if( opt_collinear.set )
+		aligner.setCollinear(true);
+
+	if( opt_max_gapped_aligner_length.set )
+	{
+		int64 mgal = atol( opt_max_gapped_aligner_length.arg_value.c_str() );
+		aligner.SetMaxGappedAlignmentLength( mgal );
+	}
+
+	if( opt_seed_family.set )
+		aligner.setUseSeedFamilies(true);
+
+	penalize_repeats = true;
+	if(opt_penalize_repeats.set && opt_penalize_repeats.arg_value == "zero")
+		penalize_repeats = false;
+
+	if( opt_scoring_scheme.set )
+	{
+		if( opt_scoring_scheme.arg_value == "ancestral" )
+			aligner.setLcbScoringScheme(ProgressiveAligner::AncestralScoring);
+		else if( opt_scoring_scheme.arg_value == "ancestral_sp" )
+			aligner.setLcbScoringScheme(ProgressiveAligner::AncestralSumOfPairsScoring);
+		else if( opt_scoring_scheme.arg_value == "sp" )
+			aligner.setLcbScoringScheme(ProgressiveAligner::ExtantSumOfPairsScoring);
+		else
+		{
+			cerr << "Unrecognized scoring scheme: " << opt_scoring_scheme.arg_value << endl;
+			return -2;
+		}
+	}else	// default to extant sp
+		aligner.setLcbScoringScheme(ProgressiveAligner::ExtantSumOfPairsScoring);
+	if( opt_no_weight_scaling.set )
+		aligner.setUseLcbWeightScaling(false);
+	if( opt_max_breakpoint_distance_scale.set )
+	{
+		double d = strtod( opt_max_breakpoint_distance_scale.arg_value.c_str(), NULL );
+		aligner.setBreakpointDistanceScale(d);
+	}
+	if( opt_conservation_distance_scale.set )
+	{
+		double d = strtod( opt_conservation_distance_scale.arg_value.c_str(), NULL );
+		aligner.setConservationDistanceScale(d);
+	}
+	if( opt_bp_dist_estimate_min_score.set )
+	{
+		double d = strtod( opt_bp_dist_estimate_min_score.arg_value.c_str(), NULL );
+		aligner.setBpDistEstimateMinScore(d);
+	}
+	if( opt_disable_cache.set )
+	{
+		aligner.SetUseCacheDb(false);
+	}
+
+	if( opt_min_scaled_penalty.set )
+	{
+		aligner.setMinimumBreakpointPenalty(strtod( opt_min_scaled_penalty.arg_value.c_str(), NULL ) );
+	}
+	if( pairwise_match_list.seq_table.size() != 0 )
+	{
+		aligner.setPairwiseMatches( pairwise_match_list );
+	}
+	if( opt_muscle_args.set )
+	{
+		MuscleInterface& mi = MuscleInterface::getMuscleInterface();
+		mi.SetExtraMuscleArguments(opt_muscle_args.arg_value);
+	}
+	if( opt_recursive.set )
+		aligner.SetRecursive(false);
+	else
+		aligner.SetRecursive(true);
+
+	PairwiseScoringScheme pss;
+	if( opt_gap_open.set )
+	{
+		pss.gap_open = atoi(opt_gap_open.arg_value.c_str());
+	}
+	if( opt_gap_extend.set )
+	{
+		pss.gap_extend = atoi(opt_gap_open.arg_value.c_str());
+	}
+	if( opt_substitution_matrix.set )
+	{
+		ifstream sub_in( opt_substitution_matrix.arg_value.c_str() );
+		if( !sub_in.is_open() )
+		{
+			cerr << "Error opening substitution matrix file: \"" << opt_substitution_matrix.arg_value << "\"\n";
+			return -1;
+		}
+		score_t matrix[4][4];
+		readSubstitutionMatrix( sub_in, matrix );
+		pss = PairwiseScoringScheme(matrix, pss.gap_open, pss.gap_extend);
+	}
+	aligner.setPairwiseScoringScheme(pss);
+
+	if( opt_input_guide_tree.set )
+		aligner.setInputGuideTreeFileName( opt_input_guide_tree.arg_value );
+	if( opt_output_guide_tree.set )
+		aligner.setOutputGuideTreeFileName( opt_output_guide_tree.arg_value );
+
+	// if we will be doing a profile-profile or profile-sequence alignment
+	// then read in the profile
+	IntervalList profile_1;
+	IntervalList profile_2;
+	if( opt_profile.set ){
+		cerr << "Profile-profile alignment not yet implemented\n";
+		return -3;
+	}
+
+	IntervalList interval_list;
+	interval_list.seq_table = pairwise_match_list.seq_table;
+	interval_list.seq_filename = pairwise_match_list.seq_filename;
+
+	if( opt_profile.set )
+		; //aligner.alignPP(profile_1, profile_2, interval_list );
+	else
+		aligner.align( interval_list.seq_table, interval_list );
+
+	if( !opt_disable_backbone.set )
+	{
+
+		string bbcols_fname = opt_output.arg_value + ".bbcols";
+		string bb_seq_fname = opt_backbone_output.arg_value;
+		if( !opt_backbone_output.set )
+			bb_seq_fname = opt_output.arg_value + ".backbone";
+		applyBackbone( interval_list, bbcols_fname, bb_seq_fname, island_gap_size, hmm_identity, pgh, pgu );
+	}
+
+	interval_list.WriteStandardAlignment(*match_out);
+	match_out->flush();
+
+	for( size_t seqI = 0; seqI < pairwise_match_list.seq_table.size(); seqI++ )
+		delete pairwise_match_list.seq_table[seqI];	// an auto_ptr or shared_ptr could be great for this
+	for( size_t seqI = 0; seqI < pairwise_match_list.sml_table.size(); seqI++ )
+		delete pairwise_match_list.sml_table[seqI];
+
+// only explicitly free memory if absolutely necessary
+// since free() is very slow and the OS will reclaim it at program exit anyways
+	if(opt_mem_clean.set)
+	{
+		// free memory used by pairwise matches
+		for( size_t mI = 0; mI < pairwise_match_list.size(); mI++ )
+			pairwise_match_list[mI]->Free();
+
+		if( opt_output.set )
+			delete match_out;
+	}
+
+/*
+}catch( gnException& gne ) {
+	cerr << "Unhandled gnException: " << gne << endl;
+	throw gne;
+	return -10;
+}catch( exception& e ) {
+	cerr << "Unhandled exception: " << e.what() << endl;
+	throw e;
+	return -11;
+}catch( char* message ){
+	cerr << "Unhandled exception: " << message << endl;
+	throw message;
+	return -12;
+}catch( const char* message ){
+	cerr << "Unhandled exception: " << message << " (const)\n";
+	throw message;
+	return -14;
+}catch(...){
+	cerr << "Unknown exception occurred.\n";
+	throw;
+	return -13;
+}
+*/
+	return 0;
+}
+
diff --git a/src/projectAndStrip.cpp b/src/projectAndStrip.cpp
new file mode 100644
index 0000000..34d11e8
--- /dev/null
+++ b/src/projectAndStrip.cpp
@@ -0,0 +1,144 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <string>
+#include <fstream>
+#include <vector>
+#include <algorithm>
+#include <iostream>
+#include <sstream>
+#include "libGenome/gnFilter.h"
+#include "libMems/IntervalList.h"
+#include "libMems/MatchList.h"
+#include "libMems/GappedAlignment.h"
+#include "libMems/Matrix.h"
+#include "libMems/MatchProjectionAdapter.h"
+#include "libMems/Aligner.h"
+#include "libMems/Islands.h"
+#include "libGenome/gnFASSource.h"
+#include <boost/tuple/tuple.hpp>
+#include "libMems/ProgressiveAligner.h"
+
+using namespace std;
+using namespace genome;
+using namespace mems;
+
+typedef boost::tuple< uint, gnSeqI, gnSeqI, vector< uint > > bbcol_t;
+
+int main( int argc, char* argv[] )
+{
+	if( argc < 5 )
+	{
+		cerr << "Usage: projectAndStrip <input xmfa> <output xmfa> <seq1> <seq2>...<seqN>\n";
+		cerr << "\nNumeric sequence identifiers start at 0.\n";
+		return -1;
+	}
+	ifstream aln_in;
+	aln_in.open( argv[1] );
+	if( !aln_in.is_open() ){
+		cerr << "Error opening " << argv[1] << endl;
+		return -1;
+	}
+	ofstream aln_out;
+	aln_out.open( argv[2] );
+	if( !aln_out.is_open() ){
+		cerr << "Error writing to " << argv[2] << endl;
+		return -1;
+	}
+	vector<uint> seq_ids(argc-3);
+	vector<uint> not_ids;
+	for( size_t i = 3; i < argc; ++i )
+		seq_ids[i - 3] = atoi(argv[i]);
+
+	try{
+		IntervalList input_ivs;
+		input_ivs.ReadStandardAlignment( aln_in );
+		aln_in.close();
+
+		LoadSequences( input_ivs, NULL );
+
+		not_ids.resize( input_ivs.seq_table.size() );
+		for( size_t i = 0; i < not_ids.size(); i++ )
+			not_ids[i] = i;
+		for( size_t i = 0; i < seq_ids.size(); i++ )
+			not_ids[seq_ids[i]] = (std::numeric_limits<size_t>::max)();
+		std::sort( not_ids.begin(), not_ids.end() );
+		not_ids.resize( not_ids.size() - seq_ids.size() );
+
+		IntervalList output_ivs;
+		output_ivs.seq_table = input_ivs.seq_table;
+		output_ivs.seq_filename = input_ivs.seq_filename;
+		
+		vector< GappedAlignment* > gaga_list;
+		
+		for( size_t ivI = 0; ivI < input_ivs.size(); ivI++ )
+		{
+			Interval& iv = input_ivs[ivI];
+			size_t j = 0;
+			for( ; j < seq_ids.size(); j++ )
+			{
+				if( iv.LeftEnd( seq_ids[j] ) == NO_MATCH )
+					break;
+			}
+			if( j == seq_ids.size() )
+			{
+				vector<string> aln_mat;
+				GetAlignment( iv, input_ivs.seq_table, aln_mat );
+				Interval new_iv;
+				GappedAlignment ga(seq_ids.size(), 0);
+				GappedAlignment* gaga = ga.Copy();
+				vector<string> sub_mat( seq_ids.size() );
+				for( size_t sI = 0; sI < seq_ids.size(); sI++ )
+				{
+					gaga->SetStart( sI, iv.Start(seq_ids[sI]) );
+					gaga->SetLength( iv.Length(seq_ids[sI]), sI );
+					swap( sub_mat[sI], aln_mat[seq_ids[sI]] );
+				}
+				gaga->SetAlignment(sub_mat);
+				gaga_list.push_back( gaga );
+			}
+		}
+
+		for( size_t gI = 0; gI < gaga_list.size(); gI++ )
+			if( gaga_list[gI]->Orientation(0) == AbstractMatch::reverse )
+				gaga_list[gI]->Invert();
+
+		cout << "constructing LCBs\n";
+		vector< gnSeqI > bps;
+		IntervalList real_out_ivs;
+		IdentifyBreakpoints(gaga_list, bps);
+		vector< vector< GappedAlignment* > > coal_ivs;
+		ComputeLCBs_v2(gaga_list, bps, coal_ivs);
+		real_out_ivs.seq_filename.resize(seq_ids.size());
+		real_out_ivs.seq_table.resize(seq_ids.size());
+		for( size_t sI = 0; sI < seq_ids.size(); sI++ )
+		{
+			real_out_ivs.seq_filename[sI] = input_ivs.seq_filename[seq_ids[sI]];
+			real_out_ivs.seq_table[sI] = input_ivs.seq_table[seq_ids[sI]];
+		}
+		real_out_ivs.resize( coal_ivs.size() );
+		for( size_t cI = 0; cI < coal_ivs.size(); cI++ )
+			real_out_ivs[cI].SetMatches(coal_ivs[cI]);
+		cout << "real_out_ivs.size() " << real_out_ivs.size() << endl;
+
+
+
+		addUnalignedIntervals( real_out_ivs );
+		real_out_ivs.WriteStandardAlignment( aln_out );
+		aln_out.close();
+	}catch( gnException& gne ){
+		cerr << gne << endl;
+		return -1;
+	}catch( exception& e ){
+		cerr << e.what() << endl;
+		return -2;
+	}catch( char const* c ){
+		cerr << c << endl;
+		return -3;
+	}catch(...){
+		cerr << "Unhandled exception" << endl;
+		return -4;
+	}
+}
+
diff --git a/src/randomGeneSample.cpp b/src/randomGeneSample.cpp
new file mode 100644
index 0000000..22aa880
--- /dev/null
+++ b/src/randomGeneSample.cpp
@@ -0,0 +1,165 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <string>
+#include <fstream>
+#include <vector>
+#include <algorithm>
+#include <iostream>
+#include <sstream>
+#include "libGenome/gnFilter.h"
+#include "libMems/IntervalList.h"
+#include "libMems/MatchList.h"
+#include "libMems/GappedAlignment.h"
+#include "libMems/Matrix.h"
+#include "libMems/MatchProjectionAdapter.h"
+#include "libMems/Aligner.h"
+#include "libMems/Islands.h"
+#include "libGenome/gnFASSource.h"
+#include <boost/tuple/tuple.hpp>
+#include "libMems/ProgressiveAligner.h"
+#include "libMems/Backbone.h"
+#include "libGenome/gnFeature.h"
+#include "libGenome/gnFASSource.h"
+
+using namespace std;
+using namespace genome;
+using namespace mems;
+
+typedef boost::tuple< uint, gnSeqI, gnSeqI, vector< uint > > bbcol_t;
+
+int main( int argc, char* argv[] )
+{
+	if( argc < 6 )
+	{
+		cerr << "Usage: randomGeneSample <input xmfa> <backbone seq file> <sample genome> <number of genes> <output base name> [random seed]\n";
+		return -1;
+	}
+	ifstream aln_in;
+	aln_in.open( argv[1] );
+	if( !aln_in.is_open() ){
+		cerr << "Error opening " << argv[1] << endl;
+		return -1;
+	}
+	uint gene_count = atoi( argv[4] );
+	uint sgI = atoi( argv[3] );
+	string output_base = argv[5];
+
+	if( argc == 7 )
+		srand(atoi(argv[6]));
+	else
+		srand(time(NULL));
+
+	IntervalList input_ivs;
+	input_ivs.ReadStandardAlignment( aln_in );
+	aln_in.close();
+	LoadSequences( input_ivs, &cout );
+	
+	vector< bb_seqentry_t > backbone;
+	ifstream bb_in;
+	bb_in.open( argv[2] );
+	if( !bb_in.is_open() ){
+		cerr << "Error opening \"" << argv[2] << "\"" << endl;
+		return -2;
+	}
+	readBackboneSeqFile( bb_in, backbone );
+	bb_in.close();
+
+	gnSequence* gen0 = input_ivs.seq_table[sgI];
+	vector< gnBaseFeature* > genes;
+	for( size_t featI = 0; featI < gen0->getFeatureListLength(); featI++ )
+	{
+		gnBaseFeature* feat = gen0->getFeature(featI);
+		if( feat->GetName() == "CDS" )
+			genes.push_back( feat );
+		else
+			delete feat;
+	}
+
+	cout << genes.size() << " of the " << gen0->getFeatureListLength() << " annotated features are CDS\n";
+
+	// pick a gene at random from the first genome, extract the alignment, and write it to a file
+	for( size_t geneI = 0; geneI < gene_count; geneI++ )
+	{
+		cerr << "picking gene\n";
+		int randy;
+		do{
+			randy = rand() % genes.size();
+			// has this gene already been used?
+			if( genes[randy] == NULL )
+				continue;
+			// is this gene part of N-way backbone?
+			gnLocation loc = genes[randy]->GetLocation(0);
+			int64 lend = loc.GetFirst();
+			int64 rend = loc.GetLast();
+			size_t bbI = 0;
+			for( ; bbI < backbone.size(); bbI++ )
+			{
+				if( genome::absolut(backbone[bbI][sgI].first) <= lend && rend <= genome::absolut(backbone[bbI][sgI].second) )
+					break;
+			}
+			size_t seqI = 0;
+			for( ; bbI < backbone.size() && seqI < input_ivs.seq_table.size(); ++seqI )
+			{
+				if( backbone[bbI][seqI].first == 0 || backbone[bbI][seqI].second == 0 )
+					break;
+			}
+			if( seqI == input_ivs.seq_table.size() && bbI < backbone.size() )
+				break;	// found a containing segment
+		}while(true);
+		// print out the feature name
+		for( size_t qI = 0; qI < genes[randy]->GetQualifierListLength(); qI++ )
+		{
+			if( genes[randy]->GetQualifierName(qI) == "gene" )
+				cout << "gene:\t" << genes[randy]->GetQualifierValue(qI) << endl;
+		}
+		// extract the alignment
+		gnLocation loc = genes[randy]->GetLocation(0);
+		int64 lend = loc.GetFirst();
+		int64 rend = loc.GetLast();
+		cerr << "lend: " << lend << "\trend: " << rend << endl;
+		size_t ivI = 0;
+		for( ivI = 0; ivI < input_ivs.size(); ivI++ )
+		{
+			if( input_ivs[ivI].Start(sgI) != NO_MATCH )
+			{
+//				cerr << "iv: " << ivI << "\tstart: " << input_ivs[ivI].Start(sgI) << "\tlength: " << input_ivs[ivI].Length(sgI) << endl;
+				gnSeqI iv_rend = genome::absolut(input_ivs[ivI].Start(sgI)) + input_ivs[ivI].Length(sgI);
+				if(  genome::absolut(input_ivs[ivI].Start(sgI)) < lend && rend < iv_rend )
+					break;
+			}
+		}
+		if( ivI == input_ivs.size() )
+			cerr << "Error: unable to assign gene to an interval!\n" << "coordinates: " << lend << '\t' << rend << endl;
+		cerr << "making iv_cga\n";
+		CompactGappedAlignment<> iv_cga(input_ivs[ivI]);
+		CompactGappedAlignment<> col_cga;
+		cerr << "getting left and right cols\n";
+		gnSeqI lcol = iv_cga.SeqPosToColumn( sgI, lend );
+		gnSeqI rcol = iv_cga.SeqPosToColumn( sgI, rend );
+		cerr << "left col: " << lcol << "\tright_col: " << rcol << endl;
+		iv_cga.copyRange(col_cga, lcol, rcol-lcol + 1);
+		cerr << "getting alignment\n";
+		vector< string > aln;
+		GetAlignment( col_cga, input_ivs.seq_table, aln );
+		gnSequence gene_aln;
+		for( size_t i = 0; i < aln.size(); i++ )
+		{
+			gene_aln += aln[i];
+			stringstream ss;
+			ss << "seq" << i;
+			gene_aln.setContigName(i, ss.str());
+		}
+		cerr << "writing fasta\n";
+		stringstream of_name;
+		of_name << output_base << "_" << geneI << ".fas";
+		gnFASSource::Write( gene_aln, of_name.str() );
+
+		// done with this gene
+		delete genes[randy];
+		genes[randy] = NULL;
+	}
+
+}
+
diff --git a/src/repeatoire.cpp b/src/repeatoire.cpp
new file mode 100644
index 0000000..45daf65
--- /dev/null
+++ b/src/repeatoire.cpp
@@ -0,0 +1,2716 @@
+#include "libGenome/gnSequence.h"
+#include "libMems/Interval.h"
+#include "libMems/CompactGappedAlignment.h"
+#include "libMems/Islands.h"
+#include "libMems/Aligner.h"
+#include "libMems/MuscleInterface.h"
+#include "libGenome/gnFASSource.h"
+#include "libMems/Backbone.h"
+#include "libMems/ProgressiveAligner.h"
+#include "libMems/HomologyHMM/parameters.h"
+
+#include <iomanip>
+#include <iostream>
+#include <algorithm>
+#include <cctype>
+
+#include "MatchRecord.h"
+#include "SeedMatchEnumerator.h"
+//#include "procrastUtilities.h"
+
+#include <boost/tuple/tuple.hpp>
+#include <boost/program_options/cmdline.hpp>
+#include <boost/program_options.hpp>
+namespace po = boost::program_options;
+using namespace std;
+using namespace genome;
+using namespace mems;
+
+bool print_warnings = false;
+
+enum rvalue { OK=0, FAILED=1, DONE=2, NOVEL=3, FIXME=110}; 
+int scoredropoff_matrix[10] = {0,0,4756,9144,13471,17981,25302,30945,38361,40754};
+int ccount = 0;
+/** A Match Position Entry stores a pointer to a match and its component for a given sequence coordinate */
+typedef std::pair< MatchRecord*, size_t >	MatchPositionEntry;
+/** the Match Position Lookup Table should be sized match the length of the sequence */
+typedef vector< MatchPositionEntry > MatchPositionLookupTable;
+
+/** This class stores a single entry in the neighborhood list */
+class NeighborhoodListEntry
+{
+public:
+	MatchRecord* match;
+	bool relative_orientation;	/** true for identical (1) and false for opposite (-1) */
+	size_t Mi_component;	/** the x value in the paper (Matching component of M_i)*/
+	size_t distance;		/** the d value from the paper */
+	size_t Mj_component;	/** the y value in the paper (Matching component of M_j) */
+};
+
+/** Used to sort the neighborhood list using std::sort */
+class NeighborhoodListComparator
+{
+public:
+	bool operator()( const NeighborhoodListEntry& a, const NeighborhoodListEntry& b )
+	{
+		if( a.match != b.match )
+			return a.match < b.match;
+		if( a.relative_orientation != b.relative_orientation )
+			return a.relative_orientation == false;
+		if( a.Mi_component != b.Mi_component )
+			return a.Mi_component < b.Mi_component;
+		return a.distance < b.distance; 
+	}
+};
+
+
+bool scorecmp( GappedMatchRecord* a, GappedMatchRecord* b ) 
+{
+   // sort first by multipicity, then by spscore
+   if( a->Multiplicity() > b->Multiplicity())
+       return true;
+   else if ( a->Multiplicity() < b->Multiplicity())
+       return false;
+   else
+       return a->spscore > b->spscore;
+ }
+
+bool score_by_sp( GappedMatchRecord* a, GappedMatchRecord* b ) 
+{
+   // sort first by multipicity, then by spscore
+   if( a->spscore > b->spscore)
+       return true;
+   else if ( a->spscore < b->spscore)
+       return false;
+   else
+       return a->Multiplicity() > b->Multiplicity();
+ }
+
+bool score_by_length( GappedMatchRecord* a, GappedMatchRecord* b ) 
+{
+   // sort first by multipicity, then by spscore
+   if( a->AlignmentLength() > b->AlignmentLength())
+       return true;
+   else if ( a->AlignmentLength() < b->AlignmentLength())
+       return false;
+   else
+       return a->spscore > b->spscore;
+ }
+/** The NeighborhoodGroup contains the MatchRecord pointer, the component map to the match being extended (M_i), and a vector of distances to M_i*/
+typedef boost::tuple< MatchRecord*, std::vector< size_t >, std::vector< size_t > > NeighborhoodGroup;
+
+class NeighborhoodGroupComponentCompare
+{
+public:
+	bool operator()( const NeighborhoodGroup& a, const NeighborhoodGroup& b ) const
+	{
+		return compare(a,b) < 0;
+	}
+	int compare( const NeighborhoodGroup& a, const NeighborhoodGroup& b ) const
+	{
+	// compare component map vectors
+		// todo: make these buffers persistent to avoid reallocation!!
+		vector< size_t > ac(a.get<1>());
+		vector< size_t > bc(b.get<1>());
+		std::sort(ac.begin(), ac.end());
+		std::sort(bc.begin(), bc.end());
+		size_t i = 0;
+		for( ; i < ac.size() && i < bc.size(); ++i )
+		{
+			if( ac[i] != bc[i] )
+				return ac[i] - bc[i];
+		}
+		if( i < ac.size() && ac[i] != (std::numeric_limits<size_t>::max)())
+			return 1;
+		else if( i < bc.size() && bc[i] != (std::numeric_limits<size_t>::max)())
+			return -1;
+
+		return 0;
+	}
+};
+
+class NeighborhoodGroupCompare
+{
+public:
+	bool operator()( const NeighborhoodGroup& a, const NeighborhoodGroup& b )
+	{
+		int cval = srcc.compare(a,b);
+		if( cval != 0 )
+			return cval < 0;
+
+	// compare distance vectors
+		vector< size_t > ad(a.get<2>());
+		vector< size_t > bd(b.get<2>());
+		std::sort(ad.begin(), ad.end());
+		std::sort(bd.begin(), bd.end());
+		size_t i = 0;
+		for( ; i < ad.size() && i < bd.size(); ++i )
+		{
+			if( ad[i] != bd[i] )
+				return ad[i] < bd[i];
+		}
+		if( i < ad.size() )
+			return false;
+		else if( i < bd.size() )
+			return true;
+
+		return false;
+	}
+protected:
+	NeighborhoodGroupComponentCompare srcc;
+};
+
+
+//function to test if a chainable match is OK, i.e. none of this stuff:
+// |---m1--->    |----c1--->               |---c2---->  |----m2---->
+bool testChainableMatch( MatchRecord* M_i, MatchRecord* M_m, const vector< size_t >& component_map )
+{
+	bool ok = true;
+	// set range to cover M_m
+    int right_count = 0;
+    int left_count = 0;
+	for( size_t x = 0; x < M_i->Multiplicity(); ++x )
+	{
+		size_t z = component_map[x];
+
+        //if there is no match, we've got a problem
+		if( M_i->LeftEnd(x) == NO_MATCH || M_m->LeftEnd(z) == NO_MATCH )
+			genome::breakHere();
+
+        //should it be allowed to chain with matches with differing orientation
+        //if so, how do we align the gap between these two matches?
+        if (M_m->Orientation(z) != M_i->Orientation(x) )
+        {
+            left_count = 1;
+            right_count = 1;
+            break;
+        }
+		int64 lend_diff = M_m->LeftEnd(z) -M_i->LeftEnd(x);
+		int64 rend_diff = M_m->RightEnd(z) - M_i->RightEnd(x);
+        /// <---m1---->   <----b1--->    <----b2---->  <----m2---->
+		if(  rend_diff < 0 && lend_diff < 0)
+		{
+            //component to chain is to the left of current match component
+            if (M_m->Orientation(z) == AbstractMatch::forward)
+                left_count++;
+            else
+                right_count++;
+            ok = false;
+        }
+        else if ( rend_diff > 0 && lend_diff > 0)
+        {
+            if (M_m->Orientation(z) == AbstractMatch::forward)
+                right_count++;
+            else   
+                left_count++;
+            //component to chain is to the right of current match component
+            ok = false;
+        }
+        else
+        {
+            left_count = 1;
+            right_count = 1;
+            break;
+
+        }
+	}
+
+    //if there are components to the left && right, things are not ok with this chained match
+    if (left_count != 0 && right_count != 0)
+        ok = false;
+    else
+        ok = true;
+	return ok;
+}
+
+bool extendRange( MatchRecord* M_i, MatchRecord* M_m, const vector< size_t >& component_map )
+{
+	bool changed = false;
+		// set range to cover M_m
+	for( size_t x = 0; x < M_i->Multiplicity(); ++x )
+	{
+		size_t z = component_map[x];
+		if( M_i->LeftEnd(x) == NO_MATCH || M_m->LeftEnd(z) == NO_MATCH )
+			genome::breakHere();
+		int64 lend_diff = M_i->LeftEnd(x) - M_m->LeftEnd(z);
+		if( lend_diff > 0 )
+		{
+            if ( M_i->LeftEnd(x) - lend_diff == 0)
+                cerr << "extendRange debugme" << endl;
+			M_i->SetLeftEnd(x, M_i->LeftEnd(x) - lend_diff);
+			M_i->SetLength(M_i->Length(x)+lend_diff, x);
+			changed = true;
+		}
+
+		int64 rend_diff = M_m->RightEnd(z) - M_i->RightEnd(x);
+		if( rend_diff > 0 )
+		{
+			M_i->SetLength( M_i->Length(x)+rend_diff, x );
+			changed = true;
+		}
+
+	}
+	return changed;
+}
+
+bool reduceRange( MatchRecord* M_i, MatchRecord* M_m, const vector< size_t >& component_map )
+{
+	bool changed = false;
+		// set range to cover M_m
+	for( size_t x = 0; x < M_i->Multiplicity(); ++x )
+	{
+		size_t z = component_map[x];
+		if( M_i->LeftEnd(x) == NO_MATCH || M_m->LeftEnd(z) == NO_MATCH )
+			genome::breakHere();
+		int64 lend_diff = M_m->LeftEnd(z) - M_i->LeftEnd(x);
+		if( lend_diff > 0 )
+		{
+            if ( M_i->LeftEnd(x) - lend_diff == 0)
+                cerr << "reduceRange debugme" << endl;
+			M_i->SetLeftEnd(x, M_i->LeftEnd(x) - lend_diff);
+			M_i->SetLength(M_i->Length(x)+lend_diff, x);
+			changed = true;
+		}
+		int64 rend_diff = M_i->RightEnd(x) - M_m->RightEnd(z) ;
+		if( rend_diff > 0 )
+		{
+			M_i->SetLength( M_i->Length(x)+rend_diff, x );
+			changed = true;
+		}
+	}
+	return changed;
+}
+
+void remapComponents(const vector< size_t >& srcmap, size_t mid_multiplicity, const vector< size_t >& destmap, vector< size_t >& newmap )
+{
+	vector< size_t > super_map( mid_multiplicity, (std::numeric_limits<size_t>::max)() );
+	for( size_t mapI = 0; mapI < destmap.size(); ++mapI )
+		super_map[destmap[mapI]] = mapI;
+	for( size_t mapI = 0; mapI < srcmap.size(); ++mapI )
+		newmap[mapI] = super_map[srcmap[mapI]];
+}
+
+void classifyMatch( AbstractMatch* M_i, AbstractMatch* M_j, vector< size_t >& ji_component_map, bool& subsumed, bool& partial, bool superset = false )
+{
+	subsumed = true;
+	partial = false;
+	for( size_t i = 0; i < ji_component_map.size(); ++i )
+	{
+		size_t x = ji_component_map[i];
+		size_t y = i;
+		int64 lend_diff = M_i->LeftEnd(x) - M_j->LeftEnd(y);
+		int64 rend_diff = M_j->RightEnd(y) - M_i->RightEnd(x);
+		if (superset)
+		{
+			lend_diff =  M_j->LeftEnd(y) - M_i->LeftEnd(x); 
+			rend_diff =  M_i->RightEnd(x)-M_j->RightEnd(y);
+		}
+		
+		if( lend_diff > 0 || rend_diff > 0 )
+			subsumed = false;
+		if( lend_diff <= 0 && rend_diff <= 0 )
+			partial = true;
+	}
+}
+//same as classifySubset, except for supersets
+void classifySuperset( MatchRecord* M_i, NeighborhoodGroup& sr, bool& subsumed, bool& partial )
+{
+	classifyMatch( M_i, sr.get<0>(), sr.get<1>(), subsumed, partial, true );
+}
+
+void classifySubset( MatchRecord* M_i, NeighborhoodGroup& sr, bool& subsumed, bool& partial )
+{
+	classifyMatch( M_i, sr.get<0>(), sr.get<1>(), subsumed, partial, false );
+}
+
+void checkLink( MatchRecord*& mr )
+{
+	while( mr->subsuming_match != NULL )
+		mr = mr->subsuming_match;
+}
+
+
+void checkLink( MatchLink& mlink )
+{
+	while( mlink.subset->subsuming_match != NULL )
+	{
+		vector< size_t > new_map( mlink.sub_to_super_map.size() );
+		for( size_t i = 0; i < mlink.sub_to_super_map.size(); ++i )
+			new_map[i] = mlink.sub_to_super_map[ mlink.subset->subsumption_component_map[i] ];
+		swap( new_map, mlink.sub_to_super_map );
+		mlink.subset = mlink.subset->subsuming_match;
+	}
+}
+
+void checkLinkAndComponent( MatchRecord*& mr, size_t& component )
+{
+	while( mr->subsuming_match != NULL )
+	{
+		component = mr->subsumption_component_map[component];
+		mr = mr->subsuming_match;
+	}
+}
+
+/** returns one of the superset links f
+rom a match.  direction is 1 for left, -1 for right */
+MatchLink& getSuperset( MatchRecord* mr, int direction )
+{
+	if( direction == 1 )
+		return mr->left_superset;
+	return mr->right_superset;
+}
+
+/** returns the subset links for a given direction.  direction is 1 for left, -1 for right */
+vector<MatchLink>& getSubsets( MatchRecord* mr, int direction )
+{
+	if( direction == 1 )
+		return mr->left_subset_links;
+	return mr->right_subset_links;
+}
+
+/** returns the extra subsets for a given direction.  direction is 1 for left, -1 for right */
+vector<MatchLink>& getExtraSubsets( MatchRecord* mr, int direction )
+{
+	if( direction == 1 )
+		return mr->extra_left_subsets;
+	return mr->extra_right_subsets;
+}
+//inverse of unlinkSuperset
+//linkSuperset then unlinkSuperset should exactly  offset each other
+void linkSuperset( MatchRecord* mr, MatchRecord* supermatch, boost::dynamic_bitset<>& comp_list, vector< size_t >& comp_map, int direction )
+{
+	// update superset links
+	MatchLink slink = MatchLink( supermatch, mr, comp_list, comp_map );
+	if( slink.superset != NULL )
+	{
+		slink.subset = mr;
+		int parity = mr->Orientation(0) == slink.superset->Orientation(slink.sub_to_super_map[0]) ? 1 : -1;
+		getSubsets(slink.superset,-direction*parity).push_back(slink);
+	}
+	vector< MatchLink >& subsets = getSubsets(mr,direction);
+	for( size_t subI = 0; subI < subsets.size(); ++subI )
+	{
+		subsets[subI].superset = mr;
+		int parity = mr->Orientation(subsets[subI].sub_to_super_map[0]) == subsets[subI].subset->Orientation(0) ? 1 : -1;
+		getSuperset(subsets[subI].subset, -direction*parity).superset = mr;
+	}
+    //punt: link extra subsets too!
+    vector< MatchLink >& extrasubsets = getExtraSubsets(mr,direction);
+	for( size_t subI = 0; subI < extrasubsets.size(); ++subI )
+	{
+		subsets[subI].superset = mr;
+		int parity = mr->Orientation(extrasubsets[subI].sub_to_super_map[0]) == extrasubsets[subI].subset->Orientation(0) ? 1 : -1;
+		getSuperset(extrasubsets[subI].subset, -direction*parity).superset = mr;
+	}
+    
+    
+	
+}
+void unlinkSuperset( MatchRecord* mr, int direction )
+{
+	MatchLink& superlink = getSuperset( mr, direction );
+	MatchRecord* super = superlink.superset;
+	if( super != NULL )
+	{
+		int parity = mr->Orientation(0) == super->Orientation(superlink.sub_to_super_map[0]) ? 1 : -1;
+		vector< MatchLink >& subs = getSubsets( super, -direction*parity );
+		for( size_t subI = 0; subI < subs.size(); ++subI )
+		{
+			if( subs[subI].subset == mr )
+			{
+				subs.erase( subs.begin() + subI, subs.begin() + subI + 1 );
+				subI--;
+			}
+		}
+        //tjt: unlink extrasubsets!
+        vector< MatchLink >& extrasubs = getExtraSubsets( super, -direction*parity );
+		for( size_t subI = 0; subI < extrasubs.size(); ++subI )
+		{
+			if( extrasubs[subI].subset == mr )
+			{
+				extrasubs.erase( extrasubs.begin() + subI, extrasubs.begin() + subI + 1 );
+				subI--;
+			}
+		}
+        
+		superlink.clear();
+	}
+}
+
+void unlinkSupersets( MatchRecord* mr )
+{
+	unlinkSuperset( mr, 1 );
+	unlinkSuperset( mr, -1 );
+}
+
+template< class MatchRecordPtrType >
+void validate( vector< MatchRecordPtrType >& records )
+{
+	// make sure all matches have non-zero components
+	for( size_t recI = 0; recI < records.size(); ++recI )
+	{
+		size_t seqI = 0;
+		for( ; seqI < records[recI]->SeqCount(); ++seqI )
+			if( records[recI]->LeftEnd(seqI) == NO_MATCH )
+				break;
+		if( seqI < records[recI]->SeqCount() )
+		{
+			cerr << "missing component\n";
+			genome::breakHere();
+		}
+	}
+
+	// make sure all links are consistent
+	for( size_t recI = 0; recI < records.size(); ++recI )
+	{
+		MatchRecord* mr = records[recI];
+		for( int direction = 1; direction >-2; direction -= 2 )
+		{
+			for( size_t subI = 0; subI < getSubsets(mr, direction).size(); subI++ )
+			{
+				// follow any stale links
+				MatchRecord* sub = getSubsets(mr, direction)[subI].subset;
+				size_t sub_mult = sub->Multiplicity();
+				while( sub->subsuming_match != NULL )
+					sub = sub->subsuming_match;
+				size_t parity_seq = getSubsets(mr, direction)[subI].sub_to_super_map[0];
+				int parity = mr->Orientation(parity_seq) == sub->Orientation(0) ? 1 : -1;
+				// make sure that each of the subsets in these points back to this superset in its own link
+				if( getSuperset(sub, -direction*parity).superset != mr )
+				{
+					cerr << "ohno\n";
+					genome::breakHere();
+				}
+				if( sub_mult != sub->Multiplicity() )
+				{
+					cerr << "unequal mult\n";
+					genome::breakHere();
+				}
+				if( getSubsets(mr,direction)[subI].super_component_list.count() != getSubsets(mr,direction)[subI].sub_to_super_map.size())
+				{
+					cerr << "broke\n";
+					genome::breakHere();
+				}
+			}
+
+			// make sure the supersets have this subset
+			if( getSuperset(mr,direction).superset != NULL )
+			{
+				MatchRecord* sup = getSuperset(mr,direction).superset;
+				int parity = mr->Orientation(0) == sup->Orientation(getSuperset(mr,direction).sub_to_super_map[0]) ? 1 : -1;
+				size_t subI = 0;
+				for( ; subI < getSubsets(sup,-direction*parity).size(); subI++ )
+				{
+					if( getSubsets(sup,-direction*parity)[subI].subset == mr )
+						break;
+				}
+				if( subI == getSubsets(sup,-direction*parity).size() )
+				{
+					cerr << "oh crap!\n";
+					genome::breakHere();
+				}
+				if( getSuperset(mr,direction).super_component_list.count() != getSuperset(mr,direction).sub_to_super_map.size())
+				{
+					cerr << "broke 3\n";
+					genome::breakHere();
+				}
+			}
+		}
+	}
+}
+
+void createNeighborhoodGroupList( vector< NeighborhoodGroup >& group_list, vector< vector< size_t > >& group_members, vector< NeighborhoodListEntry >& neighborhood_list )
+{
+	group_list.resize( group_members.size() );
+	for( size_t gI = 0; gI < group_members.size(); gI++ )
+	{
+		// is this subset completely contained--is it subsumed?
+		MatchRecord* M_j = neighborhood_list[group_members[gI][0]].match;
+
+		vector< size_t > component_map(M_j->Multiplicity(), (std::numeric_limits<size_t>::max)());
+		vector< size_t > distances(M_j->Multiplicity(), (std::numeric_limits<size_t>::max)());
+		for( vector< size_t >::iterator rec_iter = group_members[gI].begin(); rec_iter != group_members[gI].end(); ++rec_iter )
+		{
+			component_map[neighborhood_list[*rec_iter].Mj_component] = neighborhood_list[*rec_iter].Mi_component;
+			distances[neighborhood_list[*rec_iter].Mj_component] = neighborhood_list[*rec_iter].distance;
+		}
+		group_list[gI].get<0>() = M_j;
+		swap( group_list[gI].get<1>(), component_map );
+		swap( group_list[gI].get<2>(), distances );
+	}
+
+	static NeighborhoodGroupCompare src;
+	std::sort( group_list.begin(), group_list.end(), src );
+}
+
+/**
+ * Assigns the superset link from M_j to M_i.  This function should be called when
+ * M_j has been chained as part of M_i and M_j has an outgoing superset link.
+ */
+void inheritSuperset( MatchRecord* M_i, MatchRecord* M_j, int direction, int parity )
+{
+	// remap superset components
+	vector< size_t > comp_map( M_i->Multiplicity() );
+	for( size_t ci = 0; ci < comp_map.size(); ci++ )
+		comp_map[ci] = getSuperset( M_j, direction*parity ).sub_to_super_map[ M_j->subsumption_component_map[ci] ];
+	// rebuild the superset component list
+	boost::dynamic_bitset<> comp_list(getSuperset( M_j, direction*parity ).superset->Multiplicity(), false);
+	for( size_t compI = 0; compI < comp_map.size(); ++compI )
+		comp_list.set(comp_map[compI]);
+	MatchLink& slink = getSuperset(M_i, direction);
+	slink = MatchLink( getSuperset( M_j, direction*parity ).superset, M_i, comp_list, comp_map );
+	unlinkSuperset(M_j,direction*parity);
+	int slink_parity = M_i->Orientation(0) == slink.superset->Orientation(slink.sub_to_super_map[0]) ? 1 : -1;
+	getSubsets(slink.superset,-direction*slink_parity).push_back(slink);
+
+}
+
+/**
+ * returns either the left or right list, depending on the current direction of extension
+ */
+vector< NeighborhoodGroup >& selectList( vector< NeighborhoodGroup >& left_list, vector< NeighborhoodGroup >& right_list, int direction )
+{
+	return direction == 1 ? left_list : right_list;
+}
+
+
+
+/**
+ * Performs a superset link extension on M_i
+ */
+void supersetLinkExtension( GappedMatchRecord*& M_i, int direction, int& last_linked, 
+						   vector< NeighborhoodGroup >& left_deferred_subsets, 
+						   vector< NeighborhoodGroup >& right_deferred_subsets, bool chain )
+{
+	// update the left end and look for another superset to chain with
+	// then extend all the way to that match
+	MatchRecord* M_j = getSuperset(M_i, direction).superset;
+	MatchLink ij_link = getSuperset(M_i, direction);	// make a copy for safekeeping
+	int ij_parity = M_i->Orientation(0) == M_j->Orientation(ij_link.sub_to_super_map[0]) ? 1 : -1;
+
+	//
+	// Link extension part 1: 
+	// extend M_i to include M_j, add M_j to the chained matches
+
+       
+    bool changed = extendRange( M_i, M_j, ij_link.sub_to_super_map );
+    M_i->chained_matches.push_back(M_j);
+    M_i->chained_component_maps.push_back(ij_link.sub_to_super_map);
+
+
+
+	// Link extension part 2:
+	// figure out whether any subsets between M_j and M_i got subsumed
+	for( size_t subtypeI = 0; subtypeI < 2; subtypeI++ )
+	{
+		vector< MatchLink >* mjsubs;
+		if( subtypeI == 0 )
+			mjsubs = &getSubsets(M_j, -direction*ij_parity);
+		else
+			mjsubs = &getExtraSubsets(M_j, -direction*ij_parity);
+		vector< MatchLink >& mj_otherside_subsets = *mjsubs;
+
+		for( size_t leftI = 0; leftI < mj_otherside_subsets.size(); ++leftI )
+		{
+			if( subtypeI == 0 )
+				checkLink( mj_otherside_subsets[leftI] );
+			MatchLink& jk_link = mj_otherside_subsets[leftI];
+			boost::dynamic_bitset<> intersect = ij_link.super_component_list & jk_link.super_component_list;
+			MatchRecord* M_k = jk_link.subset;
+			if( M_k == M_i )
+				continue;	// been there, chained that.
+			size_t inter_size = intersect.count();
+			if( inter_size < 2 )
+				continue;	// no match
+			if( inter_size >= M_i->Multiplicity() || M_k->Multiplicity() != inter_size )
+				continue;
+
+			// has this guy already been subsumed?  if so then just skip him
+			if( M_k->subsuming_match != NULL )
+			{
+				if( subtypeI != 1 )
+					breakHere(); // this should only happen with extra subsets
+				mj_otherside_subsets.erase(mj_otherside_subsets.begin()+leftI, mj_otherside_subsets.begin()+leftI+1 );
+				leftI--;
+				continue;
+			}
+
+			// M_k is a subset relative to M_i
+			int jk_parity = M_k->Orientation(0) == M_j->Orientation(jk_link.sub_to_super_map[0]) ? 1 : -1;
+			int ik_parity = ij_parity * jk_parity;
+
+			vector< size_t > component_map( M_k->Multiplicity() );
+			remapComponents(jk_link.sub_to_super_map, M_j->Multiplicity(), ij_link.sub_to_super_map, component_map );
+
+			NeighborhoodGroup sr = boost::make_tuple( M_k, component_map, vector<size_t>( M_k->Multiplicity(), 0 ) );
+			// defer it until we're done extending
+			selectList( left_deferred_subsets, right_deferred_subsets, -direction ).push_back( sr );
+		}
+	}
+
+	//
+	// Link extension part 3:
+	// classify outgoing links that share components with M_i
+	unlinkSuperset(M_i,direction);
+	vector< size_t > supersets;
+	vector< size_t > chainable;
+	vector< size_t > subsets;
+	vector< size_t > novel_subsets;
+	vector< MatchLink >& mj_subsets = getSubsets(M_j, direction*ij_parity);
+	for( size_t leftI = 0; leftI < mj_subsets.size(); ++leftI )
+	{
+		checkLink( mj_subsets[leftI] );
+		boost::dynamic_bitset<> intersect = ij_link.super_component_list & mj_subsets[leftI].super_component_list;
+		MatchRecord* M_k = mj_subsets[leftI].subset;
+		if( M_k == M_i )
+			continue;	// been there, chained that.
+		size_t inter_size = intersect.count();
+		if( inter_size < 2 )
+			continue;	// no match
+			// M_k is a superset relative to M_i
+		if( inter_size == M_i->Multiplicity() && M_k->Multiplicity() > inter_size )
+			supersets.push_back(leftI);
+		else if( inter_size == M_i->Multiplicity() && M_k->Multiplicity() == inter_size )
+			chainable.push_back(leftI);
+		else if( inter_size < M_i->Multiplicity() && M_k->Multiplicity() == inter_size )
+			subsets.push_back(leftI);
+		else
+			novel_subsets.push_back(leftI);
+	}
+
+
+	if( supersets.size() > 0 && 1)
+	{
+//#4018
+		cerr << "something is wrong, we should never have supersets during link extension!\n";
+		genome::breakHere();
+	}
+
+	if (chain)
+	{
+		for( size_t cI = 0; cI < chainable.size(); ++cI )
+		{
+			if( chainable.size() > 1 && 1)
+			{
+				cerr << "bad news bruthah\n";
+				genome::breakHere();
+			}
+			// chain with this guy
+			MatchLink& jk_link = mj_subsets[chainable[cI]];
+			MatchRecord* M_k = jk_link.subset;
+			if( M_k->extended )
+			{
+				cerr << "extensor crap\n";
+				breakHere();
+			}
+			if( M_k == M_i )
+			{
+				cerr << "crap\n";
+				breakHere();
+			}
+
+			// update boundary coordinates
+			vector< size_t > component_map( M_i->Multiplicity() );
+			remapComponents(ij_link.sub_to_super_map, M_j->Multiplicity(), jk_link.sub_to_super_map, component_map );
+			bool changed = extendRange( M_i, M_k, component_map );
+			if( changed )
+				last_linked = 2;
+
+			// unlink from superset
+			int jk_parity = M_k->Orientation(0) == M_j->Orientation(jk_link.sub_to_super_map[0]) ? 1 : -1;
+			unlinkSuperset(M_k,-direction*ij_parity*jk_parity);
+			// set subsuming match ptrs
+			M_k->subsuming_match = M_i;
+			M_k->subsumption_component_map = component_map;
+			M_i->chained_matches.push_back( M_k );
+			M_i->chained_component_maps.push_back( component_map );
+
+			// compensate for the deletion in subsets
+			for( size_t subI = 0; subI < chainable.size(); subI++ )
+				if( chainable[subI] > chainable[cI] )
+					chainable[subI]--;
+			for( size_t subI = 0; subI < subsets.size(); subI++ )
+				if( subsets[subI] > chainable[cI] )
+					subsets[subI]--;
+
+			// inherit M_k's outward superset and stop chaining here
+			if( getSuperset( M_k, direction*ij_parity*jk_parity ).superset != NULL )
+			{
+				inheritSuperset( M_i, M_k, direction, ij_parity*jk_parity );
+				last_linked = 2;
+				break;
+			}
+		}
+	}
+	// process subsets
+	for( size_t sI = 0; sI < subsets.size(); ++sI )
+	{
+		// change M_k to point at M_i
+		MatchLink& jk_link = mj_subsets[subsets[sI]];
+		MatchRecord* M_k = jk_link.subset;
+		int jk_parity = M_k->Orientation(0) == M_j->Orientation(jk_link.sub_to_super_map[0]) ? 1 : -1;
+		int ik_parity = ij_parity * jk_parity;
+
+		vector< size_t > component_map( M_k->Multiplicity() );
+		remapComponents(jk_link.sub_to_super_map, M_j->Multiplicity(), ij_link.sub_to_super_map, component_map );
+		// rebuild the superset component list
+		boost::dynamic_bitset<> comp_list(M_i->Multiplicity(), false);
+		for( size_t compI = 0; compI < component_map.size(); ++compI )
+			if(component_map[compI] != (std::numeric_limits<size_t>::max)())
+				comp_list.set(component_map[compI]);
+		unlinkSuperset(M_k,-1*direction*ik_parity);
+
+		// add to the deferred subsets list
+		NeighborhoodGroup sr = boost::make_tuple( M_k, component_map, vector<size_t>( M_k->Multiplicity(), 0 ) );
+		vector< NeighborhoodGroup >& subset_list = selectList( left_deferred_subsets, right_deferred_subsets, direction );
+		subset_list.push_back( sr );
+
+		// compensate for the deletion in subsets
+		for( size_t subI = 0; subI < subsets.size(); subI++ )
+			if( subsets[subI] > subsets[sI] )
+				subsets[subI]--;
+	}
+}
+
+/**
+ * Temporary buffers that get used every time a neighborhood list lookup is performed.
+ * Storing the buffers persistently prevents repeated memory allocations
+ */
+class NllBuffers
+{
+public:
+	std::vector< std::vector< size_t > > superset_groups;
+	std::vector< std::vector< size_t > > chainable_groups;
+	std::vector< std::vector< size_t > > subset_groups;
+	std::vector< std::vector< size_t > > novel_subset_groups;
+	vector< NeighborhoodListEntry > neighborhood_list;
+	vector< pair< size_t, size_t > > j_comp_sort_list;
+	vector<size_t> group_entries;
+
+	NllBuffers()
+	{
+		superset_groups.reserve(100);
+		chainable_groups.reserve(100);
+		subset_groups.reserve(100);
+		novel_subset_groups.reserve(100);
+		neighborhood_list.reserve(10000);
+		j_comp_sort_list.reserve(1000);
+		group_entries.reserve(1000);
+	};
+	void clear()
+	{
+		superset_groups.resize(0);
+		chainable_groups.resize(0);
+		subset_groups.resize(0);
+		novel_subset_groups.resize(0);
+		neighborhood_list.resize(0);
+		j_comp_sort_list.resize(0);
+		group_entries.resize(0);
+	};
+};
+
+NllBuffers nllbufs;
+
+/**
+ * Performs a neighborhood list lookup to find other matches nearby the match of interest
+ * @param	M_i		The primary match which is under extension
+ * @param	match_pos_lookup_table
+ * @param	M_e		(Optionally NULL) A gapped extension which will be added to M_i after its neighborhood has been searched
+ */
+void neighborhoodListLookup( GappedMatchRecord* M_i, 
+						   MatchPositionLookupTable& match_pos_lookup_table,
+						   vector< NeighborhoodGroup >& superset_list, 
+						   vector< NeighborhoodGroup >& chainable_list,
+						   vector< NeighborhoodGroup >& subset_list, 
+						   vector< NeighborhoodGroup >& novel_subset_list,
+						   int direction,
+						   uint seed_size,
+						   uint w,
+						   bitset_t& left_lookups,
+						   bitset_t& right_lookups,
+						   GappedMatchRecord* M_e
+						   )
+{
+	// make sure storage is empty
+	nllbufs.clear();
+	//
+	// construct a neighborhood list and process the neighborhood groups
+	//
+	vector< NeighborhoodListEntry >& neighborhood_list = nllbufs.neighborhood_list;
+	for( size_t x = 0; x < M_i->Multiplicity(); ++x )
+	{
+		int o_x = M_i->Orientation(x) == AbstractMatch::forward ? 1 : -1;
+		int parity = o_x * direction;
+		int64 match_end = parity == 1 ? M_i->LeftEnd(x) : M_i->RightEnd(x) - seed_size + 1;
+
+		if( match_end > 0 )
+			if( (direction == 1 && left_lookups.test(match_end)) ||
+				(direction == -1 && right_lookups.test(match_end)) )
+			{
+				if(print_warnings)
+					cerr << "looking twice in the same place\n";
+//							genome::breakHere();
+			}else{
+				if( direction == 1 )
+					left_lookups.set(match_end);
+				if( direction == -1 )
+					right_lookups.set(match_end);
+			}
+
+		int d = 1;
+		int w_end = parity == 1 ? w : w + seed_size;
+		// are we cleaning up a gapped extension?  if so, adjust d and w_end so
+		// we don't search anything twice and also cover all of the extension area
+		if(M_e != NULL)
+		{
+			int64 me_match_end = parity == 1 ? M_e->LeftEnd(x) : M_e->RightEnd(x)-(M_e->Length(x)-1);
+			d = w+1;	// need to start at the begining of the window to properly 
+                    // classify all matches subsumed by extension and all novel 
+                    // matches which may have been discovered
+   			w_end = w + me_match_end - match_end;	// search anything new included in M_e
+		}
+		for( ; d <= w_end; ++d )
+		{
+			if( match_end <= parity * d )
+				continue;	// we're too close to the beginning
+			size_t mplt_index = match_end - parity * d;
+			if( mplt_index >= match_pos_lookup_table.size() )
+				continue;	// we're too close to the end!
+
+			MatchRecord* M_j = match_pos_lookup_table[ mplt_index ].first;
+			size_t y = match_pos_lookup_table[ mplt_index ].second;
+			if( M_j == NULL )
+				continue;	// no match at this position
+
+			NeighborhoodListEntry nle;
+			nle.match = M_j;
+			nle.Mi_component = x;
+			nle.Mj_component = y;
+			// update the link if this one was subsumed
+			checkLinkAndComponent( M_j, y );
+			int o_y = ((AbstractMatch*)M_j)->Orientation(y) == AbstractMatch::forward ? 1 : -1;
+			nle.relative_orientation = o_x * o_y == 1 ? true : false;
+			nle.distance = d;
+			neighborhood_list.push_back( nle );
+			
+			if( M_j == M_i )
+			{
+				M_i->tandem = true;
+				break;	// not so fast there cowboy!  can't chain beyond ourself!
+			}
+		}
+	}
+
+	//
+	// now classify each group of the neighborhood list and act appropriately
+	// group types are superset, chainable, subset, novel subset
+	//
+	NeighborhoodListComparator nlc;
+	std::sort( neighborhood_list.begin(), neighborhood_list.end(), nlc );
+
+    //std::reverse(neighborhood_list.begin(), neighborhood_list.end());
+
+	std::vector< std::vector< size_t > >& superset_groups = nllbufs.superset_groups;
+	std::vector< std::vector< size_t > >& chainable_groups = nllbufs.chainable_groups;
+	std::vector< std::vector< size_t > >& subset_groups = nllbufs.subset_groups;
+	std::vector< std::vector< size_t > >& novel_subset_groups = nllbufs.novel_subset_groups;
+
+	size_t group_end = 0;
+	for( size_t prev = 0; prev < neighborhood_list.size(); prev = group_end )
+	{
+		group_end = prev + 1;
+		while( group_end < neighborhood_list.size() && 
+			neighborhood_list[prev].match == neighborhood_list[group_end].match && 
+			neighborhood_list[prev].relative_orientation == neighborhood_list[group_end].relative_orientation )
+		{
+			++group_end;
+		}
+		// the group is everything in the range of prev to end-1
+		if( prev + 1 == group_end )
+			continue;	// can't do anything with groups of size 1 -- there's no match
+
+		// do something about ties here...???
+		// this code selects the *furthest* away match (e.g. that with the largest d)
+		// because that's what got sorted in last in the comparator
+		// it eliminates both duplicate M_i and duplicate M_j components...
+		// FIXME:  is this true?  is it safe?
+		vector< pair< size_t, size_t > >& j_comp_sort_list = nllbufs.j_comp_sort_list;
+		j_comp_sort_list.resize(0);
+		for( size_t i = prev + 1; i < group_end; ++i )
+		{
+            //selects the *furthest* away match 
+			if( neighborhood_list[i-1].Mi_component == neighborhood_list[i].Mi_component )
+				continue;
+			j_comp_sort_list.push_back(make_pair(neighborhood_list[i-1].Mj_component, i-1));
+		}
+		j_comp_sort_list.push_back(make_pair(neighborhood_list[group_end-1].Mj_component, group_end-1));
+		std::sort(j_comp_sort_list.begin(), j_comp_sort_list.end());
+		vector<size_t>& group_entries = nllbufs.group_entries;
+		group_entries.resize(0);
+		for( size_t i = 1; i < j_comp_sort_list.size(); ++i )
+		{
+            //selects the *furthest* away match
+			if( j_comp_sort_list[i-1].first == j_comp_sort_list[i].first )
+				continue;
+			group_entries.push_back(j_comp_sort_list[i-1].second);
+		}
+		group_entries.push_back(j_comp_sort_list.back().second);
+
+		// update the links in case something is subsumed
+		for( size_t gI = 0; gI < group_entries.size(); ++gI )
+			checkLinkAndComponent( neighborhood_list[group_entries[gI]].match, neighborhood_list[group_entries[gI]].Mj_component );
+
+		// finally, classify the match as one of superset, subset, 
+		// chainable, novel subset
+		MatchRecord* M_j = neighborhood_list[prev].match;
+
+		if( group_entries.size() == M_i->Multiplicity() && 
+			M_j->Multiplicity() > M_i->Multiplicity() )
+		{
+			// superset
+			superset_groups.push_back( group_entries );
+		}else
+		if( group_entries.size() == M_i->Multiplicity() && 
+			M_j->Multiplicity() == M_i->Multiplicity() )
+		{
+            
+            // chainable
+			chainable_groups.push_back( group_entries );
+		}else
+		if( group_entries.size() < M_i->Multiplicity() && 
+			group_entries.size() == M_j->Multiplicity() )
+		{
+			// subset
+			subset_groups.push_back( group_entries );
+		}else
+		{
+            // novel subset
+			novel_subset_groups.push_back( group_entries );
+		}
+
+	}	// end loop that splits the neighborhood into groups
+
+	createNeighborhoodGroupList( superset_list, superset_groups, neighborhood_list );
+	createNeighborhoodGroupList( chainable_list, chainable_groups, neighborhood_list );
+	createNeighborhoodGroupList( subset_list, subset_groups, neighborhood_list );
+	createNeighborhoodGroupList( novel_subset_list, novel_subset_groups, neighborhood_list );
+}
+
+/**
+ * Chains matches onto M_i or subsumes them as appropriate
+ */
+void processChainableMatches( GappedMatchRecord*& M_i, vector< NeighborhoodGroup >& chainable_list,
+				  int direction, int& last_linked, bool find_novel_subsets, bool chain )
+{
+	// link the closest possible chainable first.
+	for( size_t gI = 0; gI < chainable_list.size(); gI++ )
+	{
+		MatchRecord* M_j = chainable_list[gI].get<0>();
+
+		vector< size_t >& component_map = chainable_list[gI].get<1>();
+
+		if( M_j == M_i )
+		{
+			// this is an inverted overlapping repeat, skip it.
+			continue;
+		}
+        if( M_j->extended )
+		{
+            if ( !find_novel_subsets && (M_i->is_novel_subset ))
+            {
+                //novel subsets have been disabled!! this is why it wasn't swallowed up!
+                continue;
+            }
+            else
+            {
+                // oh no!  M_i should have been swallowed up already!
+                //tjt: claro, work has been wasted, but bypassing the breakHere() will allow
+                //the assumed-to-be subsumed M_i to be detected and updated accordingly
+                //but the question remains, why wasn't M_i previously subsumed?
+                //1)   what if before gapped extension M_j was not in M_i's neighborhood?
+                //     but after gapped extension, M_i is found in M_j's neighborhood and classified as chainable?
+			    //cerr << "extensor crap 2\n";
+			    //breakHere();
+            }
+		}
+
+		bool subsumed;
+		bool partial;
+		classifySubset( M_i, chainable_list[gI], subsumed, partial );
+		
+		vector< size_t >& yx_map = chainable_list[gI].get<1>();
+		vector< size_t > xy_map(yx_map.size());
+		for( size_t i = 0; i < yx_map.size(); ++i )
+			xy_map[ yx_map[i] ] = i;
+//		for( vector< size_t >::iterator rec_iter = chainable_groups[gI].begin(); rec_iter !=  chainable_groups[gI].end(); ++rec_iter )
+//			xy_map[ neighborhood_list[*rec_iter].Mi_component ] = neighborhood_list[*rec_iter].Mj_component;
+
+		// if M_j isn't extending the boundaries of every component of M_i then
+		// it may be inconsistent with already chained matches.  just subsume it without
+		// chaining in that case.
+		if( !subsumed && !partial && chain)
+		{
+			bool ok = testChainableMatch(M_i, M_j, xy_map);
+            if (ok)
+            {
+                M_i->chained_matches.push_back( M_j );
+			    M_i->chained_component_maps.push_back( component_map );
+			    bool changed = extendRange(M_i, M_j, xy_map);
+			    if( changed )
+                {
+			        // update the left-end and right-end coords
+				    last_linked = 2;
+                }
+            }
+            else
+                break;
+		}
+        M_j->subsuming_match = M_i;
+		M_j->subsumption_component_map = component_map;
+		int parity = M_i->Orientation(0) == M_j->Orientation(xy_map[0]) ? 1 : -1;
+		if( getSuperset( M_j, -direction*parity ).superset != NULL )
+			unlinkSuperset(M_j,-direction*parity);	// won't be needing this anymore...
+
+		// if M_j has a superset then inherit it and stop chaining here
+		if( getSuperset( M_j, direction*parity ).superset != NULL )
+		{
+			inheritSuperset( M_i, M_j, direction, parity );
+			last_linked = 2;	// we may do a link extension!
+			break;
+		}
+	}
+}
+//processes supersets
+void processSupersetMatches( GappedMatchRecord*& M_i, vector< NeighborhoodGroup >& superset_list,
+				  int direction, int& last_linked, bool gapped_extension = false )
+{
+	
+	// link the closest possible superset first.
+	for( size_t gI = 0; gI < superset_list.size(); gI++ )
+	{
+		MatchRecord* M_j = superset_list[gI].get<0>();
+
+		vector< size_t >& component_map = M_i->chained_component_maps.at(0);
+		boost::dynamic_bitset<> comp_list(M_j->Multiplicity(), false);
+		for( size_t compI = 0; compI < M_i->Multiplicity(); ++compI )
+			comp_list.set(component_map[compI]);
+		if( M_j == M_i )
+		{
+			// this is an inverted overlapping repeat, skip it.
+			continue;
+		}
+        //tjt: shouldn't the superset always be extended when we reach this point during gapped extension?
+		if( M_j->extended && !gapped_extension )
+		{
+           	// oh no!  M_i should have been swallowed up already!
+			cerr << "extensor crap 2\n";
+			breakHere();
+		}
+
+		bool subsumed;
+		bool partial;
+		//update classifysubset to ClassifySuperset
+		classifySuperset( M_i, superset_list[gI], subsumed, partial );
+
+		if( subsumed && !partial )
+		{
+			// update the left-end and right-end coords
+			bool changed = reduceRange(M_i, M_j, component_map);
+		}
+		if( partial )
+			//some of the components of the superset matches are subsumed
+			//punt: what should I do differently here?
+
+		linkSuperset( M_i, M_j, comp_list, component_map,  direction);
+		last_linked = 1;// stores the group type that was chained.  1 == superset, 2 == chainable, 0 == none
+					
+	}
+}
+
+
+/**
+ * Performs a gapped extension on a match.  The region either left or right of the match is processed by
+ * progressive alignment.
+ * @param	M_i		The match to extend
+ * @param	seq_table	gnSequences which correspond to each match component
+ * @param	params		The Homology HMM parameters to use
+ * @param	w		The max gap for chaining.  Used to compute extension lengths.
+ * @param	direction	The direction of extension
+ * @param	M_e		(output) A MatchRecord containing just the extension, or NULL if extension failed
+ * @return	FAILED, OK, or FIXME
+ */
+int ExtendMatch(GappedMatchRecord*& M_i, vector< gnSequence* >& seq_table, Params& hmm_params, unsigned w, int direction, vector<GappedMatchRecord*>& novel_matches, int gap_open, int gap_extend, int extension_window)
+{
+	ccount +=1;
+	static bool debug_extension = false;
+//  punt on this for now..
+	bool novel_hss_regions_support = false;
+	bool danger_zone_active = true;
+	int multi = M_i->Multiplicity();
+	double e = 2.71828182845904523536;
+//	I think this works a little better...
+	
+	int extend_length = 80*pow(e,-0.01*multi);
+	//use user specified window if requested
+	if (extension_window >= 0 )
+		extend_length = extension_window;
+	vector<int> left_extend_vector(multi,0);
+	vector<int> right_extend_vector(multi,0);
+	int left_extend_length = extend_length;	
+	int right_extend_length = extend_length;
+
+	if ( M_i->tandem )
+	{		
+        if ( debug_extension)
+		    cerr << "Sorry, no extension for tandem repeats.." << endl << endl;	
+		return FIXME;
+	}
+
+//  careful, if M_i->LeftEnd(j) < extend_length, ToString() will be disappointed...
+	for( gnSeqI j = 0; j < multi; j++)
+	{
+//      now put check for curpos+extend_length<startpos of next match component..
+		if( M_i->Orientation(j) == AbstractMatch::reverse )
+		{
+//          if leftend <= 0 set right extension to 0
+			if( M_i->LeftEnd(j) <= 0 || M_i->LeftEnd(j) > 4000000000u )
+                right_extend_vector[j] = 0;
+//          if extend_length goes too far, set to maximum possible
+			else if ( M_i->LeftEnd(j) <= extend_length )
+				right_extend_vector[j] = M_i->LeftEnd(j)-1;
+//          if we run into another match, don't extend into it
+			else if ( j > 0 && M_i->LeftEnd(j) - extend_length <= M_i->RightEnd(j-1) )
+            {
+				int parity = M_i->Orientation(j) ==  M_i->Orientation(j-1) ? 1 : 1;
+                right_extend_vector[j] = parity*(M_i->LeftEnd(j)-M_i->RightEnd(j-1)-1);
+            }
+//          else everything ok to set to preset extend_length
+			else
+				right_extend_vector[j] = extend_length-1;
+
+			if(M_i->RightEnd(j) <= 0 || M_i->RightEnd(j) > 4000000000u)
+				left_extend_vector.push_back(0);
+			else if ( M_i->RightEnd(j) + extend_length > seq_table[0]->length() )
+				left_extend_vector[j] = seq_table[0]->length()-M_i->RightEnd(j);
+			else if ( j+1 < multi && M_i->RightEnd(j) + extend_length >= M_i->LeftEnd(j+1) )
+            {
+                int parity = M_i->Orientation(j) ==  M_i->Orientation(j+1) ? 1 : 1;
+				left_extend_vector[j] =parity*( M_i->LeftEnd(j+1)-M_i->RightEnd(j)-1);
+            }
+            else
+				left_extend_vector[j] = extend_length-1;
+		}
+        else
+        {
+            if( M_i->LeftEnd(j) <= 0 || M_i->LeftEnd(j) > 4000000000u )
+			    left_extend_vector[j] = 0;
+		    else if ( M_i->LeftEnd(j) <= extend_length )
+			    left_extend_vector[j] = M_i->LeftEnd(j)-1;
+		    else if ( j > 0 && M_i->LeftEnd(j) - extend_length <= M_i->RightEnd(j-1) )
+            {
+                int parity = M_i->Orientation(j) ==  M_i->Orientation(j-1) ? 1 : 1;
+			    left_extend_vector[j] = parity*(M_i->LeftEnd(j)-M_i->RightEnd(j-1)-1);
+            }
+            else
+			    left_extend_vector[j] = extend_length;
+
+		    if(M_i->RightEnd(j) <= 0 || M_i->RightEnd(j) > 4000000000u)
+			    right_extend_vector[j] = 0;
+		    else if ( M_i->RightEnd(j) + extend_length > seq_table[0]->length() )
+			    right_extend_vector[j] = seq_table[0]->length()-M_i->RightEnd(j)-1;
+		    else if ( j+1 < multi && M_i->RightEnd(j) + extend_length >= M_i->LeftEnd(j+1) )
+            {
+                int parity = M_i->Orientation(j) ==  M_i->Orientation(j+1) ? 1 : 1;
+			    right_extend_vector[j] = parity*(M_i->LeftEnd(j+1)-M_i->RightEnd(j)-1);
+            }
+		    else
+			    right_extend_vector[j] = extend_length;	
+        }
+	}
+    
+	left_extend_length = *(std::min_element( left_extend_vector.begin(), left_extend_vector.end() ));
+	right_extend_length = *(std::min_element( right_extend_vector.begin(), right_extend_vector.end() ));
+    left_extend_length = left_extend_length < 0 ? 0 : left_extend_length;
+    right_extend_length = right_extend_length < 0 ? 0 : right_extend_length;
+    extend_length = direction < 0 ? right_extend_length : left_extend_length;
+	const gnFilter* rc_filter = gnFilter::DNAComplementFilter();
+	std::vector<std::string> leftExtension(multi);
+	GappedAlignment leftside(multi,left_extend_length);
+	std::vector<std::string> rightExtension(multi);
+	GappedAlignment rightside(multi,right_extend_length);
+	vector< string > leftExtension_aln;
+	vector< string > rightExtension_aln;
+	if ( left_extend_length > 0 && direction == 1  )
+	{
+//      extract sequence data
+		for( gnSeqI j = 0; j < multi; j++)
+		{			
+			if( M_i->Orientation(j) == AbstractMatch::reverse )
+			{			
+				seq_table[0]->ToString( leftExtension[j], left_extend_length, M_i->RightEnd(j)+1 );
+				leftside.SetLeftEnd(j,M_i->RightEnd(j)+1);
+				rc_filter->ReverseFilter(leftExtension[j]);
+			}else{
+				seq_table[0]->ToString( leftExtension[j], left_extend_length, M_i->LeftEnd(j) - left_extend_length );
+				leftside.SetLeftEnd(j,M_i->LeftEnd(j) - left_extend_length);
+			}
+			leftside.SetOrientation(j,M_i->Orientation(j));
+			leftside.SetLength(left_extend_length,j);
+		}
+		bool align_success = false;
+		//mems::MuscleInterface::getMuscleInterface().SetMuscleArguments("-stable -quiet -seqtype DNA");
+		align_success = mems::MuscleInterface::getMuscleInterface().CallMuscleFast( leftExtension_aln, leftExtension, gap_open, gap_extend );
+		if ( align_success ){		
+			leftside.SetAlignment(leftExtension_aln);
+			leftside.SetAlignmentLength(leftExtension_aln.at(0).size());
+		}else{
+			cerr << "Extension failed: Muscle error" << endl;
+			return FAILED;
+		}
+	}
+	else if ( right_extend_length > 0 && direction == -1 )
+	{
+		for( gnSeqI j = 0; j < multi; j++)
+		{
+			if( M_i->Orientation(j) == AbstractMatch::reverse )
+			{			
+				rightside.SetLeftEnd(j,M_i->LeftEnd(j) - right_extend_length-1);
+				seq_table[0]->ToString( rightExtension[j], right_extend_length, M_i->LeftEnd(j) - right_extend_length-1);
+				rc_filter->ReverseFilter(rightExtension[j]);
+			}else{
+				rightside.SetLeftEnd(j,M_i->RightEnd(j)+1 );
+				seq_table[0]->ToString( rightExtension[j], right_extend_length, M_i->RightEnd(j)+1 );
+			}
+			rightside.SetOrientation(j,M_i->Orientation(j));
+			rightside.SetLength(right_extend_length,j);
+		}
+		bool align_success = false;		
+		align_success = mems::MuscleInterface::getMuscleInterface().CallMuscleFast( rightExtension_aln, rightExtension, gap_open, gap_extend );
+		if ( align_success ){
+			rightside.SetAlignment(rightExtension_aln);
+			rightside.SetAlignmentLength(rightExtension_aln.at(0).size());
+		}else{
+            cerr << "Extension failed: Muscle error" << endl;
+			return FAILED;
+		}
+	}else{
+		//what are you even doing here?!?
+		if(debug_extension)
+        {
+            cerr << "Extension failed: No room to extend" << endl;
+		}
+		return FAILED;
+	}
+
+//  tjt: don't use original match, only regions to the left/right
+//       since for now we won't modify M_i, even if the homology detection method suggests otherwise
+	vector< AbstractMatch* > mlist;
+	if( direction == 1 )
+		mlist.push_back(leftside.Copy());
+	if( direction == -1 )
+		mlist.push_back(rightside.Copy());
+ 
+//  createIntervald
+	Interval iv;
+	iv.SetMatches(mlist);
+	CompactGappedAlignment<> tmp_cga;
+	CompactGappedAlignment<>* cga = tmp_cga.Copy();
+	new (cga)CompactGappedAlignment<>(iv);
+	vector< CompactGappedAlignment<>* > cga_list;
+	CompactGappedAlignment<>* result;
+//  detectAndApplyBackbone
+	backbone_list_t bb_list;
+
+	detectAndApplyBackbone( cga, seq_table,result,bb_list, hmm_params, direction != 1, direction == 1  );
+	cga->Free();
+
+	bool boundaries_improved = false;
+	if( bb_list.size() == 0 || bb_list.at(0).size() == 0)
+	{
+//      no backbone segment found
+        if(debug_extension)
+            cerr << "Extension failed: no backbones found during extension" << endl;
+		result->Free();
+		return FAILED;
+	}
+	AbstractMatch* extension_bb;
+//  tjt: was > before, wasn't taking right backbone???
+//  remember, direction == -1 rightward, == 1 leftward
+    bool isnovel = false;
+    for( size_t bbI = 0; bb_list.size() > 0 && bbI < bb_list[0].size(); bbI++ )
+    {
+        extension_bb = bb_list.at(0).at(bbI);
+        if (extension_bb == NULL)
+            continue;
+        
+	    cga_list.push_back( tmp_cga.Copy() );
+	    result->copyRange( *(cga_list.back()), extension_bb->LeftEnd(0), extension_bb->AlignmentLength()-1 );
+        int cgalen = cga_list.back()->AlignmentLength();
+        int resultlen = result->AlignmentLength();
+
+        //why set this to > 5? what about seed weight? or to > 0? seems strange to allow novel matches of length 1...
+	    if( cga_list.back()->Multiplicity() > 1 && cga_list.back()->Length() > 0 && cga_list.back()->AlignmentLength() > 0 )
+	    {
+//          successful extension!!
+//          boundaries were improved, current match extended original match
+//          create a GappedMatchRecord for the gapped extension
+		    vector< AbstractMatch* > matches( 1, cga_list.back());
+//          GappedMatchRecord* M_e = M_i->Copy();
+            UngappedMatchRecord tmp(  cga_list.back()->Multiplicity(), cga_list.back()->AlignmentLength() );
+            MatchRecord* umr = tmp.Copy();
+		    GappedMatchRecord* M_e = dynamic_cast<GappedMatchRecord*>(umr); 
+            
+		    if( M_e == NULL )
+		    {
+//              create a new gapped match record for M_i
+			    GappedMatchRecord gmr( *(UngappedMatchRecord*)umr );
+			    M_e = gmr.Copy();
+			    umr->subsuming_match = M_e;
+//              umr->subsuming_match = M_i;
+			    M_e->chained_matches.push_back( umr );
+			    vector< size_t > component_map( M_e->SeqCount() );
+			    for( size_t i = 0; i < component_map.size(); ++i )
+				    component_map[i] = i;
+			    M_e->chained_component_maps.push_back(component_map);
+			    swap(umr->subsumption_component_map, component_map);	// swap avoids reallocation
+//              update superset and subset links
+			    for( int dI = 1; dI > -2; dI -= 2 )
+			    {
+				    MatchLink& ij_link = getSuperset(M_e,dI);
+				    if( ij_link.superset != NULL )
+				    {
+					    ij_link.subset = M_e;
+					    unlinkSuperset(umr,dI);
+					    int parity = M_e->Orientation(0) == ij_link.superset->Orientation(ij_link.sub_to_super_map[0]) ? 1 : -1;
+					    getSubsets(ij_link.superset,-dI*parity).push_back(ij_link);
+				    }
+				    vector< MatchLink >& subsets = getSubsets(M_e,dI);
+				    for( size_t subI = 0; subI < subsets.size(); ++subI )
+				    {
+					    subsets[subI].superset = M_e;
+					    int parity = M_i->Orientation(subsets[subI].sub_to_super_map[0]) == subsets[subI].subset->Orientation(0) ? 1 : -1;
+					    getSuperset(subsets[subI].subset, -dI*parity).superset = M_e;
+				    }
+				    getSubsets(umr,dI).clear();	// so that validate works...
+                }
+                //          tjt: clobber M_e's GappedMatchRecord data and set boundaries
+                //tjt: we call the Temp version since we don't actually want to do anything with the regions between the matches
+                M_e->SetMatchesTemp(matches);//,cga_list.back()->Multiplicity() );
+            }
+            novel_matches.push_back(M_e->Copy());  
+        }
+      }
+    result->Free();
+	if (novel_matches.size() > 0)
+        return OK;
+    else
+        return FAILED;
+}//tjt: match should be extended!
+
+/**
+ * A class to prioritize match records for extension based on their multiplicity
+ */
+class ProcrastinationQueue
+{
+public:
+	template< typename MrPtrType >
+	ProcrastinationQueue( vector< MrPtrType >& match_record_list ) :
+	mhc()
+	{
+		q.resize( match_record_list.size() );
+		std::copy(match_record_list.begin(), match_record_list.end(), q.begin() );
+		std::make_heap( q.begin(), q.end(), mhc );
+		q_end = q.size();
+		q_size = q.size();
+	}
+
+	/** pops an element from the queue, maintaining heap order */
+	MatchRecord* pop()
+	{
+		std::pop_heap( q.begin(), q.begin()+q_end, mhc );
+		q_end--;
+		return *(q.begin() + q_end);
+	}
+
+	/** Adds an element to the queue and restores heap order */
+	void push( MatchRecord* M_n )
+	{
+		if( q_end < q.size() )
+			q[q_end] = M_n;
+		else
+		{
+			q.push_back(M_n);
+		}
+		q_size++;
+		q_end++;
+		std::push_heap(q.begin(), q.begin()+q_end, mhc);
+	}
+	/** gets the total number of elements that have been placed in the queue */
+	size_t size() const{ return q_size; }
+	/** returns the current number of elements in the queue */
+	size_t end() const{ return q_end; }
+
+
+	/** defines a multiplicity heap ordering */
+	class MultiplicityHeapCompare
+	{
+	public:
+		bool operator()( const MatchRecord* a, const MatchRecord* b )
+		{
+			return a->Multiplicity() < b->Multiplicity();
+		}
+	};
+
+private:
+	const MultiplicityHeapCompare mhc;
+	std::vector< MatchRecord* > q;
+	size_t q_end;
+	size_t q_size;
+};
+
+/**
+ * Creates novel subset matches where appropriate and adds them to the procrastination queue
+ */
+void processNovelSubsetMatches( GappedMatchRecord*& M_i, vector< NeighborhoodGroup >& novel_subset_list,
+				bool find_novel_subsets, ProcrastinationQueue& procrastination_queue,
+				vector< gnSequence* >& seq_table, int direction, uint w, int& last_linked,
+				size_t& novel_subset_count )
+{
+	// finally process novel subset
+	bool prev_linked = false;	// we only want to link the closest of a group with the same components
+	int created_thisround = 0;
+	static NeighborhoodGroupComponentCompare srcc;
+	for( size_t gI = 0; gI < novel_subset_list.size(); gI++ )
+	{
+		if( !find_novel_subsets )
+			continue;	// only find novel subsets if we're supposed to
+		if( last_linked != 0 )
+			continue;	// only generate subsets when last_linked == 0
+
+		// be sure to handle case where:
+		// --M_i-->   --M_j--   <--M_i--
+		// that may cause an identical novel subset to get spawned but with
+		// M_i and M_j swapped as left and right supersets
+
+		bool same_components = false;
+		if( gI > 0 )
+			same_components = srcc.compare(novel_subset_list[gI], novel_subset_list[gI-1]) == 0;
+		prev_linked = same_components? prev_linked : false;
+
+		if( prev_linked )
+			continue;	// don't link a subset with the same components...
+
+		// TODO: handle the tandem repeat case
+		if( M_i->tandem )
+		{
+			// step 1. count tandem repeat components
+			// step 2. create a new GappedMatchRecord with one component per tandem component
+			// add a GappedMatchRecord with the outer component boundaries
+			// do ordinary extension
+			// when finalize() gets called, something special needs to happen
+			continue;
+		}
+
+		MatchRecord* M_j = novel_subset_list[gI].get<0>();
+		// if M_j hasn't been extended then we don't do anything yet.
+		// we may find this novel subset again when M_j gets extended
+		if( M_j->extended == false)
+			continue;
+
+		size_t mult = 0;	// multiplicity of novel subset
+		for( size_t i = 0; i < novel_subset_list[gI].get<1>().size(); ++i )
+			if( novel_subset_list[gI].get<1>()[i] != (std::numeric_limits<size_t>::max)() )
+				mult++;
+
+		if( mult < 2 )
+			continue;	// can't do anything if there's no match!
+
+		UngappedMatchRecord tmper1(mult,0);
+		GappedMatchRecord tmper2(tmper1);  // this is lame
+		GappedMatchRecord* M_n = tmper2.Copy();
+
+		size_t mnewi = 0;
+		vector< size_t > new_to_i_map(mult);
+		vector< size_t > new_to_j_map(mult);
+		boost::dynamic_bitset<> ni_list(M_i->Multiplicity());
+		boost::dynamic_bitset<> nj_list(M_j->Multiplicity());
+		for( size_t i = 0; i < novel_subset_list[gI].get<1>().size(); ++i )
+		{
+			if( novel_subset_list[gI].get<1>()[i] != (std::numeric_limits<size_t>::max)() )
+			{
+				new_to_i_map[mnewi] = novel_subset_list[gI].get<1>()[i];
+				new_to_j_map[mnewi] = i;
+				ni_list.set(new_to_i_map[mnewi]);
+				nj_list.set(i);
+				M_n->SetStart(mnewi, M_j->Start(i));	// sets left-end and orientation
+				M_n->SetLength(M_j->Length(i),mnewi);
+				mnewi++;
+			}
+		}
+		if( M_n->Orientation(0) == AbstractMatch::reverse )
+			M_n->Invert();
+		// before we go any further, make sure that the relevant portion of M_i is not 
+		// either completely or partially subsumed by the relevant portion of M_j!
+		MatchProjectionAdapter mpaa( M_i, new_to_i_map );
+		vector< size_t > mpaa_to_Mn_map( new_to_i_map.size() );
+		for( size_t i = 0; i < mpaa_to_Mn_map.size(); ++i )
+			mpaa_to_Mn_map[i] = i;
+		bool subsumed;
+		bool partial;
+		classifyMatch( M_n, &mpaa, mpaa_to_Mn_map, subsumed, partial );
+		if( subsumed )
+		{
+			M_n->Free();
+			continue;	// there's nothing novel about this subset...
+		}
+		if( partial )
+		{
+			// FIXME: we should really spawn a novel subset on the non-subsumed components
+			M_n->Free();
+			continue;
+		}
+		created_thisround+= M_n->Multiplicity();
+
+		M_n->chained_matches.push_back(M_j);
+		M_n->chained_component_maps.push_back(new_to_j_map);
+		//tjt: need to send finalize seq_table for muscle alignment
+		M_n->finalize(seq_table);	// make this one a legitimate match...
+		M_n->chained_matches.clear();
+		M_n->chained_component_maps.clear();
+
+        M_n->is_novel_subset = true;//yep, this is a novel subset
+		// create links from M_n to M_i and M_j
+		int ni_parity = M_n->Orientation(0) == M_i->Orientation(new_to_i_map[0]) ? 1 : -1;
+		int nj_parity = M_n->Orientation(0) == M_j->Orientation(new_to_j_map[0]) ? 1 : -1;
+		MatchLink& ni_link = getSuperset(M_n,-direction*ni_parity);
+		ni_link = MatchLink(M_i,M_n,ni_list,new_to_i_map);
+		getSubsets(M_i,direction).push_back(ni_link);
+        //getExtraSubsets(M_i,direction).push_back(ni_link);
+		MatchLink& nj_link = getSuperset(M_n,direction*ni_parity);
+		nj_link = MatchLink(M_j,M_n,nj_list,new_to_j_map);
+		getSubsets(M_j,-direction*ni_parity*nj_parity).push_back(nj_link);
+        //getExtraSubsets(M_j,-direction*ni_parity*nj_parity).push_back(nj_link);
+		// push M_n onto the heap
+		novel_subset_list.push_back(M_n);
+		//procrastination_queue.push(M_n);
+		novel_subset_count++;
+	}
+
+
+}
+
+
+/**
+ * Writes a set of MatchRecords in eXtended Multi-FastA format
+ * @param	seedml	A matchlist containing the seq_table of interest
+ * @param	extended_matches	A set of matches to write out
+ * @param	xmfa_file	The filename to use for output
+ */
+void writeXmfa( MatchList& seedml, std::vector< GappedMatchRecord* >& extended_matches, const std::string& xmfa_file )
+{
+	GenericIntervalList<GappedMatchRecord> gmr_list;
+	for( size_t gmrI = 0; gmrI < extended_matches.size(); ++gmrI )
+		gmr_list.push_back(*extended_matches[gmrI]);
+
+	if( xmfa_file.length() > 0  && xmfa_file != "-")
+	{
+		gmr_list.seq_filename.push_back( seedml.seq_filename[0] );
+		gmr_list.seq_table.push_back( seedml.seq_table[0] );
+		if( xmfa_file == "-" )
+			gmr_list.WriteStandardAlignment(cout);
+		else
+		{
+			ofstream xmfa_out(xmfa_file.c_str());
+			gmr_list.WriteStandardAlignment(xmfa_out);
+			xmfa_out.close();
+		}
+	}
+}
+
+/**
+ * Writes a set of MatchRecords in XML format
+ * @param	seedml	A matchlist containing the seq_table of interest
+ * @param	extended_matches	A set of matches to write out
+ * @param	xml_file	The filename to use for output
+ */
+void writeXML( MatchList& seedml, std::vector< GappedMatchRecord* >& extended_matches, const std::string& xml_file )
+{
+	 
+	GenericIntervalList<GappedMatchRecord> gmr_list;
+	for( size_t gmrI = 0; gmrI < extended_matches.size(); ++gmrI )
+		gmr_list.push_back(*extended_matches[gmrI]);
+
+	if( xml_file.length() > 0  && xml_file != "-")
+	{
+		gmr_list.seq_filename.push_back( seedml.seq_filename[0] );
+		gmr_list.seq_table.push_back( seedml.seq_table[0] );
+		if( xml_file == "-" )
+			gmr_list.WriteXMLAlignment(cout);
+		else
+		{
+			ofstream xml_out(xml_file.c_str());
+			gmr_list.WriteXMLAlignment(xml_out);
+			xml_out.close();
+		}
+	}
+}
+
+class ToUPPER
+{
+public:
+	char operator()( char a ){ return toupper(a); }
+};
+int main( int argc, char* argv[] )
+{
+//	debug_interval = true;
+	// Declare the supported options.
+    bool debug_extension = false;
+
+	string sequence_file = "";
+	int extension_window = 0;
+	int w = 0;
+	int kmersize =0;
+	int gap_open = 0;
+	int gap_extend = 0;
+	uint seed_weight = 0;
+    uint min_repeat_length = 0;
+    score_t min_spscore = 0;
+    uint rmin = 0;
+    uint rmax = 0;
+	string outputfile = "";
+	string output2file = "";
+	string xmfa_file = "";
+    string xml_file = "";
+	string stat_file = "";
+	string seed_file = "";
+	bool only_direct = false;
+	bool load_sml = false;
+	bool small_repeats = false;
+	bool large_repeats = false;
+    bool allow_tandem = false;
+    bool allow_redundant = false;
+	bool find_novel_subsets = false;
+    bool use_novel_matches = true; //should procrast use novel matches found during gapped extension ?
+	bool solid_seed = false;
+	bool extend_chains = true;
+	bool chain = true;
+	bool two_hits = false;
+	bool unalign = true;
+	float percent_id = 0.0;
+    float pGoHomo = 0.004f;
+    float pGoUnrelated = 0.004f;
+    bool only_extended = false;
+
+	po::variables_map vm;
+	try {
+
+        po::options_description desc("Allowed options");
+        desc.add_options()
+			("allow-redundant", po::value <bool>(&allow_redundant)->default_value(true), "allow redundant alignments?")
+			("chain", po::value<bool>(&chain)->default_value(true), "chain seeds?")
+			("extend", po::value<bool>(&extend_chains)->default_value(true), "perform gapped extension on chains?")
+			("window", po::value<int>(&extension_window)->default_value(-1), "size of window to use during gapped extension")
+			("gapopen",po::value <int>(&gap_open)->default_value(0), "gap open penalty")
+			("gapextend",po::value <int>(&gap_extend)->default_value(0), "gap extension penalty")
+			("h", po::value<float>(&pGoHomo)->default_value(0.008f), "Transition to Homologous")
+			("help", "get help message")
+			("highest", po::value<string>(&stat_file)->default_value("procrast.highest"), "file containing highest scoring alignment for each multiplicity ")
+            ("l", po::value <unsigned>(&min_repeat_length)->default_value(1), "minimum repeat length")
+			("large-repeats", po::value <bool>(&large_repeats)->default_value(false), "optimize for large repeats")
+			("load-sml", po::value <bool>(&load_sml)->default_value(false), "try to load existing SML file?")
+			("onlydirect",po::value<bool>(&only_direct)->default_value(false), "only process seed matches on same strand?")
+			("onlyextended",po::value<bool>(&only_extended)->default_value(false), "only output extended matches?")
+			("output", po::value<string>(&outputfile)->default_value(""), "procrastAligner output ")
+			("percentid", po::value<float>(&percent_id)->default_value(0.0), "min repeat family % id")
+			("novel-subsets", po::value<bool>(&find_novel_subsets)->default_value(false), "find novel subset matches?")
+            ("novel-matches", po::value<bool>(&use_novel_matches)->default_value(true), "use novel matches found during gapped extension?")
+			("rmax",  po::value<unsigned>(&rmax)->default_value(500), "maximum repeat multiplicity (max copy number)")
+			("rmin" , po::value<unsigned>(&rmin)->default_value(2), "minimum repeat multiplicity (min copy number)")
+			("seeds", po::value<string>(&seed_file), "seed output file")
+            ("sequence", po::value<string>(&sequence_file), "FastA sequence file")
+			("small-repeats", po::value <bool>(&small_repeats)->default_value(false), "optimize for small repeats")
+			("score-out", po::value<string>(&output2file)->default_value(""), "output with corresponding score and alignment info ")
+			("solid", po::value<bool>(&solid_seed)->default_value(0), "use solid/exact seeds?")
+			("sp", po::value <score_t>(&min_spscore)->default_value(0), "minimum Sum-of-Pairs alignment score")
+			("tandem", po::value <bool>(&allow_tandem)->default_value(true), "allow tandem repeats?")
+			("two-hits", po::value<bool>(&two_hits)->default_value(false), "require two hits within w to trigger gapped extension?")
+			("u", po::value<float>(&pGoUnrelated)->default_value(0.001f), "Transition to Unrelated")			
+			("unalign", po::value<bool>(&unalign)->default_value(true), "unalign non-homologous sequence?")
+			("w", po::value<int>(&w)->default_value(0), "max gap width ")
+			("xmfa", po::value<string>(&xmfa_file)->default_value(""), "XMFA format output")
+            ("xml", po::value<string>(&xml_file)->default_value(""), "XML format output")
+			("z", po::value <unsigned>(&seed_weight)->default_value(0), "seed weight")
+
+        ;
+
+		if( argc < 2 )
+		{
+            cout << desc << "\n";
+            return 1;
+		}
+                
+		
+
+        po::store(po::parse_command_line(argc, argv, desc), vm);
+        po::notify(vm);    
+
+        if (vm.count("help")) {
+            cout << desc << "\n";
+            return 1;
+        }
+
+		if (large_repeats && small_repeats)
+		{
+			cout << "which is it? small or large? can't optimize for both!\n";
+			return 1;
+		}
+		if (seed_weight < 3) {
+            cout << "Invalid seed weight, minimum size is 3!\n";
+            return 1;
+        }
+        if (vm.count("rmin")) {
+            cout << "setting minimum multiplicity to " 
+                 << rmin << ".\n";
+        } else {
+            cout << "Using default minimum multiplicity (2).\n";
+        }
+
+        if (vm.count("rmax")) {
+            cout << "setting maximimum multiplicity to " 
+                 << rmax << ".\n";
+        } else {
+            cout << "Using default maximum multiplicity (500).\n";
+        }
+
+	    if (rmin > rmax) 
+        {
+            cout << "rmin > rmax, setting rmax == rmin\n";
+            rmax = rmin;
+        } 
+        if (rmin < 2)
+        {
+            cout << "rmin < 2, setting rmin == 2\n";
+            rmin = 2;
+        }
+        if (rmax < 2)
+        {
+            cout << "rmax < 2, setting rmax == 2\n"; 
+            rmax = 2;
+        }
+		if (percent_id > 1 )
+			percent_id = 1.0;
+		if (vm.count("z")) {
+            cout << "seed weight set to " 
+                 << seed_weight << ".\n";
+        } else {
+            cout << "Using default seed weight.\n";
+        }
+    }
+    catch(exception& e) {
+        cerr << "error: " << e.what() << "\n";
+        return 1;
+    }
+    catch(...) {
+        cerr << "Exception of unknown type!\n";
+    }
+	
+
+
+	// Roadmap: 
+	// 1. identify seed matches using a Sorted Mer List
+	// 2. create a "UngappedMatchRecord" for each seed match and put in the match record list
+	// 3. create a Match Position Lookup Table
+	// 4. create a multiplicity priority queue
+	// 5. create an (empty) Novel Subset Match Record list
+	// 6. extend all matches!
+	// 7. create a procrastination queue for novel subset matches
+	// 8. extend all novel subset matches!
+	// 9. create a final list of matches
+	// 10. score matches
+	// 11. report matches
+
+
+	//
+	// part 1, load sequence and find seed matches using SML and a repeat class...
+	//
+	MatchList seedml;
+	seedml.seq_filename = vector< string >( 1, sequence_file );
+	seedml.sml_filename = vector< string >( 1, seedml.seq_filename[0] + ".sslist");
+	//seedml.LoadSequences( &cout );
+	LoadSequences( seedml, &cout );
+	if( seed_weight == 0 )
+		seed_weight = (int)((double)getDefaultSeedWeight( seedml.seq_table[0]->length() ) * .9);
+	
+	int seed_rank = 0;
+	if ( solid_seed )
+	{
+		seed_rank = INT_MAX;
+		std::cout << "Using solid seed" << std::endl;
+	}
+	seedml.LoadSMLs( seed_weight, &cout, seed_rank, solid_seed, !load_sml );
+	int64 seed = getSeed( seed_weight, seed_rank);
+	uint seed_size = getSeedLength( seed );
+
+    if (min_spscore < 0 )
+        min_spscore = 0;
+
+	if( w == 0 )
+		w = seed_weight * 3;	// default value
+	else if( w < 0 )
+	{
+		w = 0;
+		chain = false;
+	}
+
+	cout << "Using seed weight: " << seed_weight << " and w: " << w << endl;
+	SeedMatchEnumerator sme;
+	sme.FindMatches( seedml, rmin, rmax, only_direct );
+	
+    // need single nuc & kmer frequency
+	string sequence = seedml.seq_table.at(0)->ToString();
+	string uppercase = sequence;
+	ToUPPER tupperware;
+	std::transform(sequence.begin(),sequence.end(), uppercase.begin(), tupperware);
+    kmersize =1;
+	map<string,gnSeqI> polyfreq;
+	map<string,gnSeqI> monofreq;
+	map<string, gnSeqI>::iterator it;
+	for (gnSeqI i = 0; i <= uppercase.size()-kmersize; i++)
+	{
+	   string kmer = uppercase.substr(i,kmersize);
+	   string nucleotide = uppercase.substr(i,1);
+	   if( nucleotide[0] != 'A' &&
+			nucleotide[0] != 'C' &&
+			nucleotide[0] != 'G' &&
+			nucleotide[0] != 'T' )
+			nucleotide[0] = 'A';
+	   for( size_t kI = 0; kI < kmer.size(); kI++ )
+		   if( kmer[kI] != 'A' &&
+				kmer[kI] != 'C' &&
+				kmer[kI] != 'G' &&
+				kmer[kI] != 'T' )
+				kmer[kI] = 'A';
+
+	   polyfreq[kmer] +=1;	
+	   monofreq[nucleotide] +=1;	
+	   //insert( const string& val );
+	   //it = find( const string& mer );
+       //it->second+=1;	   
+	}
+    
+	Params hmm_params = getAdaptedHoxdMatrixParameters( double(monofreq["G"]+monofreq["C"])/double(sequence.size()) );
+
+	if (percent_id > 0 )
+		adaptToPercentIdentity(hmm_params, percent_id);
+	hmm_params.iGoHomologous = pGoHomo;
+	hmm_params.iGoUnrelated = pGoUnrelated;
+
+	//
+	// part 2, convert to match records
+	//
+	vector< UngappedMatchRecord* > match_record_list(seedml.size());
+	size_t component_count = 0;
+    bool all_components_overlap = false;
+    
+    bool prev_overlaps = false;
+    uint mi_multiplicity = 0;
+    uint mi2_multiplicity = 0;
+    uint num_components = 0;
+    int overlap_size = 1;
+    int hit_match =0;
+   
+	cout << "Total number of seed matches found: " << seedml.size() << endl;
+	vector< pair< int64, UngappedMatchRecord* > > seed_sort_list;
+    for( size_t mI = 0; mI < seedml.size(); ++mI )
+	{
+		UngappedMatchRecord tmp( seedml[mI]->SeqCount(), seedml[mI]->AlignmentLength() );
+		match_record_list[mI] = tmp.Copy();
+		
+		for( size_t seqI = 0; seqI < seedml[mI]->SeqCount(); seqI++ )
+		{
+			match_record_list[mI]->SetStart( seqI, seedml[mI]->Start( seqI ) );
+			match_record_list[mI]->SetLength( seedml[mI]->Length( seqI ), seqI );
+		}
+        seed_sort_list.push_back(make_pair(match_record_list[mI]->LeftEnd(0), match_record_list[mI]));
+        component_count += seedml[mI]->SeqCount();
+        seedml[mI]->Free();
+    }
+    std::sort( seed_sort_list.begin(), seed_sort_list.end() );
+	// write seeds to file if requested
+	
+	ofstream seed_out;
+	if ( seed_file.size() > 0)
+		seed_out.open(seed_file.c_str());
+    //
+	// part 3, create a match position lookup table
+	//
+	vector< pair< gnSeqI, MatchPositionEntry > > mplt_sort_list( component_count );
+	size_t compI = 0;
+	for( size_t mI = 0; mI < seed_sort_list.size(); ++mI )
+	{
+		UngappedMatchRecord* mr = seed_sort_list[mI].second;
+		if ( seed_file.size() > 0)
+			seed_out << *mr << endl;
+		for( size_t seqI = 0; seqI < mr->Multiplicity(); ++seqI )
+			mplt_sort_list[compI++] = make_pair( mr->LeftEnd( seqI ), make_pair( mr, seqI ) );
+	}
+	// pairs get ordered on the first element by default 
+	std::sort( mplt_sort_list.begin(), mplt_sort_list.end() );
+	gnSeqI seq_length = seedml.seq_table[0]->length();
+	MatchPositionLookupTable match_pos_lookup_table( seq_length+1, make_pair( (UngappedMatchRecord*)NULL, 0 ) );
+	for( size_t i = 0; i < mplt_sort_list.size(); ++i )
+    {
+		//if ( seed_file.size() > 0)
+		//	seed_out << (*(UngappedMatchRecord*)mplt_sort_list[i].second.first) << endl;
+        //cerr << mplt_sort_list[i].first << endl;
+		match_pos_lookup_table[ mplt_sort_list[i].first ] = mplt_sort_list[i].second;
+
+    }
+
+	//
+	// part 4, create a procrastination queue
+	//
+	ProcrastinationQueue procrastination_queue( match_record_list );
+
+	//
+	// part 5, create an (empty) Novel Subset Match Record list
+	//
+	vector< GappedMatchRecord* > novel_subset_list;
+
+	size_t superset_count = 0;
+	size_t chainable_count = 0;
+	size_t subset_count = 0;
+	size_t novel_subset_count = 0;
+
+	boost::dynamic_bitset<> left_lookups(seedml.seq_table[0]->length(), false);
+	boost::dynamic_bitset<> right_lookups(seedml.seq_table[0]->length(), false);
+
+	//
+	// part 6, extend all matches!
+	//
+	vector< GappedMatchRecord* > extended_matches;	/**< The extended matches will be chains of UngappedMatchRecords */
+
+	//for extension
+	PairwiseScoringScheme pss = PairwiseScoringScheme(hoxd_matrix,-100,-20);
+	
+	int curI = 0;
+	uint curr_extensions = 0;
+	uint max_extensions = 2;
+	while(  procrastination_queue.end() > 0 )
+	{
+		int prevI = curI;
+		curI +=1;
+		if( (curI * 100) / procrastination_queue.size() != (prevI * 100) / procrastination_queue.size() )
+		{
+			cout << (curI * 100) / procrastination_queue.size() << "%..";
+			cout.flush();
+		}
+		
+		// pop the next match off the heap
+		MatchRecord* umr = procrastination_queue.pop(); 
+		// if the match has been subsumed then skip it
+		if( umr->subsuming_match != NULL )
+			continue;
+		if( umr->dont_extend == true )
+			continue;
+
+//		if( umr == (MatchRecord*)0x01335878 )
+//			cout << "umr:\n" << *(UngappedMatchRecord*)umr << endl;
+
+		GappedMatchRecord* M_i = dynamic_cast<GappedMatchRecord*>(umr);
+		if( M_i == NULL )
+		{
+			// create a new gapped match record for M_i
+			GappedMatchRecord gmr( *(UngappedMatchRecord*)umr );
+			M_i = gmr.Copy();
+			umr->subsuming_match = M_i;
+			M_i->chained_matches.push_back( umr );
+			vector< size_t > component_map( M_i->SeqCount() );
+			for( size_t i = 0; i < component_map.size(); ++i )
+				component_map[i] = i;
+			M_i->chained_component_maps.push_back(component_map);
+			swap(umr->subsumption_component_map, component_map);	// swap avoids reallocation
+			// update superset and subset links
+			for( int dI = 1; dI > -2; dI -= 2 )
+			{
+				MatchLink& ij_link = getSuperset(M_i,dI);
+				if( ij_link.superset != NULL )
+				{
+					ij_link.subset = M_i;
+					unlinkSuperset(umr,dI);
+					int parity = M_i->Orientation(0) == ij_link.superset->Orientation(ij_link.sub_to_super_map[0]) ? 1 : -1;
+					getSubsets(ij_link.superset,-dI*parity).push_back(ij_link);
+				}
+				vector< MatchLink >& subsets = getSubsets(M_i,dI);
+				for( size_t subI = 0; subI < subsets.size(); ++subI )
+				{
+					subsets[subI].superset = M_i;
+					int parity = M_i->Orientation(subsets[subI].sub_to_super_map[0]) == subsets[subI].subset->Orientation(0) ? 1 : -1;
+					getSuperset(subsets[subI].subset, -dI*parity).superset = M_i;
+				}
+				getSubsets(umr,dI).clear();	// so that validate works...
+			}
+		}
+        else
+            cerr << "castdebugme!!\n" << endl;
+     
+        M_i->extended = true;
+		extended_matches.push_back( M_i );
+		
+		// extend the match in each direction 
+		// if a superset exists use that first
+		// otherwise create a neighborhood list
+		int direction = 1;	// leftward == 1, rightward == -1, done == -3
+		//int direction = -1;	// leftward == 1, rightward == -1, done == 3
+		int last_linked = 0;	// stores the group type that was chained.  1 == superset, 2 == chainable, 0 == none
+		vector< NeighborhoodGroup > left_deferred_subsets;
+		vector< NeighborhoodGroup > right_deferred_subsets;
+		vector< NeighborhoodGroup > left_deferred_novel_subsets;
+		vector< NeighborhoodGroup > right_deferred_novel_subsets;
+
+		score_t score = 0;
+		vector< gnSequence* > seqtable( M_i->SeqCount(), seedml.seq_table[0] );
+		vector< string > alignment;
+		vector<score_t> scores;
+		bool extended = false;
+		while( direction > -2 )
+		{
+			last_linked = 0;
+			
+			// check for superset
+			if( getSuperset(M_i, direction).superset != NULL  )
+				supersetLinkExtension( M_i, direction, last_linked, left_deferred_subsets, right_deferred_subsets, chain );
+
+//          else
+//          A hack to allow our chaining to work without novel subsets would be to
+//          perform an additional neighborhood list lookup after superset link
+//          extension even if no chainables are found during link extension.
+			else
+			{
+				//
+				// perform a neighborhood list extension, 
+				// looks for neighboring matches in the match position lookup table
+				// 
+				vector< NeighborhoodGroup > superset_list;
+				vector< NeighborhoodGroup > chainable_list;
+				vector< NeighborhoodGroup > subset_list;
+				vector< NeighborhoodGroup > novel_subset_list;
+				//tjt: ok
+				neighborhoodListLookup( M_i, match_pos_lookup_table,
+								superset_list, chainable_list, subset_list, novel_subset_list,
+								direction, seed_size, w, left_lookups, right_lookups, NULL);
+
+				// tallies for debugging
+				superset_count += superset_list.size();
+				chainable_count += chainable_list.size();
+				subset_count += subset_list.size();
+				
+				// now process each type of neighborhood group
+				// supersets are already done.  happy chrismakwanzuhkkah
+				// then process chainable
+				processChainableMatches( M_i, chainable_list, direction, last_linked, find_novel_subsets, chain );
+
+				// defer subset processing
+				for( size_t gI = 0; gI < subset_list.size(); gI++ )
+				{
+					vector< NeighborhoodGroup >& cur_subset_list = selectList( left_deferred_subsets, right_deferred_subsets, direction );
+					cur_subset_list.push_back( subset_list[gI] );
+				}
+				// defer novel subset processing
+				vector< NeighborhoodGroup >& cur_novel_subset_list = selectList( left_deferred_novel_subsets, right_deferred_novel_subsets, direction );
+				cur_novel_subset_list.clear();	// we only process novel subsets on the very last extension
+				for( size_t gI = 0; gI < novel_subset_list.size(); gI++ )
+					cur_novel_subset_list.push_back( novel_subset_list[gI] );
+
+			} // end if no superset was found then do neighborhood list lookup
+			//if find_novel_subsets not enabled, we can avoid this hack? is this true?
+			if (!find_novel_subsets)
+			{
+				vector< NeighborhoodGroup > superset_list;
+				vector< NeighborhoodGroup > chainable_list;
+				vector< NeighborhoodGroup > subset_list;
+				vector< NeighborhoodGroup > novel_subset_list;
+				neighborhoodListLookup( M_i, match_pos_lookup_table,
+									superset_list, chainable_list, subset_list, novel_subset_list,
+									direction, seed_size, w, left_lookups, right_lookups, NULL);
+
+				// defer subset processing
+				for( size_t gI = 0; gI < subset_list.size(); gI++ )
+				{
+					vector< NeighborhoodGroup >& cur_subset_list = selectList( left_deferred_subsets, right_deferred_subsets, direction );
+					cur_subset_list.push_back( subset_list[gI] );
+				}
+			}
+			// if we didn't do a chaining or superset extension, try a gapped extension
+			if( last_linked == 0 )
+			{
+                double e = 2.71828182845904523536;
+				int rcode =FAILED;
+                bool extend_it = false;
+                 //extend_length = 0;
+				vector<GappedMatchRecord*> novel_matches;	// M_e will contain the extension
+				// only extend if two matches are chained if two-hits == true
+                // its fast enough now that printing to screen actually slows things down...
+				if( extend_chains && (!two_hits || (two_hits && M_i->chained_matches.size() > 1 )))
+					rcode = ExtendMatch(M_i, seqtable, hmm_params, w, direction, novel_matches, gap_open, gap_extend, extension_window);
+                
+				if (rcode == FAILED || rcode == FIXME || novel_matches.size() == 0)
+				{
+					//end gapped extension  whenever extension fails.
+					direction -=2;
+					//direction +=2;
+					continue;
+				}
+                else
+                {
+                    for (size_t mI = 0; mI < novel_matches.size(); mI++ )
+                    {
+                        //if (novel_matches.at(mI)->Multiplicity() != M_i->Multiplicity() )
+                        //    continue;
+                        GappedMatchRecord* M_e = novel_matches.at(mI);
+                        M_e->extended = false;
+                        
+                        if (M_e->Multiplicity() > M_i->Multiplicity())//what does this mean??
+                            continue;
+                        else if (M_e->Multiplicity() == M_i->Multiplicity())
+                        {
+                            //immediately chainable!
+                            if (direction > 0 && mI == novel_matches.size()-1)
+                            {
+                                
+				                extend_it = true;
+                                continue;
+                            }
+                            else if (direction < 0 && mI == 0)
+                            {
+				                extend_it = true;
+                                continue;
+                            }
+                        }
+                        vector< pair< gnSeqI, MatchPositionEntry > > mplt_sort_list( M_e->Multiplicity() );
+                        vector< pair< gnSeqI, MatchPositionEntry > > final_mplt_sort_list;
+	                    size_t compI = 0;
+    	                
+	                    for( size_t seqI = 0; seqI < M_e->Multiplicity(); ++seqI )
+		                    mplt_sort_list[compI++] = make_pair( M_e->LeftEnd( seqI ), make_pair( M_e, seqI ) );
+    	                
+	                    // pairs get ordered on the first element by default 
+	                    std::sort( mplt_sort_list.begin(), mplt_sort_list.end() );
+
+                        //don't use novel match if it clobbers the existing left end in the MPLT
+                        if (use_novel_matches )
+                        {
+                     
+                            bool clobbers_existing_match = false;
+                            for( size_t i = 0; i < mplt_sort_list.size(); ++i)
+                            {
+                                if (match_pos_lookup_table[ mplt_sort_list[i].first ].first != NULL )
+                                {
+                                    clobbers_existing_match = true;
+                                    break;
+                                }
+                            }
+                            if (! clobbers_existing_match )
+                            {
+                                for( size_t i = 0; i < mplt_sort_list.size(); ++i)
+                                    match_pos_lookup_table[ mplt_sort_list[i].first ] =  mplt_sort_list[i].second;
+                            }
+                            
+                        }
+                        //now, during the subsequent call to neighborhoodListLookup(), we should
+                        //find the novel homologous region and process it accordingly...
+                    }
+                }
+                //update links appropriately, and we can take another round
+				//through the evil megaloop, possibly discovering additional chainable
+				//seeds or superset links.
+				
+				// need to update links by looking for matches in the region that was just extended over
+				vector< NeighborhoodGroup > superset_list;
+				vector< NeighborhoodGroup > chainable_list;
+				vector< NeighborhoodGroup > subset_list;
+				vector< NeighborhoodGroup > novel_subset_list;
+
+                //if extend_it is true, it means that we can immediately extend
+                //M_i with the corresponding result from ExtendMatch()
+                if (extend_it)
+                {
+					M_i->extended = true;
+                    //build a component map for the new record
+				    vector< size_t > component_map( M_i->Multiplicity() );
+				    for( size_t i = 0; i < component_map.size(); ++i )
+					    component_map[i] = i;
+
+                    GappedMatchRecord* M_t = NULL;
+                    //leftward extension
+                    if (direction > 0 )
+                        M_t = novel_matches.back();
+                    else
+                        M_t = novel_matches.front();
+                    
+                    neighborhoodListLookup( M_i, match_pos_lookup_table,
+					            superset_list, chainable_list, subset_list, novel_subset_list,
+					            direction, seed_size, w, left_lookups, right_lookups, M_t);
+                    
+                    M_t->subsuming_match = M_i;
+		            M_t->subsumption_component_map = component_map;
+	                M_i->chained_matches.push_back( M_t );
+	                M_i->chained_component_maps.push_back( component_map );
+	                bool changed = extendRange(M_i, M_t, component_map);
+
+                    
+                }
+                else
+                {
+                    
+					if (!M_i->extended)
+						M_i->extended = false;
+                    GappedMatchRecord* M_t = NULL;
+                    if (direction > 0 )
+                        M_t = novel_matches.front();
+                    else
+                        M_t = novel_matches.back();
+
+			        //we can't extend M_i, but we can classify all of the novel
+                    //homologous regions with respect to M_i
+			        neighborhoodListLookup( M_i, match_pos_lookup_table,
+							    superset_list, chainable_list, subset_list, novel_subset_list,
+							    direction, seed_size, w, left_lookups, right_lookups,M_t);
+                    
+                }
+                extended = true;
+				// now process each type of neighborhood group
+				// if we have completely extended through a superset
+				//   then we want to replace that part of the alignment with the superset
+				// if the superset continues beyond the end of at least one component, then 
+				// we want to create a superset link for it, and process it during a link extension
+				if ( superset_list.size() > 0 && chain )
+					processSupersetMatches( M_i, superset_list, direction, last_linked, true );
+			
+				// then process chainable
+				if ( chainable_list.size() > 0 )
+					processChainableMatches( M_i, chainable_list, direction, last_linked, find_novel_subsets, chain );
+
+				// defer subset processing
+				for( size_t gI = 0; gI < subset_list.size(); gI++ )
+				{
+					vector< NeighborhoodGroup >& cur_subset_list = selectList( left_deferred_subsets, right_deferred_subsets, direction );
+					cur_subset_list.push_back( subset_list[gI] );
+				}
+				// defer novel subset processing
+				vector< NeighborhoodGroup >& cur_novel_subset_list = selectList( left_deferred_novel_subsets, right_deferred_novel_subsets, direction );
+				cur_novel_subset_list.clear();	// only process novel subsets from the very last extension
+				for( size_t gI = 0; gI < novel_subset_list.size(); gI++ )
+					cur_novel_subset_list.push_back( novel_subset_list[gI] );
+
+                //just as before, if we didn't extend M_i, change directions and continue on
+                if (!extend_it  )
+                {
+                    direction -=2;
+                    continue;
+                }
+
+                //otherwise, enable another round of gapped extension in this direction.
+            }
+		}	// end loop over leftward and rightward extension
+
+		//
+		// finalize the alignment -- this resolves overlapping components into a single gapped alignment
+		//
+		//tjt: need to send finalize seq_table for muscle alignment
+		if( M_i == (GappedMatchRecord*)0x00d37364 )
+			cerr << "debugmult\n";
+        
+		// finally process novel subset
+		for( int direction = 1; direction >-2; direction -= 2 )
+		{
+			vector< NeighborhoodGroup >& cur_novel_subset_list = selectList( left_deferred_novel_subsets, right_deferred_novel_subsets, direction );
+			processNovelSubsetMatches(M_i, cur_novel_subset_list, find_novel_subsets, procrastination_queue, 
+				seedml.seq_table, direction, w, last_linked, novel_subset_count );
+		}
+
+		//tjt: make sure finalize only gets called once!
+		M_i->finalize(seedml.seq_table);
+	    
+         if( M_i->SeqCount() == 0)//what the hell?
+            continue;
+		//
+		// process deferred subsets
+		//
+		for( int direction = 1; direction >-2; direction -= 2 )
+		{
+			vector< NeighborhoodGroup >& subset_list = selectList( left_deferred_subsets, right_deferred_subsets, direction );
+			NeighborhoodGroupCompare ngc;
+			NeighborhoodGroupComponentCompare ngcc;
+			std::sort( subset_list.begin(), subset_list.end(), ngc );
+			bool prev_linked = false;
+			for( size_t sI = 0; sI < subset_list.size(); ++sI )
+			{
+				bool same_components = false;
+				if( sI > 0 )
+					same_components = ngcc.compare(subset_list[sI], subset_list[sI-1]) == 0;
+				prev_linked = same_components? prev_linked : false;
+
+				// check whether each of these ended up getting subsumed
+				bool subsumed;
+				bool partial;
+				classifySubset( M_i, subset_list[sI], subsumed, partial );
+				MatchRecord* M_j = subset_list[sI].get<0>();
+
+				if( M_j->subsuming_match != NULL )
+				{
+					// sometimes duplicate MatchRecord pointers can exist in the subset list when a subset gets found
+					// during a neighborhood list lookup but was already linked to a neighboring superset
+					// in that case, we just skip the second entry...
+					if(M_j->subsuming_match != M_i )
+						cerr << "Error processing M_i " << M_i << ": match " << M_j << " was already subsumed\n";
+					continue;
+				}
+
+				if( subsumed )
+				{
+					M_j->subsuming_match = M_i;
+					M_j->subsumption_component_map = subset_list[sI].get<1>();
+					unlinkSupersets(M_j);
+					continue;
+				}
+				if( partial )
+				{
+					// create a novel subset record, mark this one as subsumed
+					// just destroy it for now...
+					M_j->dont_extend = true;
+					
+					unlinkSupersets(M_j);
+					for( size_t mjI = 0; mjI < M_j->Multiplicity(); ++mjI )
+                    {
+						if( match_pos_lookup_table[M_j->LeftEnd(mjI)].first == M_j )
+							match_pos_lookup_table[M_j->LeftEnd(mjI)] = make_pair((MatchRecord*)NULL,0);
+					}
+					
+                    continue;
+				}
+
+				if( prev_linked )
+				{
+					// the previous subset has the same components as this one and was linked.
+					// we may consider this one an 'extra' if all components are further away
+					NeighborhoodGroup cur_group = subset_list[sI];
+					subset_list.erase(subset_list.begin() + sI, subset_list.begin() + sI + 1);
+					sI--;
+					size_t dI = 0;
+                    if (subset_list[sI].get<2>().size() < cur_group.get<2>().size())
+                    {
+                        //debugme: why would this happen?
+                        //cerr << "subset_list[" << sI << "].get<2>().size() < cur_group.get<2>().size()" << endl;
+                        //cerr << subset_list[sI].get<2>().size() << " < " <<  cur_group.get<2>().size() << endl;
+                        //genome::breakHere();
+                        continue;
+                    }
+					for( ; dI < cur_group.get<2>().size(); ++dI )
+                    {
+                        // if cur_group.get<2)()[dI] <= subset_list[sI].get<2>()[dI],
+                        // component dI is closer than a component from the current subset
+						if( cur_group.get<2>()[dI] <= subset_list[sI].get<2>()[dI] )
+							break;
+                    }
+                    // all components were the same, yet further away, so consider this an 'extra' subset
+					if( dI == cur_group.get<2>().size() )
+					{
+						// include this in a list of extra subsets
+						boost::dynamic_bitset<> tmp_bs(M_i->Multiplicity());
+						getExtraSubsets( M_i, direction ).push_back( MatchLink( (MatchRecord*)M_i, M_j, tmp_bs, cur_group.get<1>() ) );
+						continue;
+					}
+					// else we've got a subset tie.
+					if(print_warnings)
+						cerr << "Subset tie, erasing M_j\n";
+
+					//tjt: why do we need to erase the subset? later this will mean that we can't chain the two tied subsets..
+					M_j->dont_extend = true;
+					unlinkSupersets(M_j);
+					
+					for( size_t mjI = 0; mjI < M_j->Multiplicity(); ++mjI )
+                    {
+						if( match_pos_lookup_table[M_j->LeftEnd(mjI)].first == M_j )
+							match_pos_lookup_table[M_j->LeftEnd(mjI)] = make_pair((MatchRecord*)NULL,0);
+                    }
+					
+                    continue;
+				}
+
+				int parity = M_i->Orientation( subset_list[sI].get<1>()[0] ) == M_j->Orientation(0) ? 1 : -1;
+				// if we have the following case:
+				// --M_i-->   --M_j--   <--M_i-- ... ... --M_i-->   --M_j--   <--M_i--
+				// then M_j may already be linked to M_i but on the other side
+				if( getSuperset(M_j,direction*parity).superset == M_i )
+					continue;
+				unlinkSuperset( M_j, -direction*parity );
+				// it's outside, just link it in
+				// rebuild the superset component list
+				boost::dynamic_bitset<> comp_list(M_i->Multiplicity(), false);
+
+				for( size_t compI = 0; compI < subset_list[sI].get<1>().size(); ++compI )
+                {
+                    //debugme: why do I need to check this first?
+                    if (  subset_list[sI].get<1>()[compI] != (std::numeric_limits<size_t>::max)())
+                        comp_list.set(subset_list[sI].get<1>()[compI]);
+                }
+                getSuperset(M_j,-direction*parity) = MatchLink( M_i, M_j, comp_list, subset_list[sI].get<1>() );
+				getSubsets(M_i,direction).push_back( getSuperset(M_j,-direction*parity));
+                //getExtraSubsets(M_i,direction).push_back( getSuperset(M_j,-direction*parity));
+				prev_linked = true;
+			}
+			subset_list.clear();
+		}
+	}
+	cout << "\n# of calls to MUSCLE: " << ccount << endl;
+	cout << "------------------------------"  << endl;
+	cout << "superset count: " << superset_count << endl;
+	cout << "chainable count: " << chainable_count << endl;
+	cout << "subset count: " << subset_count << endl;
+	cout << "novel subset count: " << novel_subset_count << endl;
+	cout << "------------------------------"  << endl;
+	// 
+	// part 9, create a final list of local multiple alignments (already done in extended_matches)
+	//
+    vector< GappedMatchRecord* > &final = extended_matches;
+
+	// part 10, score matches
+	
+	//create output stream
+	ostream* output;
+	ostream* output2;
+ 	ofstream score_out_file;
+	ofstream aln_out_file;
+	ofstream stats_out_file;
+	if(stat_file != "" && stat_file != "-")
+		stats_out_file.open( stat_file.c_str() );
+
+	if(outputfile == "" || outputfile == "-")
+		output = &cout;
+	else
+	{
+		aln_out_file.open( outputfile.c_str() );
+		output = &aln_out_file;
+	}
+	if(output2file == "" || output2file == "-")
+		output2 = &cout;
+	else
+	{
+		score_out_file.open( output2file.c_str() );
+		output2 = &score_out_file;
+	}
+	vector< GappedMatchRecord* > scored;
+	vector<score_t> scores_final;
+	score_t score_final = 0;
+    double e = 2.71828182845904523536;
+    vector< GappedMatchRecord* >  filtered_final;
+    int finalsize = final.size();
+    uint alignment_count = 0;
+    
+    cout << "->Computing Sum-of-Pairs score of all lmas..." << endl;
+	for( size_t fI = 0; fI < finalsize; fI++ )
+	{
+	    vector<string> alignment;
+		vector< gnSequence* > seq_table( final[fI]->SeqCount(), seedml.seq_table[0] );
+		mems::GetAlignment(*final[fI], seq_table, alignment);	// expects one seq_table entry per matching component
+		//send temporary output format to file if requested
+        if (alignment.at(0).size() >= min_repeat_length)
+        {
+			if(only_extended)
+			{
+				//we don't want it..
+				if ( alignment.at(0).size() <= seed_size )
+					continue;
+			}
+            score_final = 0;
+            computeSPScore( alignment, pss, scores_final, score_final);
+		    //*output << "#procrastAlignment " << ++alignment_count << endl << *final.at(fI) << endl;
+            final[fI]->spscore = score_final;
+            scored.push_back(final[fI]);
+
+        }
+        else
+            continue;
+
+	}
+    
+	if (!allow_redundant)
+    {
+		cout << "->Removing redudant lmas..." << endl;
+	}
+	//
+	// remove overlapping regions
+	//
+    // 1) create a vector of CompactMatchRecord* with one entry for each nucleotide in the input sequence.
+    //tjt: CompactMatchRecord is an attempt to reduce the space requirements for the method currently used to 
+    //remove overlapping regions
+	vector< CompactMatchRecord* > match_record_nt(sequence.size());
+    for( size_t mI = 0; mI < match_record_nt.size(); ++mI )
+	{
+		CompactUngappedMatchRecord tmp( 1, 1 );
+		match_record_nt[mI] = tmp.Copy();
+		match_record_nt[mI]->SetStart( 0, mI );
+		match_record_nt[mI]->SetLength( 1, 0 );
+        match_record_nt[mI]->subsuming_match = NULL;
+    }
+
+    // 2) sort the result GappedMatchRecords 
+	if (large_repeats)
+		std::sort( scored.begin(), scored.end(), score_by_length );
+	else if (small_repeats)
+		std::sort( scored.begin(), scored.end(), scorecmp );
+	else
+		std::sort( scored.begin(), scored.end(), score_by_sp );
+    for( size_t fI = 0; fI < scored.size(); fI++ )
+    {
+        //this shouldn't be the case, but let's be safe
+	    if (scored.at(fI)->AlignmentLength() < 1)
+            continue;
+
+        //if user wants to remove all overlapping regions among lmas, let's do it!
+        if (!allow_redundant)
+        {
+			
+            //for each match compontent in M_i
+            for ( size_t seqI = 0; seqI < scored.at(fI)->Multiplicity(); seqI++)
+            {
+                //if there is no match, we can't do a thing
+                if( scored.at(fI)->LeftEnd(seqI) == NO_MATCH )
+                    continue;
+
+                //if left/right ends are good, set subsuming_match pointers
+                if (scored.at(fI)->LeftEnd(seqI) < 4000000000u && scored.at(fI)->RightEnd(seqI) < 4000000000u)
+                {
+                    gnSeqI endI = scored.at(fI)->RightEnd(seqI);
+                    gnSeqI startI = scored.at(fI)->LeftEnd(seqI);
+                    for( ; startI < scored.at(fI)->RightEnd(seqI); startI++)
+                    {
+                        //3) Mark each entry in the MatchRecord* vector which corresponds to nucleotides contained within the current GMR.  
+                        //A pointer to the current GMR can be >stored in each entry
+                        if ( match_record_nt.at(startI)->subsuming_match == NULL)
+                            match_record_nt.at(startI)->subsuming_match = scored.at(fI);
+                    }
+                }
+        
+                size_t left_crop_amt = 0;
+                size_t right_crop_amt = 0;
+                gnSeqI startI = scored.at(fI)->LeftEnd(seqI);
+                //4) When a non-null entry is encountered in the vector, crop out that portion of the current GMR
+                while(match_record_nt.at(startI)->subsuming_match != NULL && match_record_nt.at(startI)->subsuming_match != scored.at(fI) && startI < scored.at(fI)->RightEnd(seqI) && scored.at(fI)->Length(seqI) < 4000000000u) 
+                {
+                    startI++;
+                    left_crop_amt++;
+                }
+                if (left_crop_amt > 0)
+                {
+                    if (left_crop_amt >= scored.at(fI)->Length(seqI))
+                        scored.at(fI)->CropLeft( scored.at(fI)->Length(seqI)-1, seqI);
+                    else
+                        scored.at(fI)->CropLeft( left_crop_amt, seqI);
+                }
+                if (scored.at(fI)->LeftEnd(seqI) < 4000000000u && scored.at(fI)->RightEnd(seqI) < 4000000000u && scored.at(fI)->Length(seqI) < 4000000000u)
+                {
+                    startI = scored.at(fI)->RightEnd(seqI)-1;
+                    //4) When a non-null entry is encountered in the vector, crop out that portion of the current GMR
+                    while(match_record_nt.at(startI)->subsuming_match != NULL && match_record_nt.at(startI)->subsuming_match != scored.at(fI) && startI >= scored.at(fI)->LeftEnd(seqI))
+                    {
+                        startI--;
+                        right_crop_amt++;
+                    }
+                }
+                if (right_crop_amt > 0)
+                {
+                    
+                    if (right_crop_amt >= scored.at(fI)->Length(seqI))
+                        scored.at(fI)->CropRight( scored.at(fI)->Length(seqI)-1, seqI);
+                    else
+                        scored.at(fI)->CropRight( right_crop_amt, seqI);
+                }
+            }
+        }
+		//if ( left_crop_amt == 0 && right_crop_amt == 0)
+		//	filtered_final.push_back(scored.at(fI));
+		
+        if (scored.at(fI)->AlignmentLength() >= min_repeat_length )
+        {
+			if(only_extended)
+			{
+				//we don't want it..
+				if ( scored.at(fI)->AlignmentLength() <= seed_size )
+					continue;
+			}
+            // yuck,recalculating sp score to update after removing overlapping regions.. 
+            // couldn't I just subtract from the original score??
+            vector<string> alignment;
+	        vector< gnSequence* > seq_table( scored[fI]->SeqCount(), seedml.seq_table[0] );
+	        mems::GetAlignment(*scored[fI], seq_table, alignment);	// expects one seq_table entry per matching component
+            // 5) put all LMAs above min_repeat_length and min_spscore into final list of scored LMAs
+            score_final = 0;
+            computeSPScore( alignment, pss, scores_final, score_final);
+            scored.at(fI)->spscore  = score_final;
+            // pass it through a tandem repeat filter, too
+            if ((scored.at(fI)->spscore > min_spscore && ( scored.at(fI)->tandem <= allow_tandem)))
+                filtered_final.push_back(scored.at(fI));
+        }
+        
+
+    }
+    cout << "->Writing xmfa & xml output..." << endl;
+    std::sort( filtered_final.begin(), filtered_final.end(), scorecmp );
+    // write the output to xmfa
+    writeXmfa( seedml, filtered_final, xmfa_file );
+
+    // write the output to xml
+    writeXML( seedml, filtered_final, xml_file );
+    
+	// 
+	// part 11, report matches in scored order, by multiplicity then by spscore
+	//
+	output->setf(ios::fixed);
+	output->precision(0);
+
+    
+	for( size_t sI = 0; sI < filtered_final.size(); ++sI )
+	{
+	    *output << "#procrastAlignment " << sI+1 << endl << *filtered_final[sI] << endl;
+	    *output << "Alignment length: " << filtered_final[sI]->AlignmentLength() << endl;
+        *output << "Score: " << filtered_final[sI]->spscore << endl;
+	}
+	
+	///report highest scoring lma for each multiplicity
+    cout << "->Calculating highest scoring lma for each multiplicity..." << endl;
+	stats_out_file.setf(ios::fixed);
+	stats_out_file.precision(0);
+	int prev_multiplicity = 0;
+    uint record_count = 0;
+    for( size_t tI = 0; tI < filtered_final.size(); ++tI )
+    {   if (filtered_final[tI]->Multiplicity() != prev_multiplicity)
+        {    
+            stats_out_file << "#" << record_count+1 << ": r= " << filtered_final[tI]->Multiplicity() << " l= " << filtered_final[tI]->AlignmentLength() << " s= " << filtered_final[tI]->spscore << endl;
+            prev_multiplicity = filtered_final[tI]->Multiplicity();
+            record_count++;
+        }
+        else
+            continue;
+    }
+	// clean up
+    cout << "->Cleaning up..." << endl;
+	for( size_t eI = 0; eI < match_record_list.size(); ++eI )
+		match_record_list[eI]->Free();
+	for( size_t eI = 0; eI < novel_subset_list.size(); ++eI )
+		if( novel_subset_list[eI]->subsuming_match != NULL  )
+			novel_subset_list[eI]->Free();
+	for( size_t eI = 0; eI < extended_matches.size(); ++eI )
+		if( extended_matches[eI]->subsuming_match == NULL )
+			extended_matches[eI]->Free();
+
+	for( size_t seqI = 0; seqI < seedml.seq_table.size(); ++seqI )
+		delete seedml.seq_table[seqI];
+	for( size_t seqI = 0; seqI < seedml.sml_table.size(); ++seqI )
+		delete seedml.sml_table[seqI];
+    
+    cout << "->Done!" << endl;
+	return 0;
+}
+
diff --git a/src/rootTrees.cpp b/src/rootTrees.cpp
new file mode 100644
index 0000000..80eef25
--- /dev/null
+++ b/src/rootTrees.cpp
@@ -0,0 +1,128 @@
+#include "libMems/PhyloTree.h"
+#include "libMems/TreeUtilities.h"
+#include <vector>
+#include <sstream>
+#include <algorithm>
+#include <utility>
+#include <fstream>
+
+using namespace std;
+
+typedef unsigned int uint;
+
+/**
+ * Depth first search to check whether a subtree contains a given node
+ */
+bool containsNode( PhyloTree< TreeNode >& t, node_id_t subtree_nodeI, node_id_t query_nodeI )
+{
+	stack< node_id_t > node_stack;
+	node_stack.push( subtree_nodeI );
+	while( node_stack.size() > 0 )
+	{
+		node_id_t cur_node = node_stack.top();
+		node_stack.pop();
+		if( cur_node == query_nodeI )
+			return true;
+		if( t[cur_node].children.size() > 0 )
+		{
+			for( size_t childI = 0; childI < t[cur_node].children.size(); childI++ )
+				node_stack.push( t[cur_node].children[childI] );
+		}
+	}
+	return false;
+}
+
+/** place a root on the branch with endpoints root_left and root_right
+ */
+void rerootTree( PhyloTree< TreeNode >& t, node_id_t new_root )
+{
+	// new root must be an internal node
+	if( t[new_root].children.size() == 0 )
+		throw "Can't root on a leaf node";
+	if( new_root == t.root )
+		return;	// idiot caller didn't realize it's already rooted here
+
+	// change the old root node to an internal node
+	uint childI = 0;
+	for( ; childI < t[t.root].children.size(); childI++ ){
+		if( containsNode( t, t[t.root].children[childI], new_root ) )
+		{
+			t[t.root].parents.push_back( t[t.root].children[childI] );
+			std::vector<node_id_t>::iterator last = std::remove( t[t.root].children.begin(), t[t.root].children.end(), t[t.root].children[childI] );
+			t[t.root].children.erase(last,t[t.root].children.end());
+			break;
+		}
+	}
+	// shake the tree out on the new root node
+	t.root = new_root;
+	t[t.root].children.insert( t[t.root].children.end(), t[t.root].parents.begin(), t[t.root].parents.end() );
+
+	stack<node_id_t> node_stack;
+	node_stack.push(t.root);
+	while( node_stack.size() > 0 )
+	{
+		// delete the current node from all of its child nodes lists 
+		// and insert it as a parent
+		// make all other nodes reference by the child grandchildren
+		// recurse on each child
+		node_id_t cur_node = node_stack.top();
+		node_stack.pop();
+		for( uint childI = 0; childI < t[cur_node].children.size(); childI++ )
+		{
+			TreeNode& child_n = t[t[cur_node].children[childI]]; 
+			std::vector<node_id_t>::iterator last = std::remove( child_n.children.begin(), child_n.children.end(), cur_node );
+			child_n.children.erase(last,child_n.children.end());
+			last = std::remove( child_n.parents.begin(), child_n.parents.end(), cur_node );
+			child_n.parents.erase(last,child_n.parents.end());
+			child_n.children.insert( child_n.children.end(), child_n.parents.begin(), child_n.parents.end() );
+			child_n.parents.clear();
+			child_n.parents.push_back(cur_node);
+			node_stack.push(t[cur_node].children[childI]);
+		}
+	}
+}
+
+
+int main( int argc, char* argv[] )
+{
+	if( argc < 3 )
+	{
+		cerr << "Usage: rootTrees <nexus input file> <nexus output file>\n";
+	}
+	string input_filename = argv[1];
+	string output_filename = argv[2];
+	ifstream input_file( input_filename.c_str() );
+	if( !input_file.is_open() )
+	{
+		cerr << "Error opening \"" << input_filename << "\"\n";
+		return -1;
+	}
+	ofstream output_file( output_filename.c_str() );
+	if( !output_file.is_open() )
+	{
+		cerr << "Error opening \"" << output_filename << "\"\n";
+		return -1;
+	}
+	
+	uint tree_count = 0;
+	vector< string > tree_list;
+	while( true )
+	{
+		PhyloTree< TreeNode > t;
+		t.readTree( input_file );
+		if( t.size() == 0 )
+			break;
+		vector< PhyloTree< TreeNode > > rooted_trees;
+//		rootAtEachNode( t, rooted_trees );
+		for( size_t treeI = 0; treeI < rooted_trees.size(); treeI++ )
+		{
+			rooted_trees[treeI].writeTree( output_file );
+		}
+		tree_count++;
+		if( tree_count % 100 == 0 )
+			cout << "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b";
+			cout << "Processed " << tree_count << " trees";
+	}
+	cerr << "Wrote rooted trees to \"" << output_filename << "\"\n";
+	return 0;
+}
\ No newline at end of file
diff --git a/src/scoreALU.cpp b/src/scoreALU.cpp
new file mode 100644
index 0000000..121acd6
--- /dev/null
+++ b/src/scoreALU.cpp
@@ -0,0 +1,729 @@
+/*******************************************************************************
+ * $Id: scoreAlignment.cpp,v 1.14 2004/02/28 00:01:31 darling Exp $
+ * This file is copyright 2002-2004 Aaron Darling.  All rights reserved.
+ * Please see the file called COPYING for licensing, copying, and modification
+ * rights.  Redistribution of this file, in whole or in part is prohibited
+ * without express permission.
+ ******************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/MatchList.h"
+#include <string>
+#include <fstream>
+#include <sstream>
+#include <map>
+#include "libMems/IntervalList.h"
+#include "libGenome/gnFilter.h"
+#include <boost/program_options/cmdline.hpp>
+#include <boost/program_options.hpp>
+namespace po = boost::program_options;
+
+using namespace std;
+using namespace genome;
+using namespace mems;
+
+class AluRecord
+{
+
+public:
+
+	AluRecord();
+	//Smith-Waterman score of the match
+	int score;
+	//% substitutions in matching region compared to the consensus
+	float divergent;
+	//% deleted bp
+	float deleted;
+	//%	inserted bp
+	float inserted;
+	//name of query sequence
+	string queryId;
+	//starting position of match in query sequence
+	gnSeqI start;
+	//ending position of match in query sequence
+	gnSeqI end;
+	//no. of bases in query sequence past the ending position of match
+	gnSeqI remaining;
+	//match is with the Complement of the consensus sequence in the database
+	string strand;
+	//name of the matching interspersed repeat
+	string repeatId;
+	//class of the matching repeat
+	string repeatClass;
+	//no. of bases in (complement of) the repeat consensus sequence 
+    //prior to beginning of the match (so 0 means that the match extended 
+    //all the way to the end of the repeat consensus sequence)
+	gnSeqI prior;
+	//starting position of match in database sequence
+	gnSeqI startDB;
+	//ending position of match in database sequence
+	gnSeqI endDB;
+	gnSeqI length(void);
+};
+
+gnSeqI AluRecord::length(void)
+{
+	gnSeqI len = 0;
+	len = absolut((int64)end)-absolut((int64)start);
+	return len;
+}
+AluRecord::AluRecord()
+{
+	score = 0;
+	divergent = 0.0;
+	deleted = 0.0;
+	inserted = 0.0;
+	queryId = "none";
+	start = 0;
+	end = 0;
+	remaining = 0;
+	strand = "+";
+	repeatId = "none";
+	repeatClass = "none";
+	prior = 0;
+	startDB = 0;
+	endDB = 0;
+}
+void ReadAluFile( istream& in_stream, vector<AluRecord*>& alu_list, gnSeqI& lr ) 
+{
+	uint seq_count = 0;
+	gnSeqI max_len = 0;
+	string cur_line;
+	//3 lines of header info
+	getline( in_stream, cur_line );
+	getline( in_stream, cur_line);
+	getline( in_stream, cur_line);
+	uint seqI = 0;
+	vector< gnSeqI > lengths;
+	//vector< AluRecord* > alu_list;
+	
+	string empty_line;
+	vector< string > aln_mat;
+	uint line_count = 1;
+
+	
+	while( getline( in_stream, cur_line) )
+	{
+		
+		AluRecord* alu = new AluRecord();
+		// read and parse first AluRecord line
+		//stringstream line_str( cur_line );
+		
+		//first line of data
+		
+		// take off leading whitespace
+		string::size_type loc = cur_line.find("(");
+		if (loc != string::npos )
+			cur_line.replace(loc,1," ");
+		
+		loc = cur_line.find(")");
+		if (loc != string::npos )
+			cur_line.replace(loc,1," ");
+		stringstream parse_str( cur_line );
+		
+		parse_str >> alu->score;	
+		parse_str >> alu->divergent;
+		parse_str >> alu->deleted;
+		parse_str >> alu->inserted;
+		parse_str >> alu->queryId;
+		parse_str >> alu->start;
+		parse_str >> alu->end;
+		parse_str >> alu->remaining;
+		parse_str >> alu->strand;
+		parse_str >> alu->repeatId;
+		parse_str >> alu->repeatClass;
+		//punt: rest of info not needed
+		//parse_str >> alu->prior;
+		//parse_str >> alu->startDB;
+		//parse_str >> alu->endDB;
+		
+		//end of line
+		alu_list.push_back(alu);
+		lr+= alu->length();
+
+	}
+	cout << "number of ALU records in file: " << alu_list.size() << endl;
+}
+
+/**
+ * program to score alignments
+ * reads in a "correct" alignment and a calculated alignment
+ * scores the calculated alignment based on the correct one
+ */
+int main( int argc, char* argv[] ){
+	
+	string alignment_fname;
+	string alu_fname;
+	
+	
+	if( argc < 2 ){
+		cout << "scoreALU <procrastAligner alignment> <repeatmasker ALUs>\n";
+		return -1;
+	}
+	// Declare the supported options.
+	
+	po::variables_map vm;
+	try {
+
+        po::options_description desc("Allowed options");
+        desc.add_options()
+            ("help", "get help message")
+            ("alignment", po::value<string>(&alignment_fname), "procrastAligner alignment")
+			("alus", po::value<string>(&alu_fname), "repeatmasker ALUs")
+        ;
+
+                
+        po::store(po::parse_command_line(argc, argv, desc), vm);
+        po::notify(vm);    
+
+        if (vm.count("help")) {
+            cout << desc << "\n";
+            return 1;
+        }
+
+        
+    }
+    catch(exception& e) {
+        cerr << "error: " << e.what() << "\n";
+        return 1;
+    }
+    catch(...) {
+        cerr << "Exception of unknown type!\n";
+    }
+	
+	
+
+	ifstream align_in;
+	align_in.open( alignment_fname.c_str() );
+	if( !align_in.is_open() ){
+		cerr << "Error opening " << alignment_fname << endl;
+		return -1;
+	}
+	ifstream alu_in;
+	alu_in.open( alu_fname.c_str() );
+	if( !alu_in.is_open() ){
+		cerr << "Error opening " << alu_fname << endl;
+		return -1;
+	}
+try{
+	cout << "Calclutating specificity and sensitivity of procrastAligner on dataset..." << endl;
+	IntervalList alignment;
+	vector<AluRecord*> alus;
+	
+	//total length of all aligned repeats found by procrastAligner
+	gnSeqI lt = 0;
+	//total length of all alignments found by procrastAligner
+	gnSeqI ld = 0;
+	//total length of repeats found by repeatmasker
+	gnSeqI lr=0;
+	gnSeqI ln=0;
+	gnSeqI lp=0;
+	//total length of all regions found only in procrastAligner
+	gnSeqI lo=0;
+	//total length of repeats masked by both programs
+	gnSeqI lc = 0;
+	ReadAluFile( alu_in, alus, lr );
+	alu_in.close();
+	string cur_line;
+	uint seqI = 0;
+    //this will suffice for now, but should plan on using
+	//IntervalList::ReadStandardAlignment or equivalent
+	//to read in XMFA formatted output from procrastAligner
+	pair<int64,int64> pos;
+	vector< vector< pair<int64,int64> > > align_list;
+	vector< pair<int64,int64> > pos_list;
+	map<int64,bool> alncoverage;
+    map<int64,bool> coverage;
+	//list of maps, one for each alignment
+	vector< map<int64,bool> > totcoverage;
+	int64 ccount = 0;
+	while( getline( align_in, cur_line) )
+	{
+		vector< int64 > start_list;
+		getline( align_in, cur_line);
+		stringstream parse_str( cur_line );
+		int64 start = 0;
+		int64 end = 0;
+		int64 length = 0;
+		string aln_len_str;
+		parse_str >> aln_len_str;
+		while( parse_str >> start )
+		{
+			start_list.push_back(start);
+		}
+		getline( align_in, cur_line);
+		stringstream parse_string(cur_line);
+		//parse_str.( cur_line );
+		string lens;
+		parse_string >> lens;
+		uint region_count = 0;	
+		while( parse_string >> length )
+		{
+			//cout << length << endl;
+			if ( region_count >= start_list.size() )
+			{
+				//something's wrong
+				cout << "alu data failed!" << endl;
+				break;
+			}
+			pos.first = start_list.at(region_count);
+			if (start_list.at(region_count) < 0 )
+			{
+				pos.second = start_list.at(region_count)-length;
+				//simply add up the alignment coverage in the map
+				for(int i = 0; i < length; i++)
+				{
+					alncoverage[pos.first-i] = true;
+					coverage[pos.first-i] = true;		
+					ccount++;
+				}
+			}
+			else
+			{
+				pos.second = start_list.at(region_count)+length;
+				//for both strands
+				for(int i = 0; i < length; i++)
+				{	
+					alncoverage[pos.first+i] = true;
+					coverage[pos.first+i] = true;
+					ccount++;
+				}
+			}
+			pos_list.push_back(pos);
+			region_count++;
+		}
+		totcoverage.push_back(alncoverage);
+		alncoverage.clear();
+		align_list.push_back(pos_list);
+		pos_list.clear();
+		
+	}//end of read procrastAligner output hack
+	//alignment.ReadStandardAlignment( align_in );
+	align_in.close();
+	cout << "alu data processed!" << endl;
+	int aluhits = 0;
+	int matches = 0;
+	//a first attempt at generating the sensitivity & specificity of our method
+	//for comparison with zhang&waterman's eulerian path method...
+	//hopefully we pull out these ~290bp repeats in a nice chain in each case
+	//FIXME: is this ok?
+
+	map<int,bool> ignoreAlignment;
+	map<int64,bool> mergedCoverage;
+	map<int64,bool> aluCoverage;
+
+	//Total length of unaligned repeats(false positives?) found by procrastAligner
+	map<int64,bool> lpt;
+	map<int64,bool> lpn;
+
+	//Total length of regions found only in procrastAligner
+	map<int64,bool> lpo;
+
+	
+	map<int,bool> hitlist;
+
+	map<int64,bool> specificity;
+
+	map< uint,pair<int,int> > best_borders;
+	map< uint,pair<int,int> > worst_borders;
+	int64 matchhits = 0;
+    int64 matchhitmult = 0;
+	cout << "checking which alus are aligned" << endl;
+	for ( int j = 0; j < align_list.size(); j++)
+	{
+		//if alufound in any component of curr alignment, consider 'aligned'
+		//if not, throw out to help our sr. specificity
+		
+		//then, for each ALU, see if it is 'covered' by our procrastAlignments.
+		//if so, increase lc2 accordingly
+
+		//for each alignment returned by procrastAligner, highest multiplicity first
+		bool alufound = false;
+
+		//cout << "checking alignment #" << j << " for ALUs..." << endl;
+		for ( int i = 0; i < alus.size(); i++)
+		{
+			
+			//lpt = 0;
+			if (alus.at(i)->strand == "+" )
+			{
+				for ( int a = 0; a < alus.at(i)->length(); a++)
+				{
+					
+
+					//column in alignment coincides with an alu
+					if(totcoverage.at(j).find((alus.at(i)->start)+a) != totcoverage.at(j).end())
+					{
+						
+						alufound = true;
+						//this column in sequence not accounted for
+						if(aluCoverage.find((alus.at(i)->start)+a) == aluCoverage.end())
+						{	
+							lc+=1;
+							
+							//now it is
+							
+						}
+						hitlist[i] = true;
+						aluCoverage[(alus.at(i)->start)+a] = true;
+
+					}
+					
+				}
+			}
+			else
+			{
+				for ( int a = 0; a < alus.at(i)->length(); a++)
+				{
+					if(totcoverage.at(j).find(-1*((alus.at(i)->start)+a)) != totcoverage.at(j).end())
+					{
+						if(aluCoverage.find(-1*((alus.at(i)->start)+a)) == aluCoverage.end())
+						{	
+							lc+=1;
+							
+							
+						}
+						hitlist[i] = true;
+						aluCoverage[-1*((alus.at(i)->start)+a)] = true;
+						//lc+=1;
+						alufound =true;
+					}
+					
+				}
+			}
+		}
+		if(!alufound)
+		{
+			ignoreAlignment[j] = true;
+			cout << "ignoring alignment " << j << endl;
+			
+			//calculate regions only appearing in procrastAligner alignments
+			for(int k = 0; k < align_list.at(j).size();k++)
+			{			
+				gnSeqI len = absolut((int64)align_list.at(j).at(k).second)-absolut((int64)align_list.at(j).at(k).first);
+				for(int n = 0; n<len;n++)
+				{
+					if(align_list.at(j).at(k).first<0)
+						lpo[align_list.at(j).at(k).first-n] = true;
+					else
+						lpo[align_list.at(j).at(k).first+n] = true;
+				}
+			}
+		}
+		else
+		{
+			//cout << "ALU was aligned!" << endl;
+			bool hit = false;
+			bool debug_pos = false;
+			bool inall = true;
+			uint rnum = 0;
+			for(int k = 0; k < align_list.at(j).size();k++)
+			{			
+				gnSeqI len = absolut((int64)align_list.at(j).at(k).second)-absolut((int64)align_list.at(j).at(k).first);
+				for(int n = 0; n<len;n++)
+				{
+					if(align_list.at(j).at(k).first<0)
+					{
+						// j = lma #
+						// k = component #
+						// first,second = start,end pos
+						if(aluCoverage.find(align_list.at(j).at(k).first-n)!= aluCoverage.end())
+						{
+							//find which alu is hit
+							for ( int i = 0; i < alus.size(); i++)
+							{	
+								//is this ok for reverse strand?
+								if( (abs((int)align_list.at(j).at(k).first) >= alus.at(i)->start) && (abs((int)align_list.at(j).at(k).first) < alus.at(i)->end ) 
+								||  (abs((int)align_list.at(j).at(k).second) > alus.at(i)->start) && (abs((int)align_list.at(j).at(k).second) < alus.at(i)->end ) )
+								{
+									//the repeat #
+									if (rnum != i+1 && rnum != 0)
+										inall = false;
+									rnum = i+1;				
+									break;
+								}
+							}
+							//current component of alignment pertains to alu
+							//spec.at(j).push_back(k)
+							lpn[align_list.at(j).at(k).first-n] = true;
+							hit = true;
+						}
+						//motif missed by procrastAligner
+						else
+						{
+							lpt[align_list.at(j).at(k).first-n] = true;
+							rnum = -1;
+						}
+						mergedCoverage[align_list.at(j).at(k).first-n] = true;
+					}
+					else
+					{
+						if(aluCoverage.find(align_list.at(j).at(k).first+n)!= aluCoverage.end())
+						{
+							//find out which alu is hit
+							for ( int i = 0; i < alus.size(); i++)
+							{
+								
+								if( (abs((int)align_list.at(j).at(k).first) >= alus.at(i)->start) && (abs((int)align_list.at(j).at(k).first) < alus.at(i)->end )
+								||  (abs((int)align_list.at(j).at(k).second) > alus.at(i)->start) && (abs((int)align_list.at(j).at(k).second) <= alus.at(i)->end ) )
+								{
+									//the repeat #
+									//cout << rnum << " " << i+1 <<  endl;
+									if (rnum != i+1 && rnum != 0)
+										inall = false;
+									rnum = i+1;
+									break;
+								}
+							}	
+							//current component of alignment pertains to alu
+							lpn[align_list.at(j).at(k).first+n] = true;
+							hit = true;
+						}
+						//motif missed by procrastAligner
+						else
+						{
+							lpt[align_list.at(j).at(k).first+n] = true;
+							rnum = -1;
+						}
+						mergedCoverage[align_list.at(j).at(k).first+n] = true;
+					}
+				}
+				if (rnum <= 0)
+					inall = false;
+
+				if(hit)
+				{
+					matchhits+=1;
+					
+				}
+			}
+			//punt: DONT need to first check if it hits all components!!
+			if (inall || 1)
+			{
+				for(int k = 0; k < align_list.at(j).size();k++)
+				{			
+					gnSeqI len = absolut((int64)align_list.at(j).at(k).second)-absolut((int64)align_list.at(j).at(k).first);
+					uint rnum = 0;
+					
+					if(align_list.at(j).at(k).first<0)
+					{
+						// j = lma #
+						// k = component #
+						// first,second = start,end pos
+						
+						//find which alu is hit
+						for ( int i = 0; i < alus.size(); i++)
+						{
+							//is this ok for reverse strand?
+							if( (abs((int)align_list.at(j).at(k).first) >= alus.at(i)->start) && (abs((int)align_list.at(j).at(k).first) < alus.at(i)->end ) 
+							||  (abs((int)align_list.at(j).at(k).second) > alus.at(i)->start) && (abs((int)align_list.at(j).at(k).second) <= alus.at(i)->end ) )
+							{
+								//the repeat #
+								rnum = i+1;
+								//find overlap
+								int leftend = 0;
+								int rightend = 0;
+								leftend = abs((int)alus.at(i)->start)-abs((int)align_list.at(j).at(k).first);
+								rightend =   abs((int)alus.at(i)->end)-abs((int)align_list.at(j).at(k).second);
+								if (debug_pos && (abs(leftend)>500 || abs(rightend)>500))
+								{
+									cout << "alu\talignment" << endl;
+									cout << alus.at(i)->start << "\t" << align_list.at(j).at(k).first << endl;
+									cout << alus.at(i)->end << "\t" << align_list.at(j).at(k).second << endl;
+
+								}
+								
+								if ( worst_borders.find( rnum ) != worst_borders.end() )
+								{
+									// if component has worse boundaries for this alu, record them
+									if ( abs((int)worst_borders[rnum].first) < abs((int)leftend) )
+										worst_borders[rnum].first = leftend;
+									if ( abs((int)worst_borders[rnum].second) < abs((int)rightend) )
+										worst_borders[rnum].second = rightend;
+									if ( abs((int)best_borders[rnum].first) > abs((int)leftend) )
+										best_borders[rnum].first = leftend;
+									if ( abs((int)best_borders[rnum].second) > abs((int)rightend) )
+										best_borders[rnum].second = rightend;
+								}
+								else
+								{
+									worst_borders[rnum] = make_pair(leftend,rightend);
+									best_borders[rnum] = make_pair(leftend,rightend);
+								}
+								
+								break;
+							}
+						} 
+					}
+					else
+					{
+						//find out which alu is hit
+						for ( int i = 0; i < alus.size(); i++)
+						{
+							//if( (abs((int)align_list.at(j).at(k).first) <= alus.at(i)->start) && (abs((int)align_list.at(j).at(k).second) >= alus.at(i)->end ) )
+							//if( (abs((int)align_list.at(j).at(k).first) >= alus.at(i)->start) && (abs((int)align_list.at(j).at(k).second) <= alus.at(i)->end ) )
+							//if( ((abs((int)align_list.at(j).at(k).first) >= alus.at(i)->start) && (abs((int)align_list.at(j).at(k).first) < alus.at(i)->end ) && (abs((int)align_list.at(j).at(k).second) > alus.at(i)->end) )
+							//||  ((abs((int)align_list.at(j).at(k).second) > alus.at(i)->start) && (abs((int)align_list.at(j).at(k).second) <= alus.at(i)->end ) && (abs((int)align_list.at(j).at(k).first) < alus.at(i)->start) ) )
+							if( (abs((int)align_list.at(j).at(k).first) >= alus.at(i)->start) && (abs((int)align_list.at(j).at(k).first) < alus.at(i)->end ) 
+							||  (abs((int)align_list.at(j).at(k).second) > alus.at(i)->start) && (abs((int)align_list.at(j).at(k).second) <= alus.at(i)->end ) )
+							{
+								//the repeat #
+								rnum = i+1;
+								//find overlap
+								int leftend = 0;
+								int rightend = 0;
+								
+								leftend = abs((int)alus.at(i)->start) -abs((int)align_list.at(j).at(k).first);
+								rightend =   abs((int)alus.at(i)->end)-abs((int)align_list.at(j).at(k).second);
+
+								if (debug_pos && (abs(leftend)>500 || abs(rightend)>500))
+								{
+									cout << "alu\talignment" << endl;
+									cout << alus.at(i)->start << "\t" << align_list.at(j).at(k).first << endl;
+									cout << alus.at(i)->end << "\t" << align_list.at(j).at(k).second << endl;
+
+								}
+								
+								if ( worst_borders.find( rnum ) != worst_borders.end() )
+								{
+									// if component has worse boundaries for this alu, record them		
+									if ( abs((int)worst_borders[rnum].first) < abs((int)leftend) )
+										worst_borders[rnum].first = leftend;
+									if ( abs((int)worst_borders[rnum].second) < abs((int)rightend) )
+										worst_borders[rnum].second = rightend;
+
+									// if component has better boundaries for this alu, record them
+									if ( abs((int)best_borders[rnum].first) > abs((int)leftend) )
+										best_borders[rnum].first = leftend;
+									if ( abs((int)best_borders[rnum].second) > abs((int)rightend) )
+										best_borders[rnum].second = rightend;
+									
+								}
+								else
+								{
+									worst_borders[rnum] = make_pair(leftend,rightend);
+									best_borders[rnum] = make_pair(leftend,rightend);
+								}
+								
+								break;
+							}
+						}
+					
+					}
+
+				}
+			}
+		    
+		}
+		alufound = false;
+	}
+	gnSequence empty_seq;
+	//this is the length of the repeats found by procrastAligner, 
+	//with overlaps removed
+	//remember the alignments to ignore!
+	ofstream boundary_file;
+	alignment_fname.append(".boundary");
+	boundary_file.open(alignment_fname.c_str());
+	map< uint,pair<int,int> >::iterator iter;
+	uint avg_worst_left = 0;
+	uint avg_worst_right = 0;
+	uint avg_best_left = 0;
+	uint avg_best_right = 0;
+	for( iter = worst_borders.begin(); iter != worst_borders.end(); iter++ ) 
+	{
+		avg_worst_left += abs(iter->second.first);
+		avg_worst_right += abs(iter->second.second);
+		boundary_file << "worst boundaries for repeat copy #" << iter->first << "\t left: " << iter->second.first << "\t right: " << iter->second.second << endl;
+	}
+	for( iter = best_borders.begin(); iter != best_borders.end(); iter++ ) 
+	{
+		avg_best_left += abs( iter->second.first);
+		avg_best_right += abs(iter->second.second);
+		boundary_file << "best boundaries for repeat copy #" << iter->first << "\t left: " << iter->second.first << "\t right: " << iter->second.second << endl;
+	}
+
+	if (worst_borders.size() > 0 )
+	{
+		avg_worst_left /= worst_borders.size();
+		avg_worst_right /= worst_borders.size();
+	}
+	else
+	{
+		avg_worst_left = -1;
+		avg_worst_right = -1;
+
+	}
+	if ( best_borders.size() > 0)
+	{
+		avg_best_left /= best_borders.size();
+		avg_best_right /= best_borders.size();
+	}
+	else
+	{
+		avg_best_left = -1;
+		avg_best_right = -1;
+
+	}
+	boundary_file << "left best: \t" << avg_best_left << endl;
+	boundary_file << "right best: \t" << avg_best_right << endl;
+	boundary_file << "left worst: \t" << avg_worst_left << endl;
+	boundary_file << "right worst: \t" << avg_worst_right << endl;
+	boundary_file << "#" << endl;
+	boundary_file.close();
+
+	lt = mergedCoverage.size();
+	//lt2 = coverage.size();
+	ld = coverage.size();
+	lp = lpt.size();
+	ln = lpn.size();
+	lo = lpo.size();
+
+	//length of only ALUs hit by procrastAligner
+	gnSeqI hitlength =0;
+	for(int i =0; i< hitlist.size(); i++)
+		hitlength+= alus.at(i)->length();
+
+	cout << "\nprocrastAlignments processed: " << align_list.size() << endl;
+	cout << "matches processed: " << matches << endl;
+	cout << "Total ALUs found by repeatmasker: " << alus.size() << endl;
+	cout << "Total ALUs hit by procrastAligner: " << hitlist.size() << endl;
+	cout << "ALU hit percentage: " << (float)hitlist.size()/(float)alus.size() << endl;
+
+	//cout << aluCoverage.size() << endl;
+    cout << "\nTotal length of all repeats found by procrastAligner: " << ld << endl;
+	 cout << "Total length of all regions found only in procrastAligner: " << lo << endl;
+	cout << "Total length of all (partially) aligned repeats found by procrastAligner: lt = " << lt << endl;
+	cout << "Total length of unaligned repeats(false positives?) found by procrastAligner: lp = " << lp << endl;
+	//cout << "Total length of ???: ln = " << ln << endl;
+	cout << "Total length of all repeats(ALU) found by repeatmasker: lr = " << lr << endl;
+	cout << "Total length of repeats(ALU) found by repeatmasker hit by procrastAligner: lh =" << hitlength << endl;
+	cout << "Total length of ALU repeats found by both methods: lc = " << lc << endl;
+	
+	//cout << "Sensitivity: lc / lr = " << (double)(lc) / (double)(lr) << endl;
+	//cout << "Specificity: lc / lt = " << (double)(lc) / (double)(lt) << endl;
+	
+	//score changes per Sunday email, focus on filtration
+	cout << "\nSensitivity-old: lc / lh = " << (double)(lc) / (double)(hitlength) << endl;
+	cout << "Specificity-old: lc / lt = " << (double)(lc) / (double)(lt) << endl;
+
+	
+	cout << "\nSensitivity= " << (double)hitlist.size()/(double)alus.size() << endl;
+	cout << "Specificity= " << (double)matchhits/(double)matchhitmult <<  endl;
+
+
+	//TN = ltn
+	//TP = lc
+	//FN = lfn
+	//FP = lp
+}catch( gnException& gne ){
+	cerr << gne << endl;
+}catch( exception& e ){
+	cerr << e.what() << endl;
+}
+
+}
+
+
diff --git a/src/scoreAlignment.cpp b/src/scoreAlignment.cpp
new file mode 100644
index 0000000..782edcd
--- /dev/null
+++ b/src/scoreAlignment.cpp
@@ -0,0 +1,467 @@
+/*******************************************************************************
+ * $Id: scoreAlignment.cpp,v 1.14 2004/02/28 00:01:31 darling Exp $
+ * This file is copyright 2002-2004 Aaron Darling.  All rights reserved.
+ * Please see the file called COPYING for licensing, copying, and modification
+ * rights.  Redistribution of this file, in whole or in part is prohibited
+ * without express permission.
+ ******************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/MatchList.h"
+#include <string>
+#include <fstream>
+#include <sstream>
+#include <map>
+#include "libMems/IntervalList.h"
+#include "libGenome/gnFilter.h"
+
+using namespace std;
+using namespace genome;
+using namespace mems;
+
+class IntervalCompare {
+public:
+	boolean operator()(const pair< gnSeqI, gnSeqI >& a, const pair< gnSeqI, gnSeqI >& b) const{
+		if( ( a.first <= b.first && b.second <= a.second ) ||
+			( b.first <= a.first && a.second <= b.second ) )
+			return false;	// one contains the other, this must be a query, they are equal.
+		if( a.first == b.first )
+			return a.second < b.second;
+		return a.first < b.first;
+	}
+};
+
+class IntervalMap {
+public:
+	virtual void add( gnSeqI left, gnSeqI right ) = 0;
+	virtual void find( gnSeqI point, vector< uint >& intervals ) const = 0;
+};
+
+class TreeIntervalMap : public IntervalMap {
+public:
+	virtual void add( gnSeqI left, gnSeqI right );
+	virtual void find( gnSeqI point, vector< uint >& intervals ) const;
+protected:
+	map< pair< gnSeqI, gnSeqI >, uint, IntervalCompare > iv_map;
+	
+};
+
+void TreeIntervalMap::add( gnSeqI left, gnSeqI right ) {
+	pair< gnSeqI, gnSeqI > cur_pos;
+	cur_pos.first = left;
+	cur_pos.second = right;
+	iv_map.insert( map< pair< gnSeqI, gnSeqI >, uint, IntervalCompare >::value_type( cur_pos, iv_map.size() ) );
+}
+
+void TreeIntervalMap::find( gnSeqI point, vector< uint >& intervals ) const{
+	pair< gnSeqI, gnSeqI > cur_loc = pair< gnSeqI, gnSeqI >( point, point );
+	map< pair< gnSeqI, gnSeqI >, uint, IntervalCompare >::const_iterator ivmap_iter = iv_map.lower_bound( cur_loc );
+	map< pair< gnSeqI, gnSeqI >, uint, IntervalCompare >::const_iterator upper_iter = iv_map.upper_bound( cur_loc );
+	while( ivmap_iter != upper_iter ){
+		if( !iv_map.key_comp()( cur_loc, ivmap_iter->first ) &&
+			!iv_map.key_comp()( ivmap_iter->first, cur_loc ) )
+			intervals.push_back( ivmap_iter->second );
+
+		ivmap_iter++;
+	}
+}
+
+class VectorIntervalMap : public IntervalMap {
+public:
+	virtual void add( gnSeqI left, gnSeqI right );
+	virtual void find( gnSeqI point, vector< uint >& intervals ) const;
+protected:
+	vector< pair< gnSeqI, gnSeqI > > iv_map;
+};
+
+void VectorIntervalMap::add( gnSeqI left, gnSeqI right ) {
+	pair< gnSeqI, gnSeqI > cur_pos;
+	cur_pos.first = left;
+	cur_pos.second = right;
+	iv_map.push_back( cur_pos );
+}
+
+void VectorIntervalMap::find( gnSeqI point, vector< uint >& intervals ) const{
+	for( uint ivI = 0; ivI < iv_map.size(); ivI++ ){
+		if( iv_map[ ivI ].first <= point && point <= iv_map[ ivI ].second )
+			intervals.push_back( ivI );
+	}
+}
+
+/**
+ * program to score alignments
+ * reads in a "correct" alignment and a calculated alignment
+ * scores the calculated alignment based on the correct one
+ */
+int main( int argc, char* argv[] ){
+	
+	if( argc < 3 ){
+		cout << "scoreAlignment <correct alignment> <calculated alignment> [evolved sequence file] [slagan]\n";
+		return -1;
+	}
+	
+	boolean debug_mismatches = false;	/**< turns on code to debug mismatches in evolved and aligned base pairs */
+	boolean slagan_mode = false;	/**< Set to true if scoring SLAGAN alignments */
+	string correct_fname = argv[ 1 ];
+	string calculated_fname = argv[ 2 ];
+	string evolved_fname;
+	if( argc > 3 ){
+		debug_mismatches = true;
+		evolved_fname = argv[ 3 ];
+	}
+	if( argc > 4 ){
+		string slagan = "slagan";
+		if( slagan == argv[ 4 ] )
+			slagan_mode = true;
+	}
+	ifstream correct_in;
+	correct_in.open( correct_fname.c_str() );
+	if( !correct_in.is_open() ){
+		cerr << "Error opening " << correct_fname << endl;
+		return -1;
+	}
+	ifstream calculated_in;
+	calculated_in.open( calculated_fname.c_str() );
+	if( !calculated_in.is_open() ){
+		cerr << "Error opening " << calculated_fname << endl;
+		return -1;
+	}
+try{
+	IntervalList correct_ivs;
+	IntervalList calculated_ivs;
+	correct_ivs.ReadStandardAlignment( correct_in );
+	correct_in.close();
+	calculated_ivs.ReadStandardAlignment( calculated_in );
+	calculated_in.close();
+	gnSequence empty_seq;
+	vector< gnSequence* > seq_table( correct_ivs[0].SeqCount(), &empty_seq );
+	uint seq_count = seq_table.size();
+	const gnFilter* comp_filter = gnFilter::DNAComplementFilter();
+	
+	gnSequence evolved_gnseqs;
+	vector< string > evolved_seqs( seq_count );
+	if( debug_mismatches ){
+		evolved_gnseqs.LoadSource( evolved_fname );
+		for( uint i = 0; i < seq_count; i++ ){
+			evolved_seqs[ i ] = evolved_gnseqs.contig( i ).ToString();
+		}
+	}
+	
+	/** A map of locations of each interval to the interval's array index */
+	vector< IntervalMap* > iv_map;
+	uint seqI = 0;
+	for( ; seqI < seq_count; seqI++ ){
+		if( seqI > 0 && slagan_mode ){
+			iv_map.push_back( new VectorIntervalMap() );
+		}else{
+			iv_map.push_back( new TreeIntervalMap() );
+		}
+
+		for( uint map_ivI = 0; map_ivI < calculated_ivs.size(); map_ivI++ ){
+			pair< gnSeqI, gnSeqI > cur_pos;
+			cur_pos.first = absolut( calculated_ivs[ map_ivI ].Start( seqI ) );
+			cur_pos.second = cur_pos.first + calculated_ivs[ map_ivI ].Length( seqI ) - 1;
+			iv_map[ seqI ]->add( cur_pos.first, cur_pos.second );
+		}
+	}
+	
+	// now compare these alignments somehow (use the evil megaloop)
+	gnSeqI true_pos = 0;	/**< when a base is correctly aligned to an orthologous base */
+	gnSeqI true_neg = 0;	/**< when a base is correctly aligned to a gap */
+	gnSeqI false_pos = 0;	/**< when a base is wrongly aligned to another base */
+	gnSeqI false_neg = 0;	/**< when a base is wrongly aligned to a gap */
+	gnSeqI total = 0;
+	gnSeqI unaligned_fn = 0;	/**< tally for errors due to unaligned regions */
+	gnSeqI unaligned_tn = 0;
+
+	gnSeqI bad_context = 0;
+	gnSeqI multiple_intersection = 0;
+	gnSeqI no_j = 0;
+	
+	for( uint cor_ivI = 0; cor_ivI < correct_ivs.size(); cor_ivI++ ){
+		uint calc_ivI = 0;
+		int64 calc_iv_lend = 0;
+		int64 calc_iv_lendJ = 0;
+		boolean parity_match = true;
+		gnAlignedSequences cor_gnas;
+		gnAlignedSequences calc_gnas;
+		correct_ivs[ cor_ivI ].GetAlignedSequences( cor_gnas, seq_table );
+		
+		for( seqI = 0; seqI < seq_count; seqI++ ){
+			int64 cor_iv_lend = correct_ivs[ cor_ivI ].Start( seqI );
+			if( cor_iv_lend == NO_MATCH )
+				continue;	// not defined in seqI, skip it
+				
+			for( uint seqJ = 0; seqJ < seq_count; seqJ++ ){
+				if( seqI == seqJ )
+					continue;
+
+				int64 cor_iv_lendJ = correct_ivs[ cor_ivI ].Start( seqJ );
+
+				/** base index for seqI in correct alignment */
+				int64 baseI = cor_iv_lend < 0 ? -correct_ivs[ cor_ivI ].Length( seqI ) + 1 : 0;
+				/** base index for seqJ in correct alignment */
+				int64 baseJ = cor_iv_lendJ < 0 ? -correct_ivs[ cor_ivI ].Length( seqJ ) + 1 : 0;
+				int64 calc_baseI;	/**< The current base pair in sequence I of the calculated alignment */
+				int64 calc_baseJ;	/**< The current base pair in sequence J of the calculated alignment */
+				int64 calc_colI = 0;	/**< The current column of the calculated alignment */
+				// update calc_* variables with the current seqI/seqJ pair
+				if( calc_ivI < calculated_ivs.size() && calc_iv_lend != 0 ){
+					calc_iv_lend = calculated_ivs[ calc_ivI ].Start( seqI );
+					calc_iv_lendJ = calculated_ivs[ calc_ivI ].Start( seqJ );
+					calc_baseI = calculated_ivs[ calc_ivI ].Start( seqI );
+					calc_baseJ = calculated_ivs[ calc_ivI ].Start( seqJ );
+					if( ( calc_iv_lend > 0 && cor_iv_lend > 0 ) || ( calc_iv_lend < 0 && cor_iv_lend < 0 ) ){
+						parity_match = true;
+						calc_baseI += calc_baseI < 0 ? -calculated_ivs[ calc_ivI ].Length( seqI ) + 1 : 0;
+						calc_baseJ += calc_baseJ < 0 ? -calculated_ivs[ calc_ivI ].Length( seqJ ) + 1 : 0;
+					}else{
+						parity_match = false;
+						calc_baseI += calc_baseI > 0 ? calculated_ivs[ calc_ivI ].Length( seqI ) - 1 : 0;
+						calc_baseJ += calc_baseJ > 0 ? calculated_ivs[ calc_ivI ].Length( seqJ ) - 1 : 0;
+					}
+					calc_colI = parity_match ? 0 : calc_gnas.alignedSeqsSize() - 1;
+					// scan calc_colI to the first actual residue
+					boolean saw_baseJ = false;
+					while( true ){
+						if( calc_colI < 0 || calc_colI >= calc_gnas.alignedSeqsSize() ){
+							cerr << "Error locating residue in alignment, calculated alignment is corrupt\n";
+							break;
+						}
+						if( calc_gnas.sequences[ seqI ][ calc_colI ] == '-' ){
+							if( calc_gnas.sequences[ seqJ ][ calc_colI ] != '-' ){
+								calc_baseJ += parity_match ? 1 : -1;
+								saw_baseJ = true;
+							}
+							calc_colI += parity_match ? 1 : -1;
+								
+						}else
+							break;
+					}
+					// if seqJ still contains a gap in calc_baseJ we haven't actually seen calc_baseJ yet
+					if( !saw_baseJ && calc_gnas.sequences[ seqJ ][ calc_colI ] == '-' ){
+						calc_baseJ += parity_match ? -1 : 1;
+					}
+				}
+				
+				for( gnSeqI colI = 0; colI < cor_gnas.alignedSeqsSize(); colI++ ){
+					if( cor_gnas.sequences[ seqI ][ colI ] == '-' ){
+						if( cor_gnas.sequences[ seqJ ][ colI ] != '-' )
+							baseJ++;
+						continue;
+					}else if( seqJ < seqI && ( cor_gnas.sequences[ seqJ ][ colI ] != '-' )){
+						// this one was already scored when seqI had the current value of seqJ
+						baseI++;
+						baseJ++;
+						continue;
+					}
+
+					total++;	/** this aligned pair counts towards the totals */
+					
+					// calculate the actual base index in seqJ for the correct alignment
+					int64 cor_baseJ = cor_iv_lendJ + baseJ;
+
+					// check if the current correct alignment entry for seqI is in
+					// the current interval of the calculated alignment
+					// if not, scan through the calculated intervals until we find the right one
+					// also check wether cor_baseJ fits (for the benefit of shuffle-lagan)
+					if( calc_iv_lend == 0 || !(absolut( calc_iv_lend ) <= absolut( cor_iv_lend + baseI ) &&
+						  absolut( cor_iv_lend + baseI ) < absolut( calc_iv_lend ) + calculated_ivs[ calc_ivI ].Length( seqI ) &&
+						  absolut( calc_iv_lendJ ) <= absolut( cor_baseJ ) &&
+						  absolut( cor_baseJ ) < absolut( calc_iv_lendJ ) + calculated_ivs[ calc_ivI ].Length( seqJ ) - 1 ) ){
+
+						boolean possibly_incorrect = false;
+						vector< uint > possible_ivsI, possible_ivsJ;
+						iv_map[ seqI ]->find( absolut( cor_iv_lend + baseI ), possible_ivsI );
+						iv_map[ seqJ ]->find( absolut( cor_baseJ ), possible_ivsJ );
+						calc_ivI = calculated_ivs.size();
+						if( possible_ivsI.size() == 0 )
+							no_j++;
+						// determine the intersection of possible_ivI and possible_ivJ
+						vector< uint > intersection;
+						uint pivI = 0;
+						for( ; pivI < possible_ivsI.size(); pivI++ ){
+							possibly_incorrect = true;
+							uint pivJ = 0;
+							for( ; pivJ < possible_ivsJ.size(); pivJ++ ){
+								int64 s = absolut( calculated_ivs[ possible_ivsJ[ pivJ ] ].Start( seqJ ) );
+								if( !(s <= cor_baseJ <= s + calculated_ivs[ possible_ivsJ[ pivJ ] ].Length( seqJ ) - 1 ) )
+									cerr << "cor_baseJ doesn't fit!\n";
+								if( possible_ivsI[ pivI ] == possible_ivsJ[ pivJ ] )
+									intersection.push_back( pivI );
+							}
+						}
+						if( intersection.size() > 0 ){
+							calc_ivI = possible_ivsI[ intersection[ 0 ] ];
+							calc_iv_lend = calculated_ivs[ calc_ivI ].Start( seqI );
+							calc_iv_lendJ = calculated_ivs[ calc_ivI ].Start( seqJ );
+						}
+						if( intersection.size() > 1 ){
+							multiple_intersection++;
+						}
+
+						// if we couldn't find baseI anywhere in the calculated alignment then treat
+						// it as aligned to a gap, otherwise
+						// update the gnAlignedSequences object for the new interval
+						if( calc_ivI < calculated_ivs.size() ){
+							calculated_ivs[ calc_ivI ].GetAlignedSequences( calc_gnas, seq_table );
+							calc_baseI = calc_iv_lend;
+							calc_baseJ = calc_iv_lendJ;
+							if( ( calc_iv_lend > 0 && cor_iv_lend > 0 ) || ( calc_iv_lend < 0 && cor_iv_lend < 0 ) ){
+								parity_match = true;
+								calc_baseI += calc_baseI < 0 ? -calculated_ivs[ calc_ivI ].Length( seqI ) + 1 : 0;
+								calc_baseJ += calc_baseJ < 0 ? -calculated_ivs[ calc_ivI ].Length( seqJ ) + 1 : 0;
+							}else{
+								parity_match = false;
+								calc_baseI += calc_baseI > 0 ? calculated_ivs[ calc_ivI ].Length( seqI ) - 1 : 0;
+								calc_baseJ += calc_baseJ > 0 ? calculated_ivs[ calc_ivI ].Length( seqJ ) - 1 : 0;
+							}
+							calc_colI = parity_match ? 0 : calc_gnas.alignedSeqsSize() - 1;
+							boolean saw_baseJ = false;
+							while( true ){
+								if( calc_colI < 0 || calc_colI >= calc_gnas.alignedSeqsSize() ){
+									cerr << "Error locating residue in alignment, calculated alignment is corrupt\n";
+									break;
+								}
+								if( calc_gnas.sequences[ seqI ][ calc_colI ] == '-' ){
+									if( calc_gnas.sequences[ seqJ ][ calc_colI ] != '-' ){
+										calc_baseJ += parity_match ? 1 : -1;
+										saw_baseJ = true;
+									}
+									calc_colI += parity_match ? 1 : -1;
+										
+								}else
+									break;
+							}
+							// if seqJ still contains a gap in calc_baseJ we haven't actually seen calc_baseJ yet
+							if( !saw_baseJ && calc_gnas.sequences[ seqJ ][ calc_colI ] == '-' ){
+								calc_baseJ += parity_match ? -1 : 1;
+							}
+
+						}else{
+							if( possibly_incorrect ){
+								// aligned to the wrong context
+								bad_context++;
+								false_pos++;
+								if( cor_gnas.sequences[ seqJ ][ colI ]  != '-' )
+									baseJ++;
+							}else if( cor_gnas.sequences[ seqJ ][ colI ]  != '-' ){
+								// wrongly aligned to a gap
+								unaligned_fn++;
+								false_neg++;
+								baseJ++;
+							}else{
+								// correctly aligned to a gap
+								unaligned_tn++;
+								true_neg++;
+							}
+							baseI++;
+							calc_iv_lend = 0;	// reset calc_iv_lend
+							continue;
+						}
+					}
+
+					int64 diffI;
+					if( parity_match )
+						diffI = baseI + cor_iv_lend - calc_baseI;
+					else
+						diffI = baseI + cor_iv_lend + calc_baseI;
+					
+					gnSeqI cbI = 0, cbJ = 0;
+					while( cbI < diffI ){
+						gnSeqI next_colI = parity_match ? calc_colI + 1 : calc_colI - 1;
+						if ( next_colI > 100000000 )
+							cerr << "bug?\n";
+						if( calc_gnas.sequences[ seqI ][ next_colI ] != '-' )
+							cbI++;
+						if( calc_gnas.sequences[ seqJ ][ next_colI ] != '-' )
+							cbJ++;
+						calc_colI += parity_match ? 1 : -1;
+					}
+
+					calc_baseI += parity_match ? cbI : -cbI;
+					calc_baseJ += parity_match ? cbJ : -cbJ;
+					// if cor_baseJ == calc_baseJ then this pair of sequences were correctly aligned!
+					// classify the correctness of the aligned pair
+					char cor_chI = cor_gnas.sequences[ seqI ][ colI ];
+					char cor_chJ = cor_gnas.sequences[ seqJ ][ colI ];
+					char calc_chI = calc_gnas.sequences[ seqI ][ calc_colI ];
+					char calc_chJ = calc_gnas.sequences[ seqJ ][ calc_colI ];
+					if( cor_iv_lend < 0 ){
+						cor_chI = comp_filter->Filter( cor_chI );
+					}
+					if( cor_iv_lendJ < 0 ){
+						cor_chJ = comp_filter->Filter( cor_chJ );
+					}
+					if( calc_iv_lend < 0 ){
+						calc_chI = comp_filter->Filter( calc_chI );
+					}
+					if( calc_iv_lendJ < 0 ){
+						calc_chJ = comp_filter->Filter( calc_chJ );
+					}
+					if( cor_chI != calc_chI && debug_mismatches ){
+						if( evolved_seqs[ seqI ][ absolut( calc_baseI ) - 1 ] == cor_chI ){
+							cerr << "The calculated alignment has incorrect base: " << calc_chI;
+							cerr << " instead of " << evolved_seqs[ seqI ][ absolut( calc_baseI ) - 1 ]; 
+							cerr << " at " << absolut( calc_baseI ) << " in sequence " << seqI << endl;
+						}else{
+							cerr << "The \"correct\" alignment has incorrect base: " << cor_chI;
+							cerr << " instead of " << evolved_seqs[ seqI ][ absolut( calc_baseI ) - 1 ]; 
+							cerr << " at " << absolut( calc_baseI ) << " in sequence " << seqI << endl;
+						}
+					}
+
+					if( calc_chJ != '-' ){
+						// make sure the calculated base actually matches the original sequence
+						if( debug_mismatches && calc_chJ != evolved_seqs[ seqJ ][ absolut( calc_baseJ ) - 1 ] ){
+							cerr << "The calculated alignment has incorrect base: " << calc_chJ;
+							cerr << " instead of " << evolved_seqs[ seqJ ][ absolut( calc_baseJ ) - 1 ]; 
+							cerr << " at " << absolut( calc_baseJ ) << " in sequence " << seqJ << endl;
+						}
+						if( cor_chJ != '-' &&
+							( ( parity_match && cor_baseJ == calc_baseJ ) ||
+							( !parity_match && cor_baseJ == -calc_baseJ ) ) ){
+							true_pos++;
+							// sanity check that the bases are really identical:
+							if( cor_chI != calc_chI || cor_chJ != calc_chJ )
+								cerr << "Calculated alignment contains a different base than the correct!\n";
+						}else if( cor_chJ == '-' )
+							false_neg++;
+						else
+							false_pos++;
+					}else{
+						if( cor_chJ == '-' )
+							true_neg++;
+						else
+							false_pos++;
+					}
+
+					if( cor_gnas.sequences[ seqJ ][ colI ] != '-' )
+						baseJ++;
+					baseI++;
+				}
+			}
+		}
+	}
+
+	cout << "Sensitivity: TP / TP + FN = " << (double)(true_pos) / (double)(true_pos + false_neg) << endl;
+	cout << "Specificity: TN / TN + FP = " << (double)(true_neg) / (double)(true_neg + false_pos) << endl;
+	cout << "TP + TN / total = " << (double)(true_pos + true_neg) / (double)(total) << endl;
+	cout << "FP + FN / total = " << (double)(false_pos + false_neg) / (double)(total) << endl;
+	cout << "unaligned error = " << (double)unaligned_fn / (double)total << endl;
+	cout << "bad_context = " << (double)bad_context / (double)total << endl;
+	cout << "multiple_intersection = " << (double)multiple_intersection / (double)total << endl;
+	cout << "no_j = " << (double)no_j / (double)total << endl;
+	
+}catch( gnException& gne ){
+	cerr << gne << endl;
+}catch( exception& e ){
+	cerr << e.what() << endl;
+}
+
+}
+
+
diff --git a/src/scoreProcrastAlignment.cpp b/src/scoreProcrastAlignment.cpp
new file mode 100644
index 0000000..3729bc3
--- /dev/null
+++ b/src/scoreProcrastAlignment.cpp
@@ -0,0 +1,458 @@
+/*******************************************************************************
+ * $Id: scoreAlignment.cpp,v 1.14 2004/02/28 00:01:31 darling Exp $
+ * This file is copyright 2002-2004 Aaron Darling.  All rights reserved.
+ * Please see the file called COPYING for licensing, copying, and modification
+ * rights.  Redistribution of this file, in whole or in part is prohibited
+ * without express permission.
+ ******************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/CompactGappedAlignment.h"
+#include "libMems/MatchList.h"
+#include <string>
+#include <fstream>
+#include <sstream>
+#include <map>
+#include "libMems/IntervalList.h"
+#include "libGenome/gnFilter.h"
+#include <boost/program_options/cmdline.hpp>
+#include <boost/program_options.hpp>
+#include <boost/tuple/tuple.hpp>
+#include <algorithm>
+namespace po = boost::program_options;
+
+using namespace std;
+using namespace genome;
+using namespace mems;
+
+// basic data structures
+
+/** store a pair of aligned positions and the characters */
+typedef struct aligned_coords_s {
+	int64 pos1;
+	int64 pos2;
+	char char1;
+	char char2;
+} aligned_coords_t;
+
+
+class AlignedCoordSeqIComparator {
+public:
+	boolean operator()(const aligned_coords_t& a, const aligned_coords_t& b) const{
+		if( abs(a.pos1) == abs(b.pos1) )
+			return abs(a.pos2) < abs(b.pos2);
+		return abs(a.pos1) < abs(b.pos1);
+	}
+};
+
+void constructCoordList( uint seqI, uint seqJ, IntervalList& iv_list, vector< aligned_coords_t >& coord_list, vector< gnSequence* >& seq_table ){
+
+	//
+	// pre-allocate the vector
+	//
+	gnSeqI ij_vec_size = 0;
+	for( int ivI = 0; ivI < iv_list.size(); ivI++ ){
+		ij_vec_size += iv_list[ivI].AlignmentLength();
+	}
+	coord_list = vector< aligned_coords_t >( ij_vec_size );
+
+	//
+	// fill in the vector with all aligned pairs
+	//
+	gnSeqI vecI = 0;	// current place in vector
+	for( int ivI = 0; ivI < iv_list.size(); ivI++ ){
+		GappedAlignment* aln;
+		aln = dynamic_cast< GappedAlignment* >( iv_list[ ivI ].GetMatches()[0] );
+		if( aln == NULL ){
+			throw "Error:  expecting interval to contain a single GappedAlignment";
+		}
+		int64 pos1 = aln->Start( seqI );
+		int64 pos2 = aln->Start( seqJ );
+	
+		// if rev. comp then we're starting at the other (right) side
+		if( pos1 < 0 )
+			pos1 -= aln->Length( seqI ) - 1;
+		if( pos2 < 0 )
+			pos2 -= aln->Length( seqJ ) - 1;
+
+		
+		const std::vector< std::string >& align_matrix = GetAlignment( *aln, seq_table );
+		for( gnSeqI colI = 0; colI < aln->Length(); colI++ ){
+			aligned_coords_t act;
+			act.char1 = align_matrix[ seqI ][ colI ];
+			act.char2 = align_matrix[ seqJ ][ colI ];
+			act.pos1 = act.char1 == '-' ? 0 : pos1;
+			act.pos2 = act.char2 == '-' ? 0 : pos2;
+			
+			coord_list[ vecI++ ] = act;
+			
+			if( act.char1 != '-' )
+				pos1++;
+			if( act.char2 != '-' )
+				pos2++;
+		}
+		
+	}
+
+	//
+	// sort the vector on aligned position
+	//
+	AlignedCoordSeqIComparator acsc;
+	sort( coord_list.begin(), coord_list.end(), acsc );
+}
+
+
+const gnFilter* comp_filter = gnFilter::DNAComplementFilter();
+
+template< class PairType >
+class PairFirstComparator
+{
+public:
+	bool operator()( const PairType& a, const PairType& b )
+	{
+		return a.first < b.first;
+	}
+};
+
+void compareAlignmentsAceD( IntervalList& correct, IntervalList& procrastinated, gnSequence& concat_sequence )
+{
+	gnSeqI sp_truepos = 0;
+	gnSeqI sp_possible = 0;
+
+	uint seqI = 0;
+	uint seqJ = 0;
+	// for now, use this value to create a unique identifier for the pairwise_component_hits bitset vector
+//	uint MAX_MULTIPLICITY = 1000;
+	uint seq_count = concat_sequence.contigListLength();
+
+	// create a data structure that indicates the start offset in concatenated coordinates for a given sequence
+	vector< gnSeqI > concat_coords(seq_count+1, 0);
+	for( size_t seqI = 0; seqI < seq_count; ++seqI )
+	{
+		concat_coords[seqI+1] = concat_coords[seqI] + concat_sequence.contigLength(seqI);
+	}
+
+	// tuple stores pointer to interval, the component of the interval, and the interval's index in procrastinated
+	// for each position of the concatenated sequence. 
+	typedef std::pair< size_t, uint > iv_tracker_t;
+	typedef vector< iv_tracker_t, boost::pool_allocator< iv_tracker_t > > tracker_vector_t;
+	typedef vector< tracker_vector_t, boost::pool_allocator< tracker_vector_t > > coord_iv_map_vector_t;
+	// create a map from sequence position to (interval,component) for the total length of the concat sequence
+	// use boost pool allocators since this never needs to get freed
+	coord_iv_map_vector_t* tmp = new coord_iv_map_vector_t( concat_coords.back() + 1 );	// heap allocate to avoid destruction when the stack frame is popped
+	coord_iv_map_vector_t& coord_iv_map = *tmp;
+	vector< size_t > coord_iv_counts( concat_coords.back() + 1, 0 );
+	// first count the number of ivs that contain each position so we know how much to allocate
+	for( size_t calcI = 0; calcI < procrastinated.size(); ++calcI )
+	{
+		Interval& iv = procrastinated[calcI];
+		for( size_t seqI = 0; seqI < iv.SeqCount(); ++seqI )
+		{
+			const gnSeqI lend = iv.LeftEnd(seqI);
+			if( lend == NO_MATCH )
+				continue;	// this shouldn't happen with procrastAligner output, but let's be safe
+			const gnSeqI rend = iv.RightEnd(seqI);
+			for( size_t posI = lend; posI <= rend; ++posI )
+				coord_iv_counts[posI]++;
+		}
+	}
+	// now allocate space for the map
+	for( size_t mapI = 0; mapI < coord_iv_map.size(); ++mapI )
+		coord_iv_map[mapI].resize( coord_iv_counts[mapI] );
+
+	std::fill( coord_iv_counts.begin(), coord_iv_counts.end(), 0 );	// recycle this storage to count the number added thus far
+
+	// finally, populate the map
+	for( size_t calcI = 0; calcI < procrastinated.size(); ++calcI )
+	{
+		Interval& iv = procrastinated[calcI];
+		for( size_t seqI = 0; seqI < iv.SeqCount(); ++seqI )
+		{
+			const gnSeqI lend = iv.LeftEnd(seqI);
+			if( lend == NO_MATCH )
+				continue;	// this shouldn't happen with procrastAligner output, but let's be safe
+			const gnSeqI rend = iv.RightEnd(seqI);
+			for( size_t posI = lend; posI <= rend; ++posI )
+			{
+				coord_iv_map[posI][coord_iv_counts[posI]] = make_pair( calcI, seqI );
+				coord_iv_counts[posI]++;
+			}
+		}
+	}
+
+	size_t all_component_count = 0;
+	size_t all_component_pair_count = 0;
+	size_t component_pair_count = 0;
+	// create a vector of bitsets for each iv to store whether their components were correctly aligned
+	vector< bitset_t > component_hits( procrastinated.size() );
+	// Follow Aaron's lead and store pairwise component hits in bitset_t vector
+	vector< bitset_t > pairwise_component_hits( procrastinated.size() );
+	for( size_t ivI = 0; ivI < component_hits.size(); ++ivI )
+	{
+		// make sure this value is always greater than the largest max multiplicity
+//		if( MAX_MULTIPLICITY < procrastinated[ivI].SeqCount())
+//			MAX_MULTIPLICITY *= 10;
+		// possible pairwise component combinations for this interval
+//		component_pair_count = ( ( procrastinated[ivI].SeqCount() *  (procrastinated[ivI].SeqCount() - 1) ) / 2 );
+		// let this be oversized for easier indexing, but correct for it when calculating the PPV below...
+		component_pair_count = procrastinated[ivI].SeqCount() * procrastinated[ivI].SeqCount();
+		pairwise_component_hits[ivI].resize(component_pair_count, false);
+		component_hits[ivI].resize(procrastinated[ivI].SeqCount(), false);
+		all_component_pair_count += ( ( procrastinated[ivI].SeqCount() *  (procrastinated[ivI].SeqCount() - 1) ) / 2 );
+		all_component_count += procrastinated[ivI].SeqCount();
+	}
+	
+	// sort each vector of iv_tracker_t by iv memory address (first element) so we can later do set intersections
+	for( size_t posI = 0; posI < coord_iv_map.size(); ++posI )
+		std::sort( coord_iv_map[posI].begin(), coord_iv_map[posI].end() );
+
+	tracker_vector_t intersect_buf1( all_component_count );	// storage for set intersections
+	tracker_vector_t intersect_buf2( all_component_count );	// storage for set intersections
+	PairFirstComparator< iv_tracker_t > pfc;
+
+	// now, for each pair of aligned positions in the correct alignment, determine whether they
+	// lie in a procrastAligner chain
+	size_t all_pair_count = (seq_count * (seq_count - 1)) / 2;
+	size_t pair_count = 0;
+	
+	for( seqI = 0; seqI < seq_count; seqI++ )
+	{
+		for( seqJ = seqI+1; seqJ < seq_count; seqJ++ )
+		{
+			size_t prev_count = pair_count;
+			pair_count++;
+			if( (pair_count * 100) / all_pair_count != (prev_count * 100) / all_pair_count )
+			{
+				cout << (pair_count * 100) / all_pair_count << "%..";
+				cout.flush();
+			}
+			vector< aligned_coords_t > cor;
+			
+			//construct the coord list just for the correct alignment
+			vector< gnSequence* > seq_table( seq_count, (gnSequence*)NULL );
+			constructCoordList( seqI, seqJ, correct, cor, seq_table );
+
+			gnSeqI corI = 0;
+			// skip any gaps aligned to gaps
+			while( corI < cor.size() && cor[ corI ].pos1 == 0 )
+				corI++;
+
+			for( ; corI < cor.size(); corI++ )
+			{
+				if( cor[ corI ].pos1 != 0 && cor[ corI ].pos2 != 0)	// don't count positions aligned to gaps
+					sp_possible++;
+				else
+					continue;
+
+				// which positions do the correct pair have in the concatenated alignment space?
+				gnSeqI trans_pos1 = genome::absolut( cor[corI].pos1 ) + concat_coords[seqI];
+				gnSeqI trans_pos2 = genome::absolut( cor[corI].pos2 ) + concat_coords[seqJ];
+				
+				// which chain(s) do these positions fall into?
+				// are any of them the same chain?
+				tracker_vector_t::iterator last_int1 = std::set_intersection( 
+					coord_iv_map[trans_pos1].begin(),coord_iv_map[trans_pos1].end(),
+					coord_iv_map[trans_pos2].begin(),coord_iv_map[trans_pos2].end(), 
+					intersect_buf1.begin(), pfc );
+
+				if( last_int1 == intersect_buf1.begin() )
+				{
+					// not contained in any chain.  false negative
+				}else{
+					// make a list of pairs for each position
+					// set_intersection always puts elements from the first set into the output buffer,
+					// since the elements in the second set may have the same iv ptr but a different
+					// match component, we want a list of those as well
+					tracker_vector_t::iterator last_int2 = std::set_intersection( 
+						coord_iv_map[trans_pos2].begin(),coord_iv_map[trans_pos2].end(), 
+						coord_iv_map[trans_pos1].begin(),coord_iv_map[trans_pos1].end(),
+						intersect_buf2.begin(), pfc );
+					
+					size_t pcount = last_int1 - intersect_buf1.begin();
+					bool found = false;	// set this to true if at least one element has different match components
+					for( size_t pI = 0; pI < pcount; ++pI )
+					{
+						// make sure they're not in the same component (probably a very rare occurrence)
+						size_t component_1 = intersect_buf1[pI].second;
+						size_t component_2 = intersect_buf2[pI].second;
+						size_t ivI = intersect_buf1[pI].first;
+						if( component_1 == component_2 )
+							continue;	// no alignment here
+
+						// make sure the relative orientations match
+						bool cor_orient = (cor[corI].pos1 > 0) == (cor[corI].pos2 > 0);
+						bool calc_orient = (procrastinated[ivI].Orientation(component_1) == procrastinated[ivI].Orientation(component_2));
+						if( cor_orient != calc_orient )
+							continue;	// calculated alignment has the wrong strand
+
+						// make sure they're not aligned to something else...
+						CompactGappedAlignment<>* cga = dynamic_cast< CompactGappedAlignment<>* >(procrastinated[ivI].GetMatches()[0]);
+						size_t col_1 = cga->SeqPosToColumn(component_1, trans_pos1);
+						const vector< bitset_t >& aln_mat = cga->GetAlignment();
+						// if they're not aligned, make sure they're in the same gap.
+						// they might get aligned later if we were to actually align the procrastAligner chains
+						// instead of just finding anchors.
+						if( !aln_mat[component_2].test(col_1) )
+						{
+							// if we encounter any columns between col_1 and col_2 that have
+							// component_1 and component_2 aligned then we wouldn't ever align
+							// pos_1 and pos_2 without changing the anchoring
+							size_t col_2 = cga->SeqPosToColumn(component_2, trans_pos2);
+							size_t col_first = col_1;
+							size_t col_last = col_2;
+							if( col_first < col_last )
+								swap(col_first, col_last);
+							size_t colI = col_first;
+							for( ; colI <= col_last; ++colI )
+							{
+								if( aln_mat[component_1].test(colI) && aln_mat[component_2].test(colI) )
+									break;
+							}
+							if( colI <= col_last )
+								continue;	// an anchor intervenes...  bummer.
+						}
+
+						// mark these components as good
+						found = true;
+						component_hits[ivI].set( component_1 );
+						component_hits[ivI].set( component_2 );
+						
+						// Always use the smallest component first
+						if( component_2 < component_1 )
+							swap(component_1, component_2);
+
+						// calculate signficand for creating double
+//						double significand = (double)(component_2+1)/(double)MAX_MULTIPLICITY;
+						// store merged_component
+//						double merged_component = (double)(component_1+1)+significand;
+
+						// and use as unique pairwise index for each pair to take advantage
+						// of bitset_t vector
+						size_t sig = component_1 * cga->SeqCount() + component_2;
+						pairwise_component_hits[ivI].set( sig );
+					}
+					if( found )
+						sp_truepos++;
+				}
+			}
+		}
+	}
+
+	cout << "\ndone!\n";
+	// yaaay! we're done.  report score.
+	cout << "sp_truepos " << sp_truepos << endl;
+	cout << "sp_possible " << sp_possible << endl;
+	cout << "SP sensitivity: " << ((double)sp_truepos) / ((double)sp_possible) << endl;
+	double components_correct = 0;
+	double components_possible = 0;
+	for( size_t ivI = 0; ivI < component_hits.size(); ++ivI )
+	{
+		components_correct += component_hits[ivI].count();
+		components_possible += component_hits[ivI].size();
+	}
+	cout << "Match component PPV: " << components_correct / components_possible << endl;
+
+	double pairwise_components_correct = 0;
+	for( size_t ivI = 0; ivI < pairwise_component_hits.size(); ++ivI )
+	{
+		pairwise_components_correct  += pairwise_component_hits[ivI].count();
+	}
+	cout << "Pairwise match component PPV: " << pairwise_components_correct / (double)all_component_pair_count << endl;
+}
+
+
+/**
+ * program to score alignments
+ * reads in a "correct" alignment and a procrastinated alignment
+ * scores the procrastinated alignment based on the correct one
+ */
+int main( int argc, char* argv[] )
+{
+	
+	string correct_aln_fname;
+	string procrast_aln_fname;
+	string sequence_file;
+	
+	
+	if( argc < 2 ){
+		cout << "scoreProcrastAlignment <correct alignment> <procrastAligner output>\n";
+		return -1;
+	}
+	// Declare the supported options.
+	
+	po::variables_map vm;
+	try {
+
+        po::options_description desc("Allowed options");
+        desc.add_options()
+            ("help", "get help message")
+            ("correct", po::value<string>(&correct_aln_fname), "correct Alignment(XMFA)")
+			("calculated", po::value<string>(&procrast_aln_fname), "procrastAligner output")
+			("sequence", po::value<string>(&sequence_file), "FastA sequence file")
+        ;
+
+                
+        po::store(po::parse_command_line(argc, argv, desc), vm);
+        po::notify(vm);    
+
+        if (vm.count("help")) {
+            cout << desc << "\n";
+            return 1;
+        }
+
+        
+    }
+    catch(exception& e) {
+        cerr << "error: " << e.what() << "\n";
+        return 1;
+    }
+    catch(...) {
+        cerr << "Exception of unknown type!\n";
+    }
+	
+	
+
+	ifstream correct_in;
+	correct_in.open( correct_aln_fname.c_str() );
+	if( !correct_in.is_open() ){
+		cerr << "Error opening " << correct_aln_fname << endl;
+		return -1;
+	}
+	ifstream procrast_in;
+	procrast_in.open( procrast_aln_fname.c_str() );
+	if( !procrast_in.is_open() ){
+		cerr << "Error opening " << procrast_aln_fname << endl;
+		return -1;
+	}
+	
+try{
+	IntervalList correct_ivs;
+	IntervalList procrast_ivs;
+	std::vector< bitset_t > align_matrix;
+	vector< gnSeqI > leftend;
+	cout << "Reading correct alignment into interval list...";
+	correct_ivs.ReadStandardAlignment( correct_in );
+	cout << " finished" << endl;
+	correct_in.close();
+
+	cout << "Reading procrastAlignment into interval list...";
+    procrast_ivs.ReadStandardAlignmentCompact( procrast_in );
+	cout << " finished" << endl;
+	procrast_in.close();
+
+	gnSequence concat_sequence;
+	concat_sequence.LoadSource( sequence_file );	// fixme, read this filename from command line or something -- this should be unaligned sequence
+	compareAlignmentsAceD( correct_ivs, procrast_ivs, concat_sequence );
+
+}catch( gnException& gne ){
+	cerr << gne << endl;
+}catch( exception& e ){
+	cerr << e.what() << endl;
+}catch( char const* c ){
+	cerr << c << endl;
+}
+
+}
diff --git a/src/sortContigs.cpp b/src/sortContigs.cpp
new file mode 100644
index 0000000..333d646
--- /dev/null
+++ b/src/sortContigs.cpp
@@ -0,0 +1,181 @@
+#include "libMems/IntervalList.h"
+#include "libMems/Aligner.h"
+#include "libGenome/gnFASSource.h"
+#include <fstream>
+#include <string>
+#include <vector>
+#include <iomanip>
+
+using namespace std;
+using namespace genome;
+using namespace mems;
+
+
+int main( int argc, char* argv[] )
+{
+	// 
+	if( argc != 3 )
+	{
+		cerr << "Usage: sortContigs <Mauve Alignment> <reference sequence #>\n";
+		cerr << "Where <Mauve Alignment> is the .mln file generated by Mauve, NOT the .alignment\n";
+		cerr << "Sequences are numbered from 0 in the order they were input to Mauve\n";
+		cerr << "This program will write out a new reordered FastA file for each of the non-reference sequences\n";
+		return -1;
+	}
+	ifstream aln_file( argv[1] );
+	if( !aln_file.is_open() )
+	{
+		cerr << "Error opening \"" << argv[1] << "\"\n";
+		return -1;
+	}
+
+	uint ref_seqI = atoi( argv[2] ); 
+
+try{
+	IntervalList iv_list;
+	iv_list.ReadList( aln_file );
+	cerr << "Read " << argv[1] << endl;
+	LoadSequences(iv_list, &cout );
+
+	// remove all but the n-way intervals
+	IntervalList nway_iv_list;
+	for( uint ivI = 0; ivI < iv_list.size(); ivI++ )
+	{
+		uint def_seqI = 0;
+		for( ; def_seqI < iv_list.seq_table.size(); def_seqI++ )
+			if( iv_list[ivI].Start( def_seqI ) == 0 )
+				break;
+		if( def_seqI == iv_list.seq_table.size() )
+			nway_iv_list.push_back( iv_list[ivI] );
+	}
+	iv_list.erase(iv_list.begin(), iv_list.end() );
+	iv_list.insert( iv_list.end(), nway_iv_list.begin(), nway_iv_list.end() );
+
+	// compute LCB adjacencies
+	vector< int64 > weights = vector< int64 >( iv_list.size(), 1 );
+	vector< LCB > adjacencies;
+	cerr << "computeLCBAdjacencies\n";
+	computeLCBAdjacencies_v2( iv_list, weights, adjacencies );
+	uint seq_count = iv_list.seq_filename.size();
+	vector< gnSequence* > new_seq_table = vector< gnSequence* >( seq_count );
+	for( uint seqI = 0; seqI < seq_count; seqI++ )
+		new_seq_table[seqI] = new gnSequence();
+	delete new_seq_table[ref_seqI];
+	new_seq_table[ref_seqI] = iv_list.seq_table[ref_seqI];
+
+	uint leftmost_lcb = 0;
+	for( ; leftmost_lcb < adjacencies.size(); leftmost_lcb++ )
+		if( adjacencies[ leftmost_lcb ].left_adjacency[ref_seqI] == -1 )
+			break;
+	uint adjI = leftmost_lcb;
+	vector< set< uint > > placed_contigs = vector< set< uint > >( iv_list.seq_table.size() );
+	cerr << "placing contigs\n";
+
+	while( adjI != -1 && adjI != -2 && adjI < adjacencies.size() )
+	{
+		for( uint seqI = 0; seqI < seq_count; seqI++ )
+		{
+			if( seqI == ref_seqI )
+				continue;
+			int64 lend = absolut(adjacencies[ adjI ].left_end[seqI] );
+			int64 rend = absolut(adjacencies[ adjI ].right_end[seqI] ) - 1;
+			bool cur_forward = (adjacencies[ adjI ].left_end[seqI] > 0);
+			bool ref_forward = (adjacencies[ adjI ].left_end[ref_seqI] > 0);
+			bool forward = cur_forward == ref_forward;
+			uint r_contig, l_contig;
+			try{
+				l_contig = iv_list.seq_table[seqI]->contigIndexByBase( absolut(lend) );
+			}catch( gnException& gne )
+			{
+				cerr << gne << endl;
+				cerr << "Thrown while getting contig for base lend: " << absolut(lend) << endl;
+			}
+			try{
+				r_contig = iv_list.seq_table[seqI]->contigIndexByBase( absolut(rend) );
+			}catch( gnException& gne )
+			{
+				cerr << gne << endl;
+				cerr << "Thrown while getting contig for base rend: " << absolut(rend) << endl;
+			}
+
+			uint first_contig = forward? l_contig : r_contig;
+			uint last_contig = forward? r_contig : l_contig;
+			first_contig++;
+			last_contig++;
+			try{
+			for( uint contigI = first_contig; forward? (contigI <= last_contig) : (contigI >= last_contig); (forward? contigI++ : contigI--) )
+			{
+				// place these if they haven't already been placed
+				set< uint >::iterator p_iter = placed_contigs[seqI].find(contigI-1);
+				if( p_iter != placed_contigs[seqI].end() )
+					continue;	// already placed this contig
+				try{
+				(*new_seq_table[ seqI ]) += iv_list.seq_table[seqI]->contig(contigI-1);
+				}catch( gnException& gne ){
+					cerr << gne << endl;
+					cerr << "Thrown while accessing seq " << seqI << " contig " << contigI-1<< endl;
+				}
+				if(!forward)
+					new_seq_table[seqI]->setReverseComplement( true, new_seq_table[seqI]->contigListLength()-1 );
+				placed_contigs[seqI].insert( contigI-1 );
+			}
+			}catch( gnException& gne ){
+				cerr << gne << endl;
+				cerr << "Thrown while adding contigs in the range: " << first_contig << " to " << last_contig << endl;
+			}
+		}
+		adjI = adjacencies[ adjI ].right_adjacency[ref_seqI];
+	}
+	cerr << "adding unplaced contigs\n";
+
+	// add any remaining contigs that the alignment didn't place
+	for( uint seqI = 0; seqI < seq_count; seqI++ )
+	{
+		if( seqI == ref_seqI )
+			continue;
+		for( uint contigI = 0; contigI < iv_list.seq_table[seqI]->contigListLength(); contigI++ )
+		{
+			// place this contig if it hasn't already been placed
+			set< uint >::iterator p_iter = placed_contigs[seqI].find(contigI);
+			if( p_iter != placed_contigs[seqI].end() )
+				continue;	// already placed this contig
+			(*new_seq_table[ seqI ]) += iv_list.seq_table[seqI]->contig(contigI);
+			placed_contigs[seqI].insert( contigI );
+		}
+	}
+
+	cerr << "writing reordered sequence\n";
+	for( uint seqI = 0; seqI < seq_count; seqI++ )
+	{
+		for( uint contigI = 0; contigI < new_seq_table[seqI]->contigListSize(); contigI++ )
+		{
+			string name = new_seq_table[seqI]->contigName( contigI );
+			stringstream ss( name );
+			string new_name;
+			ss >> new_name;
+			stringstream new_ss;
+			int fillsize = ceil(log((double)new_seq_table[seqI]->contigListSize())/log(10.0));
+			new_ss << setfill('0') << setw(fillsize);
+			new_ss << contigI << "_" << new_name;
+			if( new_seq_table[seqI]->isReverseComplement(contigI) )
+				new_ss << "-";
+			else
+				new_ss << "+";
+			new_seq_table[seqI]->setContigName( contigI, new_ss.str() );
+		}
+		if( seqI == ref_seqI )
+			continue;	// reference sequence didn't change
+		string o_filename = iv_list.seq_filename[ seqI ] + ".reordered";
+		ofstream out_file( o_filename.c_str() );
+		if( !out_file.is_open() )
+		{
+			cerr << "Error opening \"" << o_filename << "\"\n";
+			return -1;
+		}
+		gnFASSource::Write( *new_seq_table[seqI], out_file, false, false );
+	}
+
+}catch(gnException& gne){
+	cerr << gne << endl;
+}
+}
diff --git a/src/stripGapColumns.cpp b/src/stripGapColumns.cpp
new file mode 100644
index 0000000..cde0499
--- /dev/null
+++ b/src/stripGapColumns.cpp
@@ -0,0 +1,74 @@
+#include "libMems/IntervalList.h"
+#include "libMems/MatchList.h"
+#include "libMems/GappedAlignment.h"
+#include <fstream>
+#include <string>
+#include <vector>
+
+using namespace std;
+using namespace genome;
+using namespace mems;
+
+int main( int argc, char* argv[] )
+{
+	if( argc != 3 )
+	{
+		cerr << "Usage: stripGapColumns <input XMFA> <output XMFA>\n";
+		return -1;
+	}
+
+	ifstream aln_infile( argv[1] );
+	if( !aln_infile.is_open() )
+	{
+		cerr << "Error opening \"" << argv[1] << "\"\n";
+		return -1;
+	}
+	IntervalList iv_list;
+	iv_list.ReadStandardAlignment( aln_infile );
+	LoadSequences( iv_list, &cout );
+	IntervalList iv_outlist;
+	iv_outlist.seq_filename = iv_list.seq_filename;
+	iv_outlist.seq_table = iv_list.seq_table;
+	for( uint ivI = 0; ivI < iv_list.size(); ivI++ )
+	{
+		Interval& cur_iv = iv_list[ivI];
+		vector< string > alignment;
+		GetAlignment( cur_iv, iv_list.seq_table, alignment );
+		vector< string > seq_align = vector< string >( cur_iv.SeqCount() );
+		for( gnSeqI colI = 0; colI < cur_iv.AlignmentLength(); colI++ )
+		{
+			uint seqI = 0;
+			for( ; seqI < cur_iv.SeqCount(); seqI++ )
+			{
+				if( alignment[seqI][colI] == '-' )
+					break;
+			}
+			if( seqI != cur_iv.SeqCount() )
+				continue;
+			for( seqI = 0; seqI < cur_iv.SeqCount(); seqI++ )
+			{
+				seq_align[seqI] += alignment[seqI][colI];
+			}
+		}
+
+		GappedAlignment* new_ga = new GappedAlignment( seq_align.size(), seq_align[0].size() );
+		new_ga->SetAlignment( seq_align );
+		for( uint seqI = 0; seqI < cur_iv.SeqCount(); seqI++ )
+		{
+			new_ga->SetStart( seqI, cur_iv.Start( seqI ) );
+			new_ga->SetLength( cur_iv.Length( seqI ), seqI );
+		}
+		vector< AbstractMatch* > am_list( 1, new_ga );
+		Interval new_iv(am_list.begin(), am_list.end());
+		iv_outlist.push_back( new_iv );
+	}
+
+	ofstream iv_outfile( argv[2] );
+	if( !iv_outfile.is_open() )
+	{
+		cerr << "Error opening \"" << argv[2] << "\"\n" << endl;
+		return -2;
+	}
+	iv_outlist.WriteStandardAlignment( iv_outfile );
+	return 0;
+}
diff --git a/src/stripSubsetLCBs.cpp b/src/stripSubsetLCBs.cpp
new file mode 100644
index 0000000..dd4a362
--- /dev/null
+++ b/src/stripSubsetLCBs.cpp
@@ -0,0 +1,183 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <string>
+#include <fstream>
+#include <vector>
+#include <algorithm>
+#include <iostream>
+#include <sstream>
+#include "libGenome/gnFilter.h"
+#include "libMems/IntervalList.h"
+#include "libMems/MatchList.h"
+#include "libMems/GappedAlignment.h"
+#include "libMems/Matrix.h"
+#include "libMems/MatchProjectionAdapter.h"
+#include "libMems/Aligner.h"
+#include "libGenome/gnFASSource.h"
+#include <boost/tuple/tuple.hpp>
+
+using namespace std;
+using namespace genome;
+using namespace mems;
+
+typedef boost::tuple< uint, gnSeqI, gnSeqI, vector< uint > > bbcol_t;
+
+int main( int argc, char* argv[] )
+{
+	if( argc < 4 )
+	{
+		cerr << "Usage: stripSubsetLCBs <input xmfa> <input bbcols> <output xmfa> [min LCB size] [min genomes] [randomly subsample to X kb]\n";
+		return -1;
+	}
+	ifstream aln_in;
+	aln_in.open( argv[1] );
+	if( !aln_in.is_open() ){
+		cerr << "Error opening " << argv[1] << endl;
+		return -1;
+	}
+	ifstream bbcols_in;
+	bbcols_in.open( argv[2] );
+	if( !bbcols_in.is_open() )
+	{
+		cerr << "Error opening " << argv[2] << endl;
+		return -2;
+	}
+	ofstream aln_out;
+	aln_out.open( argv[3] );
+	if( !aln_out.is_open() ){
+		cerr << "Error writing to " << argv[3] << endl;
+		return -1;
+	}
+
+	size_t min_block_length = 0;
+	if(argc>=5){
+		min_block_length = atol(argv[4]);
+	}
+	size_t min_genome_count = -1;
+	if(argc>=6){
+		min_genome_count = atol(argv[5]);
+	}
+	size_t subsample_kb = 0;
+	if(argc>=7){
+		subsample_kb = atol(argv[6]);
+	}
+	
+
+	try{
+		IntervalList input_ivs;
+		input_ivs.ReadStandardAlignment( aln_in );
+		aln_in.close();
+
+		LoadSequences( input_ivs, NULL );
+
+		// read the bbcols file
+		vector< bbcol_t > bbcols;
+		string cur_line;
+		while( getline( bbcols_in, cur_line ) )
+		{
+			stringstream line_str(cur_line);
+			size_t cur_token;
+			size_t tokenI = 0;
+			bbcol_t bbcol;
+			while( line_str >> cur_token )
+			{
+				switch(tokenI)
+				{
+					case 0:
+						bbcol.get<0>() = cur_token;
+						break;
+					case 1:
+						bbcol.get<1>() = cur_token;
+						break;
+					case 2:
+						bbcol.get<2>() = cur_token;
+						break;
+					default:
+						bbcol.get<3>().push_back(cur_token);
+						break;
+				}
+				tokenI++;
+			}
+			bbcols.push_back(bbcol);
+		}
+		cout << "Read " << bbcols.size() << " backbone entries\n";
+
+		IntervalList output_ivs;
+		output_ivs.seq_table = input_ivs.seq_table;
+		output_ivs.seq_filename = input_ivs.seq_filename;
+/*		for( size_t i = 0; i < input_ivs.size(); ++i )
+		{
+			cout << "LCB " << i << " multiplicity: " << input_ivs[i].Multiplicity() << endl;
+			for( size_t seqI = 0; seqI < input_ivs.seq_table.size(); ++seqI )
+			{
+				cout << input_ivs[i].LeftEnd(seqI) << '\t' << input_ivs[i].RightEnd(seqI) << '\t';
+			}
+			cout << endl;
+			if( input_ivs[i].Multiplicity() == input_ivs.seq_table.size() )
+				output_ivs.push_back( input_ivs[i] );
+		}
+*/
+		cout << "seq_count is: " << input_ivs.seq_table.size() << endl;
+		if(min_genome_count==-1) min_genome_count = input_ivs.seq_table.size();
+
+		for( size_t bbI = 0; bbI < bbcols.size(); bbI++ )
+		{
+			if( bbcols[bbI].get<3>().size() < min_genome_count )
+				continue;
+			Interval* sub_iv = input_ivs[bbcols[bbI].get<0>()].Copy();
+			sub_iv->CropStart( bbcols[bbI].get<1>() - 1 );
+			sub_iv->CropEnd( sub_iv->Length() - bbcols[bbI].get<2>() );
+			// calculate mean length
+			size_t avglen = 0;
+			for(size_t seqI=0; seqI < sub_iv->SeqCount(); seqI++){
+				avglen += sub_iv->Length(seqI);
+			}
+			avglen /= sub_iv->SeqCount();
+			if(avglen >= min_block_length){
+				output_ivs.push_back( *sub_iv );
+			}
+			sub_iv->Free();
+		}
+		if(subsample_kb==0){
+			cout << "output_ivs.size() " << output_ivs.size() << endl;
+			output_ivs.WriteStandardAlignment( aln_out );
+		}else{
+			set<size_t> sampled;
+			double cur_kb=0;
+			for(; cur_kb < (double)subsample_kb && sampled.size() < output_ivs.size(); cur_kb++){
+				int block = rand()%output_ivs.size();
+				if(sampled.find(block)!=sampled.end()){
+					continue;
+				}
+				sampled.insert(block);
+				cur_kb += (double)(output_ivs[block].AlignmentLength()) / 1000.0;
+				
+			}
+			IntervalList new_ivs;
+			new_ivs.seq_table=output_ivs.seq_table;
+			new_ivs.seq_filename=output_ivs.seq_filename;
+			int i=0;
+			for(set<size_t>::iterator siter = sampled.begin(); siter != sampled.end(); siter++){
+				new_ivs.push_back(output_ivs[*siter]);
+			}
+			cout << "Writing " << cur_kb << " kb of alignment columns in " << new_ivs.size() << " blocks" << endl;
+			new_ivs.WriteStandardAlignment( aln_out );
+		}
+
+	}catch( gnException& gne ){
+		cerr << gne << endl;
+		return -1;
+	}catch( exception& e ){
+		cerr << e.what() << endl;
+		return -2;
+	}catch( char const* c ){
+		cerr << c << endl;
+		return -3;
+	}catch(...){
+		cerr << "Unhandled exception" << endl;
+		return -4;
+	}
+}
+
diff --git a/src/toEvoHighwayFormat.cpp b/src/toEvoHighwayFormat.cpp
new file mode 100644
index 0000000..1f2d725
--- /dev/null
+++ b/src/toEvoHighwayFormat.cpp
@@ -0,0 +1,148 @@
+#include "libMems/IntervalList.h"
+#include "libMems/Aligner.h"
+#include <fstream>
+#include <string>
+#include <vector>
+
+using namespace std;
+using namespace genome;
+using namespace mems;
+
+// find the chromosome that a given coordinate belongs to
+int getChromosome( vector< int64 >& chr_lens, int64 pos )
+{
+	int chrI = 0;
+	for( ; chrI < chr_lens.size(); chrI++ )
+		if( chr_lens[chrI] > pos )
+			break;
+	return chrI;
+}
+
+// convert a number to a four letter base 26 number
+string getAlphabetID( uint chromo_counter )
+{
+	string rval = "aaaa";
+	int charI = 3;
+	while( charI > 0 && chromo_counter > 0 )
+	{
+		int rem1 = chromo_counter % 26;
+		chromo_counter /= 26;
+		rval[charI--] = (char)(rem1 + 97);
+	}
+	return rval;
+}
+
+int main( int argc, char* argv[] )
+{
+	// 
+	if( argc < 4 )
+	{
+		cerr << "Usage: toEvoHighwayFormat <Mauve Alignment> <reference genome id> <genome 1 chr lengths>...<genome N chr lengths>\n";
+		return -1;
+	}
+	ifstream aln_file( argv[1] );
+	if( !aln_file.is_open() )
+	{
+		cerr << "Error opening \"" << argv[1] << "\"\n";
+		return -1;
+	}
+	uint ref_id = atoi( argv[2] );
+
+	vector< vector< int64 > > chr_lens;
+	for( uint genomeI = 3; genomeI < argc; genomeI++ )
+	{
+		ifstream cur_file( argv[genomeI] );
+		if( !cur_file.is_open() )
+		{
+			cerr << "Error opening \"" << argv[genomeI] << "\"\n";
+			return -2;
+		}
+		int64 cur_len = 0;
+		vector< int64 > len_vector;
+		while( cur_file >> cur_len )
+		{
+			if( len_vector.size() > 0 )
+				len_vector.push_back( cur_len + len_vector[ len_vector.size() - 1 ] );
+			else
+				len_vector.push_back( cur_len );
+		}
+		chr_lens.push_back( len_vector );
+		cerr << "Read " << argv[genomeI] << ", " << len_vector.size() << " chromosomes covering " << len_vector[len_vector.size()-1] << " nt " << endl;
+	}
+try{
+	IntervalList iv_list;
+	iv_list.ReadList( aln_file );
+	cerr << "Read " << argv[1] << endl;
+	vector< int64 > weights = vector< int64 >( iv_list.size(), 1 );
+	vector< LCB > adjacencies;
+	cerr << "computeLCBAdjacencies\n";
+	computeLCBAdjacencies_v2( iv_list, weights, adjacencies );
+	uint seq_count = iv_list.seq_filename.size();
+
+	for( uint seqI = 0; seqI < seq_count; seqI++ )
+	{
+		if( seqI == ref_id )
+			continue;
+		uint leftmost_lcb = 0;
+		for( ; leftmost_lcb < adjacencies.size(); leftmost_lcb++ )
+			if( adjacencies[ leftmost_lcb ].left_adjacency[seqI] == -1 )
+				break;
+		uint adjI = leftmost_lcb;
+		uint cur_chromosome = 0;
+		uint chromo_counter = 0;
+
+		while( adjI != -1 && adjI != -2 && adjI < adjacencies.size() )
+		{
+			if( absolut(adjacencies[adjI].left_end[seqI]) > chr_lens[seqI][cur_chromosome] )
+			{
+				cur_chromosome++;
+				chromo_counter = 0;
+			}
+
+			// write out a row for an evo highway synteny block
+			// write ref name
+			cout << iv_list.seq_filename[ref_id];
+			// write ref chromosome
+			int ref_chr = getChromosome( chr_lens[ref_id], absolut(adjacencies[adjI].left_end[ref_id]) );
+			cout << '\t' << ref_chr + 1;
+
+			// write ref interval
+			if( ref_chr > 0 )
+			{
+				cout << '\t' << absolut(adjacencies[ adjI ].left_end[ref_id]) - chr_lens[ref_id][ref_chr - 1];
+				cout << '\t' << absolut(adjacencies[ adjI ].right_end[ref_id]) - chr_lens[ref_id][ref_chr - 1];
+			}else{
+				cout << '\t' << absolut(adjacencies[ adjI ].left_end[ref_id]);
+				cout << '\t' << absolut(adjacencies[ adjI ].right_end[ref_id]);
+			}
+
+			// write species chromosome
+			cout << '\t' << cur_chromosome + 1;
+			cout << getAlphabetID( chromo_counter );
+			// write species interval
+			if( cur_chromosome > 0 )
+			{
+				cout << '\t' << absolut(adjacencies[ adjI ].left_end[seqI]) - chr_lens[seqI][cur_chromosome - 1];
+				cout << '\t' << absolut(adjacencies[ adjI ].right_end[seqI]) - chr_lens[seqI][cur_chromosome - 1];
+			}else{
+				cout << '\t' << absolut(adjacencies[ adjI ].left_end[seqI]);
+				cout << '\t' << absolut(adjacencies[ adjI ].right_end[seqI]);
+			}
+			// write strand
+			cout << '\t';
+			if( adjacencies[ adjI ].left_end[ref_id] > 0 && adjacencies[ adjI ].left_end[seqI] < 0 ||
+				adjacencies[ adjI ].left_end[ref_id] < 0 && adjacencies[ adjI ].left_end[seqI] > 0 )
+				cout << "-";
+			cout << 1;
+			// write target name
+			cout << '\t' << iv_list.seq_filename[seqI];
+			// write lcb id
+			cout << '\t' << adjacencies[adjI].lcb_id + 1 << endl;
+			adjI = adjacencies[adjI].right_adjacency[seqI];
+			chromo_counter++;
+		}
+	}
+}catch(gnException& gne){
+	cerr << gne << endl;
+}
+}
diff --git a/src/toGBKsequence.cpp b/src/toGBKsequence.cpp
new file mode 100644
index 0000000..f61977f
--- /dev/null
+++ b/src/toGBKsequence.cpp
@@ -0,0 +1,38 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libGenome/gnSequence.h"
+#include "libGenome/gnGBKSource.h"
+#include "libGenome/gnStringHeader.h"
+
+using namespace genome;
+using namespace std;
+
+int main( int argc, char* argv[] ){
+
+	if( argc != 3 ){
+		cout << argv[0] << " <input sequence> <output file>\n";
+	}
+	gnSequence seq;
+	try{
+		seq.LoadSource( argv[1] );
+		cout << argv[1] << " has " << seq.contigListLength() << " contigs\n";
+		for( int contigI = 0; contigI < seq.contigListLength(); contigI++ ){
+			gnSequence contig = seq.contig( contigI );
+			string contig_name = seq.contigName( contigI );
+			cout << "contig " << contig_name << " has " << contig.length() << "b.p.\n";
+			// add all necessary headers
+			string locus_hdr = "LOCUS       " + contig_name;
+			locus_hdr += "                          DNA                  CON 27-Jan-2005";
+			gnStringHeader* gnsh = new gnStringHeader( "LOCUS", locus_hdr );
+			contig.addHeader( 0, gnsh, 0 );
+			gnGBKSource::Write( contig, contig_name+".gbk" );
+		}
+//		gnRAWSource::Write( seq, argv[2] );
+	}catch( gnException& gne ){
+		cerr << gne << endl;
+		return -1;
+	}
+	return 0;
+}
diff --git a/src/toGrimmFormat.cpp b/src/toGrimmFormat.cpp
new file mode 100644
index 0000000..b36475e
--- /dev/null
+++ b/src/toGrimmFormat.cpp
@@ -0,0 +1,84 @@
+#include "libMems/IntervalList.h"
+#include "libMems/Aligner.h"
+#include <fstream>
+#include <string>
+#include <vector>
+
+using namespace std;
+using namespace genome;
+using namespace mems;
+
+
+int main( int argc, char* argv[] )
+{
+	// 
+	if( argc < 4 )
+	{
+		cerr << "Usage: toGrimmFormat <Mauve Alignment> <genome 1 chr lengths>...<genome N chr lengths>\n";
+		return -1;
+	}
+	ifstream aln_file( argv[1] );
+	if( !aln_file.is_open() )
+	{
+		cerr << "Error opening \"" << argv[1] << "\"\n";
+		return -1;
+	}
+	vector< vector< int64 > > chr_lens;
+	for( uint genomeI = 2; genomeI < argc; genomeI++ )
+	{
+		ifstream cur_file( argv[genomeI] );
+		if( !cur_file.is_open() )
+		{
+			cerr << "Error opening \"" << argv[genomeI] << "\"\n";
+			return -2;
+		}
+		int64 cur_len = 0;
+		vector< int64 > len_vector;
+		while( cur_file >> cur_len )
+		{
+			if( len_vector.size() > 0 )
+				len_vector.push_back( cur_len + len_vector[ len_vector.size() - 1 ] );
+			else
+				len_vector.push_back( cur_len );
+		}
+		chr_lens.push_back( len_vector );
+		cerr << "Read " << argv[genomeI] << ", " << len_vector.size() << " chromosomes covering " << len_vector[len_vector.size()-1] << " nt " << endl;
+	}
+try{
+	IntervalList iv_list;
+	iv_list.ReadList( aln_file );
+	cerr << "Read " << argv[1] << endl;
+	vector< int64 > weights = vector< int64 >( iv_list.size(), 1 );
+	vector< LCB > adjacencies;
+	cerr << "computeLCBAdjacencies\n";
+	computeLCBAdjacencies_v2( iv_list, weights, adjacencies );
+	uint seq_count = iv_list.seq_filename.size();
+	for( uint seqI = 0; seqI < seq_count; seqI++ )
+	{
+		cerr << "Analyzing seq " << seqI << endl;
+		cout << ">" << iv_list.seq_filename[seqI] << endl;
+		uint leftmost_lcb = 0;
+		for( ; leftmost_lcb < adjacencies.size(); leftmost_lcb++ )
+			if( adjacencies[ leftmost_lcb ].left_adjacency[seqI] == -1 )
+				break;
+		uint adjI = leftmost_lcb;
+		uint cur_chromosome = 0;
+		while( adjI != -1 && adjI != -2 && adjI < adjacencies.size() )
+		{
+			if( absolut(adjacencies[ adjI ].left_end[seqI]) > chr_lens[seqI][cur_chromosome] )
+			{
+				cout << " $\n";
+				cur_chromosome++;
+			}else if( adjI != leftmost_lcb )
+				cout << " ";
+			if( adjacencies[ adjI ].left_end[seqI] < 0 )
+				cout << "-";
+			cout << adjacencies[ adjI ].lcb_id + 1;
+			adjI = adjacencies[ adjI ].right_adjacency[seqI];
+		}
+		cout << " $" << endl;
+	}
+}catch(gnException& gne){
+	cerr << gne << endl;
+}
+}
diff --git a/src/toMultiFastA.cpp b/src/toMultiFastA.cpp
new file mode 100644
index 0000000..3afaaa3
--- /dev/null
+++ b/src/toMultiFastA.cpp
@@ -0,0 +1,54 @@
+#include "libMems/Interval.h"
+#include "libMems/Islands.h"
+#include "libGenome/gnFASSource.h"
+
+using namespace std;
+using namespace genome;
+using namespace mems;
+
+int main( int argc, char* argv[] )
+{
+	IntervalList iv_list;
+	if( argc != 3 )
+	{
+		cerr << "Usage: <input interval file> <output base name>";
+		return -1;
+	}
+	ifstream in_file( argv[1] );
+	if( !in_file.is_open() )
+	{
+		cerr << "Error opening \"" << argv[1] << "\"\n";
+		return -1;
+	}
+	iv_list.ReadList( in_file );
+	LoadSequences(iv_list, NULL);
+	string base_name = argv[2];
+	cout << "Input alignment has " << iv_list.size() << " intervals\n";
+	for( uint lcbI = 0; lcbI < iv_list.size(); lcbI++ )
+	{
+		gnAlignedSequences gnas;
+		iv_list[lcbI].GetAlignedSequences( gnas, iv_list.seq_table );
+		stringstream lcb_filename;
+		lcb_filename << base_name << ".lcb_" << lcbI;
+		ofstream out_file( lcb_filename.str().c_str() );
+		if( !out_file.is_open() )
+		{
+			cerr << "Error opening \"" << lcb_filename.str() << "\"\n";
+			return -2;
+		}
+		// write a multi-FastA
+		gnSequence gns;
+		for( uint seqI = 0; seqI < gnas.sequences.size(); seqI++ )
+		{
+			stringstream seq_name;
+			seq_name << seqI;
+//			seq_name << "(" << iv_list[lcbI].Start(seqI) << "-" << iv_list[lcbI].Start(seqI) + iv_list[lcbI].Length(seqI) << ")";
+			gns += gnas.sequences[seqI];
+			gns.setContigName( gns.contigListSize()-1, seq_name.str() );
+		}
+		gnFASSource::Write( gns, out_file, false, false );
+	}
+
+	return 0;
+}
+
diff --git a/src/toRawSequence.cpp b/src/toRawSequence.cpp
new file mode 100644
index 0000000..36ab3dd
--- /dev/null
+++ b/src/toRawSequence.cpp
@@ -0,0 +1,27 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libGenome/gnSequence.h"
+#include "libGenome/gnRAWSource.h"
+
+using namespace std;
+using namespace genome;
+
+
+int main( int argc, char* argv[] ){
+
+	if( argc != 3 ){
+		cout << argv[0] << " <input sequence> <output file>\n";
+	}
+	gnSequence seq;
+	try{
+		seq.LoadSource( argv[1] );
+		cout << argv[1] << " is " << seq.length() << "b.p.\n";
+		gnRAWSource::Write( seq, argv[2] );
+	}catch( gnException& gne ){
+		cerr << gne << endl;
+		return -1;
+	}
+	return 0;
+}
diff --git a/src/transposeCoordinates.cpp b/src/transposeCoordinates.cpp
new file mode 100644
index 0000000..4dcc906
--- /dev/null
+++ b/src/transposeCoordinates.cpp
@@ -0,0 +1,71 @@
+/*******************************************************************************
+ * $Id: transposeCoordinates.cpp,v 1.1 2004/02/28 00:01:31 darling Exp $
+ * This file is copyright 2002-2004 Aaron Darling.  All rights reserved.
+ * Please see the file called COPYING for licensing, copying, and modification
+ * rights.  Redistribution of this file, in whole or in part is prohibited
+ * without express permission.
+ ******************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <iostream>
+#include "libMems/Aligner.h"
+
+using namespace std;
+using namespace genome;
+using namespace mems;
+
+void print_usage( const char* pname ){
+	cout << "Usage: " << pname << " <match list> <coordinates file> <sequence ID> <match list output>\n";
+}
+
+int main( int argc, const char* argv[] ){
+	if( argc != 5 ){
+		print_usage("transposeCoordinates");
+		return -1;
+	}
+	
+	string match_filename = argv[1];
+	ifstream match_file( match_filename.c_str() );
+	if( !match_file.is_open() ){
+		cerr << "Error opening \"" << match_filename << "\"" << endl;
+		return -1;
+	}
+
+	string coord_filename = argv[2];
+	ifstream coord_file( coord_filename.c_str() );
+	if( !coord_file.is_open() ){
+		cerr << "Error opening \"" << coord_filename << "\"" << endl;
+		return -1;
+	}
+	
+	int trans_seq = atoi( argv[3] );
+	
+	MatchList mlist;
+	ReadList( mlist, match_file );
+	mlist.MultiplicityFilter( mlist.seq_filename.size() );
+	
+	int64 coord;
+	vector< int64 > coord_list;
+	while( coord_file >> coord ){
+		coord_list.push_back( coord );
+	}
+	transposeMatches( mlist, trans_seq, coord_list );
+//	for( uint ivI = 0; ivI < iv_list.size(); ivI++ ){
+//	}
+	
+	string match_outname = argv[4];
+	ofstream match_out( match_outname.c_str() );
+	if( !match_out.is_open() ){
+		cerr << "Error opening \"" << match_outname << "\"" << endl;
+		return -1;
+	}
+	WriteList( mlist, match_out );
+	
+	
+	return 0;
+}
+
+
diff --git a/src/unalign.cpp b/src/unalign.cpp
new file mode 100644
index 0000000..3aaf3df
--- /dev/null
+++ b/src/unalign.cpp
@@ -0,0 +1,91 @@
+#include "libMems/IntervalList.h"
+#include "libGenome/gnFASSource.h"
+#include "libMems/GappedAlignment.h"
+#include <algorithm>
+
+using namespace std;
+using namespace genome;
+using namespace mems;
+
+/**
+ * program to extract source sequences from an alignment
+ */
+int main( int argc, char* argv[] ){
+	
+	if( argc < 3 ){
+		cout << "Sometimes you've got an alignment but you just can't seem to find the sequences that went into it." << endl;
+		cout << "unalign <input alignment xmfa> <output Multi-FastA>\n";
+		return -1;
+	}
+	
+	string input_fname = argv[ 1 ];
+	string output_fname = argv[ 2 ];
+
+	ifstream alignment_in;
+	alignment_in.open( input_fname.c_str() );
+	if( !alignment_in.is_open() ){
+		cerr << "Error opening " << input_fname << endl;
+		return -1;
+	}
+	
+	ofstream mfa_out;
+	mfa_out.open( output_fname.c_str() );
+	if( !mfa_out.is_open() ){
+		cerr << "Error opening " << output_fname << endl;
+		return -1;
+	}
+	
+try{
+	IntervalList ivs;
+	cerr << "Reading " << input_fname << endl;
+	ivs.ReadStandardAlignment( alignment_in );
+	alignment_in.close();
+	if( ivs.size() == 0 ){
+		cerr << "Error! The alignment doesn't contain any intervals!\n";
+		return -1;
+	}
+	cerr << "Successfully read " << input_fname << endl;
+	cerr << "Removing gaps...\n";
+	uint seq_count = ivs[ 0 ].SeqCount();
+	gnSequence output_seq;
+	for( uint seqI = 0; seqI < seq_count; seqI++ ){
+		gnSequence cur_seq;
+		AbstractMatchStartComparator<Interval> ivcomp(seqI);
+		sort( ivs.begin(), ivs.end(), ivcomp );
+		for( uint ivI = 0; ivI < ivs.size(); ivI++ ){
+			const vector< AbstractMatch* >& matches = ivs[ivI].GetMatches();
+			const vector< string >& alignment = GetAlignment(*((GappedAlignment*)matches[0]), vector<gnSequence*>(seq_count) );
+			cur_seq += alignment[seqI];
+			if(ivs[ivI].LeftEnd(seqI)<0)	cur_seq.setReverseComplement(true, cur_seq.contigListLength()-1);
+		}
+		string strseq = cur_seq.ToString();
+		// strip gaps
+		string gapless_seq;
+		for( string::size_type charI = 0; charI < cur_seq.size(); charI++ ){
+			if( strseq[ charI ] != '-' )
+				gapless_seq += strseq[ charI ];
+		}
+		output_seq += gapless_seq;
+		if(ivs.seq_filename.size()>0){
+			output_seq.setContigName(seqI,ivs.seq_filename[seqI]);
+			gnSequence file_seq;
+			file_seq += gapless_seq;
+			gnFASSource::Write( file_seq, ivs.seq_filename[seqI] );
+		}
+	}
+	cerr << "Writing " << output_fname << endl;
+	gnFASSource::Write( output_seq, mfa_out );
+	
+	
+}catch( gnException& gne ){
+	cerr << gne << endl;
+	return -2;
+}catch( exception& e ){
+	cerr << e.what() << endl;
+	return -3;
+}catch( char const* c ){
+	cerr << c << endl;
+	return -4;
+}
+
+}
diff --git a/src/uniqueMerCount.cpp b/src/uniqueMerCount.cpp
new file mode 100644
index 0000000..86d75a8
--- /dev/null
+++ b/src/uniqueMerCount.cpp
@@ -0,0 +1,41 @@
+/*******************************************************************************
+ * $Id: uniqueMerCount.cpp,v 1.1 2004/02/28 00:01:31 darling Exp $
+ * This file is copyright 2002-2004 Aaron Darling.  All rights reserved.
+ * Please see the file called COPYING for licensing, copying, and modification
+ * rights.  Redistribution of this file, in whole or in part is prohibited
+ * without express permission.
+ ******************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/DNAFileSML.h"
+
+using namespace std;
+using namespace genome;
+using namespace mems;
+
+void print_usage( const char* pname ){
+	cerr << "Usage: " << pname << " <Sorted Mer List>\n";
+}
+
+int main( int argc, const char* argv[] ){
+	if( argc != 2 ){
+		print_usage("uniqueMerCount");
+		return -1;
+	}
+
+	string sml_filename = argv[1];
+	DNAFileSML* file_sml = new DNAFileSML();
+	boolean success = true;
+	try{
+		file_sml->LoadFile( sml_filename );
+	}catch( gnException& gne ){
+		success = false;
+		cerr << gne << endl;
+		return -1;
+	}
+	cout << endl << file_sml->UniqueMerCount() << endl;
+}
+
diff --git a/src/uniquifyTrees.cpp b/src/uniquifyTrees.cpp
new file mode 100644
index 0000000..b9515d1
--- /dev/null
+++ b/src/uniquifyTrees.cpp
@@ -0,0 +1,250 @@
+#include "libMems/PhyloTree.h"
+#include <vector>
+#include <sstream>
+#include <algorithm>
+#include <utility>
+#include <fstream>
+
+using namespace std;
+
+typedef unsigned int uint;
+
+bool taxonNameLessThan( string name1, string name2 )
+{
+	stringstream n1_str( name1 );
+	stringstream n2_str( name2 );
+	int n1, n2;
+	n1_str >> n1;
+	n2_str >> n2;
+	return n1 < n2;
+}
+
+template<class T, class S>
+void findAndErase( T& container, S& item )
+{
+	T new_container;
+	for( typename T::iterator t_iter = container.begin(); t_iter != container.end(); t_iter++ )
+		if( *t_iter != item )
+			new_container.push_back( *t_iter );
+	container = new_container;
+};
+
+/**
+ * Depth first search to check whether a subtree contains a given node
+ */
+bool containsNode( PhyloTree< TreeNode >& t, node_id_t subtree_nodeI, node_id_t query_nodeI )
+{
+	stack< node_id_t > node_stack;
+	node_stack.push( subtree_nodeI );
+	while( node_stack.size() > 0 )
+	{
+		node_id_t cur_node = node_stack.top();
+		node_stack.pop();
+		if( cur_node == query_nodeI )
+			return true;
+		if( t[cur_node].children.size() > 0 )
+		{
+			for( size_t childI = 0; childI < t[cur_node].children.size(); childI++ )
+				node_stack.push( t[cur_node].children[childI] );
+		}
+	}
+	return false;
+}
+
+
+/** place a root on the branch with endpoints root_left and root_right
+ */
+void rerootTree( PhyloTree< TreeNode >& t, node_id_t new_root )
+{
+	// new root must be an internal node
+	if( t[new_root].children.size() == 0 )
+		throw "Can't root on a leaf node";
+	if( new_root == t.root )
+		return;	// idiot caller didn't realize it's already rooted here
+
+	// change the old root node to an internal node
+	uint childI = 0;
+	for( ; childI < t[t.root].children.size(); childI++ ){
+		if( containsNode( t, t[t.root].children[childI], new_root ) )
+		{
+			t[t.root].parents.push_back( t[t.root].children[childI] );
+			findAndErase( t[t.root].children, t[t.root].children[childI] );
+			break;
+		}
+	}
+	// shake the tree out on the new root node
+	t.root = new_root;
+	t[t.root].children.insert( t[t.root].children.end(), t[t.root].parents.begin(), t[t.root].parents.end() );
+
+	stack<node_id_t> node_stack;
+	node_stack.push(t.root);
+	while( node_stack.size() > 0 )
+	{
+		// delete the current node from all of its child nodes lists 
+		// and insert it as a parent
+		// make all other nodes reference by the child grandchildren
+		// recurse on each child
+		node_id_t cur_node = node_stack.top();
+		node_stack.pop();
+		for( uint childI = 0; childI < t[cur_node].children.size(); childI++ )
+		{
+			TreeNode& child_n = t[t[cur_node].children[childI]]; 
+			findAndErase( child_n.children, cur_node );
+			findAndErase( child_n.parents, cur_node );
+			child_n.children.insert( child_n.children.end(), child_n.parents.begin(), child_n.parents.end() );
+			child_n.parents.clear();
+			child_n.parents.push_back(cur_node);
+			node_stack.push(t[cur_node].children[childI]);
+		}
+	}
+}
+
+/**
+ * Find the leaf node lexicographically least taxon name in the 
+ * subtree below nodeI
+ */
+node_id_t getRepresentativeTaxon( PhyloTree< TreeNode >& t, node_id_t nodeI )
+{
+	stack< node_id_t > node_stack;
+	node_stack.push( nodeI );
+	string least_name = "";
+	node_id_t least_node = nodeI;
+	while( node_stack.size() > 0 )
+	{
+		node_id_t cur_node = node_stack.top();
+		node_stack.pop();
+		if( t[cur_node].children.size() > 0 )
+		{
+			for( size_t childI = 0; childI < t[cur_node].children.size(); childI++ )
+				node_stack.push( t[cur_node].children[childI] );
+		}
+		else
+		{
+			if( least_name == "" )
+			{
+				least_name = t[cur_node].name;
+				least_node = cur_node;
+			}
+			if( taxonNameLessThan( t[cur_node].name, least_name ) )
+			{
+				least_name = t[cur_node].name;
+				least_node = cur_node;
+			}
+		}
+	}
+	return least_node;
+}
+
+class TaxonNamePairComparator
+{
+public:
+	bool operator()( const pair<string, size_t>& p1, const pair<string, node_id_t>& p2 )
+	{
+		return taxonNameLessThan( p1.first, p2.first );
+	}
+};
+
+void sortTaxa( PhyloTree< TreeNode >& t )
+{
+	for( node_id_t nodeI = 0; nodeI < t.size(); nodeI++ )
+	{
+		if( t[nodeI].children.size() == 0 )
+			continue;
+		// get the "representative" of each subtree
+		vector< pair<string, node_id_t> > representatives = vector< pair<string, node_id_t> >( t[nodeI].children.size() );
+		for( size_t repI = 0; repI < representatives.size(); repI++ )
+		{
+			node_id_t rep_node = getRepresentativeTaxon( t, t[nodeI].children[ repI ] );
+			representatives[ repI ] = make_pair( t[rep_node].name, repI );
+		}
+		// sort children on their representative taxon names
+		TaxonNamePairComparator tnc;
+		sort( representatives.begin(), representatives.end(), tnc );
+		// repopulate the children array with the sorted order
+		vector< node_id_t > sorted_children;
+		for( size_t repI = 0; repI < representatives.size(); repI++ )
+			sorted_children.push_back( t[nodeI].children[representatives[repI].second] );
+		t[nodeI].children = sorted_children;
+	}
+}
+
+/**
+ * Assumes that taxa have numeric labels starting at 1 and simply
+ * subtracts 1 from each node label
+ */
+void relabelTaxaToStartWithZero( PhyloTree< TreeNode >& t )
+{
+	for( node_id_t nodeI = 0; nodeI < t.size(); nodeI++ )
+	{
+		if( t[nodeI].name == "" )
+			continue;
+		stringstream name_str( t[nodeI].name );
+		uint number;
+		name_str >> number;
+		number--;
+		stringstream new_name_str;
+		new_name_str << number;
+		t[nodeI].name = new_name_str.str();
+	}
+}
+
+int main( int argc, char* argv[] )
+{
+	if( argc < 3 )
+	{
+		cerr << "Usage: uniquifyTrees <nexus input file> <nexus output file>\n";
+		cerr << "All trees in the input file must have the same number of taxa and the same taxon labels\n";
+	}
+	string input_filename = argv[1];
+	string output_filename = argv[2];
+	ifstream input_file( input_filename.c_str() );
+	if( !input_file.is_open() )
+	{
+		cerr << "Error opening \"" << input_filename << "\"\n";
+		return -1;
+	}
+	ofstream output_file( output_filename.c_str() );
+	if( !output_file.is_open() )
+	{
+		cerr << "Error opening \"" << output_filename << "\"\n";
+		return -1;
+	}
+	
+	size_t tree_sizes = 0;
+	uint tree_count = 0;
+	vector< string > tree_list;
+	while( true )
+	{
+		PhyloTree< TreeNode > t;
+		t.readTree( input_file );
+		if( t.size() == 0 )
+			break;
+		if( tree_sizes == 0 )
+			tree_sizes = t.size();
+		if( t.size() != tree_sizes )
+		{
+			cerr << "Error: tree " << tree_count + 1 << " has a different number of taxa\n";
+			return -2;
+		}
+ 		sortTaxa( t );
+		relabelTaxaToStartWithZero( t );
+		stringstream ss;
+		t.writeTree(ss);
+		tree_list.push_back(ss.str());
+		cout << "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b";
+		cout << "Read " << tree_list.size() << " trees";
+	}
+	cout << endl;
+	cout << "Writing unique trees to \"" << output_filename << "\"\n";
+	sort(tree_list.begin(), tree_list.end() );
+	size_t unique_count = 0;
+	for( size_t treeI = 0; treeI < tree_list.size(); treeI++ )
+	{
+		if( treeI > 0 && tree_list[treeI] == tree_list[treeI - 1] )
+			continue;
+		output_file << tree_list[treeI] << endl;
+		unique_count++;
+	}
+	cerr << "There are " << unique_count << " unique trees\n";
+	return 0;
+}
diff --git a/src/xmfa2maf.cpp b/src/xmfa2maf.cpp
new file mode 100644
index 0000000..9876c69
--- /dev/null
+++ b/src/xmfa2maf.cpp
@@ -0,0 +1,87 @@
+#include "libMems/IntervalList.h"
+#include "libMems/ProgressiveAligner.h"
+#include <fstream>
+
+using namespace mems;
+using namespace std;
+using namespace genome;
+
+int main(int argc, char* argv[] ){
+	if(argc != 3){
+		cerr << "Usage: xmfa2maf <xmfa input> <maf output>\n";
+		return -1;
+	}
+	ifstream ifile(argv[1]);
+	if(!ifile.is_open()){
+		cerr << "Error reading \"" << argv[1] << "\"\n";
+		return -2;
+	}
+	ofstream ofile(argv[2]);
+	if(!ofile.is_open()){
+		cerr << "Error writing to \"" << argv[2] << "\"\n";
+		return -2;
+	}
+
+	IntervalList xmfa;
+	xmfa.ReadStandardAlignment(ifile);
+	LoadSequences(xmfa, &cout);
+
+	// break alignments on chromosome boundaries
+	vector<AbstractMatch*> alignments;
+	for(int ivI=0; ivI < xmfa.size(); ivI++){
+		alignments.push_back( xmfa[ivI].Clone() );
+	}
+
+	vector< vector< gnSeqI > > chromo_bounds( xmfa.seq_table.size() );
+	for(int seqI=0; seqI < xmfa.seq_table.size(); seqI++){
+		for(int cI=1; cI < xmfa.seq_table[seqI]->contigListSize(); cI++){
+			chromo_bounds[seqI].push_back( xmfa.seq_table[seqI]->contigStart(cI) );
+		}
+		SSC<AbstractMatch> msc( seqI );
+		sort( alignments.begin(), alignments.end(), msc );
+		AbstractMatchSeqManipulator amsm( seqI );
+		applyBreakpoints( chromo_bounds[seqI], alignments, amsm );
+	}
+
+	ofile << "##maf version=1 program=progressiveMauve\n";
+	for(int ivI=0; ivI < alignments.size(); ivI++ ){
+		ofile << "a\n";
+		vector<string> aln;
+		GetAlignment( *((Interval*)(alignments[ivI])), xmfa.seq_table, aln );
+
+		for( int seqI=0; seqI < xmfa.seq_filename.size(); seqI++ ){
+			if(alignments[ivI]->LeftEnd(seqI)==0)
+				continue;	// sequence not defined in this block
+
+			// determine which contig this alignment is in
+			uint32 l_contigI, r_contigI;
+			gnSeqI l_baseI = alignments[ivI]->LeftEnd(seqI);
+			gnSeqI r_baseI = alignments[ivI]->RightEnd(seqI)-1;
+			xmfa.seq_table[seqI]->globalToLocal( l_contigI, l_baseI );
+			xmfa.seq_table[seqI]->globalToLocal( r_contigI, r_baseI );
+			string contig_name = xmfa.seq_table[seqI]->contigName( l_contigI );
+			if(l_contigI != r_contigI){
+				cerr << "interval " << ivI << " seq " << seqI << " left " << alignments[ivI]->LeftEnd(seqI) << " right " << alignments[ivI]->RightEnd(seqI) << endl;
+				cerr << "l_baseI " << l_baseI << " r_baseI " << r_baseI << " l_contigI " << l_contigI << " r_contigI " << r_contigI << " name " << contig_name << endl;
+				cerr << "Error, input alignment spans multiple contigs/chromosomes. Unable to translate to MAF\n";
+				return -1;
+			}
+			ofile << "s " << xmfa.seq_filename[seqI] << "." << contig_name;
+			ofile.flush();
+
+			int64 lend = l_baseI-1;
+			if(alignments[ivI]->Orientation(seqI) == AbstractMatch::reverse){
+				lend = xmfa.seq_table[seqI]->contigLength(l_contigI) - l_baseI - alignments[ivI]->Length(seqI) + 1;
+			}
+			ofile << " " << lend;
+			ofile << " " << alignments[ivI]->Length(seqI);
+			ofile << " " << (alignments[ivI]->Orientation(seqI) == AbstractMatch::reverse ? "-" : "+");
+			ofile << " " << xmfa.seq_table[seqI]->contigLength(l_contigI);
+			ofile << " " << aln[seqI] << endl;
+		}
+		ofile << endl;
+	}
+	ofile.close();
+
+	return 0;
+}

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/mauvealigner.git



More information about the debian-med-commit mailing list