[med-svn] [mugsy] 01/02: Imported Upstream version 1r2.3+dfsg-1

Andreas Tille tille at debian.org
Mon Apr 13 18:55:12 UTC 2015


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository mugsy.

commit 793650fef5f5f83ecdb8f6795665bd8e692954c3
Author: Andreas Tille <tille at debian.org>
Date:   Mon Apr 13 20:54:07 2015 +0200

    Imported Upstream version 1r2.3+dfsg-1
---
 AUTHORS                                            |   14 +
 CHANGELOG                                          |   16 +
 LICENSE                                            |  143 +
 Makefile                                           |   68 +
 README                                             |   33 +
 README.install                                     |   13 +
 README.sources                                     |   43 +
 chaining/Makefile                                  |   51 +
 chaining/file.h                                    |  733 +++
 chaining/filters.h                                 |  331 ++
 chaining/graph.h                                   |  578 ++
 chaining/lcbchecks.h                               |  905 +++
 chaining/mincut.h                                  |  975 ++++
 chaining/synchain-mugsy.cpp                        | 2237 ++++++++
 delta-dups.sh                                      |   21 +
 fixMAFnames.pl                                     |   28 +
 labelblocks.pl                                     |   59 +
 maf2fasta.pl                                       |   73 +
 maf2gp.pl                                          |   75 +
 maf2synchain.pl                                    |  115 +
 mapping/AlignmentTree.pm                           | 1476 +++++
 mapping/IntervalTree.pm                            |  154 +
 mapping/Makefile                                   |   20 +
 mapping/README                                     |   39 +
 mapping/README.example                             |   40 +
 mapping/bsmlindex.pl                               |   67 +
 mapping/chadoindex.pl                              |    4 +
 mapping/featureindex.pl                            |  119 +
 mapping/intersect.pl                               |   58 +
 mapping/mafindex.pl                                |  139 +
 mapping/mapfeatures.pl                             | 3865 +++++++++++++
 mapping/mugsy-annotator                            |   48 +
 mapping/mugsyindex.pl                              |   38 +
 mapping/mugsymapper                                |   34 +
 mapping/query.pl                                   |   19 +
 mapping/reportvariants.pl                          |  118 +
 mapping/testitree.pl                               |  327 ++
 mapping/xmfaindex.pl                               |  145 +
 mugsy                                              | 1013 ++++
 mugsy-seqan/projects/library/apps/Makefile         |   48 +
 mugsy-seqan/projects/library/apps/mugsy/mugsy.cpp  | 6035 ++++++++++++++++++++
 .../projects/library/apps/mugsy/rna_alphabet.h     |  305 +
 .../projects/library/apps/mugsy/transformcoords.h  |   36 +
 mugsyWGA                                           |    1 +
 mugsyenv.sh                                        |    8 +
 mumi.sh                                            |  140 +
 mumi_fasta.sh                                      |   95 +
 plot.pl                                            |  403 ++
 splitmaf.pl                                        |   48 +
 synchain-mugsy                                     |    1 +
 util/mafgrep.pl                                    |   55 +
 util/mafstats.pl                                   |  600 ++
 util/reportvariants.pl                             |  118 +
 xmfa2maf.pl                                        |  116 +
 54 files changed, 22243 insertions(+)

diff --git a/AUTHORS b/AUTHORS
new file mode 100644
index 0000000..9fe6679
--- /dev/null
+++ b/AUTHORS
@@ -0,0 +1,14 @@
+Sam Angiuoli <angiuoli at cs.umd.edu>
+
+Mugsy utilizes Seqan 1.2 (Doring et al. BMC Bioinformatics. 2008) and
+MUMmer 3.20 (Kurtz, S et al. Genome Biology. 2004). The version of
+these sources with modifications used for Mugsy and respective license
+and copyright files are available in SVN at http://mugsy.sf.net.
+
+
+Links
+Mugsy: http://mugsy.sf.net
+Seqan: http://www.seqan.de
+Mummer: http://mummer.sourceforge.net
+
+November 2010
diff --git a/CHANGELOG b/CHANGELOG
new file mode 100644
index 0000000..45fec2a
--- /dev/null
+++ b/CHANGELOG
@@ -0,0 +1,16 @@
+*Mugsy 1.2.3 (12/21/2011)
+-Raised hard-coded max genome limit to 256
+-Recompile with more portable options
+-Fixes to correct reporting of unaligned seqs at beginning of contigs and in some repeats
+-Performance improvement for draft genomes
+
+*Mugsy 1.2.2 (5/25/2011)
+-Compilation and portability improvements
+
+*Mugsy 1.2.1 (12/16/2010)
+-Raised hard-coded max genome limit to 128. Plans to remove this limit in the future
+-Documentation updates and portability fixes 
+
+*Mugsy 1.2 (11/1/2010)
+-First public release
+Citation: Angiuoli SV, Salzberg SL. Mugsy: Fast multiple alignment of closely related whole genomes. Bioinformatics. 2010 Dec 9.
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..03d7dc6
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,143 @@
+Artistic License 2.0
+
+Preamble
+
+This license establishes the terms under which a given free software Package may be copied, modified, distributed, and/or redistributed. The intent is that the Copyright Holder maintains some artistic control over the development of that Package while still keeping the Package available as open source and free software.
+
+You are always permitted to make arrangements wholly outside of this license directly with the Copyright Holder of a given Package. If the terms of this license do not permit the full use that you propose to make of the Package, you should contact the Copyright Holder and seek a different licensing arrangement.
+Definitions
+
+"Copyright Holder" means the individual(s) or organization(s) named in the copyright notice for the entire Package.
+
+"Contributor" means any party that has contributed code or other material to the Package, in accordance with the Copyright Holder's procedures.
+
+"You" and "your" means any person who would like to copy, distribute, or modify the Package.
+
+"Package" means the collection of files distributed by the Copyright Holder, and derivatives of that collection and/or of those files. A given Package may consist of either the Standard Version, or a Modified Version.
+
+"Distribute" means providing a copy of the Package or making it accessible to anyone else, or in the case of a company or organization, to others outside of your company or organization.
+
+"Distributor Fee" means any fee that you charge for Distributing this Package or providing support for this Package to another party. It does not mean licensing fees.
+
+"Standard Version" refers to the Package if it has not been modified, or has been modified only in ways explicitly requested by the Copyright Holder.
+
+"Modified Version" means the Package, if it has been changed, and such changes were not explicitly requested by the Copyright Holder.
+
+"Original License" means this Artistic License as Distributed with the Standard Version of the Package, in its current version or as it may be modified by The Perl Foundation in the future.
+
+"Source" form means the source code, documentation source, and configuration files for the Package.
+
+"Compiled" form means the compiled bytecode, object code, binary, or any other form resulting from mechanical transformation or translation of the Source form.
+Permission for Use and Modification Without Distribution
+
+(1) You are permitted to use the Standard Version and create and use Modified Versions for any purpose without restriction, provided that you do not Distribute the Modified Version.
+Permissions for Redistribution of the Standard Version
+
+(2) You may Distribute verbatim copies of the Source form of the Standard Version of this Package in any medium without restriction, either gratis or for a Distributor Fee, provided that you duplicate all of the original copyright notices and associated disclaimers. At your discretion, such verbatim copies may or may not include a Compiled form of the Package.
+
+(3) You may apply any bug fixes, portability changes, and other modifications made available from the Copyright Holder. The resulting Package will still be considered the Standard Version, and as such will be subject to the Original License.
+Distribution of Modified Versions of the Package as Source
+
+(4) You may Distribute your Modified Version as Source (either gratis or for a Distributor Fee, and with or without a Compiled form of the Modified Version) provided that you clearly document how it differs from the Standard Version, including, but not limited to, documenting any non-standard features, executables, or modules, and provided that you do at least ONE of the following:
+
+(a) make the Modified Version available to the Copyright Holder of the Standard Version, under the Original License, so that the Copyright Holder may include your modifications in the Standard Version.
+(b) ensure that installation of your Modified Version does not prevent the user installing or running the Standard Version. In addition, the Modified Version must bear a name that is different from the name of the Standard Version.
+(c) allow anyone who receives a copy of the Modified Version to make the Source form of the Modified Version available to others under
+(i) the Original License or
+(ii) a license that permits the licensee to freely copy, modify and redistribute the Modified Version using the same licensing terms that apply to the copy that the licensee received, and requires that the Source form of the Modified Version, and of any works derived from it, be made freely available in that license fees are prohibited but Distributor Fees are allowed.
+Distribution of Compiled Forms of the Standard Version or Modified Versions without the Source
+
+(5) You may Distribute Compiled forms of the Standard Version without the Source, provided that you include complete instructions on how to get the Source of the Standard Version. Such instructions must be valid at the time of your distribution. If these instructions, at any time while you are carrying out such distribution, become invalid, you must provide new instructions on demand or cease further distribution. If you provide valid instructions or cease distribution within thirty days [...]
+
+(6) You may Distribute a Modified Version in Compiled form without the Source, provided that you comply with Section 4 with respect to the Source of the Modified Version.
+Aggregating or Linking the Package
+
+(7) You may aggregate the Package (either the Standard Version or Modified Version) with other packages and Distribute the resulting aggregation provided that you do not charge a licensing fee for the Package. Distributor Fees are permitted, and licensing fees for other components in the aggregation are permitted. The terms of this license apply to the use and Distribution of the Standard or Modified Versions as included in the aggregation.
+
+(8) You are permitted to link Modified and Standard Versions with other works, to embed the Package in a larger work of your own, or to build stand-alone binary or bytecode versions of applications that include the Package, and Distribute the result without restriction, provided the result does not expose a direct interface to the Package.
+Items That are Not Considered Part of a Modified Version
+
+(9) Works (including, but not limited to, modules and scripts) that merely extend or make use of the Package, do not, by themselves, cause the Package to be a Modified Version. In addition, such works are not considered parts of the Package itself, and are not subject to the terms of this license.
+General Provisions
+
+(10) Any use, modification, and distribution of the Standard or Modified Versions is governed by this Artistic License. By using, modifying or distributing the Package, you accept this license. Do not use, modify, or distribute the Package, if you do not accept this license.
+
+(11) If your Modified Version has been derived from a Modified Version made by someone other than you, you are nevertheless required to ensure that your Modified Version complies with the requirements of this license.
+
+(12) This license does not grant you the right to use any trademark, service mark, tradename, or logo of the Copyright Holder.
+
+(13) This license includes the non-exclusive, worldwide, free-of-charge patent license to make, have made, use, offer to sell, sell, import and otherwise transfer the Package with respect to any patent claims licensable by the Copyright Holder that are necessarily infringed by the Package. If you institute patent litigation (including a cross-claim or counterclaim) against any party alleging that the Package constitutes direct or contributory patent infringement, then this Artistic Licen [...]
+
+(14) Disclaimer of Warranty: THE PACKAGE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS "AS IS' AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES. THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT ARE DISCLAIMED TO THE EXTENT PERMITTED BY YOUR LOCAL LAW. UNLESS REQUIRED BY LAW, NO COPYRIGHT HOLDER OR CONTRIBUTOR WILL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE USE OF THE PACKAGE, E [...]
+Copyright (c) 2000-2006, The Perl Foundation.
+
+Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed.
+Preamble
+
+This license establishes the terms under which a given free software Package may be copied, modified, distributed, and/or redistributed. The intent is that the Copyright Holder maintains some artistic control over the development of that Package while still keeping the Package available as open source and free software.
+
+You are always permitted to make arrangements wholly outside of this license directly with the Copyright Holder of a given Package. If the terms of this license do not permit the full use that you propose to make of the Package, you should contact the Copyright Holder and seek a different licensing arrangement.
+Definitions
+
+"Copyright Holder" means the individual(s) or organization(s) named in the copyright notice for the entire Package.
+
+"Contributor" means any party that has contributed code or other material to the Package, in accordance with the Copyright Holder's procedures.
+
+"You" and "your" means any person who would like to copy, distribute, or modify the Package.
+
+"Package" means the collection of files distributed by the Copyright Holder, and derivatives of that collection and/or of those files. A given Package may consist of either the Standard Version, or a Modified Version.
+
+"Distribute" means providing a copy of the Package or making it accessible to anyone else, or in the case of a company or organization, to others outside of your company or organization.
+
+"Distributor Fee" means any fee that you charge for Distributing this Package or providing support for this Package to another party. It does not mean licensing fees.
+
+"Standard Version" refers to the Package if it has not been modified, or has been modified only in ways explicitly requested by the Copyright Holder.
+
+"Modified Version" means the Package, if it has been changed, and such changes were not explicitly requested by the Copyright Holder.
+
+"Original License" means this Artistic License as Distributed with the Standard Version of the Package, in its current version or as it may be modified by The Perl Foundation in the future.
+
+"Source" form means the source code, documentation source, and configuration files for the Package.
+
+"Compiled" form means the compiled bytecode, object code, binary, or any other form resulting from mechanical transformation or translation of the Source form.
+Permission for Use and Modification Without Distribution
+
+(1) You are permitted to use the Standard Version and create and use Modified Versions for any purpose without restriction, provided that you do not Distribute the Modified Version.
+Permissions for Redistribution of the Standard Version
+
+(2) You may Distribute verbatim copies of the Source form of the Standard Version of this Package in any medium without restriction, either gratis or for a Distributor Fee, provided that you duplicate all of the original copyright notices and associated disclaimers. At your discretion, such verbatim copies may or may not include a Compiled form of the Package.
+
+(3) You may apply any bug fixes, portability changes, and other modifications made available from the Copyright Holder. The resulting Package will still be considered the Standard Version, and as such will be subject to the Original License.
+Distribution of Modified Versions of the Package as Source
+
+(4) You may Distribute your Modified Version as Source (either gratis or for a Distributor Fee, and with or without a Compiled form of the Modified Version) provided that you clearly document how it differs from the Standard Version, including, but not limited to, documenting any non-standard features, executables, or modules, and provided that you do at least ONE of the following:
+
+(a) make the Modified Version available to the Copyright Holder of the Standard Version, under the Original License, so that the Copyright Holder may include your modifications in the Standard Version.
+(b) ensure that installation of your Modified Version does not prevent the user installing or running the Standard Version. In addition, the Modified Version must bear a name that is different from the name of the Standard Version.
+(c) allow anyone who receives a copy of the Modified Version to make the Source form of the Modified Version available to others under
+(i) the Original License or
+(ii) a license that permits the licensee to freely copy, modify and redistribute the Modified Version using the same licensing terms that apply to the copy that the licensee received, and requires that the Source form of the Modified Version, and of any works derived from it, be made freely available in that license fees are prohibited but Distributor Fees are allowed.
+Distribution of Compiled Forms of the Standard Version or Modified Versions without the Source
+
+(5) You may Distribute Compiled forms of the Standard Version without the Source, provided that you include complete instructions on how to get the Source of the Standard Version. Such instructions must be valid at the time of your distribution. If these instructions, at any time while you are carrying out such distribution, become invalid, you must provide new instructions on demand or cease further distribution. If you provide valid instructions or cease distribution within thirty days [...]
+
+(6) You may Distribute a Modified Version in Compiled form without the Source, provided that you comply with Section 4 with respect to the Source of the Modified Version.
+Aggregating or Linking the Package
+
+(7) You may aggregate the Package (either the Standard Version or Modified Version) with other packages and Distribute the resulting aggregation provided that you do not charge a licensing fee for the Package. Distributor Fees are permitted, and licensing fees for other components in the aggregation are permitted. The terms of this license apply to the use and Distribution of the Standard or Modified Versions as included in the aggregation.
+
+(8) You are permitted to link Modified and Standard Versions with other works, to embed the Package in a larger work of your own, or to build stand-alone binary or bytecode versions of applications that include the Package, and Distribute the result without restriction, provided the result does not expose a direct interface to the Package.
+Items That are Not Considered Part of a Modified Version
+
+(9) Works (including, but not limited to, modules and scripts) that merely extend or make use of the Package, do not, by themselves, cause the Package to be a Modified Version. In addition, such works are not considered parts of the Package itself, and are not subject to the terms of this license.
+General Provisions
+
+(10) Any use, modification, and distribution of the Standard or Modified Versions is governed by this Artistic License. By using, modifying or distributing the Package, you accept this license. Do not use, modify, or distribute the Package, if you do not accept this license.
+
+(11) If your Modified Version has been derived from a Modified Version made by someone other than you, you are nevertheless required to ensure that your Modified Version complies with the requirements of this license.
+
+(12) This license does not grant you the right to use any trademark, service mark, tradename, or logo of the Copyright Holder.
+
+(13) This license includes the non-exclusive, worldwide, free-of-charge patent license to make, have made, use, offer to sell, sell, import and otherwise transfer the Package with respect to any patent claims licensable by the Copyright Holder that are necessarily infringed by the Package. If you institute patent litigation (including a cross-claim or counterclaim) against any party alleging that the Package constitutes direct or contributory patent infringement, then this Artistic Licen [...]
+
+(14) Disclaimer of Warranty: THE PACKAGE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS "AS IS' AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES. THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT ARE DISCLAIMED TO THE EXTENT PERMITTED BY YOUR LOCAL LAW. UNLESS REQUIRED BY LAW, NO COPYRIGHT HOLDER OR CONTRIBUTOR WILL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE USE OF THE PACKAGE, E [...]
\ No newline at end of file
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..c7fcfab
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,68 @@
+#Set release name or install directory
+RELEASE_NAME=mugsy_x86-64-v1r2.3.1
+INSTALL_DIR=./${RELEASE_NAME}
+
+
+all: nucmer synchain_mugsy mugsy_seqan
+
+install: mugsy_install mummer_install
+
+dist:
+	tar cvzf ${RELEASE_NAME}.tgz ${INSTALL_DIR}
+nucmer:
+	make -C MUMmer3.20 all
+
+synchain_mugsy:
+	make -C chaining synchain-mugsy
+
+mugsy_seqan:
+	make -C mugsy-seqan Project=mugsy
+
+mugsy_install:
+	mkdir -p ${INSTALL_DIR}
+	install mugsyenv.sh ${INSTALL_DIR}
+	perl -pi -e 's|export MUGSY_INSTALL=.*|export MUGSY_INSTALL=${INSTALL_DIR}|' ${INSTALL_DIR}/mugsyenv.sh
+
+
+	install mugsy ${INSTALL_DIR}
+	install mugsyWGA ${INSTALL_DIR}
+	install synchain-mugsy ${INSTALL_DIR}
+
+	install maf2fasta.pl ${INSTALL_DIR}
+	install xmfa2maf.pl ${INSTALL_DIR}
+	install fixMAFnames.pl ${INSTALL_DIR}
+	install splitmaf.pl ${INSTALL_DIR}
+	install plot.pl ${INSTALL_DIR}
+	install delta-dups.sh ${INSTALL_DIR}
+
+	install -m 644 README ${INSTALL_DIR}
+	install -m 644 CHANGELOG ${INSTALL_DIR}
+	install -m 644 LICENSE ${INSTALL_DIR}
+	install -m 644 LICENSE ${INSTALL_DIR}
+	install -m 644 README.install ${INSTALL_DIR}
+
+mummer_install:
+	mkdir -p ${INSTALL_DIR}/MUMmer3.20/scripts
+
+	install MUMmer3.20/delta-filter ${INSTALL_DIR}/MUMmer3.20/
+	install MUMmer3.20/gaps ${INSTALL_DIR}/MUMmer3.20/
+	install MUMmer3.20/mgaps ${INSTALL_DIR}/MUMmer3.20/
+	install MUMmer3.20/delta2maf ${INSTALL_DIR}/MUMmer3.20/
+	install MUMmer3.20/aux_bin/postnuc ${INSTALL_DIR}/MUMmer3.20/
+	install MUMmer3.20/aux_bin/prenuc ${INSTALL_DIR}/MUMmer3.20/
+#	install MUMmer3.20/src/tigr/show-coords ${INSTALL_DIR}/MUMmer3.20/
+	install MUMmer3.20/mummer ${INSTALL_DIR}/MUMmer3.20/
+	install MUMmer3.20/mummerplot ${INSTALL_DIR}/MUMmer3.20/
+	install MUMmer3.20/nucmer ${INSTALL_DIR}/MUMmer3.20/
+	install MUMmer3.20/scripts/Foundation.pm ${INSTALL_DIR}/MUMmer3.20/scripts
+
+# mapping_install:
+#	@install -d perllibs ${INSTALL_DIR}
+#	@install mapping/mafindex.pl ${INSTALL_DIR}
+#	@install mapping/AlignmentTree.pm ${INSTALL_DIR}
+#	@install mapping/IntervalTree.pm ${INSTALL_DIR}
+#	@install mapping/featureindex.pl ${INSTALL_DIR}
+#	@install mapping/mapfeatures.pl ${INSTALL_DIR}
+
+# multiz_install:
+#	@install labelblocks.pl ${INSTALL_DIR}
diff --git a/README b/README
new file mode 100644
index 0000000..7ae6c88
--- /dev/null
+++ b/README
@@ -0,0 +1,33 @@
+Mugsy - multiple whole genome alignment tool
+
+Citation:
+Angiuoli SV, Salzberg SL. Mugsy: Fast multiple alignment of closely related whole genomes. Bioinformatics. 2010 Dec 9.
+
+See README.install for installation
+
+To run 
+
+%bash
+%source mugsyenv.sh
+%mugsy --help
+
+Mugsy generates MAF formatted multiple alignments from FASTA inputs. The
+wrapper script 'mugsy' invokes all the steps to calculate the multiple
+alignment.
+
+Example invocation
+% mugsy --directory /local/scratch --prefix mygenomes genome1.fsa genome2.fsa genome3.fsa
+
+The core executables are
+
+mugsyWGA - whole genome aligner based on Seqan::TCoffee
+
+synchain-mugsy - segmentation program to produce locally collinear
+blocks (LCBs) from a set of anchors
+
+nucmer - 3.20 release bundled for convenience with new utility
+delta2maf and modified delta-filter to add support for reporting
+duplications 
+
+
+
diff --git a/README.install b/README.install
new file mode 100644
index 0000000..2373ecd
--- /dev/null
+++ b/README.install
@@ -0,0 +1,13 @@
+The x86-64 tar contains pre-compiled binaries for 64-bit x86 machines running Linux.
+
+Untar the release in the target installation area 
+eg.
+ tar xvzf mugsy-x86-64-vXrX.tgz /path/to/install_dir/
+
+Edit MUGSY_INSTALL in mugsyenv.sh and set to absolute path of the
+installation directory
+
+In bash, run 
+ source mugsyenv.sh
+
+
diff --git a/README.sources b/README.sources
new file mode 100644
index 0000000..61bcd65
--- /dev/null
+++ b/README.sources
@@ -0,0 +1,43 @@
+
+To build, run make all
+
+chaining/
+----------------
+Original Mugsy sources for LCB identification code 
+Requires the Boost library
+
+Mugsy includes some 3rd party sources to build
+
+MUMmer3.20/
+---------------
+A copy of the original sources from http://mummer.sourceforge.net. 
+
+Modifications include delta-filter -b for reporting duplications and
+new utility delta2maf
+
+mugsy-seqan/
+----------------
+A copy of the original sources from the Seqan library and
+build environment that is required to build mugsyWGA
+
+The orginal sources were obtained from here http://www.seqan.de/
+
+New sources for mugsyWGA are in projects/library/apps/mugsy
+
+Additional changes to support reversals include these sources 
+graph_align_tcoffee_msa.h
+graph_align_tcoffee_base.h
+graph_align_tcoffee_io.h
+graph_algorithm_refine_exact_iterative.h
+graph_impl_align.h
+graph_impl_interval_tree.h
+graph_impl_fragment.h
+
+mugsy-seqan/projects/library/apps/mugsy/libmaf.a
+----------------
+A library of maf conversion utilities was built from Multiz 
+downloaded from here http://www.bx.psu.edu/miller_lab/multiz-tba.012109.tar.gz
+built with
+libmaf.a: mz_scores.o charvec.o nib.o seq.o multi_util.o maf.o util.o
+        ar rsc $@ mz_scores.o charvec.o nib.o seq.o multi_util.o maf.o util.o  
+
diff --git a/chaining/Makefile b/chaining/Makefile
new file mode 100644
index 0000000..d57ebf1
--- /dev/null
+++ b/chaining/Makefile
@@ -0,0 +1,51 @@
+#-- Imported variables from top level makefile
+# BIN_DIR AUX_BIN_DIR CXX CC CPPFLAGS LDFLAGS
+
+CPPFLAGS = -I /usr/local/projects/angiuoli/boost/include/boost-1_38 -pedantic -ftemplate-depth-200 -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 -O3 
+#-Wall -mfpmath=sse -msse2
+#-march=nocona -mfpmath=sse -msse2 
+#-ggdb
+# -march=pentium4
+#CPPFLAGS = -ggdb -pg
+#CPPFLAGS = -I /usr/local/projects/angiuoli/boost/include/boost-1_38 -pg
+
+ifndef BIN_DIR
+BIN_DIR := $(CURDIR)
+endif
+ifndef AUX_BIN_DIR
+AUX_BIN_DIR := $(CURDIR)
+endif
+OBJ_RULE = $(CXX) $(CPPFLAGS) $< -static -I /usr/local/projects/angiuoli/boost/include/boost-1_38 -c -o $@
+BIN_RULE = $(CXX) $(CPPFLAGS) $^ -static -L /usr/local/projects/angiuoli/boost/lib/ -lbgl-viz -lboost_graph-gcc41-mt -o $(BIN_DIR)/$@; \
+           chmod 755 $(BIN_DIR)/$@
+AUX_BIN_RULE = $(CXX) $(CPPFLAGS) $^ -o $(AUX_BIN_DIR)/$@; \
+           chmod 755 $(AUX_BIN_DIR)/$@
+VPATH := $(AUX_BIN_DIR):$(BIN_DIR)
+
+ALL := synchain-mugsy
+
+
+#-- PHONY rules --#
+.PHONY: all clean
+
+
+all: $(ALL)
+
+
+clean:
+	rm -f *.o *~
+	cd $(BIN_DIR); rm -f $(ALL)
+	cd $(AUX_BIN_DIR); rm -f $(ALL)
+
+
+
+
+#-- not so PHONY rules --#
+synchain-mugsy.o: synchain-mugsy.cpp graph.h filters.h lcbchecks.h file.h mincut.h
+	$(OBJ_RULE)
+
+synchain-mugsy: synchain-mugsy.o
+	$(BIN_RULE)
+
+
+
diff --git a/chaining/file.h b/chaining/file.h
new file mode 100644
index 0000000..6513777
--- /dev/null
+++ b/chaining/file.h
@@ -0,0 +1,733 @@
+
+
+//################################
+//File IO
+/*Block format is 6 column
+anchor seqindex genomeindex orient beg end
+*/ 
+void read_blocks(std::istream &in, 
+		 Graph & g, 
+		 NameVertexMap & name2vertex, 
+		 NameLabelMap & genome2index, 
+		 NameLabelMap & sequence2index, 
+		 VertexLabelIntervalMap & coordinates,
+		 int distance){
+
+  NameLabelMap::iterator pos1; 
+  NameVertexMap::iterator pos;
+  VertexLabelIntervalMap::iterator pos2;  
+  bool inserted; 
+  Label seqindex=0;
+  Vertex news;
+  Edge e1,newe;
+  int edges=0;
+  bool found;
+
+  std::string line;
+  typedef tokenizer<char_separator<char> > Tok;
+  int field=0;
+  VertexName sname=0;
+  std::string sequence,genome;
+  Orientation sorient=false;
+  Coordinate sbeg=0,send=0;
+  int dist=0;
+  std::string sorientstr;
+  OrientedLabelSet newso;
+  
+  property_map < Graph, vertex_orient_t >::type orientmap = get(vertex_orient,g);
+  property_map < Graph, vertex_len_t >::type lenmap = get(vertex_len,g);
+#if defined(STORE_EDGE_LABELS)
+  property_map < Graph, edge_label_t >::type labelmap = get(edge_label,g);
+#endif
+  property_map < Graph, edge_labelmask_t >::type elabelmaskmap = get(edge_labelmask,g);
+
+  
+  vector<int> ordercounts(BITMAX);
+
+  while (getline(in, line)) {
+    Tok tok(line, char_separator<char>(" "));
+    field=0;
+    for (Tok::iterator id = tok.begin(); id != tok.end(); ++id) {
+      switch(field){
+      case 0:
+	try{
+	  sname  = lexical_cast<VertexName>(*id);
+	  field++;
+	}
+	catch (std::exception e){
+	}
+	break;
+      case 1:
+	try{
+	  sequence = lexical_cast<std::string>(*id);
+	  field++;
+	}
+	catch (std::exception e){
+	}
+	break;
+      case 2:
+	try{
+	  genome = lexical_cast<std::string>(*id);
+	  field++;
+	}
+	catch (std::exception e){
+	}
+	break;
+      case 3:
+	try {
+	  sorientstr = lexical_cast<std::string>(*id);
+	  if(sorientstr == "+"){
+	    sorient = true;
+	  }
+	  else{
+	    assert(sorientstr=="-");
+	    sorient = false;
+	  }
+	  field++;
+	}
+	catch (std::exception e){
+	}
+	break;
+      case 4:
+	try {
+	  sbeg = lexical_cast<Coordinate>(*id);
+	  field++;
+	}
+	catch (std::exception e){
+	}
+	break;
+      case 5:
+	try {
+	  send = lexical_cast<Coordinate>(*id);
+	  field++;
+	}
+	catch (std::exception e){
+	}
+	break;
+      }
+    //6 column table
+    //7 column table includes orientations
+    //11 column table includes coordinates
+      if(field==6){
+	//Set either returns existing index or inserts
+	tie(pos1, inserted) = sequence2index.insert(std::make_pair(sequence, 0));
+	if (inserted) {
+	  pos1->second = sequence2index.size();
+	  seqindex = pos1->second;
+	  assert(seqindex>=0&&seqindex<BITMAX);
+	}
+	else{
+	  seqindex = pos1->second;
+	  assert(seqindex>=0&&seqindex<BITMAX);
+	}
+	
+	//Add source vertex
+	//Set either returns existing vertex or inserts
+	tie(pos, inserted) = name2vertex.insert(std::make_pair(sname, Vertex()));
+	if (inserted) {
+	  news = add_vertex(VertexProperties(sname),g);
+	  pos->second = news;
+	} else{
+	  news = pos->second;
+	}
+	//Add oriented label
+	orientmap[news].insert(make_pair(seqindex,sorient));
+      
+	//Save coordinates
+	assert(sbeg!=send);
+	tie(pos2, inserted) = coordinates.insert(std::make_pair(make_pair(news,seqindex), std::make_pair(sbeg,send)));
+	if(lenmap[news]>0){
+	  assert(lenmap[news]==pos2->second.second-pos2->second.first);
+	}
+	lenmap[news] = pos2->second.second-pos2->second.first;
+      } 
+      else{
+	//Ignoring line
+      }
+    }
+    //iterate over all seqs
+    NameLabelMap::iterator sit,sit_end;
+    sit_end = sequence2index.end();
+    for(sit = sequence2index.begin();sit!=sit_end;++sit){
+      Label seqidx = sit->second;
+      //store all vertices with this seqlabel
+      list<Vertex> sortedV;
+      boost::graph_traits<Graph>::vertex_iterator vit,vit_end;
+      vit_end = vertices(g).second;
+      VertexIntervalMap currcoords;
+      for(vit=vertices(g).first;vit!=vit_end;++vit){
+	VertexLabelIntervalMap::iterator cit = coordinates.find(std::make_pair(*vit,seqidx));
+	if(cit!=coordinates.end()){
+	  sortedV.push_back(*vit);
+	  currcoords.insert(std::make_pair(*vit,
+					   cit->second));
+	}
+      }
+      //sort
+      sortedV.sort(coordsorder_vertex(&currcoords));
+      Vertex currvertex,prevvertex;
+      list<Vertex>::iterator it,it_end;
+      for(it=sortedV.begin();it!=sortedV.end();++it){
+	currvertex = *it;
+	if(it==sortedV.begin()){
+	  prevvertex=currvertex;
+	}
+	else{
+	  dist = abs(coordinates[std::make_pair(prevvertex,seqidx)].second - coordinates[std::make_pair(currvertex,seqidx)].first);
+	  //Add edge if
+	  assert(dist>=0);
+	  if(dist <= distance){
+	    tie(e1,found) = edge(prevvertex,currvertex, g);
+	    if(found){
+	      //existing edge between prevvertex--currvertex
+	      //add attributes from Graph g edge,e to Graph gcomp edge,e1
+#if defined(STORE_EDGE_LABELS)
+	      labelmap[e1].insert(std::make_pair(seqindex,dist));
+#endif
+	      elabelmaskmap[e1].set(seqindex,1);
+	    }
+	    else{
+	      //Code to handle directed graph where
+	      //reverse orientation
+	      //TODO
+	      //Need to consider case where
+	      //this edge is mis-oriented introducing an artificial breakpoint
+	      //in the chain
+	      tie(e1,found) = edge(currvertex,prevvertex,g);
+	      if(found){
+#if defined(STORE_EDGE_LABELS)
+		labelmap[e1].insert(std::make_pair(seqindex,dist));
+#endif
+		elabelmaskmap[e1].set(seqindex,1);
+	      }
+	      else{
+		bool inserted;
+		Edge e1;
+#if defined(STORE_EDGE_LABELS)
+		LabelMap labels;
+		labels[seqindex] = dist;
+		tie(e1, inserted) = add_edge(prevvertex,currvertex,EdgeProperties(labels),g);
+#else
+		tie(e1, inserted) = add_edge(prevvertex,currvertex,EdgeProperties(),g);
+#endif
+		assert(inserted);
+		elabelmaskmap[e1].set(seqindex,1);
+	      }
+	    }
+	    edges++;
+	  }
+	}
+      }
+    }
+  }
+}
+
+
+/*
+Projection input is 
+anchor1 anchor2 seqindex dist genomeindex orient1 orient2 beg1 end1 beg2 end2
+eg
+0 1 0 0 0 + + 0 196 196 15348
+1 3 0 1 0 + + 196 15348 15349 20373
+*/
+
+void read_pairwiseprojection(std::istream &in, 
+			     Graph & g, 
+			     NameVertexMap & name2vertex, 
+			     NameLabelMap & genome2index, 
+			     NameLabelMap & sequence2index, 
+			     VertexLabelIntervalMap & coordinates,
+			     SequenceGenomeMap & sequence2genome,
+			     int distance,
+			     int minanchor){
+
+  NameVertexMap::iterator pos;
+  NameLabelMap::iterator pos1; 
+  VertexLabelIntervalMap::iterator pos2;  
+  SequenceGenomeMap::iterator pos3;
+
+  bool inserted; 
+  Label seqindex,genomeindex;
+  Vertex news, newt;
+  Edge e1,newe;
+  int edges=0;
+  bool found;
+
+  std::string line;
+  typedef tokenizer<char_separator<char> > Tok;
+  int field=0;
+  VertexName sname=0,tname=0;
+  std::string sequence,genome;
+  Orientation sorient=false,torient=false;
+  Coordinate sbeg=0,send=0,tbeg=0,tend=0;
+  int dist=0;
+  std::string sorientstr;
+  std::string torientstr;
+
+  OrientedLabelSet newso,newto;
+  
+  property_map < Graph, vertex_orient_t >::type orientmap = get(vertex_orient,g);
+  property_map < Graph, vertex_label_t >::type vlabelmap = get(vertex_label,g);
+  property_map < Graph, vertex_genome_t >::type genomemap = get(vertex_genome,g);
+  property_map < Graph, vertex_len_t >::type lenmap = get(vertex_len,g);
+
+  property_map < Graph, edge_labelmask_t >::type elabelmaskmap = get(edge_labelmask,g);
+#if defined(STORE_EDGE_LABELS)
+  property_map < Graph, edge_label_t >::type labelmap = get(edge_label,g);
+#endif
+
+
+  
+  vector<int> ordercounts(BITMAX);
+
+  while (getline(in, line)) {
+    //std::cerr << line << std::endl;
+    Tok tok(line, char_separator<char>(" "));
+    field=0;
+    for (Tok::iterator id = tok.begin(); id != tok.end(); ++id) {
+      switch(field){
+      case 0:
+	try{
+	  sname  = lexical_cast<VertexName>(*id);
+	  field++;
+	}
+	catch (std::exception e){
+	}
+	break;
+      case 1:
+	try{
+	  tname  = lexical_cast<VertexName>(*id);
+	  field++;
+	}
+	catch (std::exception e){
+	}
+	break;
+      case 2:
+	try{
+	  sequence = lexical_cast<std::string>(*id);
+	  field++;
+	}
+	catch (std::exception e){
+	}
+	break;
+      case 3:
+	try{
+	  dist = lexical_cast<long int>(*id);
+	  field++;
+	}
+	catch (std::exception e){
+	}
+	break;
+      case 4:
+	try{
+	  genome = lexical_cast<std::string>(*id);
+	  field++;
+	}
+	catch (std::exception e){
+	}
+	break;
+      case 5:
+	try {
+	  sorientstr = lexical_cast<std::string>(*id);
+	  if(sorientstr == "+"){
+	    sorient = true;
+	  }
+	  else{
+	    assert(sorientstr=="-");
+	    sorient = false;
+	  }
+	  field++;
+	}
+	catch (std::exception e){
+	}
+	break;
+      case 6:
+	try {
+	  torientstr = lexical_cast<std::string>(*id);
+	  if(torientstr == "+"){
+	    torient = true;
+	  }
+	  else{
+	    assert(torientstr=="-");
+	    torient = false;
+	  }
+	  field++;
+	}
+	catch (std::exception e){
+	}
+	break;
+      case 7:
+	try {
+	  sbeg = lexical_cast<Coordinate>(*id);
+	  field++;
+	}
+	catch (std::exception e){
+	}
+	break;
+      case 8:
+	try {
+	  send = lexical_cast<Coordinate>(*id);
+	  field++;
+	}
+	catch (std::exception e){
+	}
+	break;
+      case 9:
+	try {
+	  tbeg = lexical_cast<Coordinate>(*id);
+	  field++;
+	}
+	catch (std::exception e){
+	}
+	break;
+      case 10:
+	try {
+	  tend = lexical_cast<Coordinate>(*id);
+	  field++;
+	}
+	catch (std::exception e){
+	}
+	break;
+      }
+    }
+    //5 column table is minimum input
+    //7 column table includes orientations
+    //11 column table includes coordinates
+    if(field==5 || field==7 || field==11){
+      if(field==5){
+	cerr << "Incomplete file "<< endl;
+	sorient = true;
+	torient = true;
+      }
+      if(abs(tend-tbeg)>=minanchor && abs(send-sbeg)>=minanchor){
+	//Set either returns existing index or inserts
+	tie(pos1, inserted) = sequence2index.insert(std::make_pair(sequence, 0));
+	if (inserted) {
+	  pos1->second = sequence2index.size();
+	  seqindex = pos1->second;
+	  //assert(seqindex>=0&&seqindex<BITMAX);
+	}
+	else{
+	  seqindex = pos1->second;
+	  //assert(seqindex>=0&&seqindex<BITMAX);
+	}
+	
+	//Add source vertex
+	//Set either returns existing vertex or inserts
+	tie(pos, inserted) = name2vertex.insert(std::make_pair(sname, Vertex()));
+	if (inserted) {
+	  news = add_vertex(VertexProperties(sname),g);
+	  pos->second = news;
+	} else{
+	  news = pos->second;
+	}
+	
+	//Add genome
+	tie(pos1, inserted) = genome2index.insert(std::make_pair(genome, 0));
+	if (inserted) {
+	  pos1->second = genome2index.size();
+	  genomeindex = pos1->second;
+	  assert(genomeindex>=0&&genomeindex<BITMAX);
+	}
+	else{
+	  genomeindex = pos1->second;
+	  assert(genomeindex>=0&&genomeindex<BITMAX);
+	}
+	
+	//Save sequence2genome lookup
+	//TODO
+	//Use genomeindex
+	//TEMP HACK to support 1 seq per genome
+	//genomeindex=seqindex;
+	//</HACK>
+	
+	genomemap[news].insert(genomeindex);
+	
+	tie(pos3, inserted) = sequence2genome.insert(std::make_pair(seqindex, genomeindex));
+	if (inserted) {
+	}
+	else{
+	  assert(seqindex==pos3->first);
+	  assert(genomeindex=pos3->second);
+	}
+	
+	//Add oriented label
+	orientmap[news].insert(make_pair(seqindex,sorient));
+	vlabelmap[news].insert(seqindex);
+	
+	//Add target vertex
+	tie(pos, inserted) = name2vertex.insert(std::make_pair(tname, Vertex()));
+	if (inserted) {
+	  newt = add_vertex(VertexProperties(tname),g);
+	  pos->second = newt;
+	} else{
+	  newt = pos->second;
+	}
+	
+	genomemap[newt].insert(genomeindex);
+	orientmap[newt].insert(make_pair(seqindex,torient));
+	vlabelmap[newt].insert(seqindex);
+	//Save coordinates
+	if(field==11){
+	  assert(sbeg!=send);
+	  assert(tbeg!=tend);
+	  tie(pos2, inserted) = coordinates.insert(std::make_pair(make_pair(news,seqindex), std::make_pair(sbeg,send)));
+	  //std::cerr << "Coords source V:" << news << " " << sbeg << "-" << send << std::endl;
+	  if(lenmap[news]>0){
+	    //assert(lenmap[news]==pos2->second.second-pos2->second.first);
+	    lenmap[news] = (lenmap[news]>pos2->second.second-pos2->second.first) ? lenmap[news] : pos2->second.second-pos2->second.first;
+	  }
+	  else{
+	    lenmap[news] = pos2->second.second-pos2->second.first;
+	  }
+	  tie(pos2, inserted) = coordinates.insert(std::make_pair(make_pair(newt,seqindex), std::make_pair(tbeg,tend)));
+	  //std::cerr << "Coords target V:" << newt << " " << tbeg << "-" << tend << std::endl;
+	  if(lenmap[newt]>0){
+	    //assert(lenmap[newt]==pos2->second.second-pos2->second.first);
+	    lenmap[newt] = (lenmap[newt]>pos2->second.second-pos2->second.first) ? lenmap[newt] : pos2->second.second-pos2->second.first;
+	  }
+	  else{
+	    lenmap[newt] = pos2->second.second-pos2->second.first;
+	  }
+	}
+	//Add edge if
+	assert(dist>=0);
+	if(dist <= distance){
+	  tie(e1,found) = edge(news,newt, g);
+	  if(found){
+	    //existing edge between news--newt
+	    //add attributes from Graph g edge,e to Graph gcomp edge,e1
+#if defined(STORE_EDGE_LABELS)
+	    labelmap[e1].insert(std::make_pair(genomeindex,dist));
+#endif
+	    elabelmaskmap[e1].set(genomeindex,1);
+	  }
+	  else{
+	    //Code to handle directed graph where
+	    //reverse orientation
+	    //TODO
+	    //Need to consider case where
+	    //this edge is mis-oriented introducing an artificial breakpoint
+	    //in the chain
+	    tie(e1,found) = edge(newt,news,g);
+	    if(found){
+#if defined(STORE_EDGE_LABELS)
+	      labelmap[e1].insert(std::make_pair(genomeindex,dist));
+#endif
+	      elabelmaskmap[e1].set(genomeindex,1);
+	    }
+	    else{
+	      bool inserted;
+	      Edge e1;
+#if defined(STORE_EDGE_LABELS)
+	      LabelMap labels;
+	      labels[genomeindex] = dist;
+	      tie(e1, inserted) = add_edge(news,newt,EdgeProperties(labels),g);
+#else
+	      tie(e1, inserted) = add_edge(news,newt,EdgeProperties(),g);
+#endif
+	      assert(inserted);
+	      elabelmaskmap[e1].set(genomeindex,1);
+	    }
+#ifdef DEBUG
+	std::cerr << "Added edge " << sname << " " << tname << std::endl;
+#endif
+	  }
+	  edges++;
+	}
+	else{
+#ifdef DEBUG
+	std::cerr << "Skipping edge dist>distance " << line << std::endl;
+#endif
+	}
+      }
+      else{
+#ifdef DEBUG
+	std::cerr << "Skipping short anchor " << line << std::endl;
+#endif
+	//Skipping short anchor
+      }
+    }
+    else{
+#ifdef DEBUG
+	std::cerr << "Ignoring line " << line << std::endl;
+#endif
+      //Ignoring line
+    }
+  }
+}
+//Add this to the dot file to force drawing of labels in large graphs
+//node [fontsize="9",margin="0.0,0.0",fixedsize=true];
+
+template<typename TGraph, typename Tedgelabelmap> 
+  void do_write_graphviz(TGraph &g, 
+			 std::string fname, 
+			 std::vector<int> & cc, 
+			 VertexLabelIntervalMap & coordinates, 
+			 EdgeSet & maskedEdges,
+			 VertexSet & maskedLCBs,
+			 Tedgelabelmap & edgelabelmap,
+			 bool expandlabel){
+
+  typedef typename TGraph::vertex_descriptor TVertex;
+  typedef typename TGraph::edge_descriptor TEdge;
+  //property_map < Graph,edge_stringname_t >::type edgelabelmap;// = get(edge_stringname, g);
+  typename property_map < TGraph,edge_category_t >::type  ecatmap = get(edge_category,g);
+
+  //Set up dynamic properties for graphviz
+  boost::dynamic_properties dp;
+  dp.property("id", get(vertex_name, g));
+
+
+  std::map<TEdge,std::string> edgecatmap;
+  //Need to set edge, vertex labels
+  //Build label map
+  std::map<TVertex, std::string> vertexlabelmap;
+  std::map<TEdge, std::string> efmap;
+  std::map<TVertex, std::string> vfmap;
+  std::map<TVertex, std::string> shapemap;
+  std::map<TEdge, std::string> linemap;
+  //std::map< Graph::edge_descriptor, std::string> edgelabelmap;
+  for(typename boost::graph_traits<TGraph>::vertex_iterator 
+	vit = vertices(g).first;vit!=vertices(g).second;++vit){
+    Vertex v = *vit;
+    std::ostringstream labelstring;
+#ifdef PRINTSEQS
+    labelstring << get(vertex_name,g,v) << " " 
+		<< v
+		<< " "
+		<< "CC" << cc[v];
+    //if(get(vertex_relorder,g,v)){
+    //labelstring << " TL" << get(vertex_relorder,g,v);
+    //}
+    labelstring << "\\n";
+    if(expandlabel){
+      OrientedLabelSet olabel = get(vertex_orient,g,v);
+      for(OrientedLabelSet::iterator it = olabel.begin();it!=olabel.end();++it){
+	//TODO support genomeidx
+	labelstring << "S"  << it->first << ":" << (it->second ? '+' : '-') 
+		    << ":" 
+		    << coordinates[std::make_pair(v,it->first)].first << "-" << coordinates[std::make_pair(v,it->first)].second << "\\n";
+      }
+      
+    }
+#else
+    labelstring << v << "\\n";
+#endif
+    vertexlabelmap[v]=labelstring.str();
+    vfmap[v]="6";
+    if(maskedLCBs.find(v)!=maskedLCBs.end()){
+      shapemap[v]="diamond";
+    }
+    else{
+      shapemap[v]="circle";
+    }
+  } 
+  for(typename boost::graph_traits<TGraph>::edge_iterator 
+	eit = edges(g).first;eit!=edges(g).second;++eit){
+    TEdge e = *eit;
+    std::ostringstream labelstring;
+    unsigned int numset=0;
+#if defined(STORE_EDGE_LABELS)
+    LabelMap currlm = get(edge_label,g,e);
+    //labelstring << "MASKS:" << get(edge_labelmask,g,e) << "\\n";
+    for(LabelMap::iterator it = currlm.begin(); it!=currlm.end(); ++it){
+      labelstring << it->first << ":" << it->second << " ";
+      numset++;
+    }
+    assert(numset==currlm.size());
+#endif
+    if(maskedEdges.find(std::make_pair(source(e,g),target(e,g)))!=maskedEdges.end() 
+       || maskedEdges.find(std::make_pair(target(e,g),source(e,g)))!=maskedEdges.end()){
+      //only true if g is of type filteredgraph assert(false);
+      linemap[e] = "dashed,bold";
+    }
+    else{
+      linemap[e] = "solid";
+    }
+    //edgelabelmap[e] = labelstring.str();
+    efmap[e]="6";
+    switch (ecatmap[e]){
+    case RED: //default
+      edgecatmap[e] = "red";
+      break;
+    case GREEN:
+      edgecatmap[e] = "green";
+      break;
+    case BLUE: //cut by mincut
+      edgecatmap[e] = "blue";
+      break;
+    case ORANGERED: //introduced via a merge
+      edgecatmap[e] = "yellow";
+      break;
+    case PURPLE: //change in relative orientation
+      edgecatmap[e] = "purple";
+      break;
+    case CYAN:
+      edgecatmap[e] = "cyan";
+      break;
+    default:
+      assert(false);
+      break;
+    }
+  }
+  
+  boost::associative_property_map< std::map<TVertex, std::string> >
+    vlabel_map(vertexlabelmap);
+  //boost::associative_property_map< std::map<Vertex, std::string> >
+  //vfontmap(vfmap);
+  //boost::associative_property_map< std::map<Edge, std::string> >
+  //efontmap(efmap);
+  boost::associative_property_map< std::map<TVertex, std::string> >
+    bshapemap(shapemap);
+  boost::associative_property_map< std::map<TEdge, std::string> >
+    blinemap(linemap);
+  //boost::associative_property_map< std::map<TEdge, Tedgelabel> >
+  //elmap(edgelabelmap);
+  boost::associative_property_map< std::map<TEdge, std::string> >
+    ecmap(edgecatmap);
+ 
+  dp.property("label",vlabel_map);
+  //dp.property("label",edgelabelmap);
+  dp.property("label",edgelabelmap);
+  dp.property("shape",bshapemap);
+  dp.property("style",blinemap);
+  //dp.property("fontsize",vfontmap);
+  //dp.property("fontsize",efontmap);
+  dp.property("color",ecmap);
+  dp.property("color",ecmap);
+
+  //dp.property("rankdir","LR");
+  //dp.property("rotate","90");
+  //Open file
+  std::ofstream gout;
+  gout.open(fname.c_str());
+  std::string node_id("id");
+
+  std::map<std::string,std::string> graph_attr, vertex_attr, edge_attr;
+  graph_attr["rankdir"] = "LR";
+  graph_attr["rotate"] = 90;
+  write_graphviz(gout, g, 
+		 dynamic_vertex_properties_writer(dp,node_id),
+		 dynamic_properties_writer(dp),
+		 make_graph_attributes_writer(graph_attr,vertex_attr,edge_attr));
+		 //graph::detail::node_id_property_map<Vertex>(dp,node_id));
+
+  gout.close();
+}
+
+
+template<typename TGraph> 
+void do_write_graphviz(TGraph &g, 
+		       std::string fname, 
+		       std::vector<int> & cc, 
+		       VertexLabelIntervalMap & coordinates, 
+		       EdgeSet & maskedEdges,
+		       VertexSet & maskedLCBs,
+		       bool expandlabel=true){
+  std::map<Edge,std::string> nullmap;
+  boost::associative_property_map< std::map<Edge,std::string > > edgelabelmap(nullmap);
+  
+  do_write_graphviz(g,fname,cc,coordinates,maskedEdges,maskedLCBs,edgelabelmap,expandlabel);
+
+}
diff --git a/chaining/filters.h b/chaining/filters.h
new file mode 100644
index 0000000..ca99697
--- /dev/null
+++ b/chaining/filters.h
@@ -0,0 +1,331 @@
+// Graph filters
+//
+//Filter edges and exclude all edges that don't match sequences in matchlabel
+//Fast edge filter using bit masks predefined on mindist in setedgesmask
+template <typename EdgeLabelMap, typename LabelContainer>
+struct distance_label_filter_bv {
+  distance_label_filter_bv() { }
+  distance_label_filter_bv(EdgeLabelMap label, LabelContainer &lc) 
+    : m_label(label),matchlabels(lc) { 
+    
+  }
+  template <typename Edgetype>
+  bool operator()(const Edgetype& e) const {
+    if((m_label[e]&matchlabels)==matchlabels){
+      return true;
+    }
+    else
+      return false;
+  }
+  EdgeLabelMap m_label;
+  LabelContainer matchlabels;
+};
+
+template <typename TEdgeLabelMap, typename TLabelSet>
+struct edge_label_filter {
+  edge_label_filter() { }
+  edge_label_filter(TEdgeLabelMap l, 
+		    TLabelSet s)
+    : labelmap(l),
+      matchlabels(s){}
+  template <typename Edgetype>
+  bool operator()(const Edgetype& e) const {
+    assert(matchlabels.size()!=0);
+    if(std::includes(matchlabels.begin(),matchlabels.end(),labelmap[e].begin(),labelmap[e].end())){
+      return true;
+    }
+  }
+  TEdgeLabelMap  labelmap;
+  TLabelSet  matchlabels;
+  
+};
+
+template <typename TGraph>
+struct snode_efilter {
+  snode_efilter()
+    :G(NULL),m_snodes(NULL)
+  {}
+  snode_efilter(std::set<typename TGraph::vertex_descriptor> *m,TGraph *gin)
+    :m_snodes(m),G(gin)
+  {}
+  template <typename Edgetype>
+  bool operator()(const Edgetype& e) const {
+    if(m_snodes->find(source(e,*G))==m_snodes->end()
+       && m_snodes->find(target(e,*G))==m_snodes->end()){
+      return false;
+    }
+    else{
+      return true;
+    }
+  }
+  std::set<typename TGraph::vertex_descriptor> *m_snodes;
+  TGraph *G;
+};
+
+
+/*
+//
+//Filter vertices and exclude all vertices that do not contain a subset of sequences defined by matchlabels
+//and (optional) orientation matchorient
+//Passing a empty matchorient container ignores filtering by orientation
+//Implemented using bitmasks preset based on distance within setvertexmasks
+//TODO
+//Refactor to another container besides BitMask and ensure proper filtering
+template <typename VertexLabelMaskMap, typename VertexOrientMaskMap, typename OrientContainer>
+struct orient_filter_bv {
+  orient_filter_bv() { }
+  orient_filter_bv(VertexLabelMaskMap label, 
+		   OrientContainer &lc, 
+		   VertexOrientMaskMap omask, 
+		   OrientContainer &oc, 
+		   OrientContainer &roc) : m_label(label),
+					   matchlabels(lc),
+					   m_orientmask(omask),
+					   matchorient(oc),
+					   matchrevorient(roc) { }
+  template <typename Vertextype>
+  bool operator()(const Vertextype& v) const {
+    assert(matchlabels!=0);
+    if((m_label[v]&matchlabels)==matchlabels){ 
+      OrientContainer orientmask = (m_orientmask[v]&matchlabels);
+      //!matchorient.any() means all - orients, which is disallowed
+      //Using this state to specify shortcircuit "ignore orient mask"
+      if(!matchorient.any() || orientmask==matchorient){
+	return true;
+      }
+      else{
+	if(!matchrevorient.any() || orientmask==matchrevorient){
+	  return true;
+	}
+	else{
+	  return false;
+	}
+      }
+    }
+    else{
+      return false;
+    }
+  }
+
+  VertexLabelMaskMap m_label;
+  OrientContainer matchlabels;
+  VertexOrientMaskMap m_orientmask;
+  OrientContainer matchorient;
+  OrientContainer matchrevorient;
+};
+*/
+
+template <typename TVertexLabelMap, typename TLabelSet>
+struct vertex_label_filter {
+  vertex_label_filter() { }
+  vertex_label_filter(TVertexLabelMap l, 
+		      TLabelSet s)
+    : labelmap(l),
+      matchlabels(s){}
+  template <typename Vertextype>
+  bool operator()(const Vertextype& v) const {
+    assert(matchlabels.size()!=0);
+    //assert((labelmap[v].find(*(matchlabels.begin())) != labelmap[v].end())
+    //==
+    //(std::includes(labelmap[v].begin(),labelmap[v].end(),matchlabels.begin(),matchlabels.end())));
+    if(labelmap[v].find(*(matchlabels.begin())) != labelmap[v].end()){
+      //std::includes(labelmap[v].begin(),labelmap[v].end(),matchlabels.begin(),matchlabels.end())){
+      return true;
+    }
+    else{
+      return false;
+    }
+  }
+  TVertexLabelMap  labelmap;
+  TLabelSet  matchlabels;
+  
+};
+
+//
+//Filter edges and exclude all edges in EdgeSet
+//where EdgeSet is a pair of vertices
+template <typename TGraph>
+struct synbp_edge_filter {
+  typedef typename boost::graph_traits<TGraph>::edge_descriptor Edge;
+  typedef typename boost::graph_traits<TGraph>::vertex_descriptor Vertex;
+  synbp_edge_filter()
+    :G(NULL)
+  {}
+  synbp_edge_filter(EdgeSet *m, TGraph *gin)
+    :maskededges(m),G(gin)
+  {}
+  template <typename Edgetype>
+  bool operator()(const Edgetype& e) const {
+    if(maskededges->find(std::make_pair(source(e,*G),target(e,*G)))!=maskededges->end()
+       || maskededges->find(std::make_pair(target(e,*G),source(e,*G)))!=maskededges->end()){      
+      //TODO
+      //This additional check target,source should not be necessary
+      return false;
+    }
+    else{
+      return true;
+    }
+  }
+  EdgeSet *maskededges;
+  TGraph *G;
+};
+
+//
+//Filter edges and exclude all incident edges to vertices in VertexSet
+template <typename TGraph>
+struct LCB_edge_filter {
+  typedef typename boost::graph_traits<TGraph>::edge_descriptor Edge;
+  typedef typename boost::graph_traits<TGraph>::vertex_descriptor Vertex;
+  LCB_edge_filter()
+    :G(NULL)
+  {}
+  LCB_edge_filter(VertexSet *m, TGraph *gin)
+    :maskedvertices(m),G(gin)
+  {}
+  template <typename Edgetype>
+  bool operator()(const Edgetype& e) const {
+    if(maskedvertices->find(source(e,*G))!=maskedvertices->end()
+       ||
+       maskedvertices->find(target(e,*G))!=maskedvertices->end()){
+      return false;
+    }
+    else{
+      return true;
+    }
+  }
+  VertexSet *maskedvertices;
+  TGraph *G;
+};
+
+//
+//Filter vertices and exclude all vertices in VertexSet
+template <typename TGraph>
+struct LCB_vertex_filter {
+  LCB_vertex_filter() { }
+  LCB_vertex_filter(VertexSet *m)
+    :maskedvertices(m)
+  {}
+  template <typename Vertextype>
+  bool operator()(const Vertextype& v) const {
+    if(maskedvertices->find(v)!=maskedvertices->end()){
+      return false;
+    }
+    else{
+      return true;
+    }
+  }
+  VertexSet *maskedvertices;
+};
+
+//
+//Create compound edge filter by chaining two edge filters
+template <typename TFilter1, typename TFilter2>
+struct compound_edge_filter{
+  compound_edge_filter(){}
+  compound_edge_filter(TFilter1 &tf1, TFilter2 &tf2)
+    :filter1(tf1),filter2(tf2)
+  {}
+  template <typename Edgetype>
+  bool operator()(const Edgetype& e) const {
+    if(filter1(e) && filter2(e)){
+      assert(filter1(e));
+      assert(filter2(e));
+    }
+    else{
+      assert(!filter1(e)||!filter2(e));
+    }
+    return filter1(e) && filter2(e);
+  }
+  TFilter1 filter1;
+  TFilter2 filter2;
+};
+
+//
+//Create compound vertex filter by chaining two vertex filters
+template <typename TFilter1, typename TFilter2>
+struct compound_vertex_filter{
+  compound_vertex_filter(){}
+  compound_vertex_filter(TFilter1 &tf1, TFilter2 &tf2)
+    :filter1(tf1),filter2(tf2)
+  {} 
+  template <typename Vertextype>
+  bool operator()(const Vertextype& v) const {
+    if(filter1(v) && filter2(v)){
+      assert(filter1(v));
+      assert(filter2(v));
+    }
+    else{
+      assert(!filter1(v)||!filter2(v));
+    }
+    return filter1(v) && filter2(v);
+  }
+  TFilter1 filter1;
+  TFilter2 filter2;
+};
+
+//
+//Define LCB as a set of connected vertices
+typedef std::vector<Vertex> LCB;
+typedef std::map<std::pair<unsigned int,Label>,Interval > LCBLabelIntervalMap; 
+
+//
+//Graph types
+//Synteny graph contains connected subgraphs, each an LCB
+typedef filtered_graph<Graph, 
+		       synbp_edge_filter<Graph> >  FilterSynGraph; 
+typedef filtered_graph<Graph, 
+		       LCB_edge_filter<Graph>, 
+		       LCB_vertex_filter<Graph> > FilterLCBGraph;
+//Synteny graph that supports masking/filtering of LCBs
+typedef filtered_graph<Graph, 
+		       compound_edge_filter<LCB_edge_filter<Graph>, synbp_edge_filter<Graph> >, 
+		       LCB_vertex_filter<Graph> > LCBSynFilterGraph; 
+
+
+template<typename TPos>
+class poscmp
+{
+public:
+  poscmp()
+  {}
+  bool operator()( const TPos &e1, const TPos &e2 ) const {
+    if(e1.first==e2.first){
+      //return false if e2 is interval close
+      /*
+	if(e2.second == false){
+	  return 0;
+	  }
+	  else{
+	  return 1;
+	  }
+      */
+      return e1.second < e2.second;
+    }
+    else{
+      return e1.first < e2.first;
+    }
+  }
+};
+
+class lencmp
+{
+public:
+  lencmp(std::map<int,int> & m)
+    :lenmap(&m)
+  {}
+  bool operator()( const int i1, const int i2) const {
+    assert(lenmap->find(i1)!=lenmap->end());
+    assert(lenmap->find(i2)!=lenmap->end());
+
+    if (lenmap->find(i1)->second < lenmap->find(i2)->second){
+      return true;
+    }
+    else{
+      return false;
+    }
+  }
+  std::map<int,int> *lenmap;
+};
+
+
+
diff --git a/chaining/graph.h b/chaining/graph.h
new file mode 100644
index 0000000..205bd93
--- /dev/null
+++ b/chaining/graph.h
@@ -0,0 +1,578 @@
+//Types
+//############
+//BGL requires crazy amount of code to define custom graph properties
+//
+//Edge properties
+struct label_t {
+  typedef edge_property_tag kind;
+};
+
+struct genome_t { 
+    typedef edge_property_tag kind;
+};
+
+//TODO f
+//Make edge_labelmask set based
+//on genomeidx rather than seqidx
+struct labelmask_t{
+  typedef edge_property_tag kind;
+};
+
+struct visted_t{
+  typedef edge_property_tag kind;
+};
+
+struct stringname_t{
+  typedef edge_property_tag kind;
+};
+
+//Vertex properties
+struct orient_t{
+  typedef vertex_property_tag kind;
+};
+
+struct orientmask_t{
+  typedef vertex_property_tag kind;
+};
+
+struct vlabelmask_t{
+  typedef vertex_property_tag kind;
+};
+
+struct chains_t{
+  typedef vertex_property_tag kind;
+};
+
+struct relorder_t{
+  typedef vertex_property_tag kind;
+};
+
+//vertex_name_t and edge_weight_t are already defined by default
+enum edge_label_t { edge_label = 10001 };
+enum edge_labelmask_t { edge_labelmask = 10004 };
+enum edge_visited_t { edge_visited = 10005 };
+enum edge_stringname_t { edge_stringname = 10011 };
+enum edge_category_t { edge_category = 10012 };
+enum vertex_orient_t { vertex_orient = 10006 };
+enum vertex_label_t { vertex_label = 10016 };
+enum vertex_genome_t { vertex_genome = 10015 };
+enum vertex_vlabelmask_t { vertex_vlabelmask = 10007 };
+enum vertex_orientmask_t { vertex_orientmask = 10008 };
+enum vertex_len_t { vertex_len = 10009 };
+enum vertex_relorder_t { vertex_relorder = 10010 };
+
+namespace boost {
+  BOOST_INSTALL_PROPERTY(edge, label);
+  BOOST_INSTALL_PROPERTY(edge, labelmask);
+  BOOST_INSTALL_PROPERTY(edge, visited);
+  BOOST_INSTALL_PROPERTY(edge, stringname);
+  BOOST_INSTALL_PROPERTY(edge, category);
+  BOOST_INSTALL_PROPERTY(vertex, relorder);
+  BOOST_INSTALL_PROPERTY(vertex, label);
+  BOOST_INSTALL_PROPERTY(vertex, vlabelmask);
+  BOOST_INSTALL_PROPERTY(vertex, orientmask);
+  BOOST_INSTALL_PROPERTY(vertex, orient);
+  BOOST_INSTALL_PROPERTY(vertex, genome);
+  BOOST_INSTALL_PROPERTY(vertex, len);
+}
+//End custom properties code
+//###########
+
+//Label is an index that corresponds to a genome sequence for complete
+//genomes or a species for incomplete genomes.  using a short limits
+//to 65,535 labels in an attempt to save some space
+//typedef unsigned short int Label;
+typedef unsigned int Label;
+
+//DNA sequence orientation -,+ == false,true
+typedef bool Orientation;
+
+//Label,distance map specifies the proximity between two
+//anchors/blocks along a sequence whose index is Label
+typedef std::map<Label,int> LabelMap;
+//typedef __gnu_cxx::hash_map<Label,int> LabelMap;
+
+//Set of labels
+//typedef std::set<Label> LabelSet;
+typedef boost::unordered_set<Label> LabelSet;
+
+//MAXGENOMES,BITMAX: Critical parameters that limit the number of genome labels
+//bitset is used for fast pattern matching of subsets
+//Increasing the size of this parameter will degrade performance
+//even for small numbers of sequences
+//TODO: Refactor. Compare or replace with use of STL includes,set_intersection,...
+//or use boost::dynamic_bitset
+#define BITMAX MAXGENOMES
+typedef std::bitset<BITMAX> BitMask;
+
+typedef pair<Label,Orientation> OrientedLabel;
+
+struct orientedlabelhasher {
+  size_t operator()(const OrientedLabel& v) const { return hash<Label>()(v.first); }
+};
+
+//faster for bitsets that can fit in ulong
+//otherwise will throw an overflow exception
+struct bitsethasher_ulong {
+  size_t operator()(const BitMask& v) const { return hash<long unsigned int>()(v.to_ulong()); }
+};
+
+//general for bitsets of any size
+struct bitsethasher_string {
+  size_t operator()(const BitMask& v) const { return hash<std::string>()(v.to_string()); }
+};
+
+struct orientedlabelcmp {
+  bool operator()( const OrientedLabel& s1, const OrientedLabel& s2 ) const {
+    if(s1.first == s2.first){
+      return s1.second < s2.second;
+    }
+    else{
+      return s1.first < s2.first;
+    }
+  }
+};
+
+struct hasheq
+{
+  bool operator()(const OrientedLabel& s1, const OrientedLabel& s2) const
+  {
+    return s1==s2;
+  }
+};
+
+//Edges in the anchor graph are marked with a set of labels called an
+//OrientedLabelSet.  The cardinality of this set is the number of
+//member sequences. Each sequence is labeled by an integer index (type
+//Label) and a boolean orientation (type Orientation) which are paired
+//to defined an OrientedLabel.
+//
+//TODO objects of this type are copied all over the place in the current impl
+//need to refactor to improve performance
+typedef std::set<OrientedLabel,orientedlabelcmp > OrientedLabelSet;
+//typedef boost::unordered_set<OrientedLabel> OrientedLabelSet;
+//typedef __gnu_cxx::hash_set<OrientedLabel,orientedlabelhasher> OrientedLabelSet;
+
+//Names of blocks/anchors and graph vertices: VertexName, VertexID
+//The program input includes a set of anchors across two or more
+//genomes.  These anchors are also referred to as blocks.  Each block
+//is stored as a vertex in a graph.  The identifier provided for each
+//block in the user provided input is stored as the VertexName. It is
+//assumed that the VertexName is a unique identifier for a block.  An
+//additional internal identifier for each block, VertexID, is used by
+//the graph library.  For a given block, the VertexName and VertexID
+//may not be equivalent.  It is also possible to change the typedef
+//for VertexName to std::string to support string names for blocks.
+typedef unsigned int VertexName;
+typedef unsigned int VertexID; //TODO, reconcile,replace with Graph::vertex_descriptor
+
+//Coordinates and Intervals
+typedef int Coordinate;
+typedef std::pair<Coordinate,Coordinate> Interval;
+
+
+//typedef BitMask SeqSet;
+typedef OrientedLabelSet SeqSet;
+//bit set per genome
+typedef LabelSet GenomeSet;
+
+//BGL requires properties in this nested format
+//VertexProperties
+typedef property<vertex_genome_t, GenomeSet> VertexGenome;
+typedef property<vertex_relorder_t, int, VertexGenome> VertexRelOrder;
+typedef property<vertex_len_t, int, VertexRelOrder> VertexLen;
+typedef property<vertex_orientmask_t, BitMask, VertexLen> VertexOrientMask;
+typedef property<vertex_vlabelmask_t, BitMask, VertexOrientMask> VertexLabelMask;
+typedef property<vertex_orient_t, SeqSet, VertexLabelMask> VertexOrientedLabel;
+typedef property<vertex_label_t, LabelSet, VertexOrientedLabel> VertexLabel;
+typedef property<vertex_name_t, VertexName, VertexLabel> VertexProperties;
+
+//Define graph properties
+//TODO
+//Replace edge properties, such as EdgeMask and LabelMap, with index to save space
+//typedef property<edge_category_t, std::string, property<edge_weight_t,int> > EdgeCategory;
+//typedef property<edge_stringname_t, std::string> EdgeStringName;
+//typedef property<edge_visited_t, bool, EdgeCategory > EdgeVisited;
+//typedef property<edge_category_t, std::string > EdgeCategory;
+
+
+//BLACK - collinear and syntenic edge between segments, indegree==outdegree==1
+//RED - collinear edge that traverses a syntenic breakpoint, degree!=1
+//PURPLE - non-collinear edge indicating possible reversal, change in orientation
+//GREEN - non-collinear edge indicative of some other flux
+//BLUE - edge removed during a mincut to split an LCB
+//ORANGERED - previously masked edge that was reintroduced during a merge
+//YELLOW - new edge introduced during a mask short, merge adjacent iteration
+
+enum EdgeCats { RED,PURPLE,CYAN,BLUE,GREEN,ORANGERED,YELLOW };
+typedef property<edge_category_t, EdgeCats > EdgeCategory;
+#if defined(STORE_EDGE_LABELS)
+typedef property<edge_labelmask_t, GenomeSet, EdgeCategory > EdgeMask;
+typedef property<edge_label_t, LabelMap, EdgeMask > EdgeProperties;
+#else
+typedef property<edge_labelmask_t, BitMask,EdgeCategory > EdgeProperties;
+#endif
+
+//TODO
+//Determine if edge,vertex storage is better as vecS,listS,multisetS
+typedef boost::adjacency_list < 
+  vecS,               // Store out-edges of each vertex in a std::set
+  vecS,               // Store vertex set in a std::vector 
+  bidirectionalS,     // The file dependency graph is directed, support for in_edges as well as out_edges
+  VertexProperties,   // vertex properties 
+  EdgeProperties      // edge properties
+  > Graph;
+
+typedef Graph::vertex_descriptor Vertex;
+typedef Graph::edge_descriptor Edge;
+
+//Lookups
+typedef std::map<std::string,Label> NameLabelMap; 
+typedef std::map<Label, std::string> LabelNameMap; 
+typedef std::map<VertexName, Vertex> NameVertexMap;
+typedef std::map<Label,Label> SequenceGenomeMap;
+//typedef std::map<pair<Vertex,Label>,Interval > VertexLabelIntervalMap; 
+typedef boost::unordered_map<std::pair<Vertex,Label>,Interval > VertexLabelIntervalMap; 
+typedef boost::unordered_map<Vertex,Interval > VertexIntervalMap;
+
+typedef adjacency_list_traits < setS, vecS, directedS > LTraits;
+
+#ifdef DEBUG
+//Add edgecategory for printing
+typedef adjacency_list < 
+  listS, //to allow for removal of edges 
+  vecS, 
+  directedS,
+  property < vertex_name_t, VertexName,
+	     property < vertex_index_t, long,
+			property < vertex_color_t, boost::default_color_type,
+				   property < vertex_distance_t, long,
+					      property < vertex_predecessor_t, LTraits::edge_descriptor > > > > >,    
+  property < edge_capacity_t, long,
+  property < edge_residual_capacity_t, long,
+  property < edge_reverse_t, LTraits::edge_descriptor, 
+  EdgeCategory  > > > > LGraph;
+#else
+typedef adjacency_list < 
+  listS, //to allow for removal of edges 
+  vecS, 
+  directedS,
+  property < vertex_name_t, VertexName,
+	     property < vertex_index_t, long,
+			property < vertex_color_t, boost::default_color_type,
+				   property < vertex_distance_t, long,
+					      property < vertex_predecessor_t, LTraits::edge_descriptor > > > > >,    
+  property < edge_capacity_t, long,
+	     property < edge_residual_capacity_t, long,
+			property < edge_reverse_t, LTraits::edge_descriptor > > > > LGraph;
+#endif
+
+typedef LGraph::vertex_descriptor LVertex;
+typedef LGraph::edge_descriptor EVertex;
+
+typedef std::set<std::pair<Vertex,Vertex> > EdgeSet;
+//typedef boost::unordered_set<std::pair<Vertex,Vertex> > EdgeSet;
+
+typedef std::set<Vertex> VertexSet;
+//typedef boost::unordered_set<Vertex> VertexSet;
+
+
+struct iloc{
+  int first;
+  int second;
+  int blocknum;
+};
+
+//Subs
+
+void printtime(){
+  time_t now;
+  time(&now);
+  struct tm *current = localtime(&now);
+  current = localtime(&now);
+  std::cerr << "TIME " << current->tm_hour << ":" << current->tm_min << ":" << current->tm_sec << std::endl;
+}
+
+class cutsdist
+{
+public:
+  cutsdist(std::map<std::pair<LVertex,LVertex>,unsigned int > *m)
+    :distmap(m)
+  {}
+  bool operator()( const std::pair<LVertex,LVertex>& v1, const std::pair<LVertex,LVertex>& v2 ) const {
+    assert(distmap->find(v1)!=distmap->end());
+    assert(distmap->find(v2)!=distmap->end());
+    //.first is fmin coordinate
+    if(distmap->find(v1)->second > distmap->find(v2)->second){
+      return true;
+    }
+    else{
+      return false;
+    }
+  }
+  std::map<std::pair<LVertex,LVertex>,unsigned int > *distmap;
+};
+
+class coordsorder
+{
+public:
+  coordsorder(VertexLabelIntervalMap *c, Label s)
+    :coords(c),currentSeq(s)
+  {}
+  bool operator()( const LVertex& v1, const LVertex& v2 ) const {
+    assert(coords->find(std::make_pair(v1,currentSeq))!=coords->end());
+    assert(coords->find(std::make_pair(v2,currentSeq))!=coords->end());
+    //.first is fmin coordinate
+    if (coords->find(std::make_pair(v1,currentSeq))->second.first < coords->find(std::make_pair(v2,currentSeq))->second.first){
+      return true;
+    }
+    else{
+      return false;
+    }
+  }
+  VertexLabelIntervalMap *coords;
+  Label currentSeq;
+};
+
+class coordsorder_vertex
+{
+public:
+  coordsorder_vertex(VertexIntervalMap *c)
+    :coords(c)
+  {}
+  bool operator()( const LVertex& v1, const LVertex& v2 ) const {
+    assert(coords->find(v1)!=coords->end());
+    assert(coords->find(v2)!=coords->end());
+    //.first is fmin coordinate
+    if (coords->find(v1)->second.first < coords->find(v2)->second.first){
+      return true;
+    }
+    else{
+      return false;
+    }
+  }
+  VertexIntervalMap *coords;
+};
+
+class matchmaporder
+{
+public:
+  matchmaporder(std::map<Vertex,int> *c)
+    :matchmap(c)
+  {}
+  bool operator()( const Vertex& v1, const Vertex& v2 ) const {
+    assert(matchmap->find(v1)!=matchmap->end());
+    assert(matchmap->find(v2)!=matchmap->end());
+    if (matchmap->find(v1)->second > matchmap->find(v2)->second){
+      return true;
+    }
+    else{
+      return false;
+    }
+  }
+  std::map<Vertex,int> *matchmap;
+};
+
+//################
+//DEBUGGING UTILS
+//
+//
+void printlabel(OrientedLabelSet & i){
+  for(OrientedLabelSet::iterator j=i.begin();j!=i.end();++j){
+    cerr << j->first << ":" << j->second << " ";
+  }
+}
+void printlabel(OrientedLabelSet & i, LabelNameMap & index2sequence){
+  for(OrientedLabelSet::iterator j=i.begin();j!=i.end();++j){
+    cerr << index2sequence[j->first] << ":" << j->second << " ";
+  }
+}
+//
+//################################
+
+template<class TGraph, class CoordMap, class SequenceGenomeMap>
+void setedgemasks(TGraph & g, int distance, CoordMap & coordinates, SequenceGenomeMap & sequence2genome){
+  typename property_map < TGraph, vertex_orient_t >::type orientmap = get(vertex_orient,g);
+#if defined(STORE_EDGE_LABELS)
+  typename property_map < TGraph, edge_label_t >::type elabelmap = get(edge_label,g);
+#endif
+  typename property_map < TGraph, edge_labelmask_t >::type elabelmaskmap = get(edge_labelmask,g);
+  typename boost::graph_traits<TGraph>::edge_iterator eit,eit_end;
+  eit_end = edges(g).second;
+  for(eit = edges(g).first;eit!=eit_end;++eit){
+      Edge e = *eit;
+      Vertex sv = source(e,g);
+      Vertex tv = target(e,g);
+
+      //add extra edge labels
+      OrientedLabelSet::iterator it_end = orientmap[sv].end();
+      for(OrientedLabelSet::iterator it = orientmap[sv].begin();it != it_end;++it){
+	Label seqidx = it->first;
+	assert(sequence2genome.find(seqidx)!=sequence2genome.end());
+	Label genomeidx = sequence2genome[seqidx];
+	Orientation orient = it->second;
+	Orientation rorient = (orient) ? false : true;
+	//If sv--tv connected in label seqidx
+	//Need to check original and reverse orientations of tv to ensure a match to sv
+	if(orientmap[tv].find(*it) != orientmap[tv].end()
+	   || orientmap[tv].find(std::make_pair(genomeidx,rorient)) != orientmap[tv].end()){
+	  //vertices share label
+	  //If edge label does not include seqidx, then we need to update
+	  //if(elabelmap[*eit].find(genomeidx)==elabelmap[*eit].end()){
+	  if(!elabelmaskmap[*eit].test(genomeidx)){
+	    assert(!elabelmaskmap[*eit].test(genomeidx));
+	    int dist=-1;
+	    //Need to check if coordinates for seqidx
+	    if(coordinates.find(std::make_pair(source(*eit,g),seqidx))!=coordinates.end()
+	       && coordinates.find(std::make_pair(target(*eit,g),seqidx))!=coordinates.end()){
+	      if(it->second==true){
+		if(coordinates[std::make_pair(target(*eit,g),seqidx)].first >= coordinates[std::make_pair(source(*eit,g),seqidx)].second){
+		  dist = coordinates[std::make_pair(target(*eit,g),seqidx)].first - coordinates[std::make_pair(source(*eit,g),seqidx)].second;
+		}
+		else{
+		  dist = coordinates[std::make_pair(source(*eit,g),seqidx)].first - coordinates[std::make_pair(target(*eit,g),seqidx)].second;
+		}
+		//assert(dist>=0);
+		if(dist<0){
+		  //std::cout << source(e,g) << "-" << target(e,g) << " " 
+		  //<< coordinates[std::make_pair(source(*eit,g),seqidx)].first << "-" << coordinates[std::make_pair(source(*eit,g),seqidx)].second
+		  //<< " " 
+		  //<< coordinates[std::make_pair(target(*eit,g),seqidx)].first << "-" << coordinates[std::make_pair(target(*eit,g),seqidx)].second
+		  //<< " " << dist << ":COORDS " << std::endl;
+		  dist=0;
+		}
+	      }
+	      else{
+		if(coordinates[std::make_pair(target(*eit,g),seqidx)].first >= coordinates[std::make_pair(source(*eit,g),seqidx)].second){
+		  dist = coordinates[std::make_pair(target(*eit,g),seqidx)].first - coordinates[std::make_pair(source(*eit,g),seqidx)].second;
+		}
+		else{
+		  dist = coordinates[std::make_pair(source(*eit,g),seqidx)].first - coordinates[std::make_pair(target(*eit,g),seqidx)].second;
+		}
+		if(dist<0){
+		  //std::cout << source(e,g) << "-" << target(e,g) << " " 
+		  //<< coordinates[std::make_pair(source(*eit,g),seqidx)].first << "-" << coordinates[std::make_pair(source(*eit,g),seqidx)].second
+		  //<< " " 
+		  //<< coordinates[std::make_pair(target(*eit,g),seqidx)].first << "-" << coordinates[std::make_pair(target(*eit,g),seqidx)].second
+		  //<< " " << dist << ":COORDS" << std::endl;
+		  dist=0;
+		}
+		//assert(dist>=0);
+	      }
+	      assert(dist>=0);
+	      if(dist<=distance){
+#if defined(STORE_EDGE_LABELS)
+		elabelmap[*eit].insert(std::make_pair(genomeidx,dist));
+#endif
+		elabelmaskmap[*eit].set(genomeidx,1);
+	      }
+	      else{
+		//elabelmaskmap[*eit].set(genomeidx,0);
+	      }
+	    }
+	    else{
+
+	    }
+	  }
+	}
+      }
+#if defined(STORE_EDGE_LABELS)
+      BitMask mask;
+      unsigned int numset=0;
+      for(LabelMap::iterator i1 = labelmap[*eit].begin();i1 != labelmap[*eit].end();++i1){
+	if(i1->second<=distance){
+	  assert(i1->first>=0&&i1->first<BITMAX);
+	  mask.set(i1->first,1);
+	  numset++;
+	}
+      }
+      assert(numset==labelmap[*eit].size());
+      assert(mask.any());
+      if(mask!=elabelmaskmap[*eit]){
+	std::cerr << "Mask     " << mask << std::endl;
+	std::cerr << "EdgeMask " << elabelmaskmap[*eit] << std::endl;
+      }
+      assert(mask==elabelmaskmap[*eit]);
+      //put(edge_labelmask,g,*eit,mask); 
+#endif
+  }
+}
+
+//Sets the following graph vertex properties: vertex_vlabelmask, vertex_orientmask
+template<class TGraph>
+void setvertexmasks(TGraph & g, SequenceGenomeMap & sequence2genome){
+  typename property_map < TGraph, vertex_orient_t >::type vmap = get(vertex_orient,g);
+  typename property_map < TGraph, vertex_vlabelmask_t >::type lmaskmap = get(vertex_vlabelmask,g);
+  typename property_map < TGraph, vertex_orientmask_t >::type omaskmap = get(vertex_orientmask,g);
+
+  typename boost::graph_traits<TGraph>::vertex_iterator vit_end = vertices(g).second;
+  for(typename boost::graph_traits<TGraph>::vertex_iterator 
+	vit = vertices(g).first;vit!=vit_end;++vit){
+    Vertex v = *vit;
+    OrientedLabelSet::iterator o_end = vmap[v].end();
+    for(OrientedLabelSet::iterator o=vmap[v].begin();o!=o_end;++o){
+      Label seqidx = o->first;
+      Label genomeidx = sequence2genome[seqidx];
+      assert(genomeidx>=0&&genomeidx<BITMAX);
+      //set lmask for each genome
+      lmaskmap[v].set(genomeidx,1);
+      //set omask for all + orientation
+      if(o->second == true){//+ orientation
+	omaskmap[v].set(genomeidx,1);
+      }
+    }
+#ifdef DEBUG
+    cerr << "VERTEX: " << get(vertex_name,g,v) << endl;
+    cerr << "VERTEXIDX: " << v << endl;
+    cerr << "LMASK :" << lmaskmap[v]  << endl;
+    cerr << "OMASK :" << omaskmap[v] << endl;
+#endif
+  }
+}
+
+void updateCoordinates(VertexLabelIntervalMap & coordinates,
+		       SequenceGenomeMap & sequence2genome){
+  
+  std::map<Label,Coordinate> maxcoord;
+  for(VertexLabelIntervalMap::iterator it=coordinates.begin();it!=coordinates.end();it++){
+    Label seqidx = it->first.second;
+    assert(it->second.first<=it->second.second);
+    maxcoord[seqidx] = (it->second.second > maxcoord[seqidx]) ? it->second.second : maxcoord[seqidx];
+  }
+  std::map<Label,std::vector<Label> > genome2sequence;
+  for(SequenceGenomeMap::iterator sit=sequence2genome.begin();sit!=sequence2genome.end();sit++){
+    genome2sequence[sit->second].push_back(sit->first);
+  }
+  std::map<Label,Coordinate> seqoffset;
+  for(std::map<Label,std::vector<Label> >::iterator git=genome2sequence.begin();git!=genome2sequence.end();git++){
+    //Label genomeidx = git->first;
+    Coordinate curroffset=0;
+    for(std::vector<Label>::iterator sit=git->second.begin();sit!=git->second.end();sit++){
+      Label seqidx = *sit;
+      seqoffset[seqidx] = curroffset;
+      curroffset = curroffset+maxcoord[seqidx];
+    }
+  }
+  VertexLabelIntervalMap newcoordinates;
+  for(VertexLabelIntervalMap::iterator it=coordinates.begin();it!=coordinates.end();it++){
+    Label seqidx = it->first.second;
+    Vertex v = it->first.first;
+    Label genomeidx = sequence2genome[seqidx];
+    Coordinate newbeg=seqoffset[genomeidx]+it->second.first;
+    Coordinate newend=seqoffset[genomeidx]+it->second.second;
+    //update coordinate map
+    pair<Vertex,Label> key = std::make_pair(v,seqidx);
+    pair<Vertex,Label> value = std::make_pair(newbeg,newend);
+    //assert(newcoordinates.find(key)==newcoordinates.end());
+    //if(newcoordinates.find(key)!=newcoordinates.end()){
+    //std::cerr << "Duplicate V:"<<v<<" seqidx:" <<seqidx << " genomeidx:"<<genomeidx << " " <<newbeg << "-" << newend << std::endl;
+    //}
+    //else{
+    //std::cerr << "Storing V:"<<v<<" seqidx:" <<seqidx << " genomeidx:"<<genomeidx << " " <<newbeg << "-" << newend << std::endl;
+    //}
+    newcoordinates[key]=value;
+    assert(newcoordinates[key].first==newbeg);
+    assert(newcoordinates[key].second==newend);
+  }
+  coordinates=newcoordinates;
+}
diff --git a/chaining/lcbchecks.h b/chaining/lcbchecks.h
new file mode 100644
index 0000000..ca7a074
--- /dev/null
+++ b/chaining/lcbchecks.h
@@ -0,0 +1,905 @@
+
+//#########################
+//Predicates
+//
+//isLabelMaxGap()
+//Test if v1 and v2 are separated by < maxgap in label intersection(s1,s2)
+bool isLabelMaxGap(Vertex v1, 
+		   Vertex v2,
+		   OrientedLabelSet & s1, 
+		   OrientedLabelSet & s2,
+		   VertexLabelIntervalMap & coordinates,
+		   unsigned int maxgap,
+		   SequenceGenomeMap & sequence2genome){
+
+  OrientedLabelSet::iterator s1_it_end = s1.end();
+  for(OrientedLabelSet::iterator s1_it=s1.begin();s1_it!=s1_it_end;++s1_it){
+    Label seqidx = s1_it->first;
+    //Label genomeidx = sequence2genome[seqidx];
+    std::list<Vertex> sortedV;
+    VertexIntervalMap currcoords;
+    //only consider seqs present in both s1 and s2
+    OrientedLabelSet::iterator s2_it = s2.find(*s1_it);
+    if(s2_it != s2.end()){
+      assert(seqidx==s2_it->first);
+      sortedV.push_back(v1);
+      sortedV.push_back(v2);
+      currcoords.insert(std::make_pair(v1,
+				       coordinates[std::make_pair(v1,seqidx)]));
+      currcoords.insert(std::make_pair(v2,
+				       coordinates[std::make_pair(v2,seqidx)]));
+
+      //sort(sortedV.begin(),sortedV.end(),coordsorder(&coordinates,seqidx));
+      //sortedV.sort(coordsorder(&coordinates,seqidx));
+      sortedV.sort(coordsorder_vertex(&currcoords));
+
+      int prevcoord=-1;
+      int currstart,currend;
+      std::list<Vertex>::iterator vit_end = sortedV.end();
+      for(std::list<Vertex>::iterator vit = sortedV.begin();vit!=vit_end;++vit){
+	assert(coordinates.find(std::make_pair(*vit,seqidx)) != coordinates.end());
+	assert(coordinates.find(std::make_pair(*vit,seqidx))->second == currcoords.find(*vit)->second);
+	tie(currstart,currend) = currcoords[*vit];
+	if(prevcoord==-1){
+	  assert(vit==sortedV.begin());
+	}
+	else{
+	  //assert(coordinates.find(std::make_pair(*(vit-1),seqidx)) != coordinates.end());
+	  //assert(coordinates.find(std::make_pair(*(vit-1),seqidx))->second.second == prevcoord);
+	  assert(currstart<currend);
+	  //assert(currstart>=prevcoord);
+	  int dist = currstart-prevcoord;
+	  if(dist>(int)maxgap){
+	    return false;
+	  }
+	}
+	prevcoord=currend;
+      }
+    }
+  }
+  return true;
+}
+
+//isLabelCollinear(s1,s2)
+//
+//Return true if there is no change in orientation between labels s1
+//and s2.  This implies a collinear relationship between s1 and s2
+//meaning there are no rearrangments between the labels in s1 and
+//s2. Labels are comprised of a pair (seq,orient).  There are 2
+//possibilities for a collinear relationship,
+
+//S=intersection(seq(s1),seq(s2))
+//return orient(s1 in S)==orient(s2 in S)
+//OR
+//revorient(s1 in S)==orient(s2 in S)
+
+//In other words, this function checks both the stored orientation of
+//s1 vs. s2 and rev(s1) vs s2 for all sequences shared between s1 and
+//s2 and returns true if either comparison is collinear (ie, no change in
+//orientation)
+inline bool isLabelCollinear(OrientedLabelSet & s1, 
+			     OrientedLabelSet & s2, 
+			     SequenceGenomeMap & sequence2genome){
+  //Implemented using bitmasks to store the presence of a sequence
+  //and the orientation
+  BitMask s1lmask,s2lmask;
+  BitMask s1omask,s2omask,s1omaskrev;
+  OrientedLabelSet::iterator s1_it_end = s1.end();
+  for(OrientedLabelSet::iterator s1_it=s1.begin();s1_it!=s1_it_end;++s1_it){
+    Label seqidx = s1_it->first;
+    Label genomeidx = sequence2genome[seqidx];
+    assert(genomeidx>=0&&genomeidx<BITMAX);
+    s1lmask.set(genomeidx,1);
+    //set omask for all + orientation
+    if(s1_it->second == true){
+      s1omask.set(genomeidx,1);
+    }
+  }
+  s1omaskrev=s1omask;
+  s1omaskrev.flip();
+  OrientedLabelSet::iterator s2_it_end = s2.end();
+  for(OrientedLabelSet::iterator s2_it=s2.begin();s2_it!=s2_it_end;++s2_it){
+    Label seqidx = s2_it->first;
+    Label genomeidx = sequence2genome[seqidx];
+    assert(genomeidx>=0&&genomeidx<BITMAX);
+    s2lmask.set(genomeidx,1);
+    //set omask for all + orientation
+    if(s2_it->second == true){
+      s2omask.set(genomeidx,1);
+    }
+  }
+
+  //Shared labels obtain by intersection using bitwise AND
+  BitMask sharedlabels = (s1lmask&s2lmask);  
+
+#if defined(V_DEBUG)
+  BitMask difflabels = (s1lmask^s2lmask)&(s1lmask|s2lmask);  
+  cout << "S1MASK: " << s1lmask << endl;
+  cout << "S2MASK: " << s2lmask << endl;
+  cout << "SHARED: " << sharedlabels << endl;
+  cout << "DIFF: " << difflabels << endl;
+  cout << "O1MASK: " << s1omask << endl;
+  cout << "O2MASK: " << s2omask << endl;
+  cout << "O1MASKa: " << (s1omask&sharedlabels) << endl;
+  cout << "O2MASKa: " << (s2omask&sharedlabels) << endl;
+#endif
+
+  if((s1omask&sharedlabels)==(s2omask&sharedlabels) || 
+     (s1omaskrev&sharedlabels)==(s2omask&sharedlabels)){
+    return true;
+  }
+  else{
+    return false;
+  }
+}
+
+inline bool isLabelCollinearMask(BitMask & sharedlabels, BitMask & s1omask, BitMask & s2omask){
+  if((s1omask&sharedlabels)==(s2omask&sharedlabels)){
+    return true; 
+  }
+  else{
+    BitMask s1omaskrev = s1omask;
+    s1omaskrev.flip();
+    if((s1omaskrev&sharedlabels)==(s2omask&sharedlabels)){
+      return true;
+    }
+    else{
+#ifdef DEBUG
+      //std::cerr << (s1omask&sharedlabels) << std::endl
+      //	  << (s2omask&sharedlabels) << std::endl << std::endl
+      //	  << (s1omaskrev&sharedlabels) << std::endl
+      //	  << (s2omask&sharedlabels) << std::endl;
+#endif
+      return false;
+    }
+  }
+  assert(false);
+}
+
+//
+//Return true if no LCB gaps > maxgap
+template<typename TGraph> inline
+bool checkLCBGaps(TGraph & g,
+		  LCB & lcb,
+		  std::vector<int> & ccvmap,
+		  VertexLabelIntervalMap & coordinates,
+		  unsigned int maxgap,
+		  SequenceGenomeMap & sequence2genome){
+  bool shortcircuit=true;
+
+  bool badGap=false;
+  int MINSPANLEN=0;
+  LabelSet seqidxSet;
+  std::set<Vertex> mmV;
+  std::map<Label,std::set<Label> > seqspergenomeMap; //tracks the number of seqs per genome in an LCB
+  std::map<Label,std::set<Label> >::iterator gpos;
+  bool inserted;
+
+  typename property_map <TGraph, vertex_orient_t >::type orientmap = get(vertex_orient,g);
+
+  LCB::iterator lit_end = lcb.end();
+  for(LCB::iterator lit=lcb.begin();lit!=lit_end;++lit){ 
+    Vertex v = *lit;
+    //OrientedLabelSet o1 = get(vertex_orient, g, v);
+    OrientedLabelSet::iterator oit_end = orientmap[v].end();
+    for(OrientedLabelSet::iterator oit=orientmap[v].begin();oit!=oit_end;++oit){ //all seqs on the vertex
+      Label seqidx = oit->first;
+      seqidxSet.insert(seqidx);
+      tie(gpos, inserted) = seqspergenomeMap.insert(std::make_pair(sequence2genome[seqidx],std::set<Label>()));
+      gpos->second.insert(seqidx);
+      if(gpos->second.size()>1){
+	return false;
+      }
+    }
+  }
+
+  LabelSet::iterator it2_end = seqidxSet.end();
+  for(LabelSet::iterator it2 = seqidxSet.begin(); it2 != it2_end; ++it2){
+    Label seqidx = *it2;
+    //Label genomeidx = sequence2genome[seqidx];
+    std::list<Vertex> sortedV;
+    unsigned int spanlen=0;
+    //std::map<pair<Vertex,Label>,Interval > currcoords;
+    VertexIntervalMap currcoords;
+
+    LCB::iterator lit_end = lcb.end();
+    for(LCB::iterator lit=lcb.begin();lit!=lit_end;++lit){ 
+      Vertex v = *lit;
+      VertexLabelIntervalMap::iterator cit = coordinates.find(std::make_pair(v,seqidx));
+      if(cit!=coordinates.end()){
+	assert(cit->second.first<cit->second.second);
+	sortedV.push_back(v);
+	currcoords.insert(std::make_pair(v,
+					 cit->second));
+	spanlen = spanlen + get(vertex_len,g,v);
+      }
+    }
+#ifdef DEBUG
+    std::cerr << "checkLCBGaps seqidx: " << seqidx << " span length " << spanlen  << " MINSPANLEN " << MINSPANLEN << std::endl;
+#endif
+    if(spanlen >= MINSPANLEN){
+      sortedV.sort(coordsorder_vertex(&currcoords));
+      
+      int prevcoord=-1;
+      int currstart,currend;
+      Vertex prevvertex=0;
+      std::list<Vertex>::iterator vit_end = sortedV.end();
+      for(std::list<Vertex>::iterator vit = sortedV.begin();vit!=vit_end;++vit){
+	assert(coordinates.find(std::make_pair(*vit,seqidx)) != coordinates.end());
+	assert(coordinates[std::make_pair(*vit,seqidx)] == currcoords[*vit]);
+	tie(currstart,currend) = currcoords[*vit];
+	if(prevcoord==-1){
+	  assert(vit==sortedV.begin());
+	  prevvertex=*vit;
+	}
+	else{
+	  assert(coordinates.find(std::make_pair(prevvertex,seqidx)) != coordinates.end());
+	  assert(coordinates.find(std::make_pair(prevvertex,seqidx))->second.second == prevcoord);
+	  assert(currstart<currend);
+	  int dist = currstart-prevcoord;
+#ifdef DEBUG
+	    std::cerr << "Checking dist:" << dist << " > " << maxgap
+		      << " between V:" <<  get(vertex_name,g,prevvertex) << " " << currstart << "-" << currend 
+		      << " and V:" <<  get(vertex_name,g,*vit)
+		      << " on seqidx:" << seqidx << std::endl;
+#endif
+
+	  if(dist>(int)maxgap){
+	    badGap=true;
+#ifdef DEBUG
+	    std::cerr << "Large gap dist:" << dist << " > " << maxgap
+		      << " between V:" <<  get(vertex_name,g,prevvertex) 
+		      << " and V:" <<  get(vertex_name,g,*vit)
+		      << " on seqidx:" << seqidx << std::endl;
+#endif
+	    if(shortcircuit){
+	      return !badGap;
+	    }
+	    else{
+	      mmV.insert(*vit);
+	    }
+	    //std::cerr << "NO SHORT CIRCUIT" << std::endl;
+	  }
+	}
+	prevvertex=*vit;
+	prevcoord=currend;
+      }
+    }
+    else{
+      //std::cerr << "Skipping check of seqidx: " << seqidx << " span length " << spanlen  << " MINSPANLEN " << MINSPANLEN << std::endl;
+    }
+#ifdef DEBUG
+    if(mmV.size()>0){
+      std::cerr << "Num bad vertices:" << mmV.size()  
+		<< " LCB size:" << lcb.size() << std::endl;
+    }
+#endif
+  }
+  return !badGap;
+}
+
+//
+//Bitmask implementation
+inline bool checkLCBOrient(BitMask & lcbl1,
+			   BitMask & lcbl2,
+			   BitMask & lcbo1,
+			   BitMask & lcbo2){
+#ifdef DEBUG
+    std::cerr << "LCBOrient l1:" << lcbl1 << " l2:" << lcbl2 << std::endl
+	      << "LCBOrient o1:" << lcbo1 << " o2:" << lcbo2 << std::endl;
+#endif
+  BitMask sharedlabels = (lcbl1&lcbl2);
+#ifdef DEBUG
+    std::cerr << "LCBOrient shared:" << sharedlabels << std::endl;
+#endif
+  //Make sure this function is working symmetrically
+  assert(isLabelCollinearMask(sharedlabels,lcbo1,lcbo2)==isLabelCollinearMask(sharedlabels,lcbo2,lcbo1));
+  if(isLabelCollinearMask(sharedlabels,lcbo1,lcbo2)){
+#ifdef DEBUG
+      std::cerr << "Match" << std::endl;
+#endif
+    return true;
+  }
+  else{
+#ifdef DEBUG
+      std::cerr << "MisMatch" << std::endl;
+#endif
+    return false;
+  }
+}
+
+template<typename TLCBOrientMap>
+bool checkLCBOrient(TLCBOrientMap & lcborientmap,
+		    typename TLCBOrientMap::key_type &lcbidx1, 
+		    typename TLCBOrientMap::key_type &lcbidx2,
+		    BitMask &longlabelmask){
+  BitMask t1=lcborientmap[lcbidx1].first&longlabelmask;
+  BitMask t2=lcborientmap[lcbidx2].first&longlabelmask;
+  BitMask t3=lcborientmap[lcbidx1].second&longlabelmask;
+  BitMask t4=lcborientmap[lcbidx2].second&longlabelmask;
+  return checkLCBOrient(t1,t2,t3,t4);
+}
+
+//
+//Return true if no mismatch between Vertex orientations within an LCB
+//Currently implemented with bitmasks
+//TODO
+//Improve perfomance
+//Profiling shows this check is primary performance bottleneck
+//First attempt above was to cache orientation of the LCB once instead of checking all pairs
+//for consisency
+//Surprisingly, this does not appear to improve performance?  
+template<typename TGraph> inline
+bool checkLCBOrient(TGraph & g,
+		    LCB & lcb,
+		    BitMask &lcbmask1,
+		    BitMask &lcbmask2,
+		    BitMask &longlabelmask){
+  
+
+  LCB::iterator it,it2,it_end,it2_end;
+
+  //Use orientmap so that we can pass by reference using lvalue []
+  typename property_map < TGraph, vertex_orient_t >::type orientmap = get(vertex_orient,g);
+  typename property_map < TGraph, vertex_orientmask_t >::type orientmaskmap = get(vertex_orientmask,g);
+  typename property_map < TGraph, vertex_vlabelmask_t >::type labelmaskmap = get(vertex_vlabelmask,g);
+  it_end = lcb.end();
+  it2_end = lcb.end();
+  BitMask l1 = lcbmask1&longlabelmask;
+  BitMask l2 = lcbmask2&longlabelmask;
+  for(it = lcb.begin();it!=it_end;++it){
+    for(it2 = it+1;it2!=it2_end;++it2){
+      if(checkLCBOrient(l1,//lcbmask1&longlabelmask,
+			l2,//lcbmask2&longlabelmask,
+			orientmaskmap[*it]&longlabelmask,
+			orientmaskmap[*it2]&longlabelmask)){
+      }
+      else{
+#ifdef DEBUG
+	std::cerr << "SAM Mismatch " << *it << "-" << *it2 << std::endl;
+#endif	
+	return false;
+      }
+    }
+  }
+  return true;
+}
+template<typename TGraph> inline
+bool checkLCBOrient(TGraph & g,
+		    LCB & lcb,
+		    SequenceGenomeMap & sequence2genome){
+
+  BitMask longlabelmask;
+  longlabelmask.set();
+  return checkLCBOrient(g,lcb,longlabelmask,sequence2genome);
+}
+
+template<typename TGraph> inline
+bool checkLCBOrient(TGraph & g,
+		    LCB & lcb,
+		    BitMask & longlabelmask,
+		    SequenceGenomeMap & sequence2genome){
+  
+  LCB::iterator it,it2,it_end,it2_end;
+
+  //Use orientmap so that we can pass by reference using lvalue []
+  typename property_map < TGraph, vertex_orient_t >::type orientmap = get(vertex_orient,g);
+  typename property_map < TGraph, vertex_orientmask_t >::type orientmaskmap = get(vertex_orientmask,g);
+  typename property_map < TGraph, vertex_vlabelmask_t >::type labelmaskmap = get(vertex_vlabelmask,g);
+  typename property_map < TGraph, vertex_label_t >::type labelset = get(vertex_label,g);
+  typename property_map < TGraph, vertex_len_t >::type lenmap = get(vertex_len,g);
+  it_end = lcb.end();
+  it2_end = lcb.end();
+  for(it = lcb.begin();it!=it_end;++it){
+    BitMask o1 = orientmaskmap[*it];
+    BitMask l1 = labelmaskmap[*it];
+    for(it2 = it+1;it2!=it2_end;++it2){
+      BitMask sharedlabels = ((l1&labelmaskmap[*it2])&longlabelmask);
+      if(isLabelCollinearMask(sharedlabels,o1,orientmaskmap[*it2])){
+      }
+      else{
+#ifdef DEBUG
+	std::cerr << "SAM Mismatch " << *it << "-" << *it2 << std::endl;
+#endif
+	return false;
+      }
+    }
+  }
+  return true;
+}
+
+template<typename TGraph, typename TLCBOrientMap> inline
+bool checkLCBOrient_old(TGraph & g,
+		    LCB & lcb,
+		    int lcbidx,
+		    TLCBOrientMap & lcborientmap){
+  bool shortcircuit=true;
+  //Check for label orientation mismatches
+  bool mmOrient=false;
+  LCB::iterator it,it2,it_end,it2_end;
+  EdgeSet mmV;
+  //Use orientmap so that we can pass by reference using lvalue []
+  typename property_map < TGraph, vertex_orient_t >::type orientmap = get(vertex_orient,g);
+  typename property_map < TGraph, vertex_orientmask_t >::type orientmaskmap = get(vertex_orientmask,g);
+  typename property_map < TGraph, vertex_vlabelmask_t >::type labelmaskmap = get(vertex_vlabelmask,g);
+  it_end = lcb.end();
+  it2_end = lcb.end();
+  for(it = lcb.begin();it!=it_end;++it){
+    BitMask o1 = orientmaskmap[*it];
+    BitMask l1 = labelmaskmap[*it];
+    for(it2 = it+1;it2!=it2_end;++it2){
+      BitMask sharedlabels = (l1&labelmaskmap[*it2]);
+#ifdef DEBUG
+	std::cerr << "LCBOrientS l1:" << l1 << " l2:" << labelmaskmap[*it2] << std::endl
+		  << "LCBOrientS o1:" << o1 << " o2:" << orientmaskmap[*it2] << std::endl;
+	std::cerr << "LCBOrientS shared:" << sharedlabels << std::endl;
+#endif
+      //assert(checkLCBOrient(lcborientmap[lcbidx].second,lcborientmap[lcbidx].second,o1,orientmaskmap[*it2]) 
+      //== isLabelCollinearMask(sharedlabels,o1,orientmaskmap[*it2]));
+      if(isLabelCollinearMask(sharedlabels,o1,orientmaskmap[*it2])){
+#ifdef DEBUG
+	  std::cerr << "Match" << std::endl;
+#endif
+      }
+      else{
+#ifdef DEBUG
+	  std::cerr << "MisMatch" << std::endl;
+	  mmV.insert(std::make_pair(*it,*it2));
+#endif
+	mmOrient=true;
+	if(shortcircuit){
+	  return !mmOrient;
+	}
+      }
+    }
+  }
+#ifdef DEBUG
+    for(EdgeSet::iterator it = mmV.begin();it!=mmV.end();++it){
+      OrientedLabelSet l1 = get(vertex_orient, g, it->first);
+      OrientedLabelSet l2 = get(vertex_orient, g, it->second);
+      Edge e1;
+      bool found;
+      
+      tie(e1,found) = edge(it->first,it->second,g);
+      if(found){
+	//assert(get(edge_category,g,e1)==PURPLE);
+      }
+      tie(e1,found) = edge(it->first,it->second,g);
+      if(found){
+	//assert(get(edge_category,g,e1)==PURPLE);
+      }
+      std::cerr << "Orient Mismatch V1:" <<  get(vertex_name,g,it->first) << " L1:";
+      printlabel(l1);
+      std::cerr <<  " V2:" << get(vertex_name,g,it->second) << " L2:";
+      printlabel(l2); 
+      std::cerr << std::endl;
+    }
+#endif
+    return !mmOrient;
+}
+
+template<typename TLabel>
+bool sameLabel(TLabel v1, TLabel v2, TLabel e){
+  if(v1==v2){
+    if(v1==e){
+      return true;
+    }
+    else{
+      return false;
+    }
+  }
+  else{
+    return false;
+  }
+}
+
+template<typename TLabel>
+bool sameOrient(TLabel o1, TLabel o2, TLabel v1){
+  BitMask revo=o1;
+  revo.flip();
+  revo=revo&v1;
+  //Must logical AND with labelmask v1
+  if(o1==o2 || o2==revo){
+    return true;
+  }
+  else{
+    return false;
+  }
+}
+
+//
+//TODO
+//First determine the orientation of the sequences in the LCB
+//Then orient each vertex in the LCB
+//assignLCBOrient(LCB)
+// BitMask s1omaskrev = s1omask;
+//  s1omaskrev.flip();
+//  if((s1omask&sharedlabels)==(s2omask&sharedlabels) || 
+//     (s1omaskrev&sharedlabels)==(s2omask&sharedlabels)){
+//    return true;
+//  }
+//  else{
+//    return false;
+//Need to assign LCBs a true orientation for each seq
+//This is not as simple as always setting a bit for every + seq
+//because blocks within an LCB can be reversed
+//Need to mark each block and whether it is flipped wrt to the LCB orientation
+//Attempt to mark this flipped state when adding the block to the LCB2
+template<typename TGraph>
+inline std::pair<BitMask,BitMask> setLCBOrient(TGraph & g, 
+					       LCB & lcb, 
+					       std::vector<Vertex> & badV,
+					       SequenceGenomeMap & sequence2genome){
+  BitMask labelmask;
+  BitMask orientmask;
+  setLCBOrient(g,labelmask,orientmask,lcb,badV,sequence2genome);
+  return std::make_pair(labelmask,orientmask);
+}
+
+template<typename TGraph>
+inline void setLCBOrient(TGraph & g, 
+			 BitMask & labelmask, 
+			 BitMask & orientmask, 
+			 LCB & lcb, 
+			 std::vector<Vertex> & badV,
+			 SequenceGenomeMap & sequence2genome){
+#ifdef DEBUG
+    std::cerr << "Setting orientation for lcb with " << lcb.size() << " vertices" << std::endl;
+#endif
+  badV.clear();
+  assert(!orientmask.any());
+  assert(!labelmask.any());
+  typename property_map < TGraph, vertex_vlabelmask_t >::type lmaskmap = get(vertex_vlabelmask,g);
+  typename property_map < TGraph, vertex_orientmask_t >::type omaskmap = get(vertex_orientmask,g);
+  typename property_map < TGraph, vertex_len_t >::type lenmap = get(vertex_len,g);
+  typename property_map < TGraph, vertex_label_t >::type labelset = get(vertex_label,g);
+  //Save label mask and determine evaluation order for orientmask
+  std::list<Vertex> sortedV;
+  std::map<Label,int> spans;
+  std::map<Label,int>::iterator sit;
+  //bool found;
+
+  std::map<Vertex,int> vorientmatchcount;
+  std::map<Vertex,int>::iterator mit;
+
+  LCB::iterator it,it_end,it2,it2_end;
+  it_end = lcb.end();
+  it2_end = lcb.end();
+
+
+  BitMask omask;
+  BitMask lmask;
+  BitMask sharedlabels;
+  //Foreach vertex in the LCB, sum the number of bases with compatible orientation labeling
+  for(it = lcb.begin();it!=it_end;++it){
+    assert(lmaskmap[*it].any());//at least one seq > MINSPANLEN
+    labelmask = labelmask|lmaskmap[*it];
+    sortedV.push_back(*it);
+    //Check num bp from other vertices compatible within the LCBs 
+    omask = omaskmap[*it];
+    lmask = lmaskmap[*it];
+    int len = lenmap[*it];
+    mit = vorientmatchcount.find(*it);
+    for(it2 = lcb.begin();it2!=it2_end;++it2){
+      if(it2!=it){
+	sharedlabels = (lmask&lmaskmap[*it2]);
+	if(isLabelCollinearMask(sharedlabels,omask,omaskmap[*it2])){
+	  //Update count of bp
+	  if(mit==vorientmatchcount.end()){
+	    vorientmatchcount[*it]=len;
+	    mit = vorientmatchcount.find(*it);
+	  }
+	  else{
+	    mit->second = mit->second+len;
+	  }
+	}
+      }
+    }
+  }
+  //Determine orientation compatible with most bp in the LCB
+  //
+  //Sort by vorientmatchcount so that we consider most frequent orientations first
+  //before alternative orientations
+  //TODO, confirm this is actually working
+  sortedV.sort(matchmaporder(&vorientmatchcount));
+#ifdef DEBUG
+    std::cerr << "LCB labelmask :" << labelmask << std::endl;
+#endif
+  BitMask currlabelmask;
+
+  //Realize speedup if < 64 genomes
+#if BITMAX > 64
+  boost::unordered_set<BitMask,bitsethasher_string> altOrients;
+#else
+  boost::unordered_set<BitMask,bitsethasher_ulong> altOrients;
+#endif
+
+  std::set<Vertex> badVS;
+  std::list<Vertex>::iterator svit,svit_end;
+  svit_end = sortedV.end();
+  for(svit=sortedV.begin();svit!=svit_end;++svit){
+    if((omaskmap[*svit]&currlabelmask)  //Vertex orients for seqs in LCB
+       ==(orientmask&lmaskmap[*svit])){ //LCB orients on current vertex
+      orientmask = orientmask|omaskmap[*svit];
+    }
+    else{
+      BitMask s1omaskrev = omaskmap[*svit];
+      s1omaskrev.flip();
+      s1omaskrev = s1omaskrev&lmaskmap[*svit];
+      if((s1omaskrev&currlabelmask)
+	 ==(orientmask&lmaskmap[*svit])){
+	orientmask = orientmask|s1omaskrev;
+      }
+      else{
+	//Alternative orientation
+	//Sorting on bp above ensures this alternative is congruent with 
+	//fewer bp than at least on other alternative orientation for the LCB
+	badV.push_back(*svit);
+#ifdef DEBUG
+	  badVS.insert(*svit);
+#endif
+	altOrients.insert((omaskmap[*svit]&labelmask));
+	
+	//(1)Simple breakpoints
+	//A simple breakpoint occues when a pair of vertices in the block have incompatible orientations
+	//For example, consider an LCB with vertices 1(a+b+) and 2(a+b-)
+
+	//(2)Compound orientation breakpoints
+	//A compound breakpoint occurs when a combination of
+	//vertices has an orientation that is incompatible with 
+	//other vertices in a block
+	//For example, consider an LCB with vertices 1(a+b+,d-) 2(b+c-) 3(a+b+c-) 4(c-,d+)
+	//All pairwise comparisons are congruent in orientation if reversals are allowed
+	//But not all multiway combinations are congruent because 
+	//      1+2 requires (a+b+c-d-)
+	//while 3+4 requires (a+b+c-d+)
+	//producing an incompatibility in the orientations of d
+	//when considered in the context of the other
+	//vertices in the block
+
+	//Save a map v->totallcbsize congruent with orientmask(v)
+	//Sort by totallcbsize in decreasing order
+	//Build LCB mask in sorted order 
+	//This ensures orientations congruent with the most bp are considered first
+	//Each alternative orientation encountered is a compound breakpoint
+
+	//Count length/number of vertices congruent with this alternative orientation
+#ifdef DEBUG
+	  std::cerr << "Mismatched orient for vertex " << get(vertex_name,g,*svit) << " with vertex/lcb/shared labels " <<std::endl 
+		    << " vertex_l     :" << lmaskmap[*svit] << std::endl
+		    << " lcb_l        :" << currlabelmask << std::endl
+		    << " shared_l     :" << (lmaskmap[*svit]&currlabelmask) << std::endl
+		    << " lcb_o        :" << orientmask << std::endl
+		    << " vertex_o     :" << (omaskmap[*svit]&currlabelmask) << std::endl
+		    << " vertex_ro    :" << (s1omaskrev&currlabelmask) << std::endl
+		    << " shared_o     :" << (orientmask&lmaskmap[*svit]) << std::endl;
+	//assert(false); 
+	  std::cerr << "Num bp matching orient len:" << vorientmatchcount[*svit] << std::endl;
+#endif
+      }
+    }
+    //Update label mask
+    currlabelmask = currlabelmask|lmaskmap[*svit];
+  }
+#ifdef DEBUG
+    std::cerr << "LCB orientmask:" << orientmask << std::endl;
+#endif
+  //Check that orientmask is only set on member sequences in the LCB
+  assert((orientmask&labelmask)==orientmask);
+  /*
+  //TESTING
+  if(0 && DEBUG){
+    for(it = lcb.begin();it!=it_end;++it){
+      //Label for vertex contains strict subset of seqs as LCB
+      assert(((lmaskmap[*it]&labelmask))==lmaskmap[*it]);
+      assert(((omaskmap[*it]&labelmask))==((omaskmap[*it]&lmaskmap[*it]&labelmask)));
+      assert(((omaskmap[*it]&labelmask))==((omaskmap[*it]&lmaskmap[*it])));
+      
+      //Check that vertex omask matches lcb omask for member sequences on the vertex
+      if((omaskmap[*it]&labelmask)==(orientmask&lmaskmap[*it])){
+	if(badVS.find(*it)!=badVS.end()){
+	  assert(false);
+	}
+      }
+      else{
+	BitMask s1omaskrev = omaskmap[*it];
+	s1omaskrev.flip();
+	s1omaskrev = s1omaskrev&lmaskmap[*it];
+	if((s1omaskrev&labelmask)==(orientmask&lmaskmap[*it])){
+	  if(badVS.find(*it)!=badVS.end()){
+	    assert(false);
+	  }
+	}
+	else{
+	  if(badVS.find(*it)==badVS.end()){
+	    std::cerr << "Vertex " << get(vertex_name,g,*it) << " does not match LCB orientation " << std::endl
+		      << "vertex_o: " << (omaskmap[*it]&labelmask) << std::endl
+		      << "vertex_ro:" << (s1omaskrev&labelmask) << std::endl
+		      << "lcb_o:    " << (orientmask&lmaskmap[*it]) << std::endl;
+	    
+	    assert(false);
+	  }
+	}
+      }
+    }
+  }
+  */
+  if(badV.size()>0){
+#ifdef DEBUG
+    std::cerr << "LCB has " << badV.size()  << "/" << lcb.size() 
+	      << " misoriented vertices. Max num alternative orients " << altOrients.size() << std::endl;
+#endif
+  }
+#ifdef DEBUG
+    std::cerr << "Setting orientation done" << std::endl;
+#endif
+}
+
+template<typename TGraph, typename TLCBMap, typename TComponentMap>
+void setLCBOrient(TGraph & g, TLCBMap & lcborientmap, TComponentMap & componentMap, SequenceGenomeMap & sequence2genome){
+#ifdef DEBUG
+    std::cerr <<"Resetting lcborientmap" << std::endl;
+#endif
+  lcborientmap.clear();
+  typename property_map < TGraph, vertex_vlabelmask_t >::type lmaskmap = get(vertex_vlabelmask,g);
+  typename property_map < TGraph, vertex_orientmask_t >::type omaskmap = get(vertex_orientmask,g);
+
+  int lcbidx=0;
+  std::vector<Vertex> badV;
+  typename TComponentMap::iterator lit,lit_end;
+  lit_end=componentMap.end();
+  for(lit = componentMap.begin();lit!=lit_end;++lit){
+    lcborientmap[lcbidx] = setLCBOrient(g,*lit,badV,sequence2genome);
+    //At least one genome label must be set
+    assert(lcborientmap[lcbidx].first.any());
+    lcbidx++;
+  }
+}
+
+template<typename TLCB,
+	 typename TLenMap,
+	 typename TLabelMap>
+BitMask setSpanMask(TLCB & lcb,
+		    TLenMap & lenmap,
+		    TLabelMap & labelset,
+		    SequenceGenomeMap & sequence2genome,
+		    int MINSPANLEN=0){
+   
+  std::map<Label,int> spans;
+  std::map<Label,int>::iterator sit;
+  LCB::iterator it,it_end,it2,it2_end;
+  LabelSet::iterator lit,lit_end;
+  BitMask longlabelmask;
+  it_end = lcb.end();
+  it2_end = lcb.end();
+
+  //Only consider sequences with > MINSPANLEN
+  for(it = lcb.begin();it!=it_end;++it){
+    //assert(labelset.find(*it)!=labelset.end());
+    //assert(lenmap.find(*it)!=lenmap.end());
+    lit_end = labelset[*it].end();
+    for(lit = labelset[*it].begin();lit != lit_end;++lit){
+      assert(sequence2genome.find(*lit)!=sequence2genome.end());
+      Label l = sequence2genome[*lit]; 
+      sit = spans.find(l);
+      if(sit==spans.end()){
+	spans.insert(std::make_pair(l,lenmap[*it]));
+      }
+      else{
+	int prev = sit->second;
+	sit->second = prev + lenmap[*it];
+	assert(spans[l] == (prev + lenmap[*it]));
+      }
+    }
+  }
+  for(std::map<Label,int>::iterator sit = spans.begin();sit!=spans.end();++sit){
+    if(sit->second>(int)MINSPANLEN){
+      longlabelmask.set(sit->first);
+    }
+  }
+  return longlabelmask;
+}
+
+
+
+//####################
+//LCB reporting functions
+//
+
+unsigned int get_LCB_length(LCB & lcb, 
+			    property_map < LCBSynFilterGraph, vertex_orient_t >::type orientmap,
+			    property_map < LCBSynFilterGraph, vertex_len_t >::type lenmap,
+			    VertexLabelIntervalMap & coordinates,
+			    LCBLabelIntervalMap & lcbcoords,
+			    unsigned int lcbidx, 
+			    int & totallen,
+			    SequenceGenomeMap & sequence2genome,
+			    int minlength=0
+			    ){
+  std::map<unsigned int,unsigned int> mincoordsbyseq;
+  std::map<unsigned int,unsigned int> maxcoordsbyseq;
+  std::map<Label,int> spans;
+  OrientedLabelSet alllabel;
+  LCB::iterator it;
+  for(it = lcb.begin();it!=lcb.end();++it){
+    OrientedLabelSet::iterator it2_end = orientmap[*it].end();
+    for(OrientedLabelSet::iterator it2 = orientmap[*it].begin();it2!=it2_end;++it2){
+      assert(it2->first>=0);
+      Label seqidx = it2->first;
+      Label genomeidx = sequence2genome[seqidx];
+      if(spans.find(genomeidx)==spans.end()){
+	spans.insert(std::make_pair(genomeidx,lenmap[*it]));
+      }
+      else{
+	spans[genomeidx] = spans[genomeidx] + lenmap[*it];
+      }
+    }
+  }
+  for(it = lcb.begin();it!=lcb.end();++it){
+    OrientedLabelSet::iterator it2_end = orientmap[*it].end();
+    for(OrientedLabelSet::iterator it2 = orientmap[*it].begin();it2!=it2_end;++it2){
+      assert(it2->first>=0);
+      Label seqidx = it2->first;
+      Label genomeidx = sequence2genome[seqidx];
+      assert(spans.find(genomeidx)!=spans.end());
+      if(spans[genomeidx] >= minlength){
+	if(mincoordsbyseq.find(seqidx)==mincoordsbyseq.end()){
+	  mincoordsbyseq[seqidx] = std::numeric_limits<unsigned int>::max();
+	}
+	if(maxcoordsbyseq.find(seqidx)==maxcoordsbyseq.end()){
+	  maxcoordsbyseq[seqidx] = std::numeric_limits<unsigned int>::min();
+	}
+	
+	alllabel.insert(OrientedLabel(seqidx,true));
+	VertexLabelIntervalMap::iterator vit =  coordinates.find(std::make_pair(*it,seqidx));
+	assert(vit!=coordinates.end());
+	assert(vit->second.second>vit->second.first);
+	totallen+=abs(vit->second.second-vit->second.first);	
+	mincoordsbyseq[seqidx] = ((unsigned int)vit->second.first<mincoordsbyseq[seqidx]) ? vit->second.first : mincoordsbyseq[seqidx];
+	maxcoordsbyseq[seqidx] = ((unsigned int)vit->second.second>maxcoordsbyseq[seqidx]) ? vit->second.second : maxcoordsbyseq[seqidx];
+	assert(mincoordsbyseq[seqidx] != std::numeric_limits<unsigned int>::max());
+	assert(maxcoordsbyseq[seqidx] != std::numeric_limits<unsigned int>::min());
+	//std::cerr << "seq:" << seqidx << " " << *it << " " << maxcoordsbyseq[seqidx] << " " <<  mincoordsbyseq[seqidx] << std::endl;
+      }
+    }
+  }
+  if(alllabel.size()==0){
+    return 0;
+  }
+  //Save spanning coords for lcb in lcbcoords
+  OrientedLabelSet::iterator it2_end = alllabel.end();
+  for(OrientedLabelSet::iterator it2 = alllabel.begin();it2!=it2_end;++it2){
+    Label seqidx = it2->first;
+    Label genomeidx = sequence2genome[seqidx];
+    assert(spans[genomeidx] >= minlength);
+
+    LCBLabelIntervalMap::iterator it=lcbcoords.find(std::make_pair(lcbidx,seqidx));
+    if(it!=lcbcoords.end()){
+      lcbcoords.erase(it);
+    }
+    assert(mincoordsbyseq[seqidx]<maxcoordsbyseq[seqidx]);
+    lcbcoords.insert(std::make_pair(std::make_pair(lcbidx,seqidx),std::make_pair(mincoordsbyseq[seqidx],maxcoordsbyseq[seqidx])));
+  }
+  //Maximum span of all seqs
+  unsigned int maxminlen = 0;
+  OrientedLabelSet::iterator it_end = alllabel.end();
+  for(OrientedLabelSet::iterator it = alllabel.begin();it!=it_end;++it){
+    Label seqidx = it->first;
+    Label genomeidx = sequence2genome[seqidx];
+    assert(spans[genomeidx] >= minlength);
+    assert(maxcoordsbyseq[seqidx]>mincoordsbyseq[seqidx]);
+    if(maxcoordsbyseq[seqidx]>mincoordsbyseq[seqidx]){
+      unsigned int len = (unsigned int)(maxcoordsbyseq[seqidx] - mincoordsbyseq[seqidx]);
+      //std::cerr << "seq:" << seqidx << " " << maxcoordsbyseq[seqidx] << " " <<  mincoordsbyseq[seqidx] << " " << len << std::endl;
+      maxminlen = len>maxminlen ? len : maxminlen;
+    }
+    else{
+      //std::cerr << "??seq:" << seqidx << " " << maxcoordsbyseq[seqidx] << " " <<  mincoordsbyseq[seqidx] << std::endl;
+      assert(false);
+    }
+  }
+  assert(maxminlen>0);
+  //std::cerr << maxminlen << std::endl;
+  return maxminlen;
+}
+
diff --git a/chaining/mincut.h b/chaining/mincut.h
new file mode 100644
index 0000000..cbce460
--- /dev/null
+++ b/chaining/mincut.h
@@ -0,0 +1,975 @@
+//##########
+//breakLCBmincutconnect()
+//Interpret anchor graph as a flow network
+//Use mincut,max-flow to partition the graph to fullfill criteria
+//1)gaps <= distance
+//2)no conflicting orientations
+//3)at most one sequence per genome (important for draft data)
+//Cut edges are tagged as BLUE in the input graph. 
+
+template<typename TGraph, typename TGraph2>
+inline
+int breakLCBmincutconnect(std::vector<LCB > &componentMap,
+			  std::vector<int> &ccvmap,
+			  EdgeSet&maskedEdges,
+			  TGraph g,
+			  TGraph2 fglcbsyn,
+			  unsigned int distance,
+			  VertexLabelIntervalMap &coordinates,
+			  std::set<Label> &seqidxSet,
+			  NameVertexMap &name2vertex,
+			  SequenceGenomeMap & sequence2genome,
+			  int filenumoffset=0){
+  bool found=false;
+  int lcbcount=0;
+
+  bool reusesupernodes=false;
+  //int SEARCH_RADIUS=std::numeric_limits<unsigned int>::max();
+  int DEFAULT_CAP=1;
+  int numcuts=0;
+  int MINSPANLEN=0;
+  int cutcount=0;
+  //Determine cuts over each LCB
+  for(std::vector<LCB >::iterator it = componentMap.begin();it!=componentMap.end();++it){
+#ifdef DEBUG
+      std::cerr << "mincut LCB count:" << lcbcount << " num vertices " << it->size() << std::endl;
+#endif
+    lcbcount++;
+    std::map<VertexName, LVertex> currlcbv;
+    std::map<VertexName, LVertex>::iterator pos;
+    std::map<LVertex,Vertex> vmap;
+    typename property_map < TGraph, vertex_orientmask_t >::type orientmaskmap = get(vertex_orientmask,g);
+    typename property_map < TGraph, vertex_vlabelmask_t >::type labelmaskmap = get(vertex_vlabelmask,g);
+    typename property_map < TGraph, vertex_orient_t>::type orientmap = get(vertex_orient,g);
+
+    std::map<std::pair<VertexName,VertexName>,std::pair<LVertex,LVertex> > cuts;
+    std::map<std::pair<LVertex,LVertex>,std::pair<VertexName,VertexName> > revcuts;
+    std::set<std::pair<LVertex,LVertex> > cutsnodes;
+    std::map<std::pair<LVertex,LVertex>,unsigned int> cutsdistmap;
+    std::map<LVertex,int> supernodes;
+    //std::set<LVertex> supernodes2;
+    int snodeedges=0;
+
+
+    //The connectivity graph used for max flow, min cut
+    LGraph currlcbg;
+    int supercount=0;
+    property_map < LGraph, edge_capacity_t >::type
+      capacity = get(edge_capacity, currlcbg);
+    property_map < LGraph, edge_reverse_t >::type 
+      rev = get(edge_reverse, currlcbg);
+    property_map < LGraph, edge_residual_capacity_t >::type
+      residual_capacity = get(edge_residual_capacity, currlcbg);
+    
+    //The set of vertices for the LCB
+    LCB blockV = *it;
+    //Get sequence labels for this lcb
+    //Slows us down for complete genomes, but helps us with perf for draft genomes
+    std::set<Label> currseqidxSet;
+    typename property_map < TGraph, vertex_label_t >::type vlabelmap = get(vertex_label,g);
+    std::map<Label,std::set<Label> > seqspergenomeMap; //tracks the number of seqs per genome in an LCB
+    std::map<Label,std::set<Label> >::iterator gpos;
+    std::set<Label>::iterator spos,spos2;
+    std::map<Label,std::set<LVertex> > seqsvertex;
+    std::set<LVertex>::iterator vpos,vpos2;
+    bool inserted;
+    for(LCB::iterator vit = blockV.begin();vit!=blockV.end();++vit){
+      for(LabelSet::iterator sit = vlabelmap[*vit].begin();sit!=vlabelmap[*vit].end();++sit){
+	currseqidxSet.insert(*sit);
+	tie(gpos, inserted) = seqspergenomeMap.insert(std::make_pair(sequence2genome[*sit],std::set<Label>()));
+	gpos->second.insert(*sit);
+      }
+    }
+    std::vector<LGraph::edge_descriptor> disconnecting_set;
+    for(std::set<Label>::iterator it2 = currseqidxSet.begin(); it2 != currseqidxSet.end(); ++it2){
+      assert(seqidxSet.find(*it2) != seqidxSet.end());
+      Label seqidx = *it2;
+      //Label genomeidx = sequence2genome[seqidx];
+      std::list<LVertex> sortedV;
+      unsigned int spanlen=0;
+      //need to create custom coords map for LGraph
+      VertexIntervalMap currcoords;
+
+      //Create special graph currlcbv to represent the current lcb
+      //createLCBGraph(g,currlcbv,blockV);
+      
+      for(LCB::iterator vit = blockV.begin();vit!=blockV.end();++vit){
+	Vertex v=*vit;
+	VertexName sname = get(vertex_name,g,v);
+	LVertex news;
+	//
+	//Insert vertex into currlcbg if needed
+	tie(pos, inserted) = currlcbv.insert(std::make_pair(sname, LVertex()));
+	if(inserted){
+	  news = add_vertex(sname,currlcbg);
+	  currlcbv[sname]=news;
+	  vmap[news]=v;
+	}
+	else{
+	  news = pos->second;
+	}
+
+	//
+	//Save coordinate information for news
+	if(coordinates.find(std::make_pair(v,seqidx))!=coordinates.end()){
+	  assert(coordinates.find(std::make_pair(v,seqidx))->second.first<coordinates.find(std::make_pair(v,seqidx))->second.second);
+	  sortedV.push_back(news);
+	  spanlen = spanlen + get(vertex_len,g,v);
+#ifdef DEBUG
+	  std::cerr << "seqidx: " << seqidx << " V:" << get(vertex_name,g,v) 
+		    << " len:" << get(vertex_len,g,news) << " spanlen:" << spanlen << std::endl;
+#endif
+	  currcoords.insert(std::make_pair(news,
+					   coordinates.find(std::make_pair(v,seqidx))->second));
+	  seqsvertex[seqidx].insert(news);
+	}
+
+	//
+	//Add all edges for news
+	//First make sure target vertex is part of currlcbg
+	//graph_traits<LCBSynFilterGraph>::out_edge_iterator out_i, out_end;
+	typename graph_traits<TGraph2>::out_edge_iterator out_i, out_end;
+	for(tie(out_i, out_end) = out_edges(v, fglcbsyn); out_i != out_end; ++out_i){
+#ifdef CUTLCBEDGESONLY
+	  if(ccvmap[v] != ccvmap[target(*out_i,g)]){
+	    std::cerr << "Skipping edge, outside LCB" << std::endl;
+	    continue;
+	  }
+#endif
+	  VertexName tname = get(vertex_name,g,target(*out_i,g));
+	  LVertex newt;
+	  tie(pos, inserted) = currlcbv.insert(std::make_pair(tname, LVertex()));
+	  if(inserted){
+	    newt = add_vertex(tname,currlcbg);
+	    currlcbv[tname]=newt;
+	    vmap[newt]=target(*out_i,g);
+	  }
+	  else{
+	    newt = pos->second;
+	  }
+	  //Now add the forward and reverse edges
+	  //and flow properties
+	  LGraph::edge_descriptor e1,e2;
+	  tie(e1, inserted) = edge(news,newt,currlcbg);
+	  if(!inserted){
+	    tie(e1, inserted) = edge(newt,news,currlcbg);
+	    assert(!inserted);
+	    tie(e1, inserted) = add_edge(news,newt,currlcbg);
+	    assert(inserted);
+	    tie(e2, inserted) = add_edge(newt,news,currlcbg);
+	    assert(inserted);
+	    //put(edge_reverse,currlcbg,e1,e2);
+	    //put(edge_reverse,currlcbg,e2,e1);
+	    rev[e1] = e2;
+	    assert(rev[e1]==e2);
+	    rev[e2] = e1;
+	    assert(rev[e2]==e1);
+	    //Capacity is set as number of labels on the edge
+	    BitMask emask = get(edge_labelmask,g,*out_i);
+	    //int minlen = (get(vertex_len,g,news) < get(vertex_len,g,newt)) ? get(vertex_len,g,news) : get(vertex_len,g,newt);
+	    //int ecapacity = emask.count() * minlen;
+	    int ecapacity = emask.count();
+	    assert(ecapacity>=1);
+#ifdef DEBUG
+	    std::cerr << "mincutlcbg " << sname << "-" << tname << " capacity:" << ecapacity << std::endl;
+#endif
+	    capacity[e1]=ecapacity;//DEFAULT_CAP;
+	    capacity[e2]=ecapacity;//DEFAULT_CAP;
+	    residual_capacity[e1]=0;
+	    residual_capacity[e2]=0;
+	  }
+	}
+      }
+      
+
+      //Condition (1) split gaps
+      if(num_vertices(currlcbg)>0 
+	 && num_edges(currlcbg)>0 
+	 && sortedV.size()>0
+	 && spanlen>=MINSPANLEN){ //Check that span of seqs > MINSPANLEN to avoid breaking LCBs based on inconsistent short fragments
+	assert(num_vertices(currlcbg)>0);
+	assert(num_edges(currlcbg)>0);
+	assert(num_vertices(currlcbg)>=sortedV.size());
+	assert(currcoords.size()==sortedV.size());
+
+	//Project order onto seq
+	sortedV.sort(coordsorder_vertex(&currcoords));
+	
+	int prevcoord=-1;
+	int currpos=0;
+	LVertex prevvertex=0,currvertex=0;
+	VertexName prevname=0,currname=0;
+	LVertex currvertexlcb;
+	std::vector<int> lcbcc(num_vertices(currlcbg));
+
+	prevname=0;
+	prevcoord=-1;
+#ifdef DEBUG
+	std::cerr << "Order by seqidx:" << seqidx << std::endl;
+#endif
+	for(std::list<LVertex>::iterator vit = sortedV.begin();vit!=sortedV.end();++vit){
+	  currvertex=*vit;
+	  currname = get(vertex_name,currlcbg,currvertex);
+	  assert(get(vertex_name,currlcbg,currvertex)==get(vertex_name,g,vmap[currvertex]));
+	  currvertexlcb = currlcbv[currname];
+
+	  assert(coordinates.find(std::make_pair(name2vertex[currname],seqidx))!=coordinates.end());
+	  //assert(coordinates.find(std::make_pair(name2vertex[currname],seqidx))->second==currcoords.find(std::make_pair(currvertex,seqidx))->second);
+	  int currstart,currend;
+	  tie(currstart,currend) = coordinates.find(std::make_pair(name2vertex[currname],seqidx))->second;
+	  if(prevcoord==-1){
+	    assert(vit==sortedV.begin());
+	  }
+	  else{
+	    //assert(*(vit-1)==prevvertex);
+	    //assert(currstart>=prevcoord);
+	    int dist = currstart-prevcoord;
+	    
+#ifdef DEBUG
+	    std::cerr << "seqidx:"  << seqidx << " dist:" << dist << " " 
+		      << prevname   << "-" << currname << " "   
+		      << prevvertex   << "-" << currvertex  
+		      << " coords " << prevcoord << "-" << currstart
+		      << " spanlenonseq: " << spanlen  
+		      << " numV: " << num_vertices(currlcbg) << std::endl;
+#endif
+	    if(dist>(int)distance){
+	      //Since the vertices are sorted by genomic position.
+	      //All verticies begin()->currVertex are also at a dist>distance
+#ifdef DEBUG
+	      std::cerr << "Found GAP " << dist << ">" << distance << std::endl;
+#endif
+#ifdef CALCFLOW
+		;
+#else
+		boost::graph_traits<LGraph>::edge_descriptor de2;
+		tie(de2,found) = edge(currvertex,prevvertex,currlcbg);
+		if(!found){
+		  tie(de2,found) = edge(prevvertex,currvertex,currlcbg);
+		  if(found){
+		    disconnecting_set.push_back(de2);
+		  }
+		}
+		else{
+		  disconnecting_set.push_back(de2);
+		}
+
+#endif	     
+	      //Convert into multi-source multi-sink problem
+	      //Add super-source and super sink nodes
+	      LVertex ssource,ssink;
+	      if(reusesupernodes && 
+		 cuts.find(std::make_pair(prevname,currname)) != cuts.end()){
+		ssource = cuts.find(std::make_pair(prevname,currname))->second.first;
+		ssink = cuts.find(std::make_pair(prevname,currname))->second.second;
+		//std::cerr << "Found prev source sink " << ssource << "-" << ssink << std::endl;
+		assert(cutsdistmap.find(std::make_pair(ssource,ssink))!=cutsdistmap.end());
+		if(dist<(int)cutsdistmap[std::make_pair(ssource,ssink)]){
+		  cutsdistmap[std::make_pair(ssource,ssink)]=dist;
+		}
+	      }
+	      else{
+		ssource = add_vertex(std::numeric_limits<int>::max()-supercount,currlcbg);
+		currlcbv[std::numeric_limits<int>::max()-supercount]=ssource;
+		ssink = add_vertex(std::numeric_limits<int>::max()-supercount-1,currlcbg);
+		currlcbv[std::numeric_limits<int>::max()-supercount-1]=ssink;
+		supercount+=2;
+		cuts[std::make_pair(prevname,currname)] = std::make_pair(ssource,ssink);
+		revcuts[std::make_pair(ssource,ssink)] = std::make_pair(prevname,currname);
+		cutsdistmap[std::make_pair(ssource,ssink)]=dist;
+	      }
+#ifdef DEBUG
+		std::cerr << "Source,sink " << ssource << "-" << ssink << " for cut " << prevname << "-" << currname << endl;
+#endif
+	      
+	      
+	      std::list<LVertex>::iterator sinkend = sortedV.end();
+	      std::list<LVertex>::iterator vit3 = vit;
+	      //std::vector<LVertex>::iterator sinkend = (int(SEARCH_RADIUS+currpos)<(int)sortedV.size()) ? vit+SEARCH_RADIUS : sortedV.end();
+	      for(;vit3!=sinkend;++vit3){
+		//addFlowEdge(ssink,*vit3,currlcbg);
+		graph_traits < LGraph >::edge_descriptor e1,e2;
+		tie(e1, inserted) = add_edge(ssink,*vit3,currlcbg);
+		if(inserted){
+		  //std::cerr << "Adding edge " << get(vertex_name,currlcbg,*vit3) << " --> sink:" << ssink << std::endl;
+		  snodeedges++;
+		  tie(e2, inserted) = add_edge(*vit3,ssink,currlcbg);
+		  assert(inserted);
+		  snodeedges++;
+		  //put(edge_reverse,currlcbg,e1,e2);
+		  //put(edge_reverse,currlcbg,e2,e1);
+		  rev[e1] = e2;
+		  assert(rev[e1]==e2);
+		  rev[e2] = e1;
+		  assert(rev[e2]==e1);
+		  capacity[e1]=0;
+		  capacity[e2]=std::numeric_limits<int>::max();
+		  residual_capacity[e1]=0;
+		  residual_capacity[e2]=0;
+		}
+		else{
+		  tie(e2, inserted) = add_edge(*vit3,ssink,currlcbg);
+		  assert(!inserted);
+		}
+	      }
+
+	      std::list<LVertex>::iterator sourceend = sortedV.begin();
+	      //std::list<LVertex>::iterator sourceend = ((int)(currpos-SEARCH_RADIUS)>0) ? vit-SEARCH_RADIUS : sortedV.begin();
+	      //std::cerr << "Curr pos " << currpos << std::endl;
+	      std::list<LVertex>::iterator vit2=vit;
+	      for(--vit2;vit2!=sourceend;--vit2){
+		graph_traits < LGraph >::edge_descriptor e1,e2;
+		tie(e1, inserted) = add_edge(ssource,*vit2,currlcbg);
+		if(inserted){
+		  //std::cerr << "Adding edge source:" << ssource << " --> " << get(vertex_name,currlcbg,*vit2) << std::endl;
+		  snodeedges++;
+		  tie(e2, inserted) = add_edge(*vit2,ssource,currlcbg);
+		  snodeedges++;
+		  assert(inserted);
+		  //put(edge_reverse,currlcbg,e1,e2);
+		  //put(edge_reverse,currlcbg,e2,e1);
+		  rev[e1] = e2;
+		  assert(rev[e1]==e2);
+		  rev[e2] = e1;
+		  assert(rev[e2]==e1);
+		  capacity[e1]=std::numeric_limits<int>::max();
+		  capacity[e2]=0;
+		  residual_capacity[e1]=0;
+		  residual_capacity[e2]=0;
+		}
+		else{
+		  tie(e2, inserted) = add_edge(*vit2,ssource,currlcbg);
+		  assert(!inserted);
+		}
+	      }
+	      if(vit2==sortedV.begin()){
+		graph_traits < LGraph >::edge_descriptor e1,e2;
+		tie(e1, inserted) = add_edge(ssource,*vit2,currlcbg);
+		if(inserted){
+		  //std::cerr << "Adding edge source:" << ssource << " --> " << get(vertex_name,currlcbg,*vit2) << std::endl;
+		  snodeedges++;
+		  tie(e2, inserted) = add_edge(*vit2,ssource,currlcbg);
+		  snodeedges++;
+		  assert(inserted);
+		  //put(edge_reverse,currlcbg,e1,e2);
+		  //put(edge_reverse,currlcbg,e2,e1);
+		  rev[e1] = e2;
+		  assert(rev[e1]==e2);
+		  rev[e2] = e1;
+		  assert(rev[e2]==e1);
+		  capacity[e1]=std::numeric_limits<int>::max();
+		  capacity[e2]=0;
+		  residual_capacity[e1]=0;
+		  residual_capacity[e2]=0;
+		}
+		else{
+		  tie(e2, inserted) = add_edge(*vit2,ssource,currlcbg);
+		  assert(!inserted);
+		}
+	      }
+	      if(cutsnodes.find(std::make_pair(ssource,ssink))==cutsnodes.end()){
+		supernodes[ssource]++;
+		supernodes[ssink]++;
+		//supernodes2.insert(ssource);
+		//supernodes2.insert(ssink);
+		cutsnodes.insert(std::make_pair(ssource,ssink));
+	      }
+	    }
+	  }
+	  currpos++;
+	  currvertex=*vit;
+	  currname = get(vertex_name,currlcbg,currvertex);
+	  assert(currvertex==(*vit));
+	  prevvertex = currvertex;
+	  //max coord of block
+	  prevcoord = currend;
+	  prevname = currname;
+	}
+      }
+      else{
+	//std::cerr << "Skipping merge on seq:" << seqidx 
+	//<< " spanlen:" << spanlen << " < " << MINSPANLEN << std::endl;
+      }
+    }
+#ifdef DEBUG
+      std::cerr << "Graph built for lcbidx:" << lcbcount << " V:" << num_vertices(currlcbg) << " E:" << num_edges(currlcbg) << std::endl; 
+#endif
+    //
+      //Condition (2) - conflicting orientation
+      //Check orientation on this one seq      
+      //Condition (3) - multiple seqs per genome
+      //Need to break LCBs that have multiple seqs from the same genome
+    LVertex prevvertex=0,currvertex=0;
+    VertexName prevname=0,currname=0;
+    LVertex currvertexlcb,prevvertexlcb;
+
+    for(gpos = seqspergenomeMap.begin();gpos!=seqspergenomeMap.end();++gpos){
+      if(gpos->second.size()>1){
+#ifdef DEBUG
+	std::cerr << "LCB with multiple seqs " << gpos->second.size() << " from same genome, splitting" << std::endl;
+#endif
+	std::vector<Label> seqs;
+	for(spos = gpos->second.begin();spos!=gpos->second.end();++spos){//each seq1
+	  seqs.push_back(*spos);
+#ifdef DEBUG
+	  std::cerr << "Seqs " << *spos << std::endl;
+#endif
+	  assert(sequence2genome[*spos]==gpos->first);
+	}
+	std::vector<LVertex> compv;
+	//Split sequences from the same genome
+	for(std::vector<Label>::iterator spos1 = seqs.begin();spos1!=seqs.end();++spos1){
+	  //std::cerr << "S1" << *spos1 << std::endl;
+	  assert(seqsvertex.find(*spos1)!=seqsvertex.end());
+	  for(std::vector<Label>::iterator spos2 = spos1+1;spos2!=seqs.end();++spos2){
+	    //std::cerr << "S2" << *spos2 << std::endl;
+	    assert(seqsvertex.find(*spos2)!=seqsvertex.end());
+	    assert(spos1!=spos2);
+	    for(vpos = seqsvertex[*spos1].begin();vpos != seqsvertex[*spos1].end();++vpos){//each vertex seq1
+	      compv.push_back(*vpos);
+	      currvertex = *vpos;
+	      currname = get(vertex_name,currlcbg,currvertex);
+	      currvertexlcb = currlcbv[currname];
+	      //std::cerr << *vpos << " name:" << currname << std::endl;
+	      assert(get(vertex_name,currlcbg,currvertex)==get(vertex_name,g,vmap[currvertex]));
+	      for(vpos2 = seqsvertex[*spos2].begin();vpos2 != seqsvertex[*spos2].end();++vpos2){//each vertex seq1
+		prevvertex = *vpos2;
+#ifdef CALCFLOW
+		;
+#else
+		boost::graph_traits<LGraph>::edge_descriptor de2;
+		tie(de2,found) = edge(currvertex,prevvertex,currlcbg);
+		if(!found){
+		  tie(de2,found) = edge(prevvertex,currvertex,currlcbg);
+		  if(found){
+		    disconnecting_set.push_back(de2);
+		  }
+		}
+		else{
+		  disconnecting_set.push_back(de2);
+		}
+#endif
+		prevname = get(vertex_name,currlcbg,prevvertex);
+		prevvertexlcb = get(vertex_name,currlcbg,prevvertex);
+		//std::cerr << *vpos2 << " name:" << prevname << std::endl;
+		LVertex ssource,ssink;
+		ssource = add_vertex(std::numeric_limits<int>::max()-supercount,currlcbg);
+		currlcbv[std::numeric_limits<int>::max()-supercount]=ssource;
+		ssink = add_vertex(std::numeric_limits<int>::max()-supercount-1,currlcbg);
+		currlcbv[std::numeric_limits<int>::max()-supercount-1]=ssink;
+		supercount+=2;
+		cuts[std::make_pair(prevname,currname)] = std::make_pair(ssource,ssink);
+		revcuts[std::make_pair(ssource,ssink)] = std::make_pair(prevname,currname);
+		cutsdistmap[std::make_pair(ssource,ssink)]=0;
+			  
+		graph_traits < LGraph >::edge_descriptor e1,e2;
+		tie(e1, inserted) = add_edge(ssink,currvertex,currlcbg);
+
+		if(inserted){
+#ifdef DEBUG
+		  std::cerr << "Added edge sink for multiple anchors same genome:" << currvertex << " <-- " << ssink << std::endl;
+#endif
+		  snodeedges++;
+		  tie(e2, inserted) = add_edge(currvertex,ssink,currlcbg);
+ 		  assert(inserted);
+		  snodeedges++;
+		  rev[e1] = e2;
+		  assert(rev[e1]==e2);
+		  rev[e2] = e1;
+		  assert(rev[e2]==e1);
+		  capacity[e1]=0;
+		  capacity[e2]=std::numeric_limits<int>::max();
+		  residual_capacity[e1]=0;
+		  residual_capacity[e2]=0;
+		}
+		else{
+		  tie(e2, inserted) = add_edge(currvertex,ssink,currlcbg);
+		  assert(!inserted);
+		}
+		tie(e1, inserted) = add_edge(ssource,prevvertex,currlcbg);
+		if(inserted){
+#ifdef DEBUG
+		  std::cerr << "Adding edge source:" << ssource << " --> " << prevvertex << std::endl;
+#endif
+		  snodeedges++;
+		  tie(e2, inserted) = add_edge(prevvertex,ssource,currlcbg);
+		  snodeedges++;
+		  assert(inserted);
+		  rev[e1] = e2;
+		  assert(rev[e1]==e2);
+		  rev[e2] = e1;
+		  assert(rev[e2]==e1);
+		  capacity[e1]=std::numeric_limits<int>::max();
+		  capacity[e2]=0;
+		  residual_capacity[e1]=0;
+		  residual_capacity[e2]=0;
+		}
+		else{
+		  tie(e2, inserted) = add_edge(prevvertex,ssource,currlcbg);
+		  assert(!inserted);
+		}
+		//Add ssource,ssink to cutset
+		if(cutsnodes.find(std::make_pair(ssource,ssink))==cutsnodes.end()){
+		  supernodes[ssource]++;
+		  supernodes[ssink]++;
+		  cutsnodes.insert(std::make_pair(ssource,ssink));
+		}
+	      }
+	    }
+	  }
+	}
+	/*
+	//Check for misoriented vertices within an LCB and break
+	for(std::vector<LVertex>::iterator vpos = compv.begin();vpos != compv.end();++vpos){//each vertex seq1
+	  for(std::vector<LVertex>::iterator vpos2 = vpos+1;vpos2 != compv.end();++vpos2){//each vertex seq1
+	    //Mismatched orient
+	    BitMask sharedlabels = (labelmaskmap[vmap[*vpos]]&labelmaskmap[vmap[*vpos2]]);
+	    assert(isLabelCollinearMask(sharedlabels,
+					orientmaskmap[vmap[*vpos]],
+					orientmaskmap[vmap[*vpos2]]) 
+		   == 
+		   isLabelCollinear(orientmap[vmap[*vpos]],
+				    orientmap[vmap[*vpos2]],
+				    sequence2genome));
+	    if(! isLabelCollinearMask(sharedlabels,orientmaskmap[vmap[*vpos]],orientmaskmap[vmap[*vpos2]])){
+	      std::cerr << "Breaking vertices with incompatible labeling " << vmap[*vpos] << "--" << vmap[*vpos2] << std::endl;
+	      currvertex = *vpos;
+	      currname = get(vertex_name,currlcbg,currvertex);
+	      currvertexlcb = currlcbv[currname];
+	      std::cerr << *vpos << " name:" << currname << std::endl;
+	      assert(get(vertex_name,currlcbg,currvertex)==get(vertex_name,g,vmap[currvertex]));
+	      prevvertex = *vpos2;
+	      prevname = get(vertex_name,currlcbg,prevvertex);
+	      prevvertexlcb = get(vertex_name,currlcbg,prevvertex);
+	      std::cerr << *vpos2 << " name:" << prevname << std::endl;
+	      LVertex ssource,ssink;
+	      ssource = add_vertex(std::numeric_limits<int>::max()-supercount,currlcbg);
+	      currlcbv[std::numeric_limits<int>::max()-supercount]=ssource;
+	      ssink = add_vertex(std::numeric_limits<int>::max()-supercount-1,currlcbg);
+	      currlcbv[std::numeric_limits<int>::max()-supercount-1]=ssink;
+	      supercount+=2;
+	      cuts[std::make_pair(prevname,currname)] = std::make_pair(ssource,ssink);
+	      revcuts[std::make_pair(ssource,ssink)] = std::make_pair(prevname,currname);
+	      cutsdistmap[std::make_pair(ssource,ssink)]=0;
+	      
+	      graph_traits < LGraph >::edge_descriptor e1,e2;
+	      tie(e1, inserted) = add_edge(ssink,currvertex,currlcbg);
+	      if(inserted){
+		std::cerr << "Added edge sink:" << currvertex << " <-- " << ssink << std::endl;
+		snodeedges++;
+		tie(e2, inserted) = add_edge(currvertex,ssink,currlcbg);
+		std::cerr << "Added edge sink:" << currvertex << " <-- " << ssink << std::endl;
+		assert(inserted);
+		snodeedges++;
+		rev[e1] = e2;
+		assert(rev[e1]==e2);
+		rev[e2] = e1;
+		assert(rev[e2]==e1);
+		capacity[e1]=0;
+		capacity[e2]=std::numeric_limits<int>::max();
+		residual_capacity[e1]=0;
+		residual_capacity[e2]=0;
+	      }
+	      else{
+		tie(e2, inserted) = add_edge(currvertex,ssink,currlcbg);
+		assert(!inserted);
+	      }
+	      std::cerr << "Added sink" << std::endl;
+	      tie(e1, inserted) = add_edge(ssource,prevvertex,currlcbg);
+	      if(inserted){
+		std::cerr << "Adding edge source:" << ssource << " --> " << prevvertex << std::endl;
+		snodeedges++;
+		tie(e2, inserted) = add_edge(prevvertex,ssource,currlcbg);
+		snodeedges++;
+		assert(inserted);
+		rev[e1] = e2;
+		assert(rev[e1]==e2);
+		rev[e2] = e1;
+		assert(rev[e2]==e1);
+		capacity[e1]=std::numeric_limits<int>::max();
+		capacity[e2]=0;
+		residual_capacity[e1]=0;
+		residual_capacity[e2]=0;
+	      }
+	      else{
+		tie(e2, inserted) = add_edge(prevvertex,ssource,currlcbg);
+		assert(!inserted);
+	      }
+	      //Add ssource,ssink to cutset
+	      if(cutsnodes.find(std::make_pair(ssource,ssink))==cutsnodes.end()){
+		supernodes[ssource]++;
+		supernodes[ssink]++;
+		  cutsnodes.insert(std::make_pair(ssource,ssink));
+	      }
+	    }
+	    else{
+	      std::cerr << "Compatible labeling " << vmap[*vpos] << "--" << vmap[*vpos2] << std::endl;
+	    }
+	  }
+	
+	  }
+	*/
+      }
+    }
+  
+    //
+    //
+    
+#ifdef PRINTFLOW
+    //Write graph
+    std::vector<int> ccvmap; //empty
+    VertexSet maskedLCBs; //empty
+#ifdef PRINTSEQS
+    ;
+#else
+    do_write_graphviz(currlcbg, std::string("gout.preflow"+lexical_cast<std::string>(cutcount+filenumoffset)+".dot"),ccvmap,coordinates,maskedEdges,maskedLCBs,capacity,false);
+    std::cerr << "Writing " << std::string("gout.preflow"+lexical_cast<std::string>(cutcount+filenumoffset)+".dot") << std::endl;
+#endif
+#endif
+    
+    LGraph::edge_iterator ei,e_end;
+
+    //property_map < LGraph, edge_reverse_t >::type revtest = get(edge_reverse,currlcbg);
+    //for(tie(ei, e_end) = edges(currlcbg); ei != e_end; ++ei) {
+    //assert(revtest[revtest[*ei]] == *ei); //check if the reverse edge map is build up properly
+    //}
+    //Evaluation order of cuts can matter
+    //TODO, try smallest->largest and largest->smallest
+    std::vector<std::pair<LVertex,LVertex> > cutsnodesuniq;
+    for(std::set<pair<LVertex,LVertex> >::iterator cit = cutsnodes.begin(); cit!= cutsnodes.end();++cit){
+      cutsnodesuniq.push_back(*cit);
+    }
+    sort(cutsnodesuniq.begin(),cutsnodesuniq.end(),cutsdist(&cutsdistmap));
+    for(std::vector<std::pair<LVertex,LVertex> >::iterator cit = cutsnodesuniq.begin(); cit!= cutsnodesuniq.end();++cit){
+      LVertex ssource = cit->first;
+      LVertex ssink = cit->second;
+#ifdef DEBUG
+	std::cerr << "Attempting split " << get(vertex_name,currlcbg,ssource) << "(" << supernodes[ssource] << ")" 
+		  << "-" << get(vertex_name,currlcbg,ssink)  << "(" << supernodes[ssink] << ")" 
+		  << " due to edge " <<  revcuts[std::make_pair(ssource,ssink)].first
+		  << "-" <<  revcuts[std::make_pair(ssource,ssink)].second 
+		  << " dist:" << cutsdistmap[std::make_pair(ssource,ssink)] << std::endl;
+#endif
+      
+      assert(supernodes[ssource]>0);
+      assert(supernodes[ssink]>0);
+
+            
+      std::vector<default_color_type> color(num_vertices(currlcbg));
+      std::vector<LGraph::edge_descriptor> pred(num_vertices(currlcbg));
+      
+      assert(num_edges(currlcbg)>0);
+
+      std::set<LVertex> S_star;
+
+      
+      property_map < LGraph, vertex_index_t >::type
+	idx = get(vertex_index, currlcbg);
+      property_map < LGraph, vertex_distance_t >::type
+	distance = get(vertex_distance, currlcbg); 
+      capacity = get(edge_capacity, currlcbg);
+      rev = get(edge_reverse, currlcbg);
+      residual_capacity = get(edge_residual_capacity, currlcbg);
+#ifdef CALCFLOW
+      long flow = edmonds_karp_max_flow(currlcbg, ssource, ssink, capacity, residual_capacity, rev, &color[0], &pred[0]);
+      ++cutcount;
+      //kolmogorov is faster but its not clear how to find the disconnecting set 
+      //long flow = kolmogorov_max_flow(currlcbg, capacity, residual_capacity, rev, &pred[0], &color[0],distance,idx,ssource,ssink);      
+
+      //long flow = edmonds_karp_max_flow(currlcbg, ssource, ssink);
+      //long flow = push_relabel_max_flow(currlcbg, ssource, ssink);
+      //long flow = kolmogorov_max_flow(currlcbg, ssource, ssink);
+      
+      /*
+      //Testing trimming graph of all but the current supernode source and sink
+      typedef std::set<LVertex> SuperNodeMap;
+      typedef filtered_graph<LGraph, 
+	snode_efilter<LGraph>,snode_vfilter<LGraph> > FLGraph;
+
+      snode_efilter<LGraph> efilter(&supernodes2,&currlcbg);
+      snode_vfilter<LGraph> vfilter(&supernodes2);
+      FLGraph filtlcbg(currlcbg, efilter, vfilter);
+      supernodes2.erase(supernodes2.find(ssource));
+      supernodes2.erase(supernodes2.find(ssink));
+      long flow = kolmogorov_max_flow(filtlcbg, capacity, residual_capacity, rev, &pred[0], &color[0],distance,idx,ssource,ssink);
+      */
+
+      //Check flow since we may have already introduced a break 
+      if(flow>0){
+	assert(flow>0);
+	graph_traits<LGraph>::out_edge_iterator ei, ei_end;
+	graph_traits<LGraph>::vertex_iterator vi, vi_end;
+	typedef color_traits<default_color_type> Color;
+	for(tie(vi,vi_end) = vertices(currlcbg);vi!=vi_end;++vi){
+	  if(color[*vi]!=Color::white()){
+	    if(reusesupernodes || supernodes.find(*vi)==supernodes.end()){
+	      S_star.insert(*vi);
+	    }
+	  }
+	}
+	for( std::set<LVertex>::iterator si = S_star.begin();si!=S_star.end();++si){
+	  for(tie(ei,ei_end) = out_edges(*si,currlcbg);ei!=ei_end;++ei){
+	    if(S_star.find(target(*ei,currlcbg))==S_star.end()){ 
+	      if(reusesupernodes || supernodes.find(target(*ei,currlcbg))==supernodes.end()){
+		disconnecting_set.push_back(*ei);
+		#ifdef DEBUG
+		std::cerr << "Disconnecting set " << get(vertex_name,currlcbg,source(*ei,currlcbg)) << "-" << get(vertex_name,currlcbg,target(*ei,currlcbg)) << std::endl; 
+		put(edge_category,currlcbg,*ei,BLUE);
+		#endif
+	      }
+	    }
+	  }
+	}
+	#ifdef DEBUG
+	std::cerr << " flow:" << flow << std::endl;
+	#endif
+
+#else
+	int flow=0;
+#endif
+#ifdef DEBUG
+	std::cerr << "Disconnecting set size:" << disconnecting_set.size() << std::endl; 
+#endif
+
+#ifdef PRINTFLOW
+	//Write graph
+	std::vector<int> ccvmap; //empty
+	VertexSet maskedLCBs; //empty
+#ifdef PRINTSEQS
+	;
+#else
+	do_write_graphviz(currlcbg, std::string("gout.flow"+lexical_cast<std::string>(cutcount+filenumoffset)+".dot"),ccvmap,coordinates,maskedEdges,maskedLCBs,capacity,false);
+	std::cerr << "Writing " << std::string("gout.flow"+lexical_cast<std::string>(cutcount+filenumoffset)+".dot") << std::endl;
+#endif
+#endif
+	for(std::vector<LGraph::edge_descriptor>::iterator ei=disconnecting_set.begin();ei!=disconnecting_set.end();++ei){
+	  boost::graph_traits<LGraph>::edge_descriptor e2;
+	  //This edge may have been cut previously so
+	  //first check if it is still present in the connectivity graph
+	  tie(e2,found) = edge(source(*ei,currlcbg),target(*ei,currlcbg),currlcbg);
+	  if(found){
+	    LGraph::edge_descriptor maxe = *ei;
+	    Vertex cuts = name2vertex[get(vertex_name,currlcbg,source(maxe,currlcbg))];
+	    assert(vmap[source(maxe,currlcbg)]==cuts);
+	    Vertex cutt = name2vertex[get(vertex_name,currlcbg,target(maxe,currlcbg))];
+	    assert(vmap[target(maxe,currlcbg)]==cutt);
+
+	    Edge e1;
+	    tie(e1,found) = edge(cuts,cutt,fglcbsyn);
+	    if(found){
+	      assert(revcuts.find(std::make_pair(ssource,ssink))!=revcuts.end());
+#ifdef DEBUG
+		std::cerr << "cut " << get(vertex_name,currlcbg,source(maxe,currlcbg)) 
+			  << "-" << get(vertex_name,currlcbg,target(maxe,currlcbg)) 
+			  << " " << cuts << "-" << cutt
+			  << std::endl;
+		if((get(vertex_name,currlcbg,target(maxe,currlcbg)) == revcuts[std::make_pair(ssource,ssink)].first 
+		    &&  get(vertex_name,currlcbg,source(maxe,currlcbg)) == revcuts[std::make_pair(ssource,ssink)].second)
+		   ||
+		   (get(vertex_name,currlcbg,source(maxe,currlcbg)) == revcuts[std::make_pair(ssource,ssink)].first 
+		    &&  get(vertex_name,currlcbg,target(maxe,currlcbg)) == revcuts[std::make_pair(ssource,ssink)].second)
+		   ){
+		  std::cerr << "Split is trivial" << std::endl;
+		}
+		else{
+		  std::cerr << "Split is non-local " << std::string("gout.flow"+lexical_cast<std::string>(cutcount+filenumoffset)) << " " << " supernodes:" << supernodes.size() << " " << "cut_set:" << disconnecting_set.size() << std::endl;
+		}
+		
+#endif
+	      maskedEdges.insert(std::make_pair(cuts,cutt)); 
+	      put(edge_category,g,e1,BLUE);
+	    }
+	    else{
+	      assert(revcuts.find(std::make_pair(ssource,ssink))!=revcuts.end());
+	      tie(e1,found) = edge(cutt,cuts,fglcbsyn);
+	      if(found){
+#ifdef DEBUG
+		std::cerr << "cut " << get(vertex_name,currlcbg,target(maxe,currlcbg)) 
+			  << "-" << get(vertex_name,currlcbg,source(maxe,currlcbg))
+			  << " " << cuts << "-" << cutt
+			  << std::endl;
+		if((get(vertex_name,currlcbg,target(maxe,currlcbg)) == revcuts[std::make_pair(ssource,ssink)].first 
+		    &&  get(vertex_name,currlcbg,source(maxe,currlcbg)) == revcuts[std::make_pair(ssource,ssink)].second)
+		   ||
+		   (get(vertex_name,currlcbg,source(maxe,currlcbg)) == revcuts[std::make_pair(ssource,ssink)].first 
+		    &&  get(vertex_name,currlcbg,target(maxe,currlcbg)) == revcuts[std::make_pair(ssource,ssink)].second)
+		   ){
+		  std::cerr << "Split is trivial" << std::endl;
+		}
+		else{
+		  std::cerr << "Split is non-local " << std::string("gout.flow"+lexical_cast<std::string>(cutcount+filenumoffset)) << " " << " supernodes:" << supernodes.size() << " " << "cut_set:" << disconnecting_set.size() << std::endl;
+		}
+#endif
+		maskedEdges.insert(std::make_pair(cutt,cuts));
+		put(edge_category,g,e1,BLUE);
+	      }
+	    }
+	    tie(e1,found) = edge(cutt,cuts,fglcbsyn);
+	    assert(!found);
+	    tie(e1,found) = edge(cuts,cutt,fglcbsyn);
+	    assert(!found);
+	    //Removing the edges from the graph can help short circuit future runs of the maxflow/mincut algorithm
+	    //The check above for flow>0 ensures subsequent cuts are only considered if there is still 
+	    //connectivity in the graph
+	    //TODO, consider changing capacity to zero instead of removing for perf boost
+	    //#ifdef DEBUG
+	    //save edges so we can visualize
+	    //#else
+	    remove_edge(rev[maxe],currlcbg);
+	    remove_edge(maxe,currlcbg);
+	    //#endif
+	  }
+	  else{
+	    //assert(false);
+	  }
+	}
+#ifdef DEBUG
+	//TESTING
+	//Check to make sure the cuts eliminated all the flow
+	//This is for testing only
+	flow = kolmogorov_max_flow(currlcbg, capacity, residual_capacity, rev, &pred[0], &color[0],distance,idx,ssource,ssink);      
+	std::cerr << "Remaining flow " << flow << std::endl;
+	assert(flow==0);
+#endif
+	numcuts+=disconnecting_set.size();
+#ifdef CALCFLOW
+    }
+    else{
+      //Previous cut already broke flow between ssource-ssink. No further cuts needed
+    }
+#endif
+      //supernodes2.insert(ssource);
+      //supernodes2.insert(ssink);
+
+      //Remove supernodes only if 
+      //they are no longer referenced
+      /*
+	This doesn't work as expected. Can't get the vertex and
+	associated edges to properly clear to property maps
+	supernodes[ssource]--;
+      supernodes[ssink]--;
+
+      if(supernodes[ssource]==0){
+	supernodes.erase(supernodes.find(ssource));
+	clear_vertex(ssource,currlcbg);
+	remove_vertex(ssource,currlcbg);
+      }
+      if(supernodes[ssink]==0){
+	supernodes.erase(supernodes.find(ssink));
+	clear_vertex(ssink,currlcbg);
+	remove_vertex(ssink,currlcbg);
+      }
+      LGraph::edge_iterator ei,e_end;
+      property_map < LGraph, edge_reverse_t >::type rev2 = get(edge_reverse, currlcbg);
+      for(tie(ei, e_end) = edges(currlcbg); ei != e_end; ++ei) {
+	std::cerr << *ei << std::endl;
+	//This will segfault after removed nodes
+	assert(rev2[rev2[*ei]]==*ei);
+      }
+      */
+    }
+  }
+  return numcuts;
+}
+
+
+
+/*
+
+template<typename TGraph, typename TFGraph, typename LCBGraph, typename VertexMap>
+void createLCBGraph(TGraph & g, TFGraph & fglcbsyn, LCBGraph & currlcbg, LCB & lcb, VertexMap & vmap){
+  int DEFAULT_CAP=1;
+  property_map < LGraph, edge_capacity_t >::type
+      capacity = get(edge_capacity, currlcbg);
+  property_map < LGraph, edge_reverse_t >::type 
+    rev = get(edge_reverse, currlcbg);
+  property_map < LGraph, edge_residual_capacity_t >::type
+    residual_capacity = get(edge_residual_capacity, currlcbg);
+  
+  std::map<VertexName, LVertex> currlcbv;
+  std::map<VertexName, LVertex>::iterator pos;
+  bool inserted;
+  for(LCB::iterator vit = lcb.begin();vit!=lcb.end();++vit){
+    Vertex v=*vit;
+    VertexName sname = get(vertex_name,g,v);
+    LVertex news;
+    //
+    //Insert vertex into currlcbg if needed
+    tie(pos, inserted) = currlcbv.insert(std::make_pair(sname, LVertex()));
+    if(inserted){
+      news = add_vertex(sname,currlcbg);
+      currlcbv[sname]=news;
+      vmap[news]=v;
+    }
+    else{
+      news = pos->second;
+    }
+    //
+    //Add all edges for news
+    //First make sure target vertex is part of currlcbg
+    graph_traits<LCBSynFilterGraph>::out_edge_iterator out_i, out_end;
+    for(tie(out_i, out_end) = out_edges(v, fglcbsyn); out_i != out_end; ++out_i){
+      VertexName tname = get(vertex_name,g,target(*out_i,g));
+      LVertex newt;
+      tie(pos, inserted) = currlcbv.insert(std::make_pair(tname, LVertex()));
+      if(inserted){
+	newt = add_vertex(tname,currlcbg);
+	currlcbv[tname]=newt;
+	vmap[newt]=target(*out_i,g);
+      }
+      else{
+	newt = pos->second;
+      }
+      //Now add the forward and reverse edges
+      //and flow properties
+      LGraph::edge_descriptor e1,e2;
+      tie(e1, inserted) = edge(news,newt,currlcbg);
+      if(!inserted){
+	tie(e1, inserted) = edge(newt,news,currlcbg);
+	assert(!inserted);
+	tie(e1, inserted) = add_edge(news,newt,currlcbg);
+	assert(inserted);
+	tie(e2, inserted) = add_edge(newt,news,currlcbg);
+	assert(inserted);
+	//put(edge_reverse,currlcbg,e1,e2);
+	//put(edge_reverse,currlcbg,e2,e1);
+	rev[e1] = e2;
+	assert(rev[e1]==e2);
+	rev[e2] = e1;
+	assert(rev[e2]==e1);
+	capacity[e1]=DEFAULT_CAP;
+	capacity[e2]=DEFAULT_CAP;
+	residual_capacity[e1]=0;
+	residual_capacity[e2]=0;
+      }
+    }
+  }
+}
+template<typename TGraph,
+	 typename TEdge>
+void addFlowEdge(TGraph & g, 
+		 TEdge & ss, 
+		 TEdge & e,
+		 rev,
+		 residual_capacity){
+  graph_traits < LGraph >::edge_descriptor e1,e2;
+		tie(e1, inserted) = add_edge(ssink,*vit3,currlcbg);
+		if(inserted){
+		  //std::cerr << "Adding edge " << get(vertex_name,currlcbg,*vit3) << " --> sink:" << ssink << std::endl;
+		  snodeedges++;
+		  tie(e2, inserted) = add_edge(*vit3,ssink,currlcbg);
+		  assert(inserted);
+		  snodeedges++;
+		  //put(edge_reverse,currlcbg,e1,e2);
+		  //put(edge_reverse,currlcbg,e2,e1);
+		  rev[e1] = e2;
+		  assert(rev[e1]==e2);
+		  rev[e2] = e1;
+		  assert(rev[e2]==e1);
+		  capacity[e1]=0;
+		  capacity[e2]=std::numeric_limits<int>::max();
+		  residual_capacity[e1]=0;
+		  residual_capacity[e2]=0;
+}
+
+*/
diff --git a/chaining/synchain-mugsy.cpp b/chaining/synchain-mugsy.cpp
new file mode 100644
index 0000000..669f017
--- /dev/null
+++ b/chaining/synchain-mugsy.cpp
@@ -0,0 +1,2237 @@
+//USAGE:mugsy-chaining max-distance min-lcblen [min-lcblenstats] < anchors.projection
+//
+
+//Mugsy chaining algorithm to partition a graph of mult-genome anchors
+//into collinear "syntenic" segments
+
+//
+//Input 
+//----- 
+//Projection format is 
+//anchor1 anchor2 seqindex dist genomeindex orient1 orient2 beg1 end1 beg2 end2
+//eg
+//0 1 0 0 0 + + 0 196 196 15348
+//1 3 0 1 0 + + 196 15348 15349 20373
+
+
+//The anchor graph is an directed graph where each vertex is a
+//multi-genome anchor and each edge connects adjacent anchors on one
+//or more genomes.  This input projection already should list anchors
+//that are adjacent on a genome within distance $max-distance.  The
+//anchor graph will be built such that edges are stored for adjacent
+//anchors in at least one genome. 
+
+//A series of heuristics is applied to identify paths in the graph
+//that correspond to collinear regions ignoring micro-rearrangments <
+//max-distance.
+
+//The regions may be overlapping with the degree of overlap determined
+//by max-distance
+
+//General outline
+
+//(1)Build anchor graph
+
+//(2)Initial clustering
+
+//(2.1) Identify vertices with more than 2 edges and mask all incident
+//edges in the graph. These are syntenic breakpoints. Some will be
+//micro- events that we ignore later
+
+//(2.2) Calculate connected components. The remaining edges correspond
+//to vertices with exactly two vertices and comprise runs of synteny.
+
+//(2.3) Run mincut to break paths that traverse breakpoints. Edges
+//indicate on synteny on some genomes but do not ensure all incident
+//anchors are syntenic. We use a maxflow-mincut procedure to determine
+//which edges to break such that the LCBs respect max-distance and do
+//not include inversions.
+
+//(2.4) Merge adjacent LCBs. The procedures of (2.2) and (2.3) will
+//over-parition the graph. Merge adjacent LCBs that have compatible
+//anchor orientations and respect max-distance
+
+//(2.5) Mask short LCBs after merge. LCBs < minlen after merging are
+//masked from the graph. Next, the vertices are projected along each
+//of the member sequences and additional edges are added to the anchor
+//graph. The clustering of (2.1) and (2.2) is repeated to identify a
+//new set of LCBs.  This step allows for ignoring short LCBs that may
+//be breaking synteny.
+
+//(2.6) Run mincut to restore invariants.
+
+//(2.7) Merge
+
+//At this step the LCBs are . Two additional iterations of masking short LCBs and merging are run to try to cluster additional bps.
+
+
+//
+//S. Angiuoli - UMD CS, 2009
+
+#define NDEBUG 
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <string>
+#include <fstream>
+#include <vector>
+#include <queue>
+#include <list>
+#include <bitset>
+#include <algorithm>
+#include <ext/hash_set> //__gnu_cxx namespace
+//#include <tr1/unordered_set>
+
+#include <boost/graph/graphviz.hpp>
+#include <boost/graph/adjacency_list.hpp>
+#include <boost/graph/adjacency_matrix.hpp>
+#include <boost/unordered_set.hpp>
+#include <boost/unordered_map.hpp>
+
+#include <boost/graph/iteration_macros.hpp>
+#include <boost/graph/adjacency_list.hpp>
+#include <boost/graph/filtered_graph.hpp>
+#include <boost/graph/graph_utility.hpp>
+#include <boost/graph/connected_components.hpp>
+#include <boost/graph/strong_components.hpp>
+#include <boost/graph/topological_sort.hpp>
+#include <boost/tokenizer.hpp>
+#include <boost/lexical_cast.hpp>
+#include <boost/graph/dijkstra_shortest_paths.hpp>
+#include <boost/graph/graph_traits.hpp>
+#include <boost/graph/properties.hpp>
+#include <boost/property_map.hpp>
+#include <boost/graph/breadth_first_search.hpp>
+
+#include <boost/graph/edmonds_karp_max_flow.hpp>
+#include <boost/graph/kolmogorov_max_flow.hpp>
+#include <boost/graph/push_relabel_max_flow.hpp>
+
+#include <boost/graph/adjacency_list.hpp>
+#include <boost/graph/read_dimacs.hpp>
+#include <boost/graph/graph_utility.hpp>
+
+// Archivers
+#include <boost/archive/binary_iarchive.hpp>
+#include <boost/archive/binary_oarchive.hpp> 
+
+#include <boost/config.hpp>
+#include <boost/pending/queue.hpp>
+
+using namespace boost;
+using namespace std;
+
+//Maximum number of input genomes 
+//There is no limit on the number of sequences per genome
+//Used to set size of std::bitset<> only
+//TODO, replace with boost::dynamic_bitset to avoid setting a limit
+#define MAXGENOMES 256
+
+//Print LCB stats
+#define LCBSTATS 1
+//Print timings for subsets
+#define TIMING 1
+//Use max-flow,min-cut 
+#define CALCFLOW
+#define CUTLCBEDGESONLY
+
+
+
+//Debug creates 
+//#define DEBUG 1
+// print sequences and coords in graphviz output
+//#define PRINTSEQS 
+// draw flow network, cannot be combined with printseqs
+//#define PRINTFLOW
+
+
+//Defining this option removes all edges labelled in a single sequence
+//only. Such edges represent can link to unaligned or non-syntenic
+//regions
+
+#define TRIMEDGES
+
+//This is useful for simplying the graph and improves performance for draft genomes but changes the
+//the algorithm 
+//When defined, sequence specific indels > distance parameter will break blocks automatically
+//When undefined, such indels are broken only during mincut
+
+
+//Mugsy codes 
+#include "graph.h"
+#include "filters.h"
+#include "file.h"
+#include "lcbchecks.h" //isLabelCollinear,isLabelMaxGap,checkLCBGaps,checkLCBOrient,sameLabel,sameOrient,setLCBOrient
+#include "mincut.h" //breakLCBmincutconnect
+
+
+//variables for testing
+
+//Define to store and print edge labels w/ distances
+//in dot output
+//Undefine for release to save space
+//#define STORE_EDGE_LABELS 0
+//#define V_DEBUG 0
+
+ 
+//Remove misoriented vertices from an LCB
+template<typename TGraph>
+void fixMisOrientedLCBs(TGraph &g, 
+			LCB & lcb,
+			VertexSet &maskedLCBs,
+			EdgeSet &maskedEdges,
+			SequenceGenomeMap & sequence2genome){
+#ifdef DEBUG
+    std::cerr << "Trimming misoriented vertices in  lcb with " << lcb.size() << " vertices" << std::endl;
+#endif
+  std::vector<Vertex> badV;
+  setLCBOrient(g,lcb,badV,sequence2genome);
+  std::vector<Vertex>::iterator it,it_end;
+  it_end = badV.end();
+  for(it=badV.begin();it!=it_end;++it){
+    maskedLCBs.insert(*it);
+    typename graph_traits<TGraph>::out_edge_iterator ei, edge_end;
+    typename graph_traits<TGraph>::in_edge_iterator ei2, edge_end2;
+    tie(ei,edge_end) = out_edges(*it,g);
+    for(;ei!=edge_end;++ei){
+      maskedEdges.insert(std::make_pair(source(*ei,g),target(*ei,g)));
+      put(edge_category,g,*ei,CYAN);
+    }
+    tie(ei2,edge_end2) = in_edges(*it,g);
+    for(;ei2!=edge_end2;++ei2){
+      maskedEdges.insert(std::make_pair(source(*ei2,g),target(*ei2,g)));
+      put(edge_category,g,*ei2,CYAN);
+    } 
+  }
+}
+
+
+
+template<typename TGraph, typename TGraphB, typename TLCBMap>
+void updateAdjacency(TGraph &g,
+		     TGraphB &baseg,
+		     std::set<Label> &seqidxSet,
+		     VertexLabelIntervalMap &coordinates, 
+		     TLCBMap & lcborientmap,
+		     unsigned int distance,
+		     EdgeSet&maskedEdges,
+		     std::vector<int> &  ccvmap,		
+		     std::vector<LCB> & componentMap,
+		     SequenceGenomeMap & sequence2genome){
+  //Graph of LCBs filtered by sequences 
+  typedef typename property_map<TGraph, vertex_vlabelmask_t>::type VertexLabelMask; 
+  typedef typename property_map<TGraph, vertex_orientmask_t>::type VertexOrientMask;
+  typedef typename property_map<TGraph, edge_labelmask_t>::type EdgeLabelMask;
+  
+  typename property_map < TGraph, vertex_orientmask_t >::type orientmaskmap = get(vertex_orientmask,g);
+  typename property_map < TGraph, vertex_vlabelmask_t >::type labelmaskmap = get(vertex_vlabelmask,g);
+  typename property_map < TGraph, vertex_orient_t>::type orientmap = get(vertex_orient,g);
+  typename property_map < TGraph, edge_labelmask_t >::type elabelmaskmap = get(edge_labelmask,g);
+  typename property_map < TGraph, vertex_label_t >::type labelmap = get(vertex_label,g);
+  typename property_map < TGraph, vertex_len_t >::type lenmap = get(vertex_len,g);
+  typename property_map < TGraph, vertex_genome_t >::type genomemap = get(vertex_genome,g);
+
+  //Variables
+  //
+
+  Edge e1;
+  bool found;
+  unsigned int numnewedges=0;
+
+  //sort by coordinates by position on each sequence in the graph
+
+#ifdef DEBUG
+  std::cerr << "Updating adjacency edges in alignment graph" << std::endl;
+#endif
+  //TODO looping over all seqs is a bottleneck for draft genomes
+  //Refactor by looping over graph and saving map [seqidx]->[vertex set]
+  std::map<Label,std::vector<typename TGraph::vertex_descriptor> > seqVertexMap;
+  for(typename boost::graph_traits<TGraph>::vertex_iterator 
+	vit = vertices(g).first;vit!=vertices(g).second;++vit){
+    //assert(labelmap.find(*vit)!=labelmap.end());
+    assert(labelmap[*vit].size() > 0);
+    for(LabelSet::iterator sit = labelmap[*vit].begin();sit!=labelmap[*vit].end();++sit){
+      seqVertexMap[*sit].push_back(*vit);
+    }
+  }
+  
+  std::set<Label> skipseqs;
+  for(typename std::map<Label,std::vector<typename TGraph::vertex_descriptor> >::iterator mit = seqVertexMap.begin();mit!=seqVertexMap.end();++mit){
+    unsigned int spanlen=0;
+    Label seqidx = mit->first;
+    for(typename std::vector<typename TGraph::vertex_descriptor>::iterator vit=mit->second.begin();vit!=mit->second.end();++vit){
+      typename TGraph::vertex_descriptor v = *vit;
+      assert(coordinates.find(std::make_pair(v,seqidx))!=coordinates.end());
+      if(coordinates.find(std::make_pair(v,seqidx))!=coordinates.end()){
+	spanlen = spanlen + get(vertex_len,g,v);
+      }
+      else{
+	assert(false);
+      }
+    }
+    if(spanlen==0){
+      skipseqs.insert(mit->first);
+    }
+  }
+
+  for(typename std::map<Label,std::vector<typename TGraph::vertex_descriptor> >::iterator mit = seqVertexMap.begin();mit!=seqVertexMap.end();++mit){
+    Label seqidx = mit->first;
+    assert(sequence2genome.find(seqidx)!=sequence2genome.end());
+    Label genomeidx = sequence2genome[seqidx];
+    if(skipseqs.find(seqidx)==skipseqs.end()){
+      //sort(sortedV.begin(),sortedV.end(),coordsorder(&coordinates,seqidx));
+      sort(mit->second.begin(),mit->second.end(),coordsorder(&coordinates,seqidx));
+      
+      //
+      //(8.1)Check and add any new edges between adjacent alignment blocks in an LCB
+      for(std::vector<Vertex>::iterator it2 = mit->second.begin();it2!=mit->second.end();++it2){
+	if(it2+1!=mit->second.end() && ccvmap[*it2]!=ccvmap[*(it2+1)]){//only consider new edges that bridge clusters
+	  //check still on same sequence,genome
+	  assert(labelmap[*it2].find(seqidx) != labelmap[*it2].end());
+	  assert(genomemap[*it2].find(genomeidx) != genomemap[*it2].end());
+	  assert(labelmap[*(it2+1)].find(seqidx) != labelmap[*(it2+1)].end());
+	  assert(genomemap[*(it2+1)].find(genomeidx) != genomemap[*(it2+1)].end());
+	  assert(coordinates.find(std::make_pair(*(it2+1),seqidx)) != coordinates.end());
+	  assert(coordinates.find(std::make_pair(*it2,seqidx)) != coordinates.end());
+	  //new edge exists only if dist < distance threshold
+	  //int dist = abs(coordinates[std::make_pair(*it2,seqidx)].second - coordinates[std::make_pair(*(it2+1),seqidx)].first);
+	  int dist = coordinates[std::make_pair(*(it2+1),seqidx)].first - coordinates[std::make_pair(*it2,seqidx)].second;
+#ifdef NDEBUG
+	  
+#else
+	  BitMask sharedlabels = (labelmaskmap[*it2]&labelmaskmap[*(it2+1)]);
+#endif
+	  assert(isLabelCollinearMask(sharedlabels,
+				      orientmaskmap[*it2],
+				      orientmaskmap[*(it2+1)]) 
+		 == 
+		 isLabelCollinear(orientmap[*it2],
+				  orientmap[*(it2+1)],
+				  sequence2genome));
+	  //Additional checks to ensure that we only add "good" edges, between vertices on the same genomes
+	  if(dist <= (int)distance 
+	     && isLabelCollinear(orientmap[*it2],
+				 orientmap[*(it2+1)],
+				 sequence2genome) 
+	     && isLabelMaxGap(*it2,*(it2+1),orientmap[*it2],orientmap[*(it2+1)],coordinates,distance,sequence2genome)){
+	    //make sure that we do not introduce a rearrangment
+	    //make sure that we do not introduce a long gap
+	    
+	    LCB newlcb;	
+	    newlcb.insert(newlcb.end(),componentMap[ccvmap[*it2]].begin(),componentMap[ccvmap[*it2]].end());
+	    newlcb.insert(newlcb.end(),componentMap[ccvmap[*(it2+1)]].begin(),componentMap[ccvmap[*(it2+1)]].end());
+	    
+	    //
+	    BitMask longlabelmask=setSpanMask(newlcb,lenmap,labelmap,sequence2genome);
+
+	    //Two check required
+	    //Check if orientation of lcb1 and lcb2 are congruent
+	    //Check if orientation of it2 and it2+1 are congruent	  
+	    //TODO
+	    //The checkLCBOrient(masks) will not consider the case where a single vertex 
+	    //can be flipped to match the orientation
+	    //TODO
+	    //Only checking overall lcb orientation currently
+	    //checkPairOrient(vlabelmap,vorientmap,*it1,*it2])
+	    if(checkLCBOrient(g,newlcb,longlabelmask,sequence2genome)
+	       && checkLCBOrient(lcborientmap,ccvmap[*it2],ccvmap[*(it2+1)],longlabelmask)
+	       && checkLCBGaps(g,newlcb,ccvmap,coordinates,distance,sequence2genome)){
+	      tie(e1,found) = edge(*it2,*(it2+1), g);
+#ifdef DEBUG
+		std::cerr << "Adding new edge between " << get(vertex_name,g,*it2) << "-" 
+			  << get(vertex_name,g,*(it2+1)) << std::endl;
+#endif
+	      numnewedges++;
+	      if(found){
+		//TODO
+		//addEdgeLabel(g,e1,genomeidx);
+		if(!elabelmaskmap[e1].test(genomeidx)){
+#if defined(STORE_EDGE_LABELS)
+		  labelmap[e1].insert(std::make_pair(genomeidx,dist));
+#endif
+		  assert(!elabelmaskmap[e1].test(genomeidx));
+		}
+		else{
+		  numnewedges--;
+		}
+		elabelmaskmap[e1].set(genomeidx,1);
+	      }
+	      else{
+		//TODO
+		//addEdgeLabel(g,e1,genomeidx)
+		tie(e1,found) = edge(*(it2+1),*it2, g);
+		if(found){
+		  if(!elabelmaskmap[e1].test(genomeidx)){
+#if defined(STORE_EDGE_LABELS)
+		    labelmap[e1].insert(std::make_pair(genomeidx,dist));
+#endif
+		    assert(!elabelmaskmap[e1].test(genomeidx));
+		  }
+		  else{
+		    numnewedges--;
+		  }
+		  elabelmaskmap[e1].set(genomeidx,1);
+		}
+		else{
+#if defined(STORE_EDGE_LABELS)
+		  LabelMap plabels;
+		  plabels[genomeidx] = dist;
+		  tie(e1,found) = add_edge(*it2,*(it2+1),EdgeProperties(plabels),baseg);
+#else
+		  tie(e1,found) = add_edge(*it2,*(it2+1),EdgeProperties(),baseg);
+#endif
+		  //TODO
+		  //addEdgeLabel(g,e1,genomeidx)
+		  elabelmaskmap[e1].set(genomeidx,1);
+		  /*
+		  //Remove any mask on this edges, only necessary if g is a filtered graph
+		  if(maskedEdges.find(std::make_pair(*it2,*(it2+1)))!=maskedEdges.end()){
+		  maskedEdges.erase(maskedEdges.find(std::make_pair(*it2,*(it2+1))));
+		  }
+		  if(maskedEdges.find(std::make_pair(*(it2+1),*it2))!=maskedEdges.end()){
+		  maskedEdges.erase(maskedEdges.find(std::make_pair(*(it2+1),*it2)));
+		  }
+		  */
+		}
+	      }
+	    }
+	  }
+	}
+      }
+    }
+    else{
+      //std::cerr << "Skipping merge on seq:" << seqidx 
+      //<< " spanlen:" << spanlen << std::endl;
+    }
+  }
+  //std::cerr << "Added " << numnewedges << " edges" << std::endl;
+  setedgemasks(g,distance,coordinates,sequence2genome);
+  setvertexmasks(g,sequence2genome);
+  //std::cerr << "Finished setting edge and vertex masks" << std::endl;
+
+}
+//
+//Merge adjacent lcbs
+//This sub will only merge entire LCBs that are congruent
+//TODO, consider edges from longest LCBs first
+// Populate edgelcbmap with max(lcblenmap[source(e)],lcblenmap[target(e)])
+// lcblenmap[edgelcbmap[e1]] < lcblenmap[edgelcbmap[e2]] 
+template<typename TGraph, typename TLCBMap> 
+int mergeLCBsGreedy(TGraph & g,
+		    std::vector<int> & ccvmap,
+		    std::vector<LCB> & componentMap,
+		    TLCBMap & lcborientmap,
+		    VertexLabelIntervalMap & coordinates,
+		    EdgeSet & maskedEdges,
+		    unsigned int maxgap,
+		    SequenceGenomeMap & sequence2genome){
+#ifdef DEBUG
+  std::cerr << "Merging LCBs. Total count " << componentMap.size() << std::endl;
+#endif
+  typename property_map < TGraph, vertex_label_t >::type labelmap = get(vertex_label,g);
+  typename property_map < TGraph, vertex_len_t >::type lenmap = get(vertex_len,g);
+  typename property_map < TGraph, vertex_orient_t >::type omap = get(vertex_orient, g);
+  typename property_map < TGraph, vertex_len_t >::type lmap = get(vertex_len, g);
+  typename graph_traits<TGraph>::out_edge_iterator ei, edge_end;
+  typename graph_traits<TGraph>::in_edge_iterator ei2, edge_end2;
+
+  LCBLabelIntervalMap lcbcoords;  
+  
+  std::vector<int> ccremap=ccvmap;
+  std::set<std::pair<int,int> > searches;
+  int lcbcount=componentMap.size();
+  int nummerges=0;
+
+  //Capture LCB length
+  std::map<int,int> lcblenMap; //lcbid->max_seq_span
+  std::vector<int> lcbidx;
+  for(unsigned int k=0;k<componentMap.size();++k){
+#ifdef DEBUG
+    std::cerr << "Component " << k << std::endl;
+#endif
+    if(componentMap[k].size()>0){
+      int bplen=0;
+      unsigned int len = get_LCB_length(componentMap[k],omap,lmap,coordinates,lcbcoords,k,bplen,sequence2genome,0); 
+#ifdef DEBUG
+      std::cerr << " len:" << len << std::endl;
+#endif
+      lcblenMap[k] = len;
+    }
+    else{
+      lcblenMap[k]=0;
+    }
+    lcbidx.push_back(k);
+  }
+  
+  //Sort LCBs on length
+  sort(lcbidx.begin(),lcbidx.end(),lencmp(lcblenMap));
+  //Greedy merge adjacent LCBs from largest to smallest
+  for(std::vector<int>::reverse_iterator cit = lcbidx.rbegin();cit != lcbidx.rend();++cit){
+#ifdef DEBUG
+    std::cerr << "Greedy merge LCB:" << *cit << " len:" << lcblenMap[*cit] << std::endl;
+#endif
+    if(componentMap[*cit].size()>0){
+      std::vector<Vertex> lcbv = componentMap[*cit];
+      for(LCB::iterator vit = lcbv.begin();vit!=lcbv.end();++vit){
+	std::vector<Edge> lcbedges;
+	tie(ei,edge_end) = out_edges(*vit,g);
+	for(;ei!=edge_end;++ei){
+	  assert(source(*ei,g)==*vit);
+	  lcbedges.push_back(*ei);
+	}
+	
+	tie(ei2,edge_end2) = in_edges(*vit,g);
+	for(;ei2!=edge_end2;++ei2){
+	  assert(target(*ei2,g)==*vit);
+	  lcbedges.push_back(*ei2);
+	}
+#ifdef DEBUG
+	std::cerr << "Edges " << lcbedges.size() << std::endl;
+#endif
+	for(vector<Edge>::iterator eit=lcbedges.begin();eit!=lcbedges.end();++eit){
+	  Vertex sv = source(*eit,g);
+	  Vertex tv = target(*eit,g);
+#ifdef DEBUG
+	  std::cerr << "Vertex " << sv << "-" << tv << std::endl;
+#endif
+	  int sidx = ccvmap[sv];
+	  int tidx = ccvmap[tv];
+	  //
+	  //If edge connects two components consider merging
+	  //if compatible
+	  if(sidx!=tidx
+	     && searches.find(std::make_pair(sidx,tidx)) == searches.end()
+	     && searches.find(std::make_pair(tidx,sidx)) == searches.end()
+	     && componentMap[tidx].size()> 0
+	     && componentMap[sidx].size()> 0
+	     ){
+	    //Make sure that there is no edge already connecting these LCBs
+	    //This merge must be run after computing connected components
+	    //
+	    assert(maskedEdges.find(std::make_pair(sv,tv)) != maskedEdges.end()
+		   ||maskedEdges.find(std::make_pair(tv,sv)) != maskedEdges.end());
+	    //
+	    //Mark that CC pair has been searched
+	    searches.insert(std::make_pair(sidx,tidx));
+	    
+	    LCB newlcb;
+	    newlcb.insert(newlcb.end(),componentMap[sidx].begin(),componentMap[sidx].end());
+	    newlcb.insert(newlcb.end(),componentMap[tidx].begin(),componentMap[tidx].end());
+	    
+	    BitMask longlabelmask=setSpanMask(newlcb,lenmap,labelmap,sequence2genome);
+	    
+	    //TODO
+	    //The funcs called in this loop are a bottleneck according to gprof 
+	    //Most time spent copying OrientedLabelSet
+	    //First this is a large loop, all edges. 
+	    //checkLCBgaps/checkLCBOrient makes copies of vertex properties like orientedlabelset
+	    //checkLCBGaps creates and sorts vectors
+	    if(checkLCBOrient(g,newlcb,longlabelmask,sequence2genome) //pairwise check for all vertices
+	       && checkLCBOrient(lcborientmap,sidx,tidx,longlabelmask) //check consistency with lcb orient
+	       && checkLCBGaps(g,newlcb,ccvmap,coordinates,maxgap,sequence2genome)){
+	      //Save LCB
+	      //std::cerr << "New LCB: "; 
+	      
+	      for(LCB::iterator vit2=newlcb.begin();vit2!=newlcb.end();++vit2){
+		assert(*vit2<ccvmap.size());
+		ccvmap[*vit2]=lcbcount;
+	      }
+	      //std::cerr << std::endl;
+	      componentMap.push_back(newlcb);
+	      //Clear out old LCB
+	      componentMap[sidx] = LCB();
+	      componentMap[tidx] = LCB();
+	      
+	      std::vector<Vertex> badV;
+	      //If joined LCBs have same labels, orietation, no need to recalc
+	      assert(lcborientmap.find(sidx)!=lcborientmap.end());
+	      assert(lcborientmap.find(tidx)!=lcborientmap.end());
+	      if(lcborientmap[sidx]==lcborientmap[tidx]){
+		lcborientmap[lcbcount]=lcborientmap[tidx];
+	      }
+	      else{
+		//Set the label and orientation for the new lcb
+#ifdef DEBUG
+		std::cerr << lcborientmap[sidx].first << std::endl;
+		std::cerr << lcborientmap[sidx].second << std::endl << std::endl;
+		std::cerr << lcborientmap[tidx].first << std::endl;
+		std::cerr << lcborientmap[tidx].second << std::endl << std::endl;
+#endif
+		lcborientmap[lcbcount]=setLCBOrient(g,newlcb,badV,sequence2genome);
+	      }
+	      //Remove mask on edge linking two lcbs
+	      EdgeSet::iterator mit = maskedEdges.find(std::make_pair(sv,tv));
+	      if(mit != maskedEdges.end()){
+		maskedEdges.erase(mit);
+		Edge e1;
+		bool found;
+		tie(e1,found) = edge(mit->first,mit->second,g);
+		assert(found);
+		put(edge_category,g,e1,ORANGERED);
+	      }
+	      else{
+		mit = maskedEdges.find(std::make_pair(tv,sv));
+		assert(mit != maskedEdges.end());
+		maskedEdges.erase(mit);
+		Edge e1;
+		bool found;
+		tie(e1,found) = edge(mit->first,mit->second,g);
+		assert(found);
+		put(edge_category,g,e1,ORANGERED);
+	      }
+#ifdef DEBUG
+	      std::cerr << "Merging LCB:"<<sidx<< " with LCB:"<<tidx<< " into LCB:"<<lcbcount << std::endl;
+#endif
+	      nummerges++;
+	      lcbcount++;
+	      assert(lcbcount==(int)componentMap.size());
+	    }
+	    else{
+	      //skip LCB
+#ifdef DEBUG
+	      std::cerr << "Skipping merge of LCB:"<<sidx
+			<< " with LCB:"<<tidx
+			<< " from edge " 
+			<< get(vertex_name,g,sv) << "-" << get(vertex_name,g,tv) 
+			<< std::endl;
+#endif
+	    }
+	  }
+	}
+      }
+    }
+  }
+  return nummerges;
+}
+//
+//Merge adjacent lcbs
+//This sub will only merge entire LCBs that are congruent
+//TODO, consider edges from longest LCBs first
+// Populate edgelcbmap with max(lcblenmap[source(e)],lcblenmap[target(e)])
+// lcblenmap[edgelcbmap[e1]] < lcblenmap[edgelcbmap[e2]] 
+template<typename TGraph, typename TLCBMap> 
+int mergeLCBs(TGraph & g,
+	      std::vector<int> & ccvmap,
+	      std::vector<LCB> & componentMap,
+	      TLCBMap & lcborientmap,
+	      VertexLabelIntervalMap & coordinates,
+	      EdgeSet & maskedEdges,
+	      unsigned int maxgap,
+	      SequenceGenomeMap & sequence2genome){
+#ifdef DEBUG
+  std::cerr << "Merging LCBs. Total count " << componentMap.size() << std::endl;
+#endif
+  typename property_map < TGraph, vertex_label_t >::type labelmap = get(vertex_label,g);
+  typename property_map < TGraph, vertex_len_t >::type lenmap = get(vertex_len,g);
+  
+  std::vector<int> ccremap=ccvmap;
+  std::set<std::pair<int,int> > searches;
+  int lcbcount=componentMap.size();
+  int nummerges=0;
+  typename boost::graph_traits<TGraph>::edge_iterator eit, edge_end;
+  edge_end=edges(g).second;
+  for(eit=edges(g).first;eit!=edge_end;++eit){//all edges in g
+    Vertex sv = source(*eit,g);
+    Vertex tv = target(*eit,g);
+    int sidx = ccvmap[sv];
+    int tidx = ccvmap[tv];
+    //
+    //If edge connects two components consider merging
+    //if compatible
+    if(sidx!=tidx
+       && searches.find(std::make_pair(sidx,tidx)) == searches.end()
+       && searches.find(std::make_pair(tidx,sidx)) == searches.end()
+       ){
+      //Make sure that there is no edge already connecting these LCBs
+      //This merge must be run after computing connected components
+      //
+      assert(maskedEdges.find(std::make_pair(sv,tv)) != maskedEdges.end()
+	     ||maskedEdges.find(std::make_pair(tv,sv)) != maskedEdges.end());
+      //
+      //Mark that CC pair has been searched
+      searches.insert(std::make_pair(sidx,tidx));
+
+      LCB newlcb;
+      newlcb.insert(newlcb.end(),componentMap[sidx].begin(),componentMap[sidx].end());
+      newlcb.insert(newlcb.end(),componentMap[tidx].begin(),componentMap[tidx].end());
+      
+      BitMask longlabelmask=setSpanMask(newlcb,lenmap,labelmap,sequence2genome);
+      
+      //TODO
+      //The funcs called in this loop are a bottleneck according to gprof 
+      //Most time spent copying OrientedLabelSet
+      //First this is a large loop, all edges. 
+      //checkLCBgaps/checkLCBOrient makes copies of vertex properties like orientedlabelset
+      //checkLCBGaps creates and sorts vectors
+      if(checkLCBOrient(g,newlcb,longlabelmask,sequence2genome) //pairwise check for all vertices
+	 && checkLCBOrient(lcborientmap,sidx,tidx,longlabelmask) //check consistency with lcb orient
+	 && checkLCBGaps(g,newlcb,ccvmap,coordinates,maxgap,sequence2genome)){
+	//Save LCB
+	//std::cerr << "New LCB: "; 
+	for(LCB::iterator vit=newlcb.begin();vit!=newlcb.end();++vit){
+	  assert(*vit<ccvmap.size());
+	  ccvmap[*vit]=lcbcount;
+	  //std::cerr << get(vertex_name,g,*vit) << " ";
+	}
+	//std::cerr << std::endl;
+	componentMap.push_back(newlcb);
+	std::vector<Vertex> badV;
+	//If joined LCBs have same labels, orietation, no need to recalc
+	assert(lcborientmap.find(sidx)!=lcborientmap.end());
+	assert(lcborientmap.find(tidx)!=lcborientmap.end());
+	if(lcborientmap[sidx]==lcborientmap[tidx]){
+	  lcborientmap[lcbcount]=lcborientmap[tidx];
+	}
+	else{
+	  //Set the label and orientation for the new lcb
+#ifdef DEBUG
+	  std::cerr << lcborientmap[sidx].first << std::endl;
+	  std::cerr << lcborientmap[sidx].second << std::endl << std::endl;
+	  std::cerr << lcborientmap[tidx].first << std::endl;
+	  std::cerr << lcborientmap[tidx].second << std::endl << std::endl;
+#endif
+	  lcborientmap[lcbcount]=setLCBOrient(g,newlcb,badV,sequence2genome);
+	}
+	//Remove mask on edge linking two lcbs
+	EdgeSet::iterator mit = maskedEdges.find(std::make_pair(sv,tv));
+	if(mit != maskedEdges.end()){
+	  maskedEdges.erase(mit);
+	  Edge e1;
+	  bool found;
+	  tie(e1,found) = edge(mit->first,mit->second,g);
+	  assert(found);
+	  put(edge_category,g,e1,ORANGERED);
+	}
+	else{
+	  mit = maskedEdges.find(std::make_pair(tv,sv));
+	  assert(mit != maskedEdges.end());
+	  maskedEdges.erase(mit);
+	  Edge e1;
+	  bool found;
+	  tie(e1,found) = edge(mit->first,mit->second,g);
+	  assert(found);
+	  put(edge_category,g,e1,ORANGERED);
+	}
+#ifdef DEBUG
+	    std::cerr << "Merging LCB:"<<sidx<< " with LCB:"<<tidx<< " into LCB:"<<lcbcount << std::endl;
+#endif
+	    nummerges++;
+	    lcbcount++;
+	    assert(lcbcount==(int)componentMap.size());
+      }
+      else{
+	//skip LCB
+#ifdef DEBUG
+	std::cerr << "Skipping merge of LCB:"<<sidx
+		  << " with LCB:"<<tidx
+		  << " from edge " 
+		  << get(vertex_name,g,sv) << "-" << get(vertex_name,g,tv) 
+		  << std::endl;
+#endif
+      }
+    }
+  }
+  return nummerges;
+}
+
+//
+//Completely remove the LCB from the graph (by adding to maskedLCBs)
+void removeLCB(LCB & lcb, 
+	       std::set<std::pair<Vertex,bool> > &breakpoints, 
+	       VertexSet &maskedLCBs){
+  std::set<std::pair<Vertex,bool> >::iterator it2;
+  for(LCB::iterator vit = lcb.begin();vit!=lcb.end();++vit){
+#ifdef DEBUG
+      std::cerr << "Removing vertex " << *vit << std::endl;
+#endif
+    maskedLCBs.insert(*vit);
+  }
+}
+
+
+//
+//Mark possible syntenic breakpoints in graph g, storing in maskedEdges
+//Breakpoints can arise from 
+//(1)Change in label
+//(2)Change in orientation
+//(3)Flux, whereever indegree!=1 or outdegree!=1
+template<typename TGraph, typename BPMap1, typename BPMap2, typename VMap1>
+void markBreakpoints(TGraph &g, 
+		     BPMap1 &breakpoints, 
+		     BPMap2 &maskedEdges, 
+		     VMap1 &vertexList, 
+		     SequenceGenomeMap &sequence2genome){
+  typename graph_traits<TGraph>::vertex_iterator i, end;
+  typename graph_traits<TGraph>::out_edge_iterator ei, edge_end;
+  typename graph_traits<TGraph>::in_edge_iterator ei2, edge_end2;
+  typename property_map < TGraph, vertex_orientmask_t >::type vorientmap = get(vertex_orientmask, g);
+  typename property_map < TGraph, vertex_vlabelmask_t >::type vlabelmap = get(vertex_vlabelmask, g);
+  typename property_map < TGraph, vertex_orient_t >::type vmap = get(vertex_orient, g);
+  typename property_map < TGraph, edge_labelmask_t >::type elabelmap = get(edge_labelmask, g);
+  int bptype1=0;
+  int bptype2=0;
+  int bptype3=0;
+  int keepmerge=false;
+
+  for(typename boost::graph_traits<TGraph>::vertex_iterator 
+	vit = vertices(g).first;vit!=vertices(g).second;++vit){
+      Vertex v = *vit;
+      if(vertexList.size()>0 && vertexList.find(v)==vertexList.end())
+	continue;
+#ifdef DEBUG
+	std::cerr << "Checking for breakpoints on vertex v:" << get(vertex_name,g,v) << std::endl;
+#endif
+	//ei = out_edges(v, g).first;
+	// bptype==0 no breakpoint
+	// bptype==1 incoming bp, end a region
+	// bptype==2 outgoing bp, start a region
+	bool inlinebp=false;
+	bool fluxbp=false;
+	bool inbp=false;
+	bool outbp=false;
+	bool ismerge=false;
+
+      if(in_degree(v,g)==1){
+	tie(ei2,edge_end2) = in_edges(v,g);
+	assert(target(*ei2,g)==v);
+	//Check same labels
+	if(sameLabel(vlabelmap[v],vlabelmap[source(*ei2,g)],elabelmap[*ei2])){	
+	}
+	else{
+	  //Some type of case (3) flux
+	  if(isLabelCollinear(vmap[v],vmap[source(*ei2,g)],sequence2genome)){
+	  }
+	  else{
+	    fluxbp=true;
+	    inbp=true;
+	    maskedEdges.insert(std::make_pair(source(*ei2,g),v));
+	    put(edge_category,g,*ei2,GREEN);
+	    bptype1++;
+	  }
+	}
+	if(isLabelCollinear(vmap[v],vmap[source(*ei2,g)],sequence2genome)){
+	  assert(sameOrient(vorientmap[v]&elabelmap[*ei2],vorientmap[source(*ei2,g)]&elabelmap[*ei2],vlabelmap[v]&elabelmap[*ei2]));
+	}
+	else{
+	  //Some type of case (2) orientation change
+	  fluxbp=true;
+	  inlinebp=false;
+	  inbp=true;
+	  maskedEdges.insert(std::make_pair(source(*ei2,g),v));
+	  put(edge_category,g,*ei2,PURPLE);
+	  bptype2++;
+	}
+      }
+      if(out_degree(v,g)==1){
+	tie(ei,edge_end) = out_edges(v,g);
+	assert(source(*ei,g)==v);
+	if(sameLabel(vlabelmap[v],vlabelmap[target(*ei,g)],elabelmap[*ei])){
+	}
+	else{
+	  //Some type of case (3) flux
+	  if(isLabelCollinear(vmap[v],vmap[target(*ei,g)],sequence2genome)){
+	  }
+	  else{
+	    fluxbp=true;
+	    //inlinebp=true;
+	    outbp=true;
+	    maskedEdges.insert(std::make_pair(v,target(*ei,g)));
+	    put(edge_category,g,*ei,GREEN);
+	    bptype1++;
+	  }
+	}
+	if(isLabelCollinear(vmap[v],vmap[target(*ei,g)],sequence2genome)){
+	  assert(sameOrient(vorientmap[v]&elabelmap[*ei],
+			    vorientmap[target(*ei,g)]&elabelmap[*ei],
+			    vlabelmap[v]&elabelmap[*ei]));
+	}
+	else{
+	  //Some type of case (2) orientation change
+	  maskedEdges.insert(std::make_pair(v,target(*ei,g)));
+	  put(edge_category,g,*ei,PURPLE);
+	  fluxbp=true;
+	  inlinebp=false;
+	  outbp=true;
+	  bptype2++;
+	}
+      }
+      if(in_degree(v,g)>1){
+	//Some type of case (3) flux
+	fluxbp=true;
+	inbp=true;
+	tie(ei2,edge_end2) = in_edges(v,g);
+
+	for(;ei2!=edge_end2;++ei2){
+	  assert(target(*ei2,g)==v);
+	  //maskedEdges.insert(std::make_pair(source(*ei2,g),target(*ei2,g)));
+#ifdef DEBUG
+	  std::cerr << "Adding bp " << get(vertex_name,g,source(*ei2,g)) << "-" << get(vertex_name,g,target(*ei2,g)) << std::endl;
+#endif
+	  if(isLabelCollinear(vmap[source(*ei2,g)],vmap[target(*ei2,g)],sequence2genome)){
+	    //Previously merged, keep
+	    if(keepmerge && get(edge_category,g,*ei2)==ORANGERED){
+	      ismerge=true;
+	    }
+	    else{
+	      put(edge_category,g,*ei2,RED);
+	      maskedEdges.insert(std::make_pair(source(*ei2,g),target(*ei2,g)));
+	    }
+	  }
+	  else{
+	    //Some type of case (2) orientation change
+	    put(edge_category,g,*ei2,PURPLE);
+	    maskedEdges.insert(std::make_pair(source(*ei2,g),target(*ei2,g)));
+	  }
+	}
+	bptype3++;
+      }
+      if(out_degree(v,g)>1){
+	//Some type of case (3) flux
+	fluxbp=true;
+	outbp=true;
+	tie(ei,edge_end) = out_edges(v,g);
+	for(;ei!=edge_end;++ei){
+	  assert(source(*ei,g)==v);
+	  //maskedEdges.insert(std::make_pair(source(*ei,g),target(*ei,g)));
+#ifdef DEBUG
+	  std::cerr << "Adding bp " << get(vertex_name,g,source(*ei,g)) << "-" << get(vertex_name,g,target(*ei,g)) << std::endl;
+#endif
+	  if(isLabelCollinear(vmap[source(*ei,g)],vmap[target(*ei,g)],sequence2genome)){
+	    if(keepmerge && get(edge_category,g,*ei)==ORANGERED){
+	      ismerge=true;
+	    }
+	    else{
+	      put(edge_category,g,*ei,RED);
+	      maskedEdges.insert(std::make_pair(source(*ei,g),target(*ei,g)));
+	    }
+	  }
+	  else{
+	    //Some type of case (2) orientation change
+	    put(edge_category,g,*ei,PURPLE);
+	    maskedEdges.insert(std::make_pair(source(*ei,g),target(*ei,g)));
+	  }
+	}
+	bptype3++;
+      }
+      if(out_degree(v,g)==0){
+	outbp=true;
+      }
+      if(in_degree(v,g)==0){
+	inbp=true;
+      }
+      if(fluxbp){
+	breakpoints.insert(std::make_pair(v,false));
+      }      
+    } 
+#ifdef DEBUG
+    std::cerr << "Marked breakpoints. type1 " << bptype1 << " type2 " << bptype2 << " type3 " << bptype3 << std::endl;
+#endif
+}
+
+//
+//Remove all breakpoints except
+//PURPLE orientation changing
+//BLUE mincuts
+template<typename TGraph>
+void clearInlineBreakpoints(TGraph & g,
+			    EdgeSet  &maskedEdges){
+  vector<EdgeSet::iterator > eraseMask;
+  EdgeSet::iterator mit;
+  for(mit = maskedEdges.begin();mit!=maskedEdges.end();++mit){
+    Edge e;
+    bool found;
+    tie(e,found) = edge(mit->first,mit->second,g);
+    assert(found);
+    if(get(edge_category,g,e)!=PURPLE && get(edge_category,g,e)!=BLUE){
+      eraseMask.push_back(mit);
+    }
+    tie(e,found) = edge(mit->second,mit->first,g);
+    if(found){
+      if(get(edge_category,g,e)!=PURPLE && get(edge_category,g,e)!=BLUE){
+	eraseMask.push_back(mit);
+      }
+    }
+  }
+  vector<EdgeSet::iterator >::iterator eit;
+  for(eit=eraseMask.begin();eit!=eraseMask.end();++eit){
+    maskedEdges.erase(*eit);
+  }
+}
+
+
+//############
+//Connected components
+template<typename TGraph, typename TGraphBase, typename TComponentMap, typename TVertexMap, typename TLCBMap>
+inline
+int calc_components_undirected(TGraph & fg, 
+			       TGraphBase & g,
+			       TComponentMap & componentMap, 
+			       TVertexMap & c, 
+			       TLCBMap & lcborientmap,
+			       SequenceGenomeMap & sequence2genome){
+
+  typedef adjacency_list<vecS,vecS,undirectedS,VertexProperties,EdgeProperties> TLGraph;
+  typedef typename TLGraph::vertex_descriptor TLVertex;
+  typedef typename TLGraph::edge_descriptor TLEdge;
+
+  typedef typename TGraph::vertex_descriptor TVertex;
+  typedef typename TGraphBase::edge_descriptor TEdgeBase;
+
+  typedef typename boost::graph_traits<TGraph>::edge_iterator TEdgeIterator;
+  typedef typename boost::graph_traits<TGraph>::vertex_iterator TVertexIterator;
+  typedef typename boost::graph_traits<TLGraph>::vertex_iterator TLVertexIterator;
+
+  typedef typename boost::graph_traits<TGraphBase>::edge_iterator TEdgeBaseIterator;
+  typedef typename boost::graph_traits<TGraphBase>::vertex_iterator TVertexBaseIterator;
+
+  bool inserted;
+  //Undirected graph(currlcbg) is required here for the CC algorithm
+  //TODO  Performance enhancement refactor
+  //      Avoid building a second graph and run CC on directed graph(fg)
+  adjacency_list<vecS,vecS,undirectedS,VertexProperties,EdgeProperties> currlcbg;
+  std::map<VertexName, TLVertex> currlcbv;
+  std::map<VertexName, TLVertex>::iterator pos;
+  //Map between undirected graph(currlcbg) vertices and directed graph(fg) vertices
+  std::map<TLVertex,TVertex> vmap;
+  TLVertex news,newt;
+  TEdgeIterator starte,ende;
+  TVertexIterator startv,endv;
+  TLEdge ne;
+
+  tie(startv,endv)=vertices(fg);
+  for(TVertexIterator vit = startv;vit!=endv;++vit){
+    VertexName sname = get(vertex_name,fg,*vit);
+    assert(currlcbv.find(sname)==currlcbv.end());
+    tie(pos, inserted) = currlcbv.insert(std::make_pair(sname, TLVertex()));
+    assert(inserted);
+    news = add_vertex(sname,currlcbg);
+    currlcbv[sname]=news;
+    assert(vmap.find(news)==vmap.end());
+    vmap[news]=*vit;
+  }
+
+  tie(starte,ende)=edges(fg);
+  for(TEdgeIterator eit = starte;eit!=ende;++eit){
+    TEdgeBase e = *eit;
+    VertexName tname = get(vertex_name,fg,target(e,fg));
+    VertexName sname = get(vertex_name,fg,source(e,fg));
+    assert(currlcbv.find(sname)!=currlcbv.end());
+    assert(currlcbv.find(tname)!=currlcbv.end());
+    news=currlcbv[sname];
+    newt=currlcbv[tname];
+    tie(ne, inserted) = add_edge(news,newt,currlcbg);
+    if(inserted){
+      
+    }
+  }
+
+  assert(currlcbv.size()==num_vertices(currlcbg));
+  c.clear();
+  c.resize(num_vertices(currlcbg));  
+  int numComponents = connected_components(currlcbg,&c[0]);
+  
+  //Save mapping of componentNum->vector<Vertex>
+  componentMap.clear();
+  assert(componentMap.size()==0);
+  componentMap.resize(numComponents);
+  for(TLVertexIterator vit = vertices(currlcbg).first;vit!=vertices(currlcbg).second;++vit){ 
+    assert(vmap.find(*vit)!=vmap.end());
+    //This ensures lcbidx=c[vertex]
+    componentMap[c[*vit]].push_back(vmap[*vit]);
+  }  
+  //
+  //Save mask for the LCB
+  setLCBOrient(g,lcborientmap,componentMap,sequence2genome);
+  return numComponents;
+}
+
+
+
+
+//
+//Calculate some summary statistics
+//numLCBs,minlcblen(bp),totallcblen(bp),avglen(bp)
+template<typename TGraph>
+void summaryStats(TGraph & fglcbsyn,
+		  std::vector<LCB> &componentMap, 
+		  VertexLabelIntervalMap &coordinates, 
+		  unsigned int minlength,
+		  int & numc,
+		  unsigned int & minlen,
+		  int & totallen,
+		  unsigned int & avglen,
+		  int & maxv,
+		  SequenceGenomeMap & sequence2genome){
+  
+  LCBLabelIntervalMap lcbcoords;  
+  avglen=0;
+  minlen=std::numeric_limits<unsigned int>::max();
+  totallen=0;
+  numc=0;
+  maxv=0;
+  for(unsigned int k=0;k<componentMap.size();++k){
+    if(componentMap[k].size()>0){
+      maxv = (maxv > (int)componentMap[k].size()) ? maxv : componentMap[k].size();
+      assert(componentMap[k].size()>0);
+      OrientedLabelSet label = get(vertex_orient, fglcbsyn, componentMap[k][0]);
+      property_map < LCBSynFilterGraph, vertex_orient_t >::type omap = get(vertex_orient, fglcbsyn);
+      property_map < LCBSynFilterGraph, vertex_len_t >::type lmap = get(vertex_len, fglcbsyn);
+      int bplen=0;
+      unsigned int len = get_LCB_length(componentMap[k],omap,lmap,coordinates,lcbcoords,k,bplen,sequence2genome,minlength); 
+      if(len>0){
+	if(len>=minlength){
+	  minlen = (len < minlen) ? len : minlen;
+	  avglen+=len;
+	  totallen+=bplen;
+	  numc++;
+	}
+	else{
+	}
+      }
+    }
+  }
+}
+
+
+
+int main(int argc, char* argv[])
+{
+
+  //Number of iterations to run
+  unsigned int MAXITERS=5;
+  unsigned int MAXSTABLE=1;
+
+  //Input graph
+  Graph g;
+
+  //Key parameters
+  unsigned int distance=0; //maximum gap length between anchors
+  unsigned int shortlcblen=0; //maximum length of LCBs that are masked during chaining
+
+  unsigned int minlength=0; //for reporting stats only
+  unsigned int minanchor=0; //minimum anchor length
+  unsigned int minprintlength=0;
+
+  //Ensure chains do not overlap by removing overlapping regions
+  bool removeoverlaps=false;
+
+  //Lookups
+  NameVertexMap name2vertex,name2vertexcomp;  
+  NameLabelMap sequence2index,genome2index;  
+  LabelNameMap index2sequence;
+  SequenceGenomeMap sequence2genome;
+  
+  //Map of coordinates for each anchor
+  VertexLabelIntervalMap coordinates;
+  VertexLabelIntervalMap::iterator cpos;
+
+#ifdef TIMING
+  time_t now;
+  time(&now);
+  time_t lasttime=now;
+#endif
+
+  if(argc<=3){
+    cerr << "USAGE:mugsy-chaining max-distance min-lcbspan min-statslen < anchors.projection" << std::endl;
+    exit(1);
+  }
+
+  if(argc>1){
+    assert(atoi(argv[1])>=0);
+    distance = atoi(argv[1]);
+  }
+  if(argc>2){
+    assert(atoi(argv[2])>=0);
+    shortlcblen = atoi(argv[2]);
+  }
+  if(argc>3){
+    minlength = atoi(argv[3]);
+  }
+  assert(distance>0);
+  assert(minlength>=0);
+  cerr << "#Using custom distance " << distance << endl;
+  cerr << "#Using custom minlength " << minlength << endl;
+  
+  cerr << "#Parsing graph from stdin" << endl;
+  if(0){
+    //TODO
+    //Check file format
+    //Allow for unprojected,projected list of blocks
+    //Read blocks and build alignment graph perform projection over
+    //each sequence and connect blocks that are adjacent on any given
+    //sequence at distance < d
+    read_blocks(std::cin,
+		g,
+		name2vertex,
+		genome2index,
+		sequence2index,
+		coordinates,
+		distance);
+  }
+  else{
+    //Read a projection of anchors and build anchor graph
+    //Only consider anchors that are adjacent < d
+    read_pairwiseprojection(std::cin,
+			    g,
+			    name2vertex,
+			    genome2index,
+			    sequence2index,
+			    coordinates,
+			    sequence2genome,
+			    distance,
+			    minanchor);
+    //Save coordinates for each anchor in coordinates map
+    updateCoordinates(coordinates,sequence2genome); 
+  }
+
+
+  //Reverse sequence2index map
+  for(NameLabelMap::iterator i = sequence2index.begin();i!=sequence2index.end();++i){
+#ifdef DEBUG
+    std::cerr << "Seq idx:" << i->second << " " << i->first << std::endl;
+#endif
+    index2sequence[i->second] = i->first;
+  }
+
+  //Restrict to a set of labels
+  LabelSet labels;
+  if(argc>4){
+    for(int i=4;i<argc;++i){
+      NameLabelMap::iterator it = sequence2index.find(argv[i]);
+      if(it != sequence2index.end()){
+	cerr << "#Restricting outputs to sequence label " << argv[i] << endl;
+	labels.insert(it->second);
+      }
+      else{
+	cerr << "#Invalid sequence label " << argv[i] << endl;
+	assert(false);
+      }
+    }
+  }
+  
+  cerr << "#Num of vertices " << num_vertices(g) << endl;
+  cerr << "#Num of edges " << num_edges(g) << endl;
+
+  //Set edge and vertex masks for fast pattern matching
+  cerr << "#Setting edge and vertex masks" << endl;
+  setedgemasks(g,distance,coordinates,sequence2genome);
+  setvertexmasks(g,sequence2genome);
+
+#ifdef TRIMEDGES
+  //Remove edges connnected in only one label. This simplifies the
+  //graph by removing flux contributed by a single genome only.
+  std::vector<boost::graph_traits<Graph>::edge_descriptor> eraseEdges;
+  for(boost::graph_traits<Graph>::edge_iterator 
+	eit = edges(g).first;eit!=edges(g).second;++eit){
+    Edge e = *eit;
+
+    BitMask emask = get(edge_labelmask,g,*eit);
+#if defined(STORE_EDGE_LABELS)
+    LabelMap inlabels = get(edge_label,g,*eit);
+    assert(inlabels.size()==emask.count());
+#endif    
+    if(emask.count()<=1){
+      eraseEdges.push_back(*eit);
+    }
+  }
+
+  for(std::vector<boost::graph_traits<Graph>::edge_descriptor>::iterator eit=eraseEdges.begin();eit!=eraseEdges.end();++eit){
+    //std::cerr << "Removing edge:" << get(vertex_name,g,source(*eit,g)) << "-" << get(vertex_name,g,target(*eit,g)) << std::endl;
+    remove_edge(*eit,g);
+  }
+#endif
+
+  property_map < Graph, vertex_name_t >::type vertex_name_map = get(vertex_name, g);
+  
+  //Variables
+  Edge e1;
+  int itercount=0;
+  //Extract all sequences
+  std::set<Label> seqidxSet;
+  for(NameLabelMap::iterator it = sequence2index.begin();it!=sequence2index.end();++it){
+    //it->first is the sequence name
+    //it->second is the index
+    seqidxSet.insert(it->second);
+  }
+#ifdef TIMING
+  time(&now);
+  std::cerr << "TIME_INIT:" << now-lasttime << std::endl;
+  lasttime=now;
+#endif
+  //Initial clustering: build anchor graph, cut, merge, maskshort, recluster
+  //Building the anchor graph
+  //(1) Create filtered graph that supports breakpoints and maskedLCBs
+  std::cerr << "Building alignment graph and initial clutering" << std::endl;
+  std::set<std::pair<Vertex,bool> > breakpoints;
+  VertexSet maskedLCBs;
+
+  //Store vertex pair rather than edge_descriptor to avoid problems with 
+  //stale edge descriptors and lack of < operator needed for std::set
+  EdgeSet maskedEdges; 
+
+  LCBLabelIntervalMap lcbcoords;  
+
+  //Filter graph has predicates for
+  //-Masked edges
+  //-Masked LCBs
+  //
+  //Edge filters
+  synbp_edge_filter<Graph> synefilter(&maskedEdges,&g);
+  LCB_edge_filter<Graph> lcbefilter(&maskedLCBs,&g);
+  compound_edge_filter<LCB_edge_filter<Graph>, synbp_edge_filter<Graph> > 
+    cmpefilter(lcbefilter,synefilter);
+  //Vertex filters
+  LCB_vertex_filter<Graph> lcbvfilter(&maskedLCBs);
+  //The graph
+  LCBSynFilterGraph fglcbsyn(g,cmpefilter,lcbvfilter);
+
+  //
+  //(2.1) Find and mark all breakpoints in the graph
+  //Breakpoint types (stored in edge_category)
+  //RED - potential syntenic brkpt due to multiple incoming/outgoing edges
+  //PURPLE - change in orientation between sequences in adjacent blocks/vertices
+  //GREEN - other flux such loss of homology in a single genome
+  set<Vertex> dummySet;
+  markBreakpoints(g,breakpoints,maskedEdges,dummySet,sequence2genome);
+#ifdef DEBUG
+  std::cerr << "Marked " << breakpoints.size() << " breakpoints" << std::endl;
+#endif
+  //
+  //(2.2) Calculate LCBs using connected components
+  //This initial clustering is expected to produce a over-segmented
+  //set of LCBs. Later steps in the clustering will collapse LCBs
+  std::vector<LCB> componentMap;
+  std::vector<int> ccvmap(num_vertices(fglcbsyn));
+
+#ifdef DEBUG
+    do_write_graphviz(g, std::string("gout.input.dot"),ccvmap,coordinates,maskedEdges,maskedLCBs);
+#endif
+
+  //TODO
+  //Replace with non BitMask version or use boost::dynamic_bitset
+  //Lookup for lcbid->(labelmask,orientmask)
+  std::map<int,std::pair<BitMask,BitMask> > lcborientmap;
+
+  int numComponents = calc_components_undirected(fglcbsyn,g,componentMap,ccvmap,lcborientmap,sequence2genome);
+  itercount=numComponents;
+#ifdef TIMING
+  time(&now);
+  std::cerr << "TIME_CLUST1:" << now-lasttime << std::endl;
+  lasttime=now;
+#endif
+
+  unsigned int avglen,minlen;
+  int totallen,numc,maxv;
+  int allbps;
+
+#ifdef LCBSTATS
+  //Calculate stats
+  summaryStats(fglcbsyn,componentMap,coordinates,minprintlength, 
+	       numc,minlen,totallen,avglen,maxv,sequence2genome); //using minprintlength
+  allbps=totallen;
+  if(numc>0){
+    std::cerr << "LCB summary orig " << numc << " min:" << minlen << " coverage:" << totallen << "(" << (float)totallen/allbps << ")" << " avg_bp:" << avglen/numc << " maxv:" << maxv<< std::endl;
+  }
+  summaryStats(fglcbsyn,componentMap,coordinates,minlength, 
+	       numc,minlen,totallen,avglen,maxv,sequence2genome); //using minlength
+  if(numc>0){
+    std::cerr << "LCB summary orig " << numc << " min:" << minlen << " coverage:" << totallen << "(" << (float)totallen/allbps << ")" << " avg_bp:" << avglen/numc << " maxv:" << maxv<< std::endl;
+  }
+#endif
+#ifdef DEBUG
+  do_write_graphviz(g, std::string("gout.orig.dot"),ccvmap,coordinates,maskedEdges,maskedLCBs);
+  do_write_graphviz(fglcbsyn, std::string("gout.orig.dot.filtered"),ccvmap,coordinates,maskedEdges,maskedLCBs);
+#endif
+  
+  std::cerr << "Partitioning graph to maintain contraints" << std::endl;
+  //(2.3) Breaks LCBs based on gap lengths, mismatched orient, and mult seqs same genome
+  int cutattempts=0;
+  int origbreaks = breakLCBmincutconnect(componentMap,ccvmap,maskedEdges,g,fglcbsyn,distance,coordinates,seqidxSet,name2vertex,sequence2genome);
+#ifdef DEBUG
+  std::cerr << "Num orig breaks " << origbreaks << std::endl;
+#endif
+  numComponents = calc_components_undirected(fglcbsyn,g,componentMap,ccvmap,lcborientmap,sequence2genome);
+#ifdef TIMING
+  time(&now);
+  std::cerr << "TIME_MINCUT1:" << now-lasttime << std::endl;
+  lasttime=now;
+#endif
+#ifdef DEBUG
+  do_write_graphviz(g, std::string("gout.mincut1.dot"),ccvmap,coordinates,maskedEdges,maskedLCBs);
+  do_write_graphviz(fglcbsyn, std::string("gout.mincut1.dot.filtered"),ccvmap,coordinates,maskedEdges,maskedLCBs);
+#endif
+  
+#ifdef LCBSTATS
+  //Calculate stats
+  summaryStats(fglcbsyn,componentMap,coordinates,minprintlength, 
+	       numc,minlen,totallen,avglen,maxv,sequence2genome); //using minprintlength
+  if(numc>0){
+    std::cerr << "LCB summary post-cuts (" << origbreaks << " cuts) " << numc << " min:" << minlen << " coverage:" << totallen << "(" << (float)totallen/allbps << ")" << " avg_bp:" << avglen/numc << " maxv:" << maxv<< std::endl;
+  }
+  summaryStats(fglcbsyn,componentMap,coordinates,minlength, 
+	       numc,minlen,totallen,avglen,maxv,sequence2genome); //using minlength
+  if(numc>0){
+    std::cerr << "LCB summary post-cuts (" << origbreaks << " cuts) " << numc << " min:" << minlen << " coverage:" << totallen << "(" << (float)totallen/allbps << ")" << " avg_bp:" << avglen/numc << " maxv:" << maxv<< std::endl;
+  }
+#endif
+  
+  
+  int lcbidx=0;
+  
+#ifdef DEBUG
+  //Preceeding step breakLCBmincut and CC should not introduce
+  //bad edges so check predicates
+  cutattempts+=1000;
+  int morebreaks = breakLCBmincutconnect(componentMap,ccvmap,maskedEdges,g,fglcbsyn,distance,coordinates,seqidxSet,name2vertex,sequence2genome,cutattempts);
+  std::cerr << "Num orig breaks " << morebreaks << std::endl;
+  assert(morebreaks==0);
+
+  for(std::vector<LCB >::iterator it = componentMap.begin();it!=componentMap.end();++it){
+    
+    //checkSeqsPerLCB(g,*it)
+    std::map<Label,std::set<Label> > seqspergenomeMap; //tracks the number of seqs per genome in an LCB
+    std::map<Label,std::set<Label> >::iterator gpos;
+    bool inserted;
+    property_map < Graph, vertex_label_t >::type vlabelmap = get(vertex_label,g);
+    std::cerr << " LCB " << lcbidx << std::endl;
+    for(LCB::iterator vit = it->begin();vit!=it->end();++vit){
+      std::cerr << " V:" << *vit << std::endl;
+      printlabel(get(vertex_orient,g,*vit));
+      std::cerr << std::endl;
+      for(LabelSet::iterator sit = vlabelmap[*vit].begin();sit!=vlabelmap[*vit].end();++sit){
+	//std::cerr << " seqidx:" << *sit << " genomeidx:" << sequence2genome[*sit] << std::endl;
+	tie(gpos, inserted) = seqspergenomeMap.insert(std::make_pair(sequence2genome[*sit],std::set<Label>()));
+	gpos->second.insert(*sit);
+	assert(gpos->second.size()==1);
+      }
+    }
+    
+    if(checkLCBGaps(g,*it,ccvmap,coordinates,distance,sequence2genome)){}
+    else{
+      std::cerr << "Bad gap" << std::endl;
+      assert(false);
+    }
+    if(checkLCBOrient(g,*it,sequence2genome)){}
+    else{
+      std::cerr << "Misoriented LCB" << std::endl;
+      //TODO, add orientation condition to mincut
+      //assert(false);
+    }
+    lcbidx++;
+  }
+#endif
+  
+  //
+  //(2.4)Attempt to merge lcbs that are adjacent on two or more genomes
+  //   and do not introduce rearrangements, gaps
+  //
+  std::cerr << "Merging adjacent LCBs" << std::endl;
+  //Update lcbcoords
+  lcbcoords.clear();
+#ifdef DEBUG
+  for(unsigned int k=0;k<componentMap.size();++k){
+    if(componentMap[k].size()>0){
+      property_map < LCBSynFilterGraph, vertex_orient_t >::type omap = get(vertex_orient, fglcbsyn);
+      property_map < LCBSynFilterGraph, vertex_len_t >::type lmap = get(vertex_len, fglcbsyn);
+      int bplen=0;
+      unsigned int len = get_LCB_length(componentMap[k],omap,lmap,coordinates,lcbcoords,k,bplen,sequence2genome); 
+      assert(len>=0);
+    }
+  }
+#endif
+
+  //Breakpoints are stored in maskedEdges. mergeLCBs clears breakpoints between connected,adjacent and congruent LCBs
+  //TODO, put this in loop
+  int nummerges=-1;
+  int totalnummerges=0;
+  while(nummerges!=0){
+    nummerges = mergeLCBsGreedy(g,ccvmap,componentMap,lcborientmap,coordinates,maskedEdges,distance,sequence2genome);
+    totalnummerges = totalnummerges+nummerges;
+  }
+  //int origmerges = mergeLCBs(g,ccvmap,componentMap,lcborientmap,coordinates,maskedEdges,distance,sequence2genome);
+#ifdef DEBUG
+  std::cerr << "Num orig merges " << totalnummerges << std::endl;
+#endif
+  numComponents = calc_components_undirected(fglcbsyn,g,componentMap,ccvmap,lcborientmap,sequence2genome);  
+#ifdef TIMING
+  time(&now);
+  std::cerr << "TIME_MERGE1:" << now-lasttime << std::endl;
+  lasttime=now;
+#endif
+#ifdef DEBUG
+  do_write_graphviz(g, std::string("gout.merge1.dot"),ccvmap,coordinates,maskedEdges,maskedLCBs);
+  do_write_graphviz(fglcbsyn, std::string("gout.merge1.dot.filtered"),ccvmap,coordinates,maskedEdges,maskedLCBs);
+#endif
+#ifdef DEBUG
+  //TESTING
+  //Ensure merge didn't introduce large gaps
+  //origbreaks = breakLCBmincutconnect(componentMap,maskedEdges,g,fglcbsyn,distance,coordinates,seqidxSet,name2vertex,sequence2genome);
+  //std::cerr << "Num orig breaks " << origbreaks << std::endl;
+  //assert(origbreaks==0);
+  //Preceeding step breakLCBmincut and CC should not introduce
+  //bad edges so check predicates
+  lcbidx=0;
+  for(std::vector<LCB >::iterator it = componentMap.begin();it!=componentMap.end();++it){
+    if(checkLCBGaps(g,*it,ccvmap,coordinates,distance,sequence2genome)
+       && checkLCBOrient(g,*it,sequence2genome)){}
+    else{
+      //TODO, fix orient check in mincut
+      //property_map < Graph, vertex_label_t >::type labelmap = get(vertex_label,g);
+      //property_map < Graph, vertex_len_t >::type lenmap = get(vertex_len,g);
+      //BitMask longlabelmask=setSpanMask(*it,lenmap,labelmap,sequence2genome);
+      //assert(!checkLCBOrient(g,*it,longlabelmask,sequence2genome));
+      //assert(false);
+    }
+    lcbidx++;
+  }
+#endif
+  
+#ifdef LCBSTATS
+  //Calculate stats
+  summaryStats(fglcbsyn,componentMap,coordinates,minprintlength, 
+	       numc,minlen,totallen,avglen,maxv,sequence2genome); //using minlength==0
+  if(numc>0){
+    std::cerr << "LCB summary post-merge (" << totalnummerges << " merges) " << numc << " min:" << minlen << " coverage:" << totallen << "(" << (float)totallen/allbps << ")" << " avg_bp:" << avglen/numc << " maxv:" << maxv<< std::endl;
+  }
+  summaryStats(fglcbsyn,componentMap,coordinates,minlength, 
+	       numc,minlen,totallen,avglen,maxv,sequence2genome); //using minlength
+  if(numc>0){
+    std::cerr << "LCB summary post-merge (" << totalnummerges << " merges) " << numc << " min:" << minlen << " coverage:" << totallen << "(" << (float)totallen/allbps << ")" << " avg_bp:" << avglen/numc << " maxv:" << maxv<< std::endl;
+  }
+#endif
+  
+#ifdef DEBUG
+  cutattempts+=1000;
+  int newbreaks = breakLCBmincutconnect(componentMap,ccvmap,maskedEdges,g,fglcbsyn,distance,coordinates,seqidxSet,name2vertex,sequence2genome,cutattempts);
+  std::cerr << "Breaks after merge " << newbreaks << std::endl;
+  assert(newbreaks==0);
+#endif
+  //(7) Remove breakpoints caused by short LCBs
+  unsigned int threshold=shortlcblen;//bp
+  std::cerr << "Masking short lcbs <= length " << threshold << std::endl;
+  
+  
+  unsigned int numremoved=0;
+  std::vector<LCB> currRemovedLCB = componentMap;
+  for(unsigned int k=0;k<componentMap.size();++k){
+    if(componentMap[k].size()>0){
+      assert(componentMap[k].size()>0);
+      //checkLCB(componentMap[k],fglcbsyn);
+      property_map < LCBSynFilterGraph, vertex_orient_t >::type omap = get(vertex_orient, fglcbsyn);
+      property_map < LCBSynFilterGraph, vertex_len_t >::type lmap = get(vertex_len, fglcbsyn);
+      unsigned int len = get_LCB_length(componentMap[k],omap,lmap,coordinates,lcbcoords,k,totallen,sequence2genome); 
+#ifdef DEBUG
+      std::cerr << "LCB " << k << " len:" << len << std::endl;
+#endif
+      if(len >=0 && len < threshold){
+	//Remove LCB
+	removeLCB(componentMap[k],breakpoints,maskedLCBs);
+	currRemovedLCB[k].clear();
+	numremoved++;
+	for(LCB::iterator vit = componentMap[k].begin();vit!=componentMap[k].end();++vit){
+	  put(vertex_relorder,g,*vit,len);
+	}
+      }
+    }
+  }
+#ifdef DEBUG
+  std::cerr << "Removed " << numremoved << " LCBs (len<" << threshold << ") containing " 
+	    << maskedLCBs.size() << " vertices" << std::endl;
+  std::cerr << "Remaining LCBs: " << numComponents-numremoved << std::endl;
+#endif
+  
+  //(8) Update synteny graph
+  //to connect vertices that are adjacent when ignoring short/masked LCBs
+  //be sure to only add good edges to avoid over-merging clusters
+  updateAdjacency(fglcbsyn,
+		  g,
+		  seqidxSet,
+		  coordinates,
+		  lcborientmap,
+		  distance,
+		  maskedEdges,
+		  ccvmap,
+		  componentMap,
+		  sequence2genome);
+
+#ifdef DEBUG
+  //std::cerr << "Iteration 1 of CC. Num LCBs: " << numComponents << std::endl;
+  do_write_graphviz(g, std::string("gout.dot"),ccvmap,coordinates,maskedEdges,maskedLCBs);
+  do_write_graphviz(fglcbsyn, std::string("gout.dot.filtered"),ccvmap,coordinates,maskedEdges,maskedLCBs);
+#endif
+  
+  //
+  //(9) Recalculate breakpoints on updated graph
+  //breakpoints.clear();
+  EdgeSet keepmaskedEdges;
+  for(EdgeSet::iterator it = maskedEdges.begin();it!=maskedEdges.end();++it){
+    Edge e;
+    bool found;
+    tie(e,found) = edge(it->first,it->second,g);
+    assert(found);
+    //BLUE edges are previous cuts
+    if(get(edge_category,g,e)==BLUE){
+      keepmaskedEdges.insert(*it);
+    }
+  }
+#ifdef DEBUG
+  std::cerr << "Keeping " << keepmaskedEdges.size() << " breakpoints" << std::endl;
+#endif
+  maskedEdges.clear();
+  maskedLCBs.clear();     
+  //
+  //Mark breakpoints with short LCBs "masked"
+  markBreakpoints(fglcbsyn,breakpoints,maskedEdges,dummySet,sequence2genome);
+  //markBreakpoints(fglcbsyn,breakpoints,maskedEdges,maskedLCBs,sequence2genome);
+#ifdef DEBUG
+  std::cerr << "Recalc breakpoints. Num:" << breakpoints.size() << std::endl;
+#endif
+  for(EdgeSet::iterator it = keepmaskedEdges.begin();it!=keepmaskedEdges.end();++it){
+    maskedEdges.insert(*it);
+  }
+  
+  maskedLCBs.clear();         
+  numComponents = calc_components_undirected(fglcbsyn,g,componentMap,ccvmap,lcborientmap,sequence2genome);
+#ifdef TIMING
+  time(&now);
+  std::cerr << "TIME_MASKSHORT1:" << now-lasttime << std::endl;
+  lasttime=now;    
+#endif
+#ifdef DEBUG
+  do_write_graphviz(g, std::string("gout.maskshort.dot"),ccvmap,coordinates,maskedEdges,maskedLCBs);
+  do_write_graphviz(fglcbsyn, std::string("gout.maskshort.dot.filtered"),ccvmap,coordinates,maskedEdges,maskedLCBs);
+#endif
+  if(numComponents==0){
+    //No components
+    return 0;
+  }
+#ifdef LCBSTATS
+  //Calculate stats
+  summaryStats(fglcbsyn,componentMap,coordinates,minprintlength, 
+	       numc,minlen,totallen,avglen,maxv,sequence2genome);
+  assert(avglen>0);
+  assert(numc>0);
+  std::cerr << std::endl;
+  
+  //std::cerr << "Iteration 2 of CC. Num LCBs: " << numComponents << std::endl;
+  std::cerr << "LCB summary post-maskshort+merge ("<< numremoved << " LCBs < " << threshold << ") " << numc << " min:" << minlen << " coverage:" << totallen << "(" << (float)totallen/allbps << ")" << " avg_bp:" << avglen/numc << " maxv:" << maxv<< std::endl;
+  summaryStats(fglcbsyn,componentMap,coordinates,minlength, 
+	       numc,minlen,totallen,avglen,maxv,sequence2genome);
+  if(numc>0){
+    std::cerr << "LCB summary post-maskshort+merge ("<< numremoved << " LCBs < " << threshold << ") " << numc << " min:" << minlen << " coverage:" << totallen << "(" << (float)totallen/allbps << ")" << " avg_bp:" << avglen/numc << " maxv:" << maxv<< std::endl;
+  }
+#endif
+
+  itercount=MAXITERS; 
+  int nobreaks=0; //number of iterations with no breaks
+  while(itercount>0){
+    /*
+      (10) Break apart components that violate invariants
+      For each component/LCB
+      -Order the component by projecting blocks onto each member
+      sequence in increasing order along the genome
+      -Iterator over the projection checking the distance invariant at each iteration
+      -If the "gap" between the current block previously seen block > distance (prev.max-curr.min>distance)
+      Break/mask all edges that connect the current block with the previous blocks in the ordering.
+      Save the broken edges in maskedEdges
+    */
+#ifdef DEBUG
+    std::cerr << "Breaking LCBs that violate contraints" << std::endl;
+#endif
+  std::cerr << "Partitioning graph to maintain contraints" << std::endl;
+  cutattempts+=1000;
+  int breaks = breakLCBmincutconnect(componentMap,ccvmap,maskedEdges,g,fglcbsyn,distance,coordinates,seqidxSet,name2vertex,sequence2genome,cutattempts);
+#ifdef DEBUG
+  std::cerr << "Number breaks " << breaks << std::endl;
+#endif
+  if(breaks==0){
+#ifdef DEBUG
+    std::cerr << "Summary mincut shows no breaks necessary, ending iteration at " << MAXITERS-itercount << std::endl;
+#endif
+    nobreaks++;
+    if(nobreaks>MAXSTABLE){
+      break;
+    }
+  }
+  //
+  //(11) Recalc CC
+  //
+  numComponents = calc_components_undirected(fglcbsyn,g,componentMap,ccvmap,lcborientmap,sequence2genome);
+#ifdef TIMING
+  time(&now);
+  std::cerr << "TIME_MINCUT2:" << now-lasttime << std::endl;
+  lasttime=now;
+#endif
+#ifdef DEBUG
+  do_write_graphviz(g, std::string("gout.mincut2.dot"),ccvmap,coordinates,maskedEdges,maskedLCBs);
+  do_write_graphviz(fglcbsyn, std::string("gout.mincut2.dot.filtered"),ccvmap,coordinates,maskedEdges,maskedLCBs);
+#endif
+#ifdef DEBUG
+  int lcbidx=0;
+  //Preceeding step breakLCBmincut and CC should not introduce
+  //bad edges so check predicates
+  for(std::vector<LCB >::iterator it = componentMap.begin();it!=componentMap.end();++it){
+    if(checkLCBGaps(g,*it,ccvmap,coordinates,distance,sequence2genome)
+       && checkLCBOrient(g,*it,sequence2genome)){}
+    else{
+      std::cerr << "Bad gap or orient in LCB " << lcbidx << std::endl;
+      //Assert(false);
+    }
+    lcbidx++;
+  }
+#endif
+  VertexSet shortLCBs;
+  
+#ifdef DEBUG
+  //
+  //Update cc map to match output
+  std::vector<int> xxxc(num_vertices(fglcbsyn));
+  int gvlcbnum=0;
+  int maskedlcbnum=0;
+  for(std::vector<LCB >::iterator it = componentMap.begin();it!=componentMap.end();++it){
+    if(it->size()>0){ 
+      LCB clcb = *it;
+      property_map < LCBSynFilterGraph, vertex_orient_t >::type omap = get(vertex_orient, fglcbsyn);
+      property_map < LCBSynFilterGraph, vertex_len_t >::type lmap = get(vertex_len, fglcbsyn);
+      unsigned int len = get_LCB_length(clcb,omap,lmap,coordinates,lcbcoords,gvlcbnum,totallen,sequence2genome); 
+      if(len>=minlength){
+	for(LCB::iterator vit = it->begin();vit!=it->end();++vit){
+	  xxxc[*vit]=gvlcbnum;
+	}
+	gvlcbnum++;
+      }
+      else{
+	maskedlcbnum--;
+	for(LCB::iterator vit = it->begin();vit!=it->end();++vit){
+	  xxxc[*vit]=maskedlcbnum;
+	  shortLCBs.insert(*vit);
+	}
+      }
+    }
+  }
+  
+  do_write_graphviz(fglcbsyn, std::string("gout.dot.postmincut2"),xxxc,coordinates,maskedEdges,shortLCBs);
+  do_write_graphviz(g, std::string("gout.dot.postmincut2.all"),xxxc,coordinates,maskedEdges,shortLCBs);
+#endif
+  
+#ifdef LCBSTATS
+  
+  //Calculate stats      
+  unsigned int avglen,minlen;
+  int totallen,numc,maxv;
+  summaryStats(fglcbsyn,componentMap,coordinates,minprintlength,
+	       numc,minlen,totallen,avglen,maxv,sequence2genome);
+  assert(avglen>0);
+  assert(numc>0);
+  std::cerr << "LCB summary post-cuts (" << breaks << " cuts) " << numc << " min:" << minlen << " coverage:" << totallen << "(" << (float)totallen/allbps << ")" << " avg_bp:" << avglen/numc << " maxv:" << maxv<< std::endl;
+  summaryStats(fglcbsyn,componentMap,coordinates,minlength,
+	       numc,minlen,totallen,avglen,maxv,sequence2genome);
+  std::cerr << "LCB summary post-cuts (" << breaks << " cuts) " << numc << " min:" << minlen << " coverage:" << totallen << "(" << (float)totallen/allbps << ")" << " avg_bp:" << avglen/numc << " maxv:" << maxv<< std::endl;
+#endif
+  
+  //(12) Mask short LCBs
+  //
+  //Mask short LCBs and fix misorients
+  unsigned int numremoved=0;
+  for(unsigned int k=0;k<componentMap.size();++k){
+    if(componentMap[k].size()>0){
+      assert(componentMap[k].size()>0);
+      property_map < LCBSynFilterGraph, vertex_orient_t >::type omap = get(vertex_orient, fglcbsyn);
+      property_map < LCBSynFilterGraph, vertex_len_t >::type lmap = get(vertex_len, fglcbsyn);
+      int bplen=0;
+      unsigned int len = get_LCB_length(componentMap[k],omap,lmap,coordinates,lcbcoords,k,bplen,sequence2genome); 
+      if(len>=0 && len>=shortlcblen){
+	//TODO, consider if misoriented vertices should be trimmed or kept till end
+	//fixMisOrientedLCBs(g,componentMap[k],maskedLCBs,maskedEdges);
+      }
+      else{
+	//
+	//Mask the LCB
+	removeLCB(componentMap[k],breakpoints,maskedLCBs);
+	numremoved++;
+      }
+    }
+  }
+  
+  //(13) Connect adjacent LCBs after masking short LCBs
+  //
+  //Update synteny graph with short LCBs masked 
+  //be sure to only add good edges to avoid over-merging clusters
+  updateAdjacency(fglcbsyn,
+		  g,
+		  seqidxSet,
+		  coordinates,
+		  lcborientmap,
+		  distance,
+		  maskedEdges,
+		  ccvmap,
+		  componentMap,
+		  sequence2genome);
+  
+  //(14) Recalculate breakpoints on updated graph
+  //breakpoints.clear();
+  EdgeSet keepmaskedEdges;
+  for(EdgeSet::iterator it = maskedEdges.begin();it!=maskedEdges.end();++it){
+    Edge e;
+    bool found;
+    tie(e,found) = edge(it->first,it->second,g);
+    assert(found);
+    if(get(edge_category,g,e)==BLUE){
+      keepmaskedEdges.insert(*it);
+    }
+  }
+#ifdef DEBUG
+  std::cerr << "Keeping " << keepmaskedEdges.size() << " breakpoints" << std::endl;
+#endif
+  maskedEdges.clear();
+  maskedLCBs.clear();     
+  //Mark breakpoints on original graph
+  markBreakpoints(fglcbsyn,breakpoints,maskedEdges,dummySet,sequence2genome);
+  //markBreakpoints(fglcbsyn,breakpoints,maskedEdges,maskedLCBs,sequence2genome);
+#ifdef DEBUG
+  std::cerr << "Recalc breakpoints. Num:" << breakpoints.size() << std::endl;
+#endif
+  for(EdgeSet::iterator it = keepmaskedEdges.begin();it!=keepmaskedEdges.end();++it){
+    maskedEdges.insert(*it);
+  }
+  
+  maskedLCBs.clear();  
+  numComponents = calc_components_undirected(fglcbsyn,g,componentMap,ccvmap,lcborientmap,sequence2genome);
+#ifdef TIMING
+  time(&now);
+  std::cerr << "TIME_MASK2:" << now-lasttime << std::endl;
+  lasttime=now;
+#endif
+#ifdef DEBUG
+  do_write_graphviz(g, std::string("gout.mask2.dot"),ccvmap,coordinates,maskedEdges,maskedLCBs);
+  do_write_graphviz(fglcbsyn, std::string("gout.mask2.dot.filtered"),ccvmap,coordinates,maskedEdges,maskedLCBs);
+#endif 
+#ifdef LCBSTATS
+  //Calculate stats
+  summaryStats(fglcbsyn,componentMap,coordinates,minprintlength,
+	       numc,minlen,totallen,avglen,maxv,sequence2genome);
+  
+  if(numc>0){
+    std::cerr << "LCB summary post-maskshort ("<< numremoved << " LCBs < " << threshold << ") " << numc << " min:" << minlen << " coverage:" << totallen << "(" << (float)totallen/allbps << ")" << " avg_bp:" << avglen/numc << " maxv:" << maxv<< std::endl;
+  } 
+  summaryStats(fglcbsyn,componentMap,coordinates,minlength,
+	       numc,minlen,totallen,avglen,maxv,sequence2genome);
+  
+  if(numc>0){
+    std::cerr << "LCB summary post-maskshort ("<< numremoved << " LCBs < " << threshold << ") " << numc << " min:" << minlen << " coverage:" << totallen << "(" << (float)totallen/allbps << ")" << " avg_bp:" << avglen/numc << " maxv:" << maxv<< std::endl;
+  }
+#endif
+  std::cerr << "Merging adjacent LCBs " << std::endl;
+
+  //
+  //(XX)Attempt to merge LCBs
+  int nummerges=-1;
+  int totalnummerges=0;
+  while(nummerges!=0){
+    nummerges = mergeLCBsGreedy(g,ccvmap,componentMap,lcborientmap,coordinates,maskedEdges,distance,sequence2genome);
+    totalnummerges = totalnummerges+nummerges;
+    //TODO, consider fixing misoriented vertices here
+    //for(unsigned int k=0;k<componentMap.size();++k){
+    //fixMisOrientedLCBs(g,componentMap[k],maskedLCBs,maskedEdges);
+    //}
+    
+    //
+    //(XX)Recalculate connected components
+#ifdef DEBUG
+    std::cerr << "Num merges:" << nummerges << std::endl;
+#endif
+    numComponents = calc_components_undirected(fglcbsyn,g,componentMap,ccvmap,lcborientmap,sequence2genome);      
+#ifdef DEBUG
+    std::cerr << "Recalc components: " << numComponents << std::endl;
+#endif
+  }
+#ifdef TIMING
+  time(&now);
+  std::cerr << "TIME_MERGE2:" << now-lasttime << std::endl;
+  lasttime=now;
+#endif
+#ifdef DEBUG
+  do_write_graphviz(g, std::string("gout.merge2.dot"),ccvmap,coordinates,maskedEdges,maskedLCBs);
+  do_write_graphviz(fglcbsyn, std::string("gout.merge2.dot.filtered"),ccvmap,coordinates,maskedEdges,maskedLCBs);
+#endif      
+#ifdef DEBUG
+  for(std::vector<LCB >::iterator it = componentMap.begin();it!=componentMap.end();++it){
+    if(it->size()>0){ 
+      LCB clcb = *it;
+      property_map < LCBSynFilterGraph, vertex_orient_t >::type omap = get(vertex_orient, fglcbsyn);
+      property_map < LCBSynFilterGraph, vertex_len_t >::type lmap = get(vertex_len, fglcbsyn);
+      unsigned int len = get_LCB_length(clcb,omap,lmap,coordinates,lcbcoords,0,totallen,sequence2genome); 
+      if(len>=minlength){
+      }
+      else{
+	for(LCB::iterator vit = it->begin();vit!=it->end();++vit){
+	  //shortLCBs.insert(*vit);
+	}
+      }
+    }
+  }
+  do_write_graphviz(g, std::string("gout.dot.postimerge."+lexical_cast<std::string>(MAXITERS-itercount)+".all"),ccvmap,coordinates,maskedEdges,shortLCBs);
+#endif
+#ifdef LCBSTATS
+  //Calculate stats
+  summaryStats(fglcbsyn,componentMap,coordinates,minprintlength,
+	       numc,minlen,totallen,avglen,maxv,sequence2genome);
+  if(numc>0){
+    std::cerr << "LCB summary post-maskshort+merge ("<< totalnummerges << " merges) " << numc << " min:" << minlen << " coverage:" << totallen << "(" << (float)totallen/allbps << ")" << " avg_bp:" << avglen/numc << " maxv:" << maxv<< std::endl;
+  }
+  summaryStats(fglcbsyn,componentMap,coordinates,minlength,
+	       numc,minlen,totallen,avglen,maxv,sequence2genome);
+  if(numc>0){
+    std::cerr << "LCB summary post-maskshort+merge ("<< totalnummerges << " merges) " << numc << " min:" << minlen << " coverage:" << totallen << "(" << (float)totallen/allbps << ")" << " avg_bp:" << avglen/numc << " maxv:" << maxv<< std::endl;
+  }
+#endif
+  itercount--;
+  }// end numiters
+  
+  //(9) Remove misoriented vertices
+  VertexSet shortLCBs;
+  shortLCBs.clear();
+  for(std::vector<LCB >::iterator it = componentMap.begin();it!=componentMap.end();++it){
+    if(it->size()>0){ 
+      LCB clcb = *it;
+      property_map < LCBSynFilterGraph, vertex_orient_t >::type omap = get(vertex_orient, fglcbsyn);
+      property_map < LCBSynFilterGraph, vertex_len_t >::type lmap = get(vertex_len, fglcbsyn);
+      unsigned int len = get_LCB_length(clcb,omap,lmap,coordinates,lcbcoords,0,totallen,sequence2genome); 
+      if(len>=shortlcblen){
+	  fixMisOrientedLCBs(g,clcb,maskedLCBs,maskedEdges,sequence2genome);
+      }
+      else{
+#ifdef DEBUG
+	for(LCB::iterator vit = it->begin();vit!=it->end();++vit){
+	  shortLCBs.insert(*vit);
+	}
+#endif
+      }
+    }
+  }
+  
+  numComponents = calc_components_undirected(fglcbsyn,g,componentMap,ccvmap,lcborientmap,sequence2genome);
+  
+  //Sanity check to ensure no large gaps
+  //TODO
+  //Try and avoid this condition
+  //Currently remvong LCBs in fixMisOriented may introduce gaps > threshold
+  //so we need to recut here. 
+  int breaks=-1;
+  while(breaks!=0){
+    cutattempts += 1000;
+    breaks=breakLCBmincutconnect(componentMap,ccvmap,maskedEdges,g,fglcbsyn,distance,coordinates,seqidxSet,name2vertex,sequence2genome,cutattempts);
+#ifdef DEBUG
+    std::cerr << "Num final breaks " << breaks << std::endl;
+#endif
+    numComponents = calc_components_undirected(fglcbsyn,g,componentMap,ccvmap,lcborientmap,sequence2genome);
+  }
+  
+#ifdef DEBUG
+  cutattempts+=1000;
+  breaks=breakLCBmincutconnect(componentMap,ccvmap,maskedEdges,g,fglcbsyn,distance,coordinates,seqidxSet,name2vertex,sequence2genome,cutattempts);
+  assert(breaks==0);
+#endif
+#ifdef LCBSTATS
+  //Calculate stats
+  summaryStats(fglcbsyn,componentMap,coordinates,minprintlength,
+	       numc,minlen,totallen,avglen,maxv,sequence2genome);
+  if(numc>0){
+    std::cerr << "LCB summary final " << numc << " min:" << minlen 
+	      << " coverage:" << totallen << "(" << (float)totallen/allbps << ")" << " avg_bp:" << avglen/numc << " maxv:" << maxv<< std::endl;
+  }
+  summaryStats(fglcbsyn,componentMap,coordinates,minlength,
+	       numc,minlen,totallen,avglen,maxv,sequence2genome);
+  if(numc>0){
+    std::cerr << "LCB summary final " << numc << " min:" << minlen 
+	      << " coverage:" << totallen << "(" << (float)totallen/allbps << ")" << " avg_bp:" << avglen/numc << " maxv:" << maxv<< std::endl;
+  }
+#endif
+#ifdef DEBUG
+  do_write_graphviz(g, std::string("gout.dot.final.all"),ccvmap,coordinates,maskedEdges,shortLCBs);
+#endif
+#ifdef TIMING
+  time(&now);
+  std::cerr << "TIME_MINCUT3:" << now-lasttime << std::endl;
+  lasttime=now;
+#endif
+#ifdef DEBUG
+	do_write_graphviz(g, std::string("gout.final.dot"),ccvmap,coordinates,maskedEdges,maskedLCBs);
+	do_write_graphviz(fglcbsyn, std::string("gout.final.dot.filtered"),ccvmap,coordinates,maskedEdges,maskedLCBs);
+#endif
+  //
+  //
+  //Optionally remove overlapping LCBs, keep longest LCBs spanning each region
+  int idx=0;
+  //Index of lcb in componentMap
+  lcbidx=0;
+  std::map<int,int> lcboverlapMap; //lcbid->longer_overlapping_lcbid
+  std::set<int> bestLCBs; //set of LCBs that are longest over at least one genomic segment
+  std::map<int,int> lcblenMap; //lcbid->max_seq_span
+  if(removeoverlaps){
+    std::cerr << "Sorting LCBs along each seq. Num seqs " << seqidxSet.size() << std::endl;
+    property_map < Graph, vertex_orient_t >::type orientmap = get(vertex_orient,g);
+    property_map < Graph, vertex_len_t >::type lenmap = get(vertex_len,g);
+    typedef iloc TLoc;
+    LCB::iterator it;
+    std::vector<std::vector<TLoc> > olaplcbs;
+    olaplcbs.resize(seqidxSet.size()+1);
+    for(std::vector<LCB >::iterator it = componentMap.begin();it!=componentMap.end();++it){
+      std::cerr << "Looking at LCB " << lcbidx << " of size " << it->size() << std::endl;
+      std::set<Label> currseqs;
+      property_map < LCBSynFilterGraph, vertex_orient_t >::type omap = get(vertex_orient, fglcbsyn);
+      property_map < LCBSynFilterGraph, vertex_len_t >::type lmap = get(vertex_len, fglcbsyn);
+      for(LCB::iterator lit = it->begin();lit!=it->end();++lit){
+	OrientedLabelSet::iterator it2_end = omap[*lit].end();
+	for(OrientedLabelSet::iterator it2 = omap[*lit].begin();it2!=it2_end;++it2){
+	  Label seqidx = it2->first;
+	  currseqs.insert(seqidx);
+	  }
+      }
+      unsigned int len = get_LCB_length(*it,omap,lmap,coordinates,lcbcoords,lcbidx,totallen,sequence2genome); 
+      lcblenMap[lcbidx] = len;
+      for(std::set<Label>::iterator it2 = currseqs.begin();it2!=currseqs.end();++it2){
+	Label seqidx = *it2;
+	assert(lcbcoords.find(std::make_pair(lcbidx,seqidx))!=lcbcoords.end());
+	//Label genomeidx = sequence2genome[seqidx];
+	TLoc t1,t2;
+	t1.first = lcbcoords[std::make_pair(lcbidx,seqidx)].second;
+	t1.second = 0;
+	t1.blocknum=lcbidx;
+	t2.first = lcbcoords[std::make_pair(lcbidx,seqidx)].first;
+	t2.second = 1;
+	t2.blocknum=lcbidx;
+	if(t1.first-t2.first > 0){//{(int)shortlcblen){
+	  std::cerr << "lcbidx: " << lcbidx << " sidx: " << seqidx << " " << olaplcbs.size() << std::endl;
+	  assert(seqidx<olaplcbs.size());
+	  olaplcbs[seqidx].push_back(t1);
+	  olaplcbs[seqidx].push_back(t2);
+	}
+      }
+      lcbidx++;
+      idx++;  
+    }
+    
+    std::cerr << "Iterating over LCBs to look for overlaps" << std::endl;
+    for(int i=0;i<(int)seqidxSet.size()+1;i++){
+      int open=0;
+      assert(i<(int)olaplcbs.size());
+      std::vector<TLoc> &ait=olaplcbs[i];
+      std::cerr << "Sorting on seq " << i << std::endl;
+      sort(ait.begin(),ait.end(),poscmp<TLoc>());
+      std::cerr << "sorted" << std::endl;
+      std::set<int> currlcbs;
+      for(std::vector<TLoc>::iterator pit = ait.begin();pit!=ait.end();pit++){
+	int currlen = lcblenMap[pit->blocknum];
+	if(pit->second>0){
+	  int longestlen = lcblenMap[pit->blocknum];
+	  int bestlcb = pit->blocknum;
+	  std::cerr << "LCB open " << pit->blocknum << " len " << currlen << std::endl;
+	  if(open>0){
+	    //in overlap
+	    //only remove short LCBs that are overlapped by longer lcbs
+	    //if(currlen<shortlcblen){
+	    std::cerr << "Number overlaps " << currlcbs.size() << std::endl;
+	    for(std::set<int>::iterator cit=currlcbs.begin();cit!=currlcbs.end();++cit){
+	      std::cerr << "Overlapping " << *cit << " len " << lcblenMap[*cit] << std::endl;
+	      assert(*cit!=pit->blocknum);
+	      if(lcblenMap[*cit]>longestlen){
+		longestlen = lcblenMap[*cit];
+		bestlcb = *cit;
+	      }
+	      //overlapping lcb > current lcb
+	      if(lcblenMap[*cit]>currlen){
+		if(lcboverlapMap.find(pit->blocknum)!=lcboverlapMap.end()){
+		  //
+		  if(lcblenMap[*cit]>lcblenMap[lcboverlapMap[pit->blocknum]]){
+		    lcboverlapMap[pit->blocknum] = *cit;
+		  }
+		}
+		else{
+		  lcboverlapMap[pit->blocknum] = *cit;
+		}
+	      }
+	    }
+	  }
+	  open++;
+	  assert(currlcbs.find(pit->blocknum)==currlcbs.end());
+	  currlcbs.insert(pit->blocknum);
+	  bestLCBs.insert(bestlcb);
+	  std::cerr << "opened " << pit->blocknum << std::endl;
+	}
+	else{
+	  open--;
+	  assert(currlcbs.find(pit->blocknum)!=currlcbs.end());
+	  assert(currlcbs.size()>0);
+	  currlcbs.erase(pit->blocknum);
+	  std::cerr << "closed " << pit->blocknum << std::endl;
+	  int longestlen = lcblenMap[pit->blocknum];
+	  int bestlcb = pit->blocknum;
+	  if(open){
+	    assert(currlcbs.size()>0);
+	  }
+	  for(std::set<int>::iterator cit=currlcbs.begin();cit!=currlcbs.end();++cit){
+	    if(lcblenMap[*cit]>longestlen){
+	      longestlen = lcblenMap[*cit];
+	      bestlcb = *cit;
+	    }
+	    if(lcblenMap[*cit]>currlen){
+	      if(lcboverlapMap.find(pit->blocknum)!=lcboverlapMap.end()){
+		if(lcblenMap[*cit]>lcblenMap[lcboverlapMap[pit->blocknum]]){
+		  lcboverlapMap[pit->blocknum] = *cit;
+		}
+	      }
+	      else{
+		lcboverlapMap[pit->blocknum] = *cit;
+	      }
+	    }
+	  }
+	  bestLCBs.insert(bestlcb);
+	}
+      }
+    }
+    
+  }
+  lcbidx=0;
+  std::vector<LCB > validLCBs;
+    if(removeoverlaps){
+      for(std::vector<LCB >::iterator it = componentMap.begin();it!=componentMap.end();++it){
+	if(bestLCBs.find(lcbidx)==bestLCBs.end()){ 
+	  std::cerr << "LCB idx: " << lcbidx;
+	  if(lcboverlapMap.find(lcbidx)!=lcboverlapMap.end()){
+	    std::cerr << " overlaps " << lcboverlapMap[lcbidx];
+	  }
+	  LCB newlcb;
+	  newlcb.insert(newlcb.end(),it->begin(),it->end());
+	  newlcb.insert(newlcb.end(),componentMap[lcboverlapMap[lcbidx]].begin(),componentMap[lcboverlapMap[lcbidx]].end());
+	  validLCBs.push_back(newlcb);
+	}
+	else{
+	  validLCBs.push_back(*it);
+	}
+	lcbidx++;
+      }
+    }
+    else{
+      validLCBs = componentMap;
+    }
+#ifdef LCBSTATS
+    //Calculate stats
+    summaryStats(fglcbsyn,validLCBs,coordinates,minprintlength,
+		 numc,minlen,totallen,avglen,maxv,sequence2genome);
+    if(numc>0){
+      std::cerr << "LCB summary final post-processing " << numc << " min:" << minlen 
+		<< " coverage:" << totallen << "(" << (float)totallen/allbps << ")" << " avg_bp:" << avglen/numc << " maxv:" << maxv<< std::endl;
+    }
+    summaryStats(fglcbsyn,validLCBs,coordinates,minlength,
+		 numc,minlen,totallen,avglen,maxv,sequence2genome);
+    if(numc>0){
+      std::cerr << "LCB summary final post-processing " << numc << " min:" << minlen 
+		<< " coverage:" << totallen << "(" << (float)totallen/allbps << ")" << " avg_bp:" << avglen/numc << " maxv:" << maxv<< std::endl;
+    }
+#endif
+
+  //Write out LCBs
+  //Format is 2 lines per LCB
+  //I seq1 orient1 coords1 ... seqN orientN coordsN
+  //V feat1 feat2 .... featN
+    assert((unsigned int)numComponents==componentMap.size());
+    lcbidx=0;
+    idx=0;
+    unsigned int maxlcblen=0;
+    property_map < Graph, vertex_orient_t >::type vmap = get(vertex_orient,g);
+    //for(std::vector<LCB >::iterator it = componentMap.begin();it!=componentMap.end();++it){
+    for(std::vector<LCB >::iterator it = validLCBs.begin();it!=validLCBs.end();++it){
+      if(it->size()>0){ 
+	idx++;
+	LCB clcb = *it;
+	unsigned int len;
+	property_map < LCBSynFilterGraph, vertex_orient_t >::type omap = get(vertex_orient, fglcbsyn);
+	property_map < LCBSynFilterGraph, vertex_len_t >::type lmap = get(vertex_len, fglcbsyn);
+	unsigned int nlen = get_LCB_length(clcb,omap,lmap,coordinates,lcbcoords,idx,totallen,sequence2genome);
+	len=nlen;
+	if(len>=minprintlength){
+	  maxlcblen = (len>maxlcblen ? len : maxlcblen);
+	  unsigned int numcomps=0; 
+	  if(checkLCBOrient(fglcbsyn,*it,sequence2genome)){
+	    if(checkLCBGaps(g,*it,ccvmap,coordinates,distance,sequence2genome)){
+	      std::cout << "I ";
+	      //
+	      //Save mask for the LCB
+	      BitMask labelsmask;
+	      BitMask orientmask;
+	      std::vector<Vertex> badV;
+	      tie(labelsmask,orientmask) = setLCBOrient(g,*it,badV,sequence2genome);
+	      SeqSet currlabelset;
+	      for(LCB::iterator vit = it->begin();vit!=it->end();++vit){
+		currlabelset.insert(vmap[*vit].begin(),vmap[*vit].end());
+	      }
+	      for(OrientedLabelSet::iterator oit = currlabelset.begin();oit != currlabelset.end();++oit){
+		Label seqidx = oit->first;
+		Label genomeidx = sequence2genome[seqidx];
+		assert(labelsmask.test(genomeidx));
+		std::cout << index2sequence[seqidx] <<" " << (orientmask.test(genomeidx) ? '+' : '-') << " ";
+		std::cout << lcbcoords[std::make_pair(idx,seqidx)].first << "-" << lcbcoords[std::make_pair(idx,seqidx)].second << " ";
+	      }
+	      std::cout << " ;" << std::endl;
+	      std::cout << "V ";
+	      for(LCB::iterator vit = it->begin();vit!=it->end();++vit){
+		std::cout << get(vertex_name,fglcbsyn,*vit) << " ";
+		numcomps++;
+	      }
+	      std::cout << " ;" << std::endl; 
+	    }
+	    else{
+	      std::cerr << "SKIPPING LCB:" << idx << " Bad gap" << std::endl;
+	      assert(false);
+	    }
+	  }
+	  else{
+	    std::cerr << "BAD LCB:" << idx << " Mis-matched lable orientation" << std::endl;
+	    assert(false);
+	  }
+	}
+	else{
+	  std::cerr << "SKIPPING LCB:" << lcbidx << " len:" << nlen << " < " << minprintlength << std::endl;
+	}
+      }
+      lcbidx++;
+    }
+    std::cerr << "Max LCB length " << maxlcblen << std::endl;
+#ifdef TIMING
+    time(&now);
+    std::cerr << "TIME_POSTPROC:" << now-lasttime << std::endl;
+    lasttime=now;    
+#endif
+    return 0;
+}
+
+
+//################
+//General utilities
+//
+//
+
+unsigned int getIntervalDist(int s1, int e1, int s2, int e2){
+  //Contained
+  if(s1>s2 && s1<e2){
+    return 0;
+  }
+  else{
+    if(s2>s1 && s2<e1){
+      return 0;
+    }
+    else{
+      if(s1<s2){
+	assert(s2-s1>=0);
+	return (unsigned int)s2-s1;
+      }
+      else{
+	assert(s1-s2>=0);
+	return (unsigned int)s1-s2;
+      }
+    }
+  }
+}
+
diff --git a/delta-dups.sh b/delta-dups.sh
new file mode 100755
index 0000000..3ea9c3f
--- /dev/null
+++ b/delta-dups.sh
@@ -0,0 +1,21 @@
+#!/bin/sh
+#Identify duplicated regions in a pairwise delta file from NUCmer
+
+mugsypath=$MUGSY_INSTALL
+mummerpath=$MUGSY_INSTALL/MUMmer3.20/
+deltafile=$1;
+
+#Run delta-filter -b for duplications that are detected using LIS
+$mummerpath/delta-filter -b $deltafile > $deltafile.b
+#Capture additional dup/repeat regions by looking for overlapping alignments
+#Alignments that overlap by more than half their lengths are reports as dups
+$mummerpath/delta-filter -m $deltafile > $deltafile.m
+$mummerpath/delta-filter -v -u 50 $deltafile.m > $deltafile.u
+#Dump union of two sets to maf format
+$mummerpath/delta2maf $deltafile.b 2> /dev/null | $mugsypath/fixMAFnames.pl 
+#Skip first line
+$mummerpath/delta2maf $deltafile.u 2> /dev/null | $mugsypath/fixMAFnames.pl | tail -n +1 
+rm $deltafile.b &
+rm $deltafile.m &
+rm $deltafile.u &
+
diff --git a/fixMAFnames.pl b/fixMAFnames.pl
new file mode 100755
index 0000000..318b2a8
--- /dev/null
+++ b/fixMAFnames.pl
@@ -0,0 +1,28 @@
+#!/usr/bin/perl
+
+use strict;
+
+
+while(my $line=<STDIN>){
+    if($line =~ /^s\s+(\S+)\:(\S+):\d+-\d+:\d+:[+-]:\d+/){
+	if($1 eq $2){
+	    $line =~ s/^s\s+(\S+)\:(\S+):\d+-\d+:\d+:[+-]:\d+/s $1.$1/;
+	}
+	else{
+	    $line =~ s/^s\s+(\S+)\:(\S+):\d+-\d+:\d+:[+-]:\d+/s $1.$2/;
+	}
+    }
+    elsif($line =~ /^s\s+(\S+)\:(\S+):\d+:[+-]:\d+/){
+	if($1 eq $2){
+	    $line =~ s/^s\s+(\S+)\:(\S+):\d+:[+-]:\d+/s $1.$1/;
+	}
+	else{
+	    $line =~ s/^s\s+(\S+)\:(\S+):\d+:[+-]:\d+/s $1.$2/;
+	}
+    }
+    elsif($line =~ /^s\s+\S+\s+/){
+	$line =~ s/^s\s+(\S+)(\s+)/s $1.$1$2/;
+    }
+    print $line;
+}
+
diff --git a/labelblocks.pl b/labelblocks.pl
new file mode 100755
index 0000000..bfadaf2
--- /dev/null
+++ b/labelblocks.pl
@@ -0,0 +1,59 @@
+#!/usr/bin/perl
+
+use strict;
+
+my $members=0;
+my $label=0;
+my $blockopen=0;
+my @lines=0;
+
+while(my $line=<STDIN>){
+
+    if($line =~ /^a score/){
+	if($blockopen==1){
+	    if($members>=1){
+		&labelblocks(\@lines,$members,++$label);
+	    }
+	    else{
+		print @lines;
+	    }
+	}
+	$blockopen=1;
+	$members=0;
+	@lines=();
+    }
+    if($blockopen==1){
+	push @lines,$line;
+    }
+    else{
+	print $line;
+    }
+    if($line =~ /^s\s\S/){
+	$members++;
+    }
+}
+if($blockopen==1){
+    if($members>1){
+	&labelblocks(\@lines,$members,++$label);
+    }
+    else{
+	print @lines;
+    }
+}
+
+sub labelblocks{
+    my($lines,$nummembers,$label) = @_;
+    die if($lines[0] !~ /^a score=/);
+    chomp $lines[0];
+    my @orients;
+    for(my $i=1;$i<@lines;$i++){
+	my($orient) = ($lines[$i] =~ /^s\s+\S+\s+\d+\s+\d+\s+([+-])/);
+	push @orients,$orient if(defined $orient);
+    }
+    $lines[0] .= " label=$label ";
+    if($lines[0] !~ /orient/){
+	$lines[0] .= " orient=+ ";
+    }
+    $lines[0] .= "\n";
+    print @lines;
+}
diff --git a/maf2fasta.pl b/maf2fasta.pl
new file mode 100755
index 0000000..b29e41a
--- /dev/null
+++ b/maf2fasta.pl
@@ -0,0 +1,73 @@
+#!/usr/bin/perl
+#Convert MAF to FASTA
+#Optionally only convert blocks that contain label 
+#./maf2fasta.pl [label] < maf > fasta
+
+use strict;
+
+my $currscore;
+my $currlabel;
+my $currcoord;
+my $currorient;
+my $saveblock=0;
+my @matches;
+
+my @blocks;
+
+while(my $line=<STDIN>){
+    if($line =~ /a\s+score=([\d\.\-]+)/){
+	if($saveblock>0){
+	    my @nmatches = @matches;
+	    push @blocks,[$currscore,$currlabel,$currorient,$currcoord,\@nmatches];
+	}
+	($currscore) = ($line =~ /a\s+score=([\d\.\-]+)/);
+	($currlabel) = ($line =~ /label=(\d+)/);
+	@matches=();
+    }
+    elsif($line =~ /s\s+(\S+)\s+(\d+)\s+(\d+)\s+([+-])\s+(\d+)\s+(\S+)/){
+	my $accession = $1;
+	my $start = $2;
+	my $len = $3;
+	my $orientation = $4;
+	my $seqlength = $5;
+	my $seq = $6;
+	if($accession =~ /([^\.]+)\.(\S+)/){
+	    
+	}
+	else{
+	    die if($accession =~ /\./);
+	    #$accession = "$accession.$accession";
+	}
+	push @matches,[$accession,$start,$len,$orientation,$seqlength,$seq];
+	$saveblock=1;
+    }
+    else{
+	
+    }
+}
+if($saveblock>0){
+    my @nmatches = @matches;
+    push @blocks,[$currscore,$currlabel,$currorient,$currcoord,\@nmatches];
+}
+foreach my $block (sort {$a->[3] <=> $b->[3]} @blocks){
+    if($ARGV[0]){
+	if($block->[1] eq $ARGV[0]){
+	    &printFASTA(@$block) ;
+	}
+    }
+    else{
+	&printFASTA(@$block) ;
+    }
+}
+
+sub printFASTA{
+    my($score,$label,$orient,$coord,$matches) = @_;
+    foreach my $m (@$matches){
+	#print ">$m->[0].$label score=$score $m->[1] $m->[2] $m->[3] $m->[4]\n";	
+	print ">$m->[0] $m->[1] $m->[2] $m->[3] $m->[4]\n";
+	for(my $i=0;$i<length($m->[5]);$i+=60){
+	    print substr($m->[5],$i,60),"\n";
+	}
+    }
+    print "=\n";
+}
diff --git a/maf2gp.pl b/maf2gp.pl
new file mode 100755
index 0000000..014f13d
--- /dev/null
+++ b/maf2gp.pl
@@ -0,0 +1,75 @@
+#!/usr/bin/perl
+
+use strict;
+
+
+print "0 0 0\n";
+print "0 0 0\n";
+print "\n\n";
+my @x;
+my $regex = ($ARGV[0] eq '+') ? '\-' : '\+';
+
+while(my $line=<STDIN>){
+    if($line =~ /^a/){
+	my @p;
+	foreach my $elt (@x){
+	    my($acc) = ($elt =~ /s\s+(\w+)/);
+	    if($acc eq $ARGV[1]){
+		$p[0] = $elt;
+	    }
+	    if($acc eq $ARGV[2]){
+		$p[1] = $elt;
+	    }
+	}
+
+	&printpair(@p) if(scalar(@p) ==2);
+    	@x = ();
+    }
+    else{
+	if($line =~ /^(s.+)\s+\S+/){
+	    push @x,$1;
+	}
+    }
+}
+
+my @p;
+foreach my $elt (@x){
+    my($acc) = ($elt =~ /s\s+(\w+)/);
+    if($acc eq $ARGV[1] || $acc eq $ARGV[2]){
+	push @p,$elt;
+    }
+}
+&printpair(@p) if(scalar(@p) ==2);
+
+
+
+sub printpair{
+    my($ref,$qry) = @_;
+    
+    my($refa,$refb,$refe,$refo,$reflen) = ($ref =~ /s\s+(\w+)\s+(\d+)\s+(\d+)\s+([\+\-])\s+(\d+)/);
+    my($qrya,$qryb,$qrye,$qryo,$qrylen) = ($qry =~ /s\s+(\w+)\s+(\d+)\s+(\d+)\s+([\+\-])\s+(\d+)/);
+    $refe = $refb + $refe;
+    $qrye = $qryb + $qrye;
+    print "#$ref\n";
+    print "#$qry\n";
+    if($refo eq '+' && $qryo eq '+' && $ARGV[0] ne '-'){
+	print "$refb $qryb 100\n";
+	print "$refe $qrye 100\n\n\n";
+    }
+    elsif($refo eq '+' && $qryo eq '-' && $ARGV[0] eq '-'){
+	$qrye = $qrylen - $qrye;
+	$qryb = $qrylen - $qryb;
+	print "$refe $qrye 100\n";
+	print "$refb $qryb 100\n\n\n";
+
+    }
+    elsif($refo eq '-' && $qryo eq '+' && $ARGV[0] eq '-'){
+	$refe = $reflen - $refe;
+	$refb = $reflen - $refb;
+	print "$refe $qrye 100\n";
+	print "$refb $qryb 100\n\n\n";
+    }
+    else{
+#		    print STDERR "$ref\n$qry\n";
+    }
+}
diff --git a/maf2synchain.pl b/maf2synchain.pl
new file mode 100644
index 0000000..684f855
--- /dev/null
+++ b/maf2synchain.pl
@@ -0,0 +1,115 @@
+#!/usr/bin/perl
+#Convert MAF to FASTA
+#Optionally only convert blocks that contain label 
+#./maf2fasta.pl [label] < maf > fasta
+
+use strict;
+
+my $anchors = {};
+my $seq2anchors = {};
+my $seq2index = {};
+my $genome2index = {};
+
+my $anchornum=-1;
+while(my $line=<STDIN>){
+    if($line =~ /a\s+score=([\d\.\-]+)/){
+	$anchornum++;
+    }
+    elsif($line =~ /s\s+(\S+)\s+(\d+)\s+(\d+)\s+([+-])\s+(\d+)\s+(\S+)/){
+	my $accession = $1; #Must be formated as Genome.Sequence
+	my $start = $2;
+	my $len = $3;
+	my $orientation = $4;
+	my $end;
+	if($orientation eq '-'){
+	    $end = $start-$len-1;
+	}
+	else{
+	    $end = $start+$len;
+	}
+	my $seqlength = $5;
+	my $sequence;
+	my $genome;
+	if($accession =~ /([^\.]+)\.(\S+)/){
+	    $genome=$1;
+	    $sequence=$2;
+	}
+	else{
+	    die "Accession not in Genome.Sequence format";
+	}
+	#Store index for this accession if first time we've seen it
+	if(!exists $seq2index->{$accession}){
+	    $seq2index->{$accession} = scalar(keys %$seq2index);
+	}
+	if(!exists $genome2index->{$genome}){
+	    $genome2index->{$genome} = scalar(keys %$genome2index);
+	}
+	$anchors->{$anchornum}->{$accession}->{'gidx'} = $genome2index->{$genome};
+	$anchors->{$anchornum}->{$accession}->{'sidx'} = $seq2index->{$accession};
+	$anchors->{$anchornum}->{$accession}->{'start'} = ($start<$end ? $start:$end);
+	$anchors->{$anchornum}->{$accession}->{'end'} = ($start>$end ? $start:$end);
+	$anchors->{$anchornum}->{$accession}->{'orient'} = $orientation;
+	die "Can't find in $anchornum,$accession" if(! exists $anchors->{$anchornum}->{$accession});
+	$seq2anchors->{$accession}->{$anchornum}++;
+    }
+    else{
+	
+    }
+}
+#Foreach sequence, sort anchors by coordinate and print distance between adjacent coords
+foreach my $accession (sort {$a cmp $b} (keys %$seq2index)){
+    my @sortedanchors =  sort {$anchors->{$a}->{$accession}->{'start'} <=> $anchors->{$b}->{$accession}->{'start'}} (keys %{$seq2anchors->{$accession}});
+    my $genome;
+    if($accession =~ /([^\.]+)\.(\S+)/){
+	$genome=$1;
+    }
+    else{
+	    die "Accession not in Genome.Sequence format";
+    }
+    for(my $i=0;$i<scalar(@sortedanchors)-1;$i++){
+	my $a1 =  $sortedanchors[$i];
+	my $a2 =  $sortedanchors[$i+1];
+	die "Can't find in $a1,$accession" if(! exists $anchors->{$a1}->{$accession});
+	die "Can't find in $a2,$accession" if(! exists $anchors->{$a2}->{$accession});
+	my $dist = &getDistance($anchors->{$a1}->{$accession},$anchors->{$a2}->{$accession});
+	print STDERR "Bad coords Accession:$accession a1:$a1 $anchors->{$a1}->{$accession}->{'start'} - $anchors->{$a1}->{$accession}->{'end'} a2:$a2 $anchors->{$a2}->{$accession}->{'start'} - $anchors->{$a2}->{$accession}->{'end'}\n" if($dist < 0);
+	print STDERR "Genome $genome $genome2index->{$genome} missing" if(!exists $genome2index->{$genome});
+	$dist = 0 if($dist<0);
+	print $a1," ",$a2," ",                             #Anchors
+	    $seq2index->{$accession}," ",                  #Seqindex
+	    $dist," ",                                     #Distance between anchors
+	$genome2index->{$genome}," ",                   #Genomeindex
+	$anchors->{$a1}->{$accession}->{'orient'}," ",$anchors->{$a2}->{$accession}->{'orient'}," ", #Orientation
+	$anchors->{$a1}->{$accession}->{'start'}," ",$anchors->{$a2}->{$accession}->{'start'}," ",   #Anchor1 coords
+	$anchors->{$a1}->{$accession}->{'end'}," ",$anchors->{$a2}->{$accession}->{'end'}," ",       #Anchor2 coords
+	"\n";
+    }
+}
+
+
+sub getDistance{
+    my($anchors1,$anchors2) = @_;
+    return (abs($anchors2->{'start'} - $anchors1->{'end'}));
+    #Short circuit for now
+    if($anchors1->{'orient'} eq '-' && $anchors2->{'orient'} eq '-'){
+	# <e----s|  <e----s|
+	return $anchors2->{'end'} - $anchors1->{'start'};
+    }
+    elsif($anchors1->{'orient'} eq '-' && $anchors2->{'orient'} eq '+'){
+	# <e---s| |s---e>
+	return $anchors2->{'start'} - $anchors1->{'start'};
+    }
+    elsif($anchors1->{'orient'} eq '+' && $anchors2->{'orient'} eq '-'){
+	# |s---e> <e---s|
+	return $anchors2->{'end'} - $anchors1->{'end'};
+    }
+    elsif($anchors1->{'orient'} eq '+' && $anchors2->{'orient'} eq '+'){
+	# |s---e> |s---e>
+	return $anchors2->{'start'} - $anchors1->{'end'};
+    }
+    else{
+	die "Bad orientations $anchors1->{'orient'} && $anchors2->{'orient'}";
+	return -1;
+    }
+
+}
diff --git a/mapping/AlignmentTree.pm b/mapping/AlignmentTree.pm
new file mode 100644
index 0000000..47e5acf
--- /dev/null
+++ b/mapping/AlignmentTree.pm
@@ -0,0 +1,1476 @@
+package AlignmentTree;
+
+#AlignedIntervalTree is an interval tree with the additions that
+#stored intervals 1) may contain a correspondence map, such as an
+#alignment and 2) can be oriented for DNA sequences.
+
+#The data structure supports retrieval of corresponding,aligned intervals
+#The data structure also supports discontinuous intervals
+
+#Each interval in the structure is associated with a single coordinate
+#system or sequence and has an orientation '+','-'
+
+#The data structure used to represent an interval and an alignment is
+#[[seqname1,start1,end1,orientation2,cigarstring1,tag1_0,...tag1_N],
+# [seqname2,start2,end2,orientation2,cigarstring2,tag2_0,...tag2_N],...,]
+
+#Represented in the code as $alignobj = [$alni_1,$alni_2];
+
+#
+#insert(interval/alignment)     - insert an interval or alignment (a series of mapped intervals)
+#find(seq,start,end)            - retrieve intervals that overlap start,end on seq
+#intersect(seq,start,end)       - retrieve corresponding,aligned intervals that overlap start,end on seq
+#                                 
+#map(seq,start,end)             - retrieve intervals that overlap the range specified by any intersecting intervals 
+#                                 intersecting intervals are obtained if there exists an alignment that spans start,end on seq
+# Definitions
+# interval
+# alignment -  a series of mapped intervals
+
+use strict;
+use Math::Random qw(random_uniform);
+use POSIX qw(ceil floor);
+use IntervalTree;
+use Bit::Vector;
+use Storable qw(store retrieve);
+
+#remove only using for translation machinery and revcom
+use Bio::Perl;
+use Bio::DB::Fasta;
+use Bio::Seq;
+use Bio::Tools::CodonTable;
+
+$Storable::Deparse = 1;
+$Storable::Eval = 1;
+my $DEBUG=0;
+my $QCCHECKS=0;
+my $BITV_SIZE=10000000; #10MB largest single aligned region
+
+my $aligntoken="WGA";
+
+#Disallow more than one genetic segment per alignment
+my $nodups=0;
+
+sub new{
+    my $classname = shift;
+    my $self = {};
+    bless($self,$classname);
+    $self->{_itrees} = {};
+    $self->{_alignments} = {}; #Saved as [alignref,bitvector,align_width]
+    #Support for filtering output using a phylogenetic profile of genomes
+    #Implemented using bitmasks
+    $self->{_maxbits} = 1000;
+
+    $self->{_doremoveoverlaps}=0;
+    $self->{_bits} = 0;
+    $self->{_bitlookup} = {};
+    $self->{_bitmask} = new Bit::Vector($self->{_maxbits});
+    $self->{_defaultmask} = new Bit::Vector($self->{_maxbits});
+    $self->{_debug}=$DEBUG;
+    return $self;
+}
+
+sub serialize{
+    my($self,$file) = @_;
+    $self->{_bitmaskstr} = $self->{_bitmask}->to_Enum();
+    $self->{_defaultmaskstr} = $self->{_bitmask}->to_Enum();
+    return Storable::store($self,$file);
+}
+sub deserialize{
+    my($file) = @_;
+    my $atree = Storable::retrieve($file);
+    $atree->{_bitmask} = new Bit::Vector($atree->{_maxbits});
+    $atree->{_bitmask}->from_Enum($atree->{_bitmaskstr});
+    $atree->{_defaultmask} = new Bit::Vector($atree->{_maxbits});
+    $atree->{_defaultmask}->from_Enum($atree->{_bitmaskstr});
+    return $atree;
+}
+
+#Require output contains one or more tags
+#An example of a tag is a genome name
+sub filter{
+    my($self) = shift;
+    foreach my $tag (@_){
+	if(!exists $self->{_bitlookup}->{$tag}){
+	    $self->{_bitlookup}->{$tag} = $self->{_bits}++;
+	}
+	$self->{_bitmask}->flip($self->{_bitlookup}->{$tag});
+    }
+    $self->{_bitmask}->Union($self->{_defaultmask},$self->{_bitmask});
+}
+
+sub clear_filter{
+    my($self) = shift;
+    $self->{_bitmask}->Empty();
+}
+
+#
+#Insert an interval or alignment
+#insert(
+#       {[seqname,start,end,orientation,cigarstring,tag0,...tagN]},
+#       uniquename,
+#       tags
+#      )
+#A unique identifier for the alignment, uniquename, must be provided
+#seqname - must be a uniquename for the coordinate system containing interval [start,end]
+#start - beginning of interval 0-based
+#end - end of interval 0-based
+#orientation - 2
+#cigarstring - in the UCSC format (#M#S#I#D#X) indicates the continuity of the alignment over the interval
+#tag0...tagN - zero or more tags that can be used by filtering functions. Tags can be specified on either the alignment or the interval
+
+#Intervals and alignments are stored in a consistent manner. An
+#alignment is a set of intervals with a correspondence map. Single
+#annotated intervals, like genes, are stored as an alignment alignment
+#with only a single interval.  The correspondence map is an identity
+#map in this case.
+
+sub insert{
+    my($self,$alignmentref,$name, at tags) = @_;
+    my $genomelookup = {};
+    my $alignment_bv = new Bit::Vector(1000);
+    my $align_width = 0;
+    die "Bad alignment passed to insert($alignmentref). Alignment needs to be a ref to an array" if(!ref($alignmentref));
+    foreach my $align (@$alignmentref){
+	die "Bad alignment passed to insert($align). Alignment needs to be a ref to an array" if(!ref($align));
+	#print "INSERTING ",join(',',@$align),"\n";
+	my $seqname = $align->[0];
+	my $start = $align->[1];
+	my $end = $align->[2];
+	my $orientation = $align->[3];
+	if($align->[4]){
+	    #Check that column count is consistent
+	    my ($cigs,$columncount) = &get_cigs($align->[4]);
+	    $align_width = $columncount if(!$align_width);
+	    if($columncount != $align_width){
+		&printAlignmentDebug($alignmentref,\*STDERR);
+		die "Bad input. Mismatched column count $columncount in $align->[4], expecting $align_width";
+	    }
+	}
+	if($orientation =~ /\d/){
+	    if($orientation>0){
+		$orientation = '+';
+	    }
+	    else{
+		$orientation = '-';
+	    }
+	} 
+	$align->[3]=$orientation;
+	die "Bad orient $orientation ".join(',',@$align)."\n" if($orientation ne '-' && $orientation ne '+');
+	#Store tags in bit vector
+	for(my $i=5;$i<@$align;$i++){
+	    my $tag = $align->[$i];
+	    if(!exists $self->{_bitlookup}->{$tag}){
+		$self->{_bitlookup}->{$tag} = $self->{_bits}++;
+	    }
+	    $alignment_bv->Bit_On($self->{_bitlookup}->{$tag});
+	}
+	if(!exists $self->{_itrees}->{$seqname}){
+	    $self->{_itrees}->{$seqname} = new IntervalTree($start,$end,$name,$orientation);
+	}
+	else{
+	    $self->{_itrees}->{$seqname}->insert($start,$end,$name,$orientation);
+	}
+    }
+
+    #Store tags in bit vector
+    foreach my $tag (@tags){
+	if(!exists $self->{_bitlookup}->{$tag}){
+	    $self->{_bitlookup}->{$tag} = $self->{_bits}++;
+	}
+	#print STDERR "Adding tag $tag on $self->{_bitlookup}->{$tag} $self->{_defaultmask}\n";
+	$alignment_bv->Bit_On($self->{_bitlookup}->{$tag});
+	$self->{_defaultmask}->Bit_On($self->{_bitlookup}->{$tag});
+    }
+#    print "Masks ",$self->{_defaultmask}->Norm()," ",$self->{_bitmask}->Norm(),"\n";
+    if(0 && exists $self->{_alignments}->{$name}){
+	print STDERR "Duplicate feature $name already stored. Skipping this one\n";
+    }
+    else{
+	$self->{_alignments}->{$name} = [$alignmentref,$alignment_bv,$align_width];
+    }
+}
+
+#
+#Find all intersecting alignments in interval (query.start,query.end) from query.seqname 
+#Returns (start,end) coordinates on seqname of all matching alignments
+#
+#intersect(query.seqname,query.start,query.end,tags)
+
+#returns [alignname,seqname,start,end,coverage,pid,queryorient,matchorient]
+#0-alignname
+#1-seqname
+#2-start
+#3-end
+#4-coverage is number of corresponding characters between start,end
+#5-pid is number of identical characters between start,end
+#6-queryorient is orientation of the matching aligned query interval query.seqname:query.start-query.end 
+#7-matchorient is orientation of the matching aligned interval seqname:start-end
+
+sub intersect{
+    my($self,$qseqname,$qstart,$qend, at qtags) = @_;
+    my @results;
+    #$self->filter(@qtags);
+    if(exists $self->{_itrees}->{$qseqname}){
+	print "Querying $qseqname:$qstart,$qend with qtags $qtags[0]\n" if($self->{_debug});
+	#(1) Find all intersecting features on [$qstart,$qend]
+	#returns IntervalTree::intersect returns an array of interval names
+	my @alignments = $self->{_itrees}->{$qseqname}->intersect($qstart,$qend); 
+	#Optionally remove fully nested intervals
+	if($self->{_doremoveoverlaps}){
+	    @alignments = $self->removeOverlaps(\@alignments,$qseqname);
+	}
+	foreach my $align_name (@alignments){
+	    die "Overlapping interval $align_name not found" if(! exists $self->{_alignments}->{$align_name});
+	    my($alignobj,$alignment_bv,$align_width) = @{$self->{_alignments}->{$align_name}};
+	    if($align_name =~ /$qtags[0]/){
+		print "Overlapping feature $align_name\n" if($self->{_debug});
+		print "MATCH $align_name query:$qseqname $qstart-$qend . Number of seqs ",scalar(@$alignobj),"\n" if($self->{_debug});
+		#(2) Crop interval [$qstart,$qend] to the alignment 
+		my ($qmstart,$qmend,$queryorient) = &matchinginterval($alignobj,$qseqname,$qstart,$qend);
+		if(!defined $qmstart || !defined $qmend){
+		    #Error condition
+		    print "WARNING. print unexpected overlapping alignments for query $qstart,$qend on $qseqname\n";
+		    foreach my $align_name2 (@alignments){
+			my($alignobj2,$alignment_bv2,$align_width2) = @{$self->{_alignments}->{$align_name2}};
+			foreach my $alni2 (@$alignobj2){
+			    print "$align_name2  $alignobj2 ",join(' ',@$alni2),"\n";
+			}
+		    }
+		    die "Bad overlapping alignments";
+		}
+		die if($qmstart<$qstart);
+		die if($qmend>$qend);
+		if($qmstart==$qmend){
+		    next;
+		}
+		die "Invalid matching interval coords:$qmend-$qmstart from query $qstart-$qend\n" if($qmend<=$qmstart);
+		my $queryspancheck=0;
+		if($qstart == $qmstart && $qend == $qmend){
+		    print "Alignment fully spans query\n" if($self->{_debug});
+		}
+		else{
+		    if($qstart != $qmstart && $qend != $qmend){
+			print "Query fully spans alignment\n" if($self->{_debug});
+			$queryspancheck=1;
+		    }
+		}
+		print "ISECT: $align_name QUERY:$qseqname $qstart-$qend mapped:$qmstart-$qmend len:",$qmend-$qmstart,"\n" if($self->{_debug});
+		#(3) Convert from genomic coords to alignment column. 1->alignment_width
+		my ($qcolumnstart,$qcolumnend,$querybv) = &coordstocolumn($alignobj,$qseqname,$qmstart,$qmend);
+		#$querybv stores a bitmatrix from $qcolumnstart-$qcolumnend indicating if sequence $seqname is aligned in the interval
+		print "MAPPED $qseqname:$qmstart-$qmend len:",$qmend-$qmstart,
+		" to column coords $qseqname:$qcolumnstart-$qcolumnend len:",$qcolumnend-$qcolumnstart+1,"\n" if($self->{_debug});
+		print "Transform $qseqname:$qmstart-$qmend to column coords $qcolumnstart-$qcolumnend\n" if($self->{_debug});
+		
+		foreach my $alni (@$alignobj){
+		    die "Invalid zero length matching interval $alni->[2]-$alni->[1]\n" if($alni->[2] - $alni->[1]<=0);
+		    print "Converting $align_name $alni->[0]:$alni->[1]-$alni->[2] from $qseqname:$qmstart-$qmend using column coords $qcolumnstart-$qcolumnend\n" if($self->{_debug});
+		    my ($mseq,$malign_start,$malign_end,$morient) = @$alni;
+		    print "ALNI: $mseq,$malign_start,$malign_end,$morient\n" if($self->{_debug});
+		    die if(@$alni>5);
+		    
+		    #(4) Crop aligned feature. Convert back from alignment column to genomic coords on $mseq
+		    #$currbv stores a bitmatrix from $qcolumnstart-$qcolumnend indicating if sequence $mseq is aligned in the interval
+		    my($s,$e,$currbv) = &columntocoords($alni,$qcolumnstart,$qcolumnend,$querybv);
+		    #if($mseq eq $qseqname && $nodups){
+			#die if($s != $qmstart);
+			#die if($e != $qmend);
+			#die if($morient ne $queryorient);
+		    #}
+		    
+		    #Check the actual number of aligned columns
+		    my $pid=0;
+
+		    
+		    #if($mseq eq $qseqname && $nodups){
+			#my ($qs1,$qe1) = &coordstocolumn($alignobj,$qseqname,$s,$e);
+			#print "Checking for matching characters between col:$qcolumnstart-$qcolumnend $qs1-$qe1 coords:$s-$e\n" if($self->{_debug});
+		     #}
+
+		    my $intersectbv = new Bit::Vector($querybv->Size());
+		    $intersectbv->Intersection($querybv,$currbv); 
+		    for(my $i=$qcolumnstart;$i<=$qcolumnend;$i++){
+			if($intersectbv->bit_test($i)==1){
+			    $pid++;
+			}
+		    }
+		    print "Intersect matches: ",$intersectbv->Norm(),"\n" if($self->{_debug});
+		    print "Query matches: ",$querybv->Norm(),"\n" if($self->{_debug});
+		    print "Matches in the interval $qcolumnstart-$qcolumnend:$pid\n" if($self->{_debug});
+
+		    if($e-$s>0){
+			die "Bad pid" if($pid==0);
+			die "Invalid zero length matching interval $e-$s\n" if($e-$s<=0);
+			print "($qcolumnend-$qcolumnstart) - ($qmend-$qmstart)\n" if($self->{_debug});
+			my $numgaps_query = ($qcolumnend-$qcolumnstart+1) - ($qmend-$qmstart);
+			my $querypid = ($qcolumnend-$qcolumnstart+1) - $numgaps_query;
+			my $coverage = $e-$s;
+			print "Num_query_gaps=$numgaps_query\nNum_qry_matches=$querypid\nNum_hit_matches=$pid\n" if($self->{_debug});
+			die "$pid<1" if($pid<1);
+			die "$pid>$coverage" if($pid>$coverage);
+			print "RESULT: $align_name,$mseq,$s,$e,$coverage,$pid\n" if($self->{_debug});
+			#Intersect result is a $alni,$coverage,$pid
+			push @results,[$align_name,$mseq,$s,$e,$coverage,$pid,$queryorient,$morient];
+		    }
+		    else{
+			#Entirely contained within a gap
+			die "Bad pid" if($pid !=0);
+			die "Bad coordinates $align_name,$mseq,$s,$e\n" if($e<$s);
+			print "NORES: Skipping $align_name,$mseq,$s,$e. Mapped in a gap\n" if($self->{_debug});
+		    }
+		}
+	    }
+	    else{
+		print "Skipping $align_name does not match $qtags[0]\n" if($self->{_debug});;
+	    }
+	}
+    }
+    else{
+	#Skip, nothing to find
+	if($self->{_debug}){
+	    print "Interal trees for ",join(',',keys %{$self->{_itrees}}),"\n";
+	    print "No interval tree for sequence [$qseqname] $self->{_itrees}->{$qseqname}\n";
+	}
+    }
+    #$self->clear_filter();
+    return @results;
+}
+#
+#Map a coordinate (query.start,query.end) from query.seqname to
+#intersecting alignments on match_i.seqname..match_j.seqname with
+#coordinates (match_i.start,match_i.end...match_j.start,match_j.end)
+#
+#map(seqname,start,end,type)
+#returns [match_name,$mseq,$mstart,$mend,$mcoverage,align_name,seq_name,$coveraged]
+sub map{
+    my($self,$qseqname,$qstart,$qend, at qtags) = @_;
+    my @results;
+    if(exists $self->{_itrees}->{$qseqname}){
+	print "Finding WGA alignments on $qseqname,$qstart,$qend\n" if($self->{_debug});
+	#(1)Retrieve all the alignments on genomic coords qstart-qend
+	$self->{_doremoveoverlaps}=1;
+	my @isects = $self->intersect($qseqname,$qstart,$qend,$aligntoken);
+	$self->{_doremoveoverlaps}=0;
+	print "FOUND ", scalar(@isects)," alignments\n" if($self->{_debug});
+
+	#Currently assuming non overlapping alignments and the total
+	#coverage,pid must be less than the query length $qend-$qstart
+
+	my $totalqcoverage=0;
+	my $totalqid=0;
+	my $qcoverage=undef;
+	my $qmstart=undef;
+	my $qmend=undef;
+	my $qmorient=undef;
+
+	#(2)Determine the min-max spanning interval over all matching alignments to the query
+	#intersect() already provides the query interval [$qstart,$qend] crop to the overlapping alignment(s)
+	foreach my $isectn (@isects){
+	    my($align_name,$seq,$start,$end,$coverage,$pid,$qorient,$orient) = @$isectn;
+	    print "Looking for $qseqname in $align_name,$seq,$start,$end,$coverage,$pid\n" if($self->{_debug}); 
+	    if($seq eq $qseqname && $end<=$qend && $start>=$qstart){
+		die "Mismatched orient $qmorient != $orient" if($qorient ne $orient);
+		die "$end>$qend" if($end>$qend);
+		die "$start<$qstart" if($start<$qstart);
+		if(defined $qmstart || defined $qmend){
+		    print "#Duplicate $seq already found in $align_name. Multiple alignments spanning query\n" if($self->{_debug}); 
+		    $qcoverage=$coverage;
+		    $totalqcoverage+=$coverage;
+		    $totalqid+=$pid;
+		    $qmstart=$start<$qmstart ? $start : $qmstart;
+		    $qmend=$end>$qmend ? $end : $qmend;
+		    if(defined $qmorient && $orient ne $qmorient){
+			#print "WARNING multiple matching alignments to $qseqname,$qstart,$qend with inconsistent orientations.  $align_name:$orient ne $qmorient\n";
+			$qmorient='?';
+		    }
+		    else{
+			$qmorient=$orient;
+		    }
+
+		}
+		else{
+		    die if(defined $qmorient);
+		    $qcoverage=$coverage;
+		    $totalqcoverage+=$coverage;
+		    $totalqid+=$pid;
+		    $qmstart=$start;
+		    $qmend=$end;
+		    $qmorient=$orient;
+		}
+	    }
+	}
+	
+	#
+	#(3)Map the spanning interval [$qmstart,$qmend] to the rest of the sequences in the alignment
+	
+	foreach my $isectn (@isects){
+	    die if(!defined $qcoverage);
+	    die if(!defined $qmstart || !defined $qmend);
+	    my($align_name,$seq,$start,$end,$coverage,$pid,$qaln_orient,$aln_orient) = @$isectn;
+	    my($alignobj,$alignment_bv,$align_width) = $self->getAlignment($align_name);
+	    my($qfstart,$qfend,$qforient) = &matchinginterval($alignobj,$qseqname,$qmstart,$qmend);
+	    my ($qfscolumnstart,$qfscolumnend,$fsquerybv) = &coordstocolumn($alignobj,$qseqname,$qfstart,$qfend);
+	    #print "#Mapping with alignment $align_name $seq $start-$end cov:$coverage,pid:$pid,qaln_orient:$qaln_orient,aln_orient:$aln_orient\n";
+	    #Query coverage should correspond to interval start-end
+	    die if($coverage != ($end-$start));
+	    die "$qaln_orient ne $qmorient" if($qmorient ne '?' && $qaln_orient ne $qmorient);
+	    print "$align_name:$seq $start,$end ",$end-$start," query_start:$qstart query_end:$qend query_coverage:$qcoverage ",$qcoverage/($qend-$qstart),"\n" if($self->{_debug});
+	    #(4)Find features in the mapped interval
+	    my @misects = $self->intersect($seq,$start,$end,"gene");
+	    foreach my $fisectn (@misects){
+		my($fname,$fseq,$fstart,$fend,$fcoverage,$fpid,$forient1,$forient2) = @$fisectn;
+
+		#Need intersection of $start,$fend $qmstart,$qmend to get proper $pid and $cov
+		my ($fscolumnstart,$fscolumnend,$fsbv) = &coordstocolumn($alignobj,$seq,$fstart,$fend);
+		#die "$fscolumnstart != $qfscolumnstart" if($fscolumnstart != $qfscolumnstart);
+		#die "$fscolumnend != $qfscolumnend" if($fscolumnend != $qfscolumnend);
+		my $ipid=0;
+		my $intersectbv = new Bit::Vector($fsquerybv->Size());
+		$intersectbv->Intersection($fsquerybv,$fsbv); 
+		for(my $i=$fscolumnstart;$i<=$fscolumnend;$i++){
+		    if($intersectbv->bit_test($i)==1){
+			$ipid++;
+		    }
+		}
+		die "Bad number of matching columns $ipid>($fend-$fstart) $fscolumnstart-$fscolumnend" if($ipid>($fend-$fstart));
+		die if($forient1 ne $forient2);
+		die if($seq ne $fseq);
+		if($fseq eq $qseqname && $nodups){
+		    die "$fstart<$qmstart query:$seq,$start,$end $fname,$fseq,$fstart,$fend,$fcoverage,$fpid" if($fstart<$qmstart);
+		    die "$fend>$qmend query:$seq,$start,$end $fname,$fseq,$fstart,$fend,$fcoverage,$fpid" if($fend>$qmend);
+		}
+		print "Adding result $fname,$fseq,$fstart,$fend,$fcoverage,$align_name,$seq,$qcoverage,$fpid\n" if($self->{_debug});
+		#push @results,[$fname,$fseq,$fstart,$fend,$coverage,$align_name,$seq,$fcoverage,$ipid,$isectn,$qaln_orient,$aln_orient,$forient1];
+		#Determine span on query
+		my ($qfsstart,$qfsend) = &columntocoords($self->getAlignedInterval($align_name,$qseqname,$qfstart,$qfend),$fscolumnstart,$fscolumnend);
+		push @results,[$fname,$fseq,$fstart,$fend,$qfsend-$qfsstart,$align_name,$seq,$fcoverage,$ipid,$isectn,$qaln_orient,$aln_orient,$forient1];
+	    }
+	    print "Finished mapping alignment $align_name\n" if($self->{_debug});
+	}
+	if($totalqcoverage>($qend-$qstart)){
+#TODO die on bad coverage die "Bad coverage $totalqcoverage>($qend-$qstart) ".($qend-$qstart) if($totalqcoverage>($qend-$qstart));
+	    $totalqcoverage=($qend-$qstart);	  
+	}
+	if($totalqid>($qend-$qstart)){
+#TODO die "Bad identity ($totalqid>($qend-$qstart) " if($totalqid>($qend-$qstart));
+	    $totalqid = ($qend-$qstart);
+	}
+    } 
+    return @results;
+}
+
+#TODO, optimize this retrieval
+sub matchinginterval{
+    my($alignobj,$qseqname,$qstart,$qend) = @_;
+    my $start=undef;
+    my $end=undef;
+    my $orient=undef;
+    print "QUERYING $alignobj for $qseqname $qstart-$qend\n" if($DEBUG);
+    foreach my $alni (@$alignobj){
+	if($alni->[0] eq $qseqname){
+	    print "HIT on $alni->[0] query=$qseqname:$qstart-$qend ; interval=$qseqname:$alni->[1]-$alni->[2] $alni->[3]\n" if($DEBUG);
+	    if(($qstart < $alni->[1] && $qend < $alni->[1]) || ($qstart > $alni->[2] && $qend > $alni->[2])){
+		if($nodups){
+		    print "WARNING: Invalid matching interval. Alignment interval $alni->[0]:$alni->[1]-$alni->[2] not contained in interval $qseqname:$qstart-$qend\n";
+		    &printAlignmentDebug($alignobj);
+		    #return ($start,$end,$orient);
+		}
+		next;
+	    }
+
+	    $start = $qstart < $alni->[1] ? $alni->[1] : $qstart;
+	    $end = $qend < $alni->[2] ? $qend : $alni->[2];
+	    if(defined $orient && $orient ne $alni->[3]){
+		print "WARNING multiple matching alignments to $qseqname,$qstart,$qend with inconsistent orientations. $orient ne $alni->[3]\n";
+		&printAlignmentDebug($alignobj);
+		die "Multiple copies of a sequence per alignment not supported";
+	    }
+	    else{
+		$orient = $alni->[3];
+	    }
+	    last;
+	}
+	else{
+	    print "Checked $alni->[0] in obj size ",scalar(@$alignobj),"\n" if($DEBUG);
+	}
+    }
+    die "Cannot find $qseqname,$qstart,$qend in alignment. Returned ($start,$end,$orient)" if(!defined $start || !defined $end);
+    return ($start,$end,$orient);
+}
+
+#Determine column indices in an alignment matrix
+#that correspond to interval $coord1-$coord2 on sequence $qseqname
+#The alignment matrix is assumed to have one sequence per row with no
+#sequence appearing more than oncequerybv
+#The format is
+#@$alignobj = [seqname,start,end,orient,cigar]
+#start<end is specified in 0 start interbase coordinates
+#cigar specifies the continuity of the alignment of the interval
+#when orient=='-' the cigar string specifies the alignment end->start
+#otherwise the cigar string species the alignment start->end
+#Column coordinates are 1 start, numbering bases/columns in an alignment matrix
+#Genomic coordinates are 0 start, interbase
+sub coordstocolumn{
+    my($alignobj,$qseqname,$coord1,$coord2,$skipbv) = @_;
+    die "Expecting 0 start, interbase coordinates $coord1<=$coord2" if($coord1>=$coord2);
+    #Column position in the alignment
+    #Starting from position 1
+    my $columnstart;
+    my $columnend;
+    #Bit vector keeps track of aligned columns
+    #Starting at column 1, column index 0 is ignored
+    my $querybv;
+    if(! $skipbv){
+	$querybv = new Bit::Vector($BITV_SIZE); #setting max aligned interval at 10MB
+    }
+    my $alnwidth;
+    foreach my $alni (@$alignobj){
+	if($alni->[0] eq $qseqname){
+	    if($coord1<$alni->[1] || $coord1>$alni->[2]){
+		if($nodups){
+		    &printAlignmentDebug($alignobj);
+		    die "Start position $coord1 is not contained in interval $qseqname:$alni->[1]-$alni->[2]";
+		}
+		next;
+	    }
+	    if($coord2<$alni->[1] || $coord2>$alni->[2]){
+		if($nodups){
+		    &printAlignmentDebug($alignobj);
+		    die "End position $coord2 is not contained in interval $qseqname:$alni->[1]-$alni->[2]";
+		}
+		next;
+	    }
+
+	    my $offsetstart;
+	    my $offsetend;
+	    my $orient = $alni->[3];
+	    print "COORDSTOCOLUMN for seq $qseqname and coordinates $coord1-$coord2 and orient $orient\n" if($DEBUG);
+
+	    die "Bad orient: $orient\n" if($orient ne '-' && $orient ne '+');
+	    #$alignobj is a collinear segment
+	    #Determing offsets into the aligned segment
+	    #$alni->[1] |-----------------| $alni->[2]
+	    #coord1        |------|      coord2
+	    #+ strand
+	    #        ------|             offsetstart
+	    #        -------------|      offsetend
+	    #Cigar string is relative to $alni->[1] --> $alni->[1]
+	    #Columnstart,end are relative to $alni->[1]              
+
+	    #- strand
+	    #                     |----- offsetstart
+	    #              |------------ offsetend
+	    #Cigar string is relative to $alni->[2] --> $alni->[1]      
+	    #Columnstart,end are relative to $alni->[2]                             
+	    if($orient eq '-'){
+		#offset is from end ($coord2) of segment match
+		# A T G C A T
+		#0 1 2 3 4 5 6
+		#  |---------|   i->[1] -> i[2] == 1-6 
+		#    |-----|     coord1 -> coord2 == 2-5
+		#         *      offsetstart = 6-5+1 == 2; column 2 in the alignment matrix
+		# X X M M M X    alignment matrix
+		#     *          offsetend = 6-2 == 4; column 4 in the alignment matrix
+		#0 1 2 3 4 5 6
+		# A T G C A T
+		$offsetstart = $alni->[2] - $coord2+1;
+		$offsetend = $alni->[2] - $coord1;
+		die "$offsetstart = $alni->[2] - $coord2+1; $offsetend = $alni->[2] - $coord1;" if($offsetend < $offsetstart);
+	    }
+	    else{
+		#offset is from beginning ($coord1) of segment match
+		$offsetstart = $coord1 - $alni->[1]+1;
+		$offsetend = $coord2 - $alni->[1];
+		die "$offsetstart < $offsetend " if($offsetend < $offsetstart);
+	    }
+	    die if($offsetstart<1);
+	    die if($offsetend>($alni->[2]-$alni->[1]));
+	    #If cigar string
+	    if($alni->[4]){
+		print "coordstocolumn using cig $alni->[0] $alni->[4]\n" if($DEBUG);
+		print "Looking for offset $offsetstart-$offsetend:$orient in match $alni->[1]-$alni->[2] of length ",
+		$alni->[2]-$alni->[1]," orient:$orient\n" if($DEBUG);
+
+		my ($cigs,$columncount) = &get_cigs($alni->[4]);
+		$alnwidth=$columncount;
+		die "$offsetend>$columncount. Check cigar string $alni->[4], appears to be incorrect length for $coord1-$coord2" if($offsetend>$columncount);
+		my $currcount2=0;
+		foreach my $c2 (@$cigs){
+		    my($count2,$char2) = @$c2;
+		    if(!$skipbv && $char2 eq 'M'){
+			#|*      |$currcount2
+			#| *     |$currcount2+1
+			#|     * |$currcount2+$count2
+			#| MMMXX |3M,$count2==3
+			#| 11100 |bitvector
+			$querybv->Interval_Fill($currcount2+1,$currcount2+$count2);
+		    }
+		    $currcount2+=$count2;
+		}
+
+		my $matches=0;
+		my $currcount=0;
+
+		my $foundstart=0;
+		my $foundend=0;			
+		
+		foreach my $c (@$cigs){
+		    my($count,$char) = @$c;
+		    if($char eq 'M'){
+			#0 1 2 3 4 5 6
+			#  |---------|   i->[1] -> i[2] == 1-6 
+			#     *          offsetstart
+			#         *      offsetend
+			# X X M M M X    alignment matrix
+			#   *            matches
+			#0 1 2 3 4 5 6
+			# A T G C A T
+			if($count+$matches>=$offsetstart){
+			    if(!$foundstart){
+				print "FOUNDSTART $currcount $matches $count$char\n" if($DEBUG>1);
+				$columnstart = $currcount+($offsetstart-$matches);
+				print "columnstart=$columnstart\n" if($DEBUG>1);
+				$foundstart=1;
+			    }
+			}
+			if($count+$matches>=$offsetend){
+			    if(!$foundend){
+				print "FOUNDEND $currcount $matches $count$char\n" if($DEBUG>1);
+				#$columnend=$currcount+($offsetend-$matches)-1;
+				$columnend=$currcount+($offsetend-$matches);
+				print "columnend=$columnend\n" if($DEBUG>1);
+				$foundend=1;
+				last;
+			    }
+			}
+			$matches+=$count;
+			$currcount+=$count;
+		    }
+		    elsif($char eq 'X'){
+			$currcount+=$count;
+		    }
+		}
+		die "Could not find start or end " if(!$foundstart || !$foundend);
+		die if($columnstart<1);
+		die if($columnend>$columncount);
+	    }
+	    else{
+		#No cigar string 
+		#Assume interval aligns at its entire length
+		$columnstart=$offsetstart;
+		$columnend=$offsetend;
+		if(!$skipbv){
+		    $querybv->Interval_Fill($columnstart,$columnend);
+		}
+	    }
+	    last;
+	}
+    }
+    if(!defined $columnstart){
+	#$columnstart=1;
+    }
+    if(!defined $columnend){
+	#$columnend=$alnwidth;
+    }
+    die "Can't map $alignobj,$qseqname,$coord1,$coord2 to $columnstart-$columnend" if(!defined $columnstart || !defined $columnend);
+    return ($columnstart,$columnend,$querybv);
+}
+
+#Maps the alignment columns $columnstart-$columnend to genomic coordinates
+#In the case where the specified columns map to gaps, the coordinate corresponding to the 
+#next matching column is returned 
+#Column coordinates are 1 start, numbering bases,gaps/columns in an alignment matrix
+#Genomic coordinates are 0 start, interbase
+sub columntocoords{
+    my($aln,$columnstart,$columnend,$querybv) = @_;
+    die "Columnstart $columnstart must be >= 1. aln:$aln" if($columnstart<1);
+    #columnstart is start relative to beginning of alignment matrix
+    #columnend is end relative to beginning of alignment matrix
+    #|MMMIIMMMIIIMMMXXXMM|
+    # -----|               columnstart
+    # ------------|        columnend
+    #Alignment $aln is in genomic coordinates
+    #start,end are mapped genomic coordinates for aln relative to offsets specified by $columnstart,$columnend
+    my $start; 
+    my $end;
+    #Bit vector keeps track of aligned columns
+    #Starting at column 1
+    if(! defined $querybv){
+	$querybv = Bit::Vector->new($BITV_SIZE);
+	$querybv->Interval_Fill($columnstart,$columnend);
+    }
+    my $currbv = Bit::Vector->new($querybv->Size()); 
+    my $orient = $aln->[3];
+    die "Bad orient $orient\n" if($orient ne '-' && $orient ne '+');
+    #Check if there is a cigar string
+    print "Converting for alni with orient $orient\n" if($DEBUG);
+    if(length($aln->[4])>0){
+	print "columntocoords using cig $aln->[0] $aln->[4]\n" if($DEBUG > 1);
+	my ($cigs,$count) = &get_cigs($aln->[4]);
+	die "Columnstart $columnstart must be >= 1. aln:$aln" if($columnstart<1);
+	die "Columnend $columnend > cigar count $count. Check cigar string $aln->[4] for $aln->[1]-$aln->[2]" if($columnend>$count);
+	print "Looking for offset $columnstart-$columnend:$orient in match $aln->[1]-$aln->[2] of length ",$aln->[2]-$aln->[1]," orient:$orient\n" if($DEBUG > 1);
+		
+	my $matches=0;
+	my $currcount=0;
+	
+	my $foundstart=0;
+	my $foundend=0;
+	my $startcount;
+	my $currcount2=0;
+	foreach my $c2 (@$cigs){
+	    my($count2,$char2) = @$c2;
+	    if($char2 eq 'M'){
+		$currbv->Interval_Fill($currcount2+1,$currcount2+$count2);
+	    }
+	    $currcount2+=$count2;
+	}
+	foreach my $c (@$cigs){
+	    my($count,$char) = @$c;
+	    print "Analyzing $count$char\n" if($DEBUG>1);
+	    if($count+$currcount>=$columnstart){
+		if(!$foundstart){
+		    print "FOUNDSTART $currcount $matches $count$char\n" if($DEBUG > 1);
+		    if($char eq 'M'){
+			print "START IN MATCH $orient\n" if($DEBUG > 1);
+			#|----*------ columnstart
+			# --|         currcount
+			#    MMMM     count        columnstart-currcount=number of contributing matches in current cig
+			if($orient eq '-'){
+			    $start = $aln->[2]-$matches-($columnstart-$currcount)+1;
+			}
+			else{
+			    #Start is alignment start (s1) + matching columns + number of matches in current cigar
+			    $start = $aln->[1]+$matches+($columnstart-$currcount)-1;
+			}
+		    }
+		    else{
+                        #Report NEXT matching position
+			print "START IN GAP $orient\n" if($DEBUG > 1);
+			#No match to $->[0]; in gap
+			#Report next matching position
+			#in the alignment between query and current sequence
+			#(the next matching position past the gap)
+
+                        #|----*------ columnstart
+			# --|         currcount
+			#    XXXX     count
+
+			#Need to account for case where
+			#next matching position in current sequence is a gap in the query
+			#Use the bit vectors to find next matching position between current seq and query
+			my $intersectbv = new Bit::Vector($querybv->Size());
+			$intersectbv->Intersection($querybv,$currbv); 
+			die if($QCCHECKS && $currbv->bit_test($currcount+1));
+			die if($QCCHECKS && $intersectbv->bit_test($currcount+1));
+			my($imin,$imax) = $intersectbv->Interval_Scan_inc($currcount+1);
+			my($cmin,$cmax) = $currbv->Interval_Scan_inc($currcount+1);
+			if(! defined $cmin || ! defined $imin){
+			    print "INGAP Can't find matching position in $aln->[0] > columnstart:$columnstart. Returning no mapping\n" if($DEBUG > 1);
+			    if($orient eq '-'){
+				$start = $aln->[2]-$matches;
+			    }
+			    else{
+				#Start is alignment start (s1) + matching columns 
+				$start = $aln->[1]+$matches-1;
+			    }
+			    #return ($aln->[1],$aln->[1],$currbv);
+			    return ($start,$start,$currbv);
+			}
+			if($imin>$columnend){
+			    #|-------*MMMMMMM*-----------| Query columnstart -> columnend
+			    #        *       *             columnstart,columnend
+			    #                    *         cmin>columnend
+			    #|----*XXXXXXXXXXXXXXMM------| 
+			    #  
+			    print "INGAP Alignment occurs entirely within a gap $cmin>$columnend\n" if($DEBUG > 1);
+			    return ($aln->[1],$aln->[1],$currbv);
+			}
+			die "$imin,$imax $cmin,$cmax" if($imin<$cmin);
+			#num matching bits set between 
+			#$imin-$cmin is number of matches in current seq until the 
+			#next matching position in the query
+			my $nummatches=0;
+			if($cmin>0){
+			    die "Bad match index after scan $cmin" if($QCCHECKS && $currbv->bit_test($cmin)!=1);
+			}
+			#TODO replace with Interval_Scan
+			for(my $i=$cmin;$i<=$imin;$i++){
+			    if($currbv->bit_test($i)){
+				$nummatches++;
+			    }
+			}
+			die if($cmin==0 && $nummatches!=0);
+			print "INGAP $nummatches matches until next match between query and $aln->[0] between columns $cmin-$imin\n" if($DEBUG > 1);
+			if($orient eq '-'){
+			    $start = $aln->[2]-$matches-$nummatches+1;
+			}
+			else{
+			    #Start is alignment start (s1) + matching columns 
+			    $start = $aln->[1]+$matches+$nummatches-1;
+			}
+		    }
+		    print "START genomic=$start\n" if($DEBUG > 1);
+		    die if($start<$aln->[1]);
+		    die if($start>$aln->[2]);
+		    $startcount = $currcount+($columnstart-$currcount);
+		    $foundstart=1;
+		}
+		if($count+$currcount>=$columnend){
+		    if(!$foundend){
+			print "FOUNDEND $currcount $matches $count$char\n" if($DEBUG > 1);
+			if($char eq 'M'){
+			    print "END IN MATCH $orient\n" if($DEBUG > 1);
+			    if($orient eq '-'){
+				$end = $aln->[2]-$matches-($columnend-$currcount);
+			    }
+			    else{
+				$end = $aln->[1]+$matches+($columnend-$currcount);
+			    }
+			}
+			else{
+			    #Report last matching position
+			    print "END INGAP $orient\n" if($DEBUG > 1);
+			    #Report last matching position
+			    #Last matching position is defined by last overlapping M interval between 
+			    #query and current sequence
+			    if($QCCHECKS){
+				my $intersectbv = new Bit::Vector($querybv->Size());
+				$intersectbv->Intersection($querybv,$currbv);
+				die if($currbv->bit_test($currcount+1));
+				die if($intersectbv->bit_test($currcount+1));
+				my($imin,$imax) = $intersectbv->Interval_Scan_dec($currcount+1);
+				my($cmin,$cmax) = $currbv->Interval_Scan_dec($currcount+1);
+				my $nummatches=0;
+				if(! defined $cmax || !defined $imax){
+				    die "INGAP Can't find matching position in $aln->[0] < columnend:$columnend. No last matching position\n" if($DEBUG > 1);
+				}
+				else{
+				    if($DEBUG>1){
+					for(my $i=$cmax;$i>=$imax;$i--){
+					    if($currbv->bit_test($i)){
+						$nummatches++;
+					    }
+					}
+				    }
+				}
+				print "INGAP $nummatches until next match between query and $aln->[0] at column $cmax-$imax\n" if($DEBUG > 1);
+			    }
+			    if($orient eq '-'){
+				$end = $aln->[2]-$matches;
+			    }
+			    else{
+				$end = $aln->[1]+$matches;
+			    }
+			}
+			print "END genomic=$end\n" if($DEBUG > 1);
+			die if($end>$aln->[2]);
+			die if($end<$aln->[1]);
+			$foundend=1;
+			last;
+		    }
+		}
+	    }
+	    if($char eq 'M'){
+		$matches+=$count;
+	    }
+	    $currcount+=$count;
+	}
+	die "Could not find start or end " if(!$foundstart || !$foundend);
+    }
+    else{
+	if($orient eq '-'){
+	    $start=$aln->[2]-$columnend-1;
+	    $end=$aln->[2]-$columnstart;
+	}
+	else{
+	    $start=$aln->[1]+$columnstart-1;
+	    $end=$aln->[1]+$columnend;
+	}
+    }
+    #returning fmin<fmax
+    if($orient eq '-'){
+	($start,$end) = ($end,$start);
+    }
+     return ($start,$end,$currbv);
+}
+
+sub get_cigs {
+    my($cig) = @_;
+    my @chars = split /\d+/,$cig;
+    my @counts = split /[MXIDG]/,$cig;
+
+    my @cigs;
+    my $columncount;
+    for(my $i=0;$i<@counts;$i++){
+	my $c = $chars[$i+1];
+	die "Invalid cigar str $c\n" if($c !~ /[MXIDG]/);
+	my $currcount = $counts[$i];
+	die "Invalid count $currcount\n" if($currcount !~ /\d+/);
+	push @cigs,[$currcount,$c];
+	$columncount+=$currcount;
+    }
+    return (\@cigs,$columncount);
+}
+
+#return alnobj,bitvector,width
+sub getAlignment{
+    my($self,$name) = @_;
+    die "Bad alignment $name" if(ref $name);
+    if(exists $self->{_alignments}->{$name}){
+	return @{$self->{_alignments}->{$name}};
+    }
+    else{
+	print "Alignment $name not found\n";
+	return undef;
+    }
+}
+
+#If passed with no start/end, then returns first interval encountered
+#Assumes one interval per genome, per alignment
+sub getAlignedInterval{
+    my($self,$align_name,$seqname,$qstart,$qend) = @_;
+    die if(!$seqname);
+    my $alignobj = $self->{_alignments}->{$align_name}->[0];
+    foreach my $alni (@$alignobj){
+	if($alni->[0] eq $seqname){
+	    if(! defined $qstart && !defined $qend){
+		return $alni;
+	    }
+	    else{
+		if(($qstart < $alni->[1] && $qend < $alni->[1]) || ($qstart > $alni->[2] && $qend > $alni->[2])){
+		    print "#Non overlapping alignment on $seqname: $qstart < $alni->[1] && $qend < $alni->[1]) || ($qstart > $alni->[2] && $qend > $alni->[2]\n" if($self->{_debug});
+		    #Non-overlapping
+		}
+		else{
+		    return $alni;
+		}
+	    }
+	}
+    }
+    print "#Can't find $seqname on alignment $align_name\n";
+    return undef;
+}
+
+
+#
+#Returns closed interval [$startcol,$endcol]
+sub getAlignmentMatrix {
+    my($self,$align_name,$startcol,$endcol,$db,$ref, at seqs) = @_;
+    if(!$startcol){
+	$startcol=0;
+    }
+    die "Can't find alignment $align_name" if(!exists $self->{_alignments}->{$align_name});    
+    my ($alignobj,$gv,$align_width) = @{$self->{_alignments}->{$align_name}};
+    die "Bad input columns $startcol-$endcol $startcol >$align_width || $endcol > $align_width" if($startcol >$align_width || $endcol > $align_width);
+
+    #populate alignment matrix
+    my $row=0;
+    my $matrix=[];
+    my $seqmatrix=[];
+    my @names;
+
+    my $alni;
+
+    my $namesfilter = {};
+    foreach my $s (@seqs){
+	$namesfilter->{$s}=1;
+    }
+    my $skipfilter = (scalar(keys %$namesfilter)>0) ? 0 : 1;
+    if($ref){
+	$namesfilter->{$ref}=1;
+	my $refi;
+	for(my $i=0;$i<(@$alignobj);$i++){
+	    if($alignobj->[$i]->[0] eq $ref){
+		$refi=$i;
+		last;
+	    }
+	}
+	my $tmpi = $alignobj->[0];
+	$alignobj->[0] = $alignobj->[$refi];
+	$alignobj->[$refi] = $tmpi;
+    }
+    if(!$endcol){
+	$endcol=$align_width;
+    }
+
+    foreach my $alni (@$alignobj){
+	if($skipfilter || $namesfilter->{$alni->[0]}==1){
+        #my $seqobj = $db->get_Seq_by_id($alni->[0]);
+	    my $seqobj = $db->{$alni->[0]};
+	    my $seq = $seqobj->seq();
+	    die "Can't find seq $alni->[0]\n" if(!$seqobj);
+	    my $matchcount=0;
+	    my $column=1;
+	    #my $mstr = '-'x ($align_width+1);
+	    $matrix->[$row] = '-'x ($align_width+1);
+	    #my $sstr = '-'x ($align_width+1);
+	    $seqmatrix->[$row]='-'x ($align_width+1);
+	    #push @names,$alni->[0];
+	    $names[$row] = $alni->[0];
+	    my ($cigs,$columncount) = &get_cigs($alni->[4]);
+	    foreach my $c (@$cigs){
+		my($count,$char) = @$c;
+		if($char eq 'M'){
+		    if(($column >= $startcol && $column <= $endcol)
+		       ||
+		       ($startcol >= $column && $startcol <= ($column+$count))){
+			#my $mmstr = '.' x $count;
+			#die if(length($mmstr)!=$count);
+			#substr($mstr,$column) = $mmstr;
+			substr($matrix->[$row],$column,$count,'.' x $count);# if($column >= $startcol && $column <= $endcol);
+			#die if($self->{_debug} && $alni->[1]>$alni->[2]);
+			#my $str;
+			#print "$alni->[1]+$matchcount,$alni->[1]+$matchcount+$count-1\n";
+			if($alni->[3] eq '+'){
+			    #$str = $seqobj->subseq($alni->[1]+$matchcount+1,$alni->[1]+$matchcount+$count);
+			    #$str = substr($seq,$alni->[1]+$matchcount,($alni->[1]+$matchcount+$count)-($alni->[1]+$matchcount)+1);
+			    substr($seqmatrix->[$row],$column,$count,substr($seq,$alni->[1]+$matchcount,$count));# if($column >= $startcol && $column <= $endcol);
+			}
+			else{
+			    #Note cigar always denotes offset from alignment start
+			    #In '-' orient, cigar starts from $alni->[2]--->$alni->[1]
+			    #$str = revcom($seqobj->subseq($alni->[2]-$matchcount-$count+1,$alni->[2]-$matchcount))->seq();
+			    #$str = revcom(substr($seq,$alni->[2]-$matchcount-$count,($alni->[2]-$matchcount)-($alni->[2]-$matchcount-$count)+1));
+			    
+			    #substr($sstr,$column,$count,revcom(substr($seq,$alni->[2]-$matchcount-$count,$count)));
+			    substr($seqmatrix->[$row],$column,$count,revcom(substr($seq,$alni->[2]-$matchcount-$count,$count))->seq());# if($column >= $startcol && $column <= $endcol);
+			    
+			}
+			#die length($str)." != $count" if(length($str)!=$count);
+			#die if($self->{_debug} && length($str)!=length($mmstr));
+			#substr($sstr,$column,length($str)) = $str;
+			#die if($self->{_debug} && substr($sstr,$column,length($str)) ne $str);
+		    }
+		    $matchcount+=$count;
+		}
+		else{
+		    #my $mmstr = '-' x $count;
+		    #die if($self->{_debug} && length($mmstr)!=$count);
+		    #substr($mstr,$column) = $mmstr;
+		    if(($column >= $startcol && $column <= $endcol)
+		       ||
+		       ($startcol >= $column && $startcol <= ($column+$count))){
+			substr($matrix->[$row],$column,$count,'-' x $count);#  if($column >= $startcol && $column <= $endcol);
+		    }
+		}
+		$column+=$count;
+	    }
+	    #$matrix->[$row]=$mstr;
+	    #$seqmatrix->[$row]=$sstr;
+	    $row++;
+	}
+    }
+    
+    die "Invalid range $startcol-$endcol" if($endcol < $startcol);
+
+    my $retmatrix=[];
+    my $retseqmatrix=[];
+    for(my $i=0;$i<@$matrix;++$i){ 
+	$retmatrix->[$i] = substr($matrix->[$i],$startcol,$endcol-$startcol+1);
+	$retseqmatrix->[$i] = substr($seqmatrix->[$i],$startcol,$endcol-$startcol+1);
+	die "Bad sequence $retmatrix->[$i]" if(length ($retmatrix->[$i])<1);
+	die "Bad sequence $retseqmatrix->[$i]" if(length ($retseqmatrix->[$i])<1);
+    }
+    #remove same characters
+    #this is really going to really slow in perl this way
+    my $mcount;
+    for(my $i=1;$i<@$retmatrix;++$i){ 
+	my $m=0;
+	for(my $j=0;$j<length($retseqmatrix->[$i]);$j++){
+	    my $topchar = uc(substr($retseqmatrix->[0],$j,1));
+	    if(uc(substr($retseqmatrix->[$i],$j,1)) ne $topchar){
+		substr($retmatrix->[$i],$j,1) = substr($retseqmatrix->[$i],$j,1);
+	    }
+	    else{
+		if($topchar eq '-'){
+		    substr($retmatrix->[$i],$j,1) = '-';
+		}
+		else{
+		    $m++;
+		    substr($retmatrix->[$i],$j,1) = '.';
+		}
+	    }
+	}
+	$mcount->{$i} = $m;
+    }
+    $mcount->{0} = 1000000000;#$align_width+1;
+    return ($retmatrix,$retseqmatrix,\@names,$mcount);
+}
+
+sub contains{
+    my($self,$align_name,$qseqname,$coord1,$coord2) = @_;
+    die "Can't find alignment $align_name" if(!$align_name || !exists $self->{_alignments}->{$align_name});
+    my $alignobj = $self->{_alignments}->{$align_name}->[0];
+    foreach my $alni (@$alignobj){
+	if($alni->[0] eq $qseqname){
+	    if($coord1<$alni->[1] || $coord1>$alni->[2]){
+		#print "Start position $coord1 is not contained in interval $qseqname:$alni->[1]-$alni->[2]\n";
+		return 0 if(!$nodups);
+	    }
+	    elsif($coord2<$alni->[1] || $coord2>$alni->[2]){
+		#print "End position $coord2 is not contained in interval $qseqname:$alni->[1]-$alni->[2]\n";
+		return 0 if(!$nodups);
+	    }
+	    return 1;
+	}
+    }
+    return 0;
+}
+
+#mappedfeats in the form [name,seq,start,end]
+sub printAlignment{
+    my($self,$fh,$aln,$startcol,$endcol,$db,$ref,$mappedfeats,$htmlout) = @_;
+    die "Must specify Bioperl database $db that contains sequence data" if(!$db);
+    die "Must specify startcol, endcol $startcol-$endcol" if(!$startcol || !$endcol);
+    my($alignobj,$alignment_bv,$align_width) = @{$self->{_alignments}->{$aln}};
+
+    #$mmatrix,$seqmatrix are relative to $startcol, index starting at 0
+    my ($mmatrix,$seqmatrix,$names,$mcount) = $self->getAlignmentMatrix($aln,$startcol,$endcol,$db,$ref);
+    my $COL_WIDTH=100;
+    my $atree = new AlignmentTree();
+    my $features = {};
+
+    foreach my $feat (@$mappedfeats){
+	$atree->insert(@$feat);
+	$features->{$feat->[1]} = [$feat->[0]->[0]->[1],$feat->[0]->[0]->[2]];
+    }
+    my @mcountsort = sort {$mcount->{$b}<=>$mcount->{$a}} (keys %{$mcount});
+    for(my $j=0;$j<=(($endcol-$startcol)/$COL_WIDTH);$j++){
+	my $anchors; #for html output
+	my $s=$j*$COL_WIDTH+1;
+	my $e=$s+$COL_WIDTH-1;
+	$e = ($e>($endcol-$startcol+1)) ? ($endcol-$startcol+1) : $e;
+	my @coords;
+	#offset into full alignment $aln
+	my $absstartcol = $s+$startcol-1;
+	my $absendcol = $e+$startcol-1;
+	foreach my $i (@mcountsort){
+	    #for(my $i=0;$i<@$names;$i++){
+	    die if(! $names->[$i]);
+	    my($alni) = $self->getAlignedInterval($aln,$names->[$i]);
+	    my($start,$end) = &columntocoords($alni,$absstartcol,$absendcol);
+	    $coords[$i] = [$start,$end,$alni->[3]];
+	    ($start,$end) = ($alni->[3] eq '-') ? ($end,$start) : ($start,$end);
+	    my $displaystr;
+	    if($i==0){
+		$displaystr = substr($seqmatrix->[$i],$s-1,$e-$s+1);
+		#Highlight Shine Delgarno
+		$displaystr =~ s/AGGAGG/<font color='red'>aggagg<\/font>/g;
+	    }
+	    else{
+		$displaystr = substr($mmatrix->[$i],$s-1,$e-$s+1);
+		$displaystr =~ s/AGGAGG/<font color='red'>aggagg<\/font>/g;
+	    }
+	    if($self->{debug}){
+		printf $fh ("%30.30s %7s %11s %-30s %7s %11s\n",
+			"$names->[$i]:$alni->[3]",
+			$start,
+			"col:$absstartcol",
+			$displaystr,
+			$end,
+			"col:$absendcol");
+	    }
+	    else{
+		if($htmlout){
+		    printf $fh ("%30.30s %7s %11s %-30s %7s %11s\n",
+			    "$names->[$i]:$alni->[3]",
+			    $start,
+			    "",
+			    $displaystr,
+			    $end,
+			    ""); 
+		    
+		}
+		else{
+		    printf $fh ("%30.30s %7s %11s %-30s %7s %11s\n",
+			   "$names->[$i]:$alni->[3]",
+			   $start,
+			   "",
+			   $displaystr,
+			   $end,
+			   "");
+		}
+	    }
+	}
+	printf $fh ("%".$COL_WIDTH.".".$COL_WIDTH."s","$aln col:$absstartcol-$absendcol\n");
+	foreach my $i (@mcountsort){
+	    #for(my $i=0;$i<@$names;$i++){
+	    #Show all matching features that intersect $coords[$i]->[0],$coords[$i]->[1]
+	    if($coords[$i]->[1]-$coords[$i]->[0]>1){
+		my @res = $atree->intersect($names->[$i],$coords[$i]->[0],$coords[$i]->[1],'gene');
+		foreach my $r (@res){
+		    
+		    #offset into full alignment $aln
+		    my($cs,$ce) = &coordstocolumn($alignobj,$names->[$i],$r->[2],$r->[3],1);
+		    #print "$r->[0] coords:$r->[2],$r->[3] $cs,$ce $cs-$absstartcol $absendcol-$ce\n";
+		    my $leadinggap = 'X'x($cs-$absstartcol);
+		    my $trailinggap = 'X'x($absendcol-$ce);
+		    my $displaystr;
+		    my $startcodonstr;
+		    my $stopcodonstr;
+		    my $displaytoken;
+		    
+		    #TODO, REFACTOR into a matrix. this impl doesn't support codons that span row bounds
+		    #currently only viz start,stop codons at beginning/end of alignment
+		    #print "$r->[2] <= $features->{$r->[0]}->[0] && $r->[3] >= $features->{$r->[0]}->[0]\n";
+		    if($r->[2] <= $features->{$r->[0]}->[0] && $r->[3] >= $features->{$r->[0]}->[0]){
+			$anchors->{"$aln:$cs"}++;
+			$anchors->{"$aln:$ce"}++;
+			my $showfirst = 3;
+			if($ce-$cs<3){
+			    $showfirst = ($ce-$cs);
+			}
+			if($coords[$i]->[2] eq '-'){
+			    $stopcodonstr = substr('***',0,$showfirst);
+			    if($r->[6] eq '-'){
+				#$stopcodonstr = 'TAA';
+				#$stopcodonstr = substr($seqmatrix->[$i],$cs-$absstartcol-($COL_WIDTH-$showfirst)+($j*$COL_WIDTH),$showfirst);
+				$displaytoken .= 'STOP1<--';
+			    }
+			    else{
+				#$stopcodonstr = 'CAT';
+				#$stopcodonstr = substr($seqmatrix->[$i],$cs-$absstartcol-($COL_WIDTH-$showfirst),$showfirst);
+				$displaytoken .= '<--START1';
+				$anchors->{"$r->[0]"}++;
+			    }
+			}
+			else{
+			    $startcodonstr = substr('***',0,$showfirst);
+			    if($r->[6] eq '-'){
+				#$startcodonstr = 'TTA';
+				#$startcodonstr = substr($seqmatrix->[$i],$cs-$absstartcol+($j*$COL_WIDTH),$showfirst);
+				$displaytoken  .= 'STOP2<--';
+			    }
+			    else{
+				#$startcodonstr = 'ATG';
+				#$startcodonstr = substr($seqmatrix->[$i],$cs-$absstartcol+($j*$COL_WIDTH),$showfirst);
+				$displaytoken .= 'START2-->';
+				$anchors->{"$r->[0]"}++;
+			    }
+			}
+		    #TODO trim to row
+		    }
+		    if($r->[2] <= $features->{$r->[0]}->[1] && $r->[3] >= $features->{$r->[0]}->[1]){
+			$anchors->{"$aln:$cs"}++;
+			$anchors->{"$aln:$ce"}++;
+			my $showfirst = 3;
+			if($ce-$cs<3){
+			    $showfirst = ($ce-$cs)+1;
+			}
+			if($coords[$i]->[2] eq '-'){
+			    $startcodonstr = substr('***',0,$showfirst);
+			    if($r->[6] eq '-'){
+				#$startcodonstr = 'ATG';
+				#$startcodonstr = substr($seqmatrix->[$i],$cs-$absstartcol+($j*$COL_WIDTH),$showfirst);
+				$displaytoken .= 'START3-->';
+				$anchors->{"$r->[0]"}++;
+			    }
+			    else{
+				#$startcodonstr = 'TTA';
+				#$startcodonstr = substr($seqmatrix->[$i],$absendcol-$ce-($COL_WIDTH-$showfirst),$showfirst);
+				$displaytoken .= 'STOP3<--';
+			    }
+			}
+			else{
+			    $stopcodonstr = substr('***',0,$showfirst);
+			    if($r->[6] eq '-'){
+				#$stopcodonstr = 'CAT';
+				#$stopcodonstr = substr($seqmatrix->[$i],$absendcol-$ce-$showfirst,$showfirst);
+				$displaytoken .= '<--START4';
+				$anchors->{"$r->[0]"}++;
+			    }
+			    else{
+				#$stopcodonstr = 'TAA';
+				#$stopcodonstr = substr($seqmatrix->[$i],$absendcol-$ce-$showfirst,$showfirst);
+				$displaytoken .= 'STOP4<--';
+			    }
+			}
+			#TODO trim to row
+		    }
+		    my $spacer = '_'x($ce-$cs+1-length($startcodonstr)-length($stopcodonstr));
+		    
+		    $displaystr =  $startcodonstr.$spacer.$stopcodonstr;#;substr($seqmatrix->[$i],$cs-1,$ce-$cs+1);
+		    die if(length($displaystr) > $COL_WIDTH);
+		    my ($feat_start,$feat_end) = ($r->[2],$r->[3]);
+		    
+		    #TODO determine frame and print ~ only every 3 codons in frame if in frame
+		    #otherwise
+		    my $frame;
+		    if($coords[$i]->[2] eq '-'){
+			if($r->[6] eq '-'){
+			    die if($features->{$r->[0]}->[1] < $feat_end);
+			    $frame = (($features->{$r->[0]}->[1] - $feat_end)%3);
+			}
+			else{
+			    die if($features->{$r->[0]}->[1] < $feat_end);
+			    $frame = (($features->{$r->[0]}->[1] - $feat_end)%3);
+			}
+		    }
+		    else{
+			if($r->[6] eq '-'){
+			    die "$feat_start < $features->{$r->[0]}->[0] $r->[0]" if($feat_start < $features->{$r->[0]}->[0]);
+			    $frame = (($feat_start - $features->{$r->[0]}->[0])%3);
+			}
+			else{
+			    die if($feat_start < $features->{$r->[0]}->[0]);
+			    $frame = (($feat_start - $features->{$r->[0]}->[0])%3);
+			}
+		    }
+		    ($feat_start,$feat_end) = ($coords[$i]->[2] eq '-') ? ($feat_end,$feat_start) : ($feat_start,$feat_end);
+
+
+		    my $m=$frame;
+		    my $fulldisplaystr = $leadinggap.$displaystr.$trailinggap;
+		    for(my $k=length($leadinggap);$k<length($leadinggap)+length($displaystr);$k++){
+			my $idx=$k;
+			if(substr($mmatrix->[$i],$s-1+$k,1) ne '-'){
+			    $m++;
+			    if($m%3==0){
+				#Don't overwrite start,stop codons
+				if($idx>length($startcodonstr)+length($leadinggap)
+				   & $idx<(length($leadinggap)+length($displaystr)-length($stopcodonstr))){
+				substr($fulldisplaystr,$idx,1) = '|';
+				}
+			    }
+			    else{
+				#substr($fulldisplaystr,$idx,1) = substr($mmatrix->[$i],$s-1+$k,1);	
+			    }
+			}
+			else{
+			    if(substr($mmatrix->[$i],$s-1+$k,1) eq '-'){
+				substr($fulldisplaystr,$idx,1) = substr($mmatrix->[$i],$s-1+$k,1);	
+			    }
+			}
+		    }		    
+		    die if($r->[6] ne $r->[7]);
+		    if($htmlout){
+			printf $fh ("%30.30s %7s %11s %-30s %7s %11s\n",
+				$r->[0].":$r->[6]",
+				$feat_start,
+				$displaytoken,
+				$fulldisplaystr,
+				$feat_end,
+				$displaytoken);	
+		    }
+		    else{
+			printf $fh ("%30.30s %7s %11s %-30s %7s %11s\n",
+			       $r->[0].":$r->[6]",
+			       $feat_start,
+			       $displaytoken,
+			       $fulldisplaystr,
+			       $feat_end,
+			       $displaytoken);
+		    }
+		}
+	    }
+	}
+	if($htmlout){
+	    foreach my $a (keys %$anchors){
+		print $fh "<a href='#$a'></a>\n";
+	    }
+	}
+	
+	printf $fh ("%".$COL_WIDTH.".".$COL_WIDTH."s","ANNOTATIONS\n");
+    }
+}
+
+sub printAlignmentDebug{
+    my($alignobj,$handle) = @_;
+    foreach my $alni2 (@$alignobj){
+	if(!$handle){
+	    $handle=\*STDOUT;
+	}
+	print $handle "#ALIGNOBJ $alignobj ",join(' ',@$alni2),"\n";
+    }
+}
+
+sub revcomp{
+    my($aln) = @_;
+    my @naln;
+    foreach my $alni (@$aln){
+	push @naln,&revcomp_alni($alni);
+    }
+    return \@naln;
+}
+
+sub revcomp_alni{
+    my($alni) = @_;
+    my $cigstr;
+    my $nalni = [$alni->[0],$alni->[1],$alni->[2]];
+    $nalni->[3] = ($alni->[3] eq '+') ? '-' : '+'; 
+    my ($cigs,$columncount) = &get_cigs($alni->[4]);
+    foreach my $c (@$cigs){
+	my($count,$char) = @$c;
+	if($c eq 'M' || $c eq 'X'){
+	    $cigstr .= "$count$c";
+	}
+	elsif($c eq 'I'){
+	    $cigstr .= "$count"."D";
+	}
+	elsif($c eq 'D'){
+	    $cigstr .= "$count"."I";
+	}
+	else{
+	    $cigstr .= "$count$c";
+	}
+    }
+    die if(length($cigstr)!=$columncount);
+    $nalni->[4] = $cigstr;
+    return $nalni;
+}
+
+sub removeOverlaps{
+    my($self,$alignments,$qseqname) = @_;
+    my @alns;
+    my @results;
+    my %contained;
+    my %overlaps;
+    foreach my $align_name (@$alignments){
+	my $alni = $self->getAlignedInterval($align_name,$qseqname);
+	if($align_name =~ /$aligntoken/){
+	    push @alns,[$align_name,$alni->[1],$alni->[2],$alni->[2]-$alni->[1]];
+	}
+    }
+    my @sortedalns = sort {$b->[3] <=> $a->[3]} @alns;
+    for(my $i=0;$i<@sortedalns;$i++){
+	my $ifmin = $sortedalns[$i]->[1];
+	my $ifmax = $sortedalns[$i]->[2];
+	for(my $j=$i+1;$j<@sortedalns;$j++){
+	    my $jfmin = $sortedalns[$j]->[1];
+	    my $jfmax = $sortedalns[$j]->[2];
+	    if($jfmin>=$ifmin && $jfmax <=$ifmax){
+		print "Marking $sortedalns[$j]->[0] contained $jfmin>=$ifmin && $jfmax <=$ifmax in $sortedalns[$i]->[0]\n" if($DEBUG);
+		$contained{$j}++;
+	    }
+	    else{
+		if($jfmin>=$ifmin && $jfmin <=$ifmax){
+		    $overlaps{$j}++;
+		}
+		if($jfmax>=$ifmin && $jfmax <=$ifmax){
+		    $overlaps{$j}++;
+		}
+	    }
+	}
+    }
+    if(scalar(keys %overlaps)>0){
+	print "#WARNING removing some alignments with overlaps\n" if($DEBUG);;
+    }
+    if(scalar(keys %contained)>0){
+	print "#WARNING removing some alignments that are fully contained\n" if($DEBUG);;
+	for(my $i=0;$i<@sortedalns;$i++){
+	    if(!exists $contained{$i}){
+		push @results,$sortedalns[$i]->[0];
+	    }
+	    else{
+		print "#WARNING removing contained alignment $sortedalns[$i]->[0]\n" if($DEBUG);;
+	    }
+	}
+	return @results;
+    }
+    else{
+	return @$alignments;
+    }
+}
+1;
diff --git a/mapping/IntervalTree.pm b/mapping/IntervalTree.pm
new file mode 100644
index 0000000..71f0359
--- /dev/null
+++ b/mapping/IntervalTree.pm
@@ -0,0 +1,154 @@
+package IntervalTree;
+
+#Adapted from bx-python
+
+use strict;
+use Math::Random qw(random_uniform);
+use POSIX qw(ceil floor);
+use Data::Dumper;
+
+sub new{
+    my $classname = shift;
+    my $self = {};
+    bless($self,$classname);
+    my($start,$end,$name,$orient) = @_;
+    $self->{'priority'} = ceil( (-1.0 / log(.5)) * log( -1.0 / (random_uniform(1,0,1) - 1)));
+    $self->{'start'} = $start;
+    $self->{'end'} = $end;
+    $self->{'orient'} = $orient;
+    $self->{'maxend'} = $self->{'end'};
+    $self->{'minend'} = $self->{'end'};
+    $self->{'left'} = undef;
+    $self->{'right'} = undef;
+    $self->{'name'} = $name;
+    $self->{'default_func'} = sub {
+	my $interval = shift; 
+	return $interval->{'name'};
+    };
+    return $self;
+}
+
+sub insert{
+    my($self,$start,$end,$name,$orient) = @_;
+    die "Bad start-end $start-$end" if($end<$start);
+    die "Bad orient $orient" if($orient ne '-' && $orient ne '+');
+    my $root = $self;
+    if($start > $self->{'start'}){
+	if(defined $self->{'right'}){
+	    $self->{'right'} = $self->{'right'}->insert($start,$end,$name,$orient);
+	}
+	else{
+	    $self->{'right'} = new IntervalTree($start,$end,$name,$orient);
+	}
+	# rebalance tree
+	if($self->{'priority'} < $self->{'right'}->{'priority'}){
+	#    $root = $self->rotateleft();
+	}
+    }
+    else{
+	if(defined $self->{'left'}){
+	    $self->{'left'} = $self->{'left'}->insert($start,$end,$name,$orient);
+	}
+	else{
+	    $self->{'left'} = new IntervalTree($start, $end, $name,$orient);
+	}
+	# rebalance tree
+	if($self->{'priority'} < $self->{'left'}->{'priority'}){ 
+	#    $root = $self->rotateright();
+	}
+    }
+    if(defined $root->{'right'} && defined $root->{'left'}){
+	$root->{'maxend'} = ($root->{'end'}>$root->{'right'}->{'maxend'}) ? $root->{'end'} : $root->{'right'}->{'maxend'};
+	$root->{'maxend'} = ($root->{'maxend'}>$root->{'left'}->{'maxend'}) ? $root->{'maxend'} : $root->{'left'}->{'maxend'};
+
+	$root->{'minend'} = ($root->{'end'}<$root->{'right'}->{'minend'}) ? $root->{'end'} : $root->{'right'}->{'minend'};
+	$root->{'minend'} = ($root->{'minend'}<$root->{'left'}->{'minend'}) ? $root->{'minend'} : $root->{'left'}->{'minend'};
+    }
+    elsif(defined $root->{'right'}){
+	$root->{'maxend'} = ($root->{'end'}>$root->{'right'}->{'maxend'}) ? $root->{'end'} : $root->{'right'}->{'maxend'};
+	$root->{'minend'} = ($root->{'end'}<$root->{'right'}->{'minend'}) ? $root->{'end'} : $root->{'right'}->{'minend'};
+    }
+    elsif(defined $root->{'left'}){
+	$root->{'maxend'} = ($root->{'end'}>$root->{'left'}->{'maxend'}) ? $root->{'end'} : $root->{'left'}->{'maxend'};
+	$root->{'minend'} = ($root->{'end'}<$root->{'left'}->{'minend'}) ? $root->{'end'} : $root->{'left'}->{'minend'};
+    }
+    return $root;
+}
+
+sub intersect{
+    my($self,$start,$end,$func) = @_;
+    die "$self->{'name'}:$end<$start not valid query" if($end<$start);
+    my @results;
+
+    $func = $self->{'default_func'} if(!$func);
+    #print "CHECKING $start $end\n";
+    if($start < $self->{'end'} && $end > $self->{'start'}){
+	#print "Found $self->{'name'} $start <= $self->{'end'} && $end >= $self->{'start'}\n";
+	push @results,$func->( $self );
+    }
+    if(defined $self->{'left'} && $start <= $self->{'left'}->{'maxend'}){
+	push @results, $self->{'left'}->intersect( $start, $end, $func );
+    }
+    if(defined $self->{'right'} && $end >= $self->{'start'}){
+	push @results, $self->{'right'}->intersect( $start, $end, $func );
+    }
+
+     return @results;
+}
+
+sub rotateright{
+    my($self) = @_;
+    die if(!exists $self->{'left'});
+    die if(!exists $self->{'left'}->{'right'});
+    my $root = $self;
+    if(defined $self->{'left'}->{'right'}){
+	$root = $self->{'left'};
+	$self->{'left'} = $self->{'left'}->{'right'};
+	$root->{'right'} = $self;
+	if(defined $self->{'right'} && defined $self->{'left'}){
+	    $self->{'maxend'} = ($self->{'end'}>$self->{'right'}->{'maxend'}) ? $self->{'end'} : $self->{'right'}->{'maxend'};
+	    $self->{'maxend'} = ($self->{'maxend'}>$self->{'left'}->{'maxend'}) ? $self->{'maxend'} : $self->{'left'}->{'maxend'};
+	    
+	    $self->{'minend'} = ($self->{'end'}<$self->{'right'}->{'minend'}) ? $self->{'end'} : $self->{'right'}->{'minend'};
+	    $self->{'minend'} = ($self->{'minend'}<$self->{'left'}->{'minend'}) ? $self->{'minend'} : $self->{'left'}->{'minend'};
+	}
+	elsif(defined $self->{'right'}){
+	    $self->{'maxend'} = ($self->{'end'}>$self->{'right'}->{'maxend'}) ? $self->{'end'} : $self->{'right'}->{'maxend'};
+	    $self->{'minend'} = ($self->{'end'}<$self->{'right'}->{'minend'}) ? $self->{'end'} : $self->{'right'}->{'minend'};
+	}
+	elsif(defined $self->{'left'}){ 
+	    $self->{'maxend'} = ($self->{'end'}>$self->{'left'}->{'maxend'}) ? $self->{'end'} : $self->{'left'}->{'maxend'};
+	    $self->{'minend'} = ($self->{'end'}<$self->{'left'}->{'minend'}) ? $self->{'end'} : $self->{'left'}->{'minend'};
+	}
+    }
+    return $root;
+}
+sub rotateleft{
+    my($self) = @_;
+    die if(!exists $self->{'right'});
+    die if(!exists $self->{'right'}->{'left'});
+    my $root = $self;
+    if(defined $self->{'right'}->{'left'}){
+	$root = $self->{'right'};
+	$self->{'right'} = $self->{'right'}->{'left'};
+	$root->{'left'} = $self;
+	if(defined $self->{'right'} && defined $self->{'left'}){
+	    $self->{'maxend'} = ($self->{'end'}>$self->{'right'}->{'maxend'}) ? $self->{'end'} : $self->{'right'}->{'maxend'};
+	    $self->{'maxend'} = ($self->{'maxend'}>$self->{'left'}->{'maxend'}) ? $self->{'maxend'} : $self->{'left'}->{'maxend'};
+	    
+	    $self->{'minend'} = ($self->{'end'}<$self->{'right'}->{'minend'}) ? $self->{'end'} : $self->{'right'}->{'minend'};
+	    $self->{'minend'} = ($self->{'minend'}<$self->{'left'}->{'minend'}) ? $self->{'minend'} : $self->{'left'}->{'minend'};
+	}
+	elsif(defined $self->{'right'}){
+	    $self->{'maxend'} = ($self->{'end'}>$self->{'right'}->{'maxend'}) ? $self->{'end'} : $self->{'right'}->{'maxend'};
+	    $self->{'minend'} = ($self->{'end'}<$self->{'right'}->{'minend'}) ? $self->{'end'} : $self->{'right'}->{'minend'};
+	}
+	elsif(defined $self->{'left'}){ 
+	    $self->{'maxend'} = ($self->{'end'}>$self->{'left'}->{'maxend'}) ? $self->{'end'} : $self->{'left'}->{'maxend'};
+	    $self->{'minend'} = ($self->{'end'}<$self->{'left'}->{'minend'}) ? $self->{'end'} : $self->{'left'}->{'minend'};
+	}
+    }
+    return $root;
+}
+
+1;
diff --git a/mapping/Makefile b/mapping/Makefile
new file mode 100644
index 0000000..08417c6
--- /dev/null
+++ b/mapping/Makefile
@@ -0,0 +1,20 @@
+#Set release name or install directory
+RELEASE_NAME=mugsy_annotator-0.5
+INSTALL_DIR=./${RELEASE_NAME}
+
+all: mugsya_install
+
+install: mugsya_install
+
+dist:
+	tar cvzf ${RELEASE_NAME}.tgz ${INSTALL_DIR}
+
+mugsya_install:
+	mkdir -p ${INSTALL_DIR}
+	install README ${INSTALL_DIR}
+	install mugsy-annotator ${INSTALL_DIR}
+	install mapfeatures.pl ${INSTALL_DIR}
+	install mafindex.pl ${INSTALL_DIR}
+	install featureindex.pl ${INSTALL_DIR}
+	install AlignmentTree.pm ${INSTALL_DIR}
+	install IntervalTree.pm ${INSTALL_DIR}
diff --git a/mapping/README b/mapping/README
new file mode 100644
index 0000000..d825085
--- /dev/null
+++ b/mapping/README
@@ -0,0 +1,39 @@
+##
+#INSTALL
+##
+Untar the mugsy-annotator download to an installation directory
+Edit PREFIX= in the mugsy-annotator script to the install directory
+Add the installation directory to the PERL5LIB environment variable
+eg.
+
+export PERL5LIB=/path/to/mugsy-annotator:${PERL5LIB}
+
+(1) To run Mugsy-Annotator using the wrapper, you need genome FASTA
+files and annotations in either gff3 or genbank flat file format first
+run mugsy to generate a whole genome alignment. 
+Genbank .gbk inputs require bp_genbank2gff3.pl from Bioperl 
+
+mugsy --prefix mygenomes genome1.fsa genome2.fsa genome3.fsa
+#default output is in /tmp/mygenomes.maf
+
+cat genome1.fsa genome2.fsa genome3.fsa > allgenomes.fsa
+mugsy-annotator allgenomes.fsa /tmp/mygenomes.maf genome1.gff genome2.gff genome3.gff
+
+Note, the input fasta file and MAF need to have matching sequence names in the format genome.seqname
+
+#(2) Alternatively, execute the individual steps outside the wrapper
+
+#Example
+#Clean MAF if necessary
+cat /tmp/nmen_v16.maf | perl -ne 's/^s(\s+)[^\.]+\./s$1/;print' > nmen_v16.maf
+
+mafindex.pl n16.index < nmen_v16.maf > /dev/null
+
+featureindex.pl n16.index genbank < v16annotations.gbk > /dev/null 
+
+#To generate orthologs only
+mapfeatures.pl --reportedits=0 ./n16.index ./v16.all.fsa <  v16annotations.out > v16.features.mapped
+
+#For full report (slower)
+mapfeatures.pl ./n16.index ./v16.all.fsa <  v16annotations.out > v16.features.mapped
+
diff --git a/mapping/README.example b/mapping/README.example
new file mode 100644
index 0000000..88d2613
--- /dev/null
+++ b/mapping/README.example
@@ -0,0 +1,40 @@
+Download 3 genomes from Genbank ftp site, align, and run mugsy annotator
+
+#Get annotations
+wget ftp://ftp.ncbi.nih.gov/genbank/genomes/Bacteria/Escherichia_coli_APEC_O1_uid16718/CP000468.gbk
+wget ftp://ftp.ncbi.nih.gov/genbank/genomes/Bacteria/Escherichia_coli_K_12_substr__DH10B_uid20079/CP000948.gbk
+wget ftp://ftp.ncbi.nih.gov/genbank/genomes/Bacteria/Escherichia_coli_O157H7_EDL933_uid259/AE005174.gbk
+
+#Get genome FASTA
+wget ftp://ftp.ncbi.nih.gov/genbank/genomes/Bacteria/Escherichia_coli_APEC_O1_uid16718/CP000468.fna
+wget ftp://ftp.ncbi.nih.gov/genbank/genomes/Bacteria/Escherichia_coli_K_12_substr__DH10B_uid20079/CP000948.fna
+wget ftp://ftp.ncbi.nih.gov/genbank/genomes/Bacteria/Escherichia_coli_O157H7_EDL933_uid259/AE005174.fna
+
+#Calculate alignment
+mugsy --directory `pwd` --prefix ecolitest *.fna
+
+#Run mugsy-annotator
+cat *.fna > ecolitest.fsa
+mugsy-annotator ecolitest.fsa ecolitest.maf *.gbk > ecolitest.mugsyannotator.out
+
+This will output matching genes but will also throw errors "Can't find
+seqname" that will prevent calculating agreement information. The
+error results from a reformatting of the sequence names in the MAF
+output by Mugsy. To fix this, update the original FASTA headers to
+match the sequence names used in the MAF, which are in the form
+genome.seqname.
+
+Eg.
+perl -pi -e 's/^>.*\|gb\|(\w+).*/>$1/' AE005174.fna
+perl -pi -e 's/^>.*\|gb\|(\w+).*/>$1/' CP000948.fna
+perl -pi -e 's/^>.*\|gb\|(\w+).*/>$1/' CP000468.fna
+cat *.fna > ecolitest.fsa
+
+Once fixed, mugsy-annotator should produce output, including agreement information
+mugsy-annotator ecolitest.fsa ecolitest.maf *.gbk > ecolitest.mugsyannotator.out
+
+A legend of the codes used in the output file is at the end of the output file
+
+
+
+
diff --git a/mapping/bsmlindex.pl b/mapping/bsmlindex.pl
new file mode 100755
index 0000000..3781a30
--- /dev/null
+++ b/mapping/bsmlindex.pl
@@ -0,0 +1,67 @@
+#!/usr/bin/perl
+
+use strict;
+
+use XML::Twig;
+use AlignmentTree;
+use AlignmentTree;
+use Storable qw(store retrieve);
+use Data::Dumper;
+
+$Storable::Deparse = 1;
+$Storable::Eval = 1;
+
+my $atree = new AlignmentTree();
+if(-e $ARGV[0]){
+    $atree = AlignmentTree::deserialize($ARGV[0]);
+}
+else{
+
+}
+
+my $mapping;
+
+if(-e $ARGV[1]){
+    #parsing lookup file
+    open FILE, $ARGV[1] or die "Can't open mapping file $ARGV[1]";
+    while(my $line=<FILE>){
+	my($tseq,$oseq,$offset) = split(/\s+/,$line);
+	$mapping->{$oseq} = [$tseq,$offset-1];
+    }
+    close FILE;
+}
+
+my $twig = new XML::Twig(
+			 twig_handlers =>         
+			 { 'Feature[@class = "polypeptide"]' => sub {
+			     my( $twig, $elt)= @_;
+			     my $iloc  = $elt->first_child('Interval-loc');
+			     my $seqname  = $elt->parent('Sequence')->{'att'}->{'id'};
+			     my $featname = $elt->{'att'}->{'id'};
+			     my $class = $elt->{'att'}->{'class'};
+			     my $complement = $iloc->{'att'}->{'complement'};
+			     if ($complement eq '1'){
+				 $complement = '-';
+			     }
+			     if ($complement eq '0'){
+				 $complement = '+';
+			     }
+			     my($fmin,$fmax) = ($iloc->{'att'}->{'startpos'},$iloc->{'att'}->{'endpos'});
+			     if(exists $mapping->{$seqname}){
+				 $fmin = $fmin+$mapping->{$seqname}->[1];
+				 $fmax = $fmax+$mapping->{$seqname}->[1];
+				 print "Using mapping for $seqname $mapping->{$seqname}->[0]\n";
+				 $seqname = $mapping->{$seqname}->[0];
+			     }
+			     $atree->insert([[$seqname,$fmin,$fmax,$complement,$fmax-$fmin."M"]],$featname,$class);
+			     #print "$seqname\t$featname\t$fmin\t$fmax\t$class\n";
+			     
+			 },
+		       },
+			 );            
+
+print STDERR "Writing index to $ARGV[0]\n";
+$atree->serialize($ARGV[0]);
+
+my $stdin_fh = \*STDIN;
+$twig->parse($stdin_fh);
diff --git a/mapping/chadoindex.pl b/mapping/chadoindex.pl
new file mode 100644
index 0000000..b78660e
--- /dev/null
+++ b/mapping/chadoindex.pl
@@ -0,0 +1,4 @@
+#!/usr/bin/perl
+
+use strict;
+
diff --git a/mapping/featureindex.pl b/mapping/featureindex.pl
new file mode 100755
index 0000000..2284838
--- /dev/null
+++ b/mapping/featureindex.pl
@@ -0,0 +1,119 @@
+#!/usr/bin/perl
+#
+#./featureindex.pl mugsyindex < mugsy.out
+#Converts GFF or simple tab text files to
+#Supports Genbank files if Bioperl is also installed
+#
+#TODO
+#POD::usage
+#Add more supported types from bioperl, remote download of accessions etc
+
+
+use strict;
+use lib '/usr/local/projects/angiuoli/mugsy_trunk/mapping';
+use lib './';
+use AlignmentTree;
+use Storable qw(store retrieve);
+use Data::Dumper;
+
+$Storable::Deparse = 1;
+$Storable::Eval = 1;
+
+my $atree = new AlignmentTree();
+if(-e $ARGV[0]){
+    $atree = AlignmentTree::deserialize($ARGV[0]);
+}
+else{
+
+}
+
+my $filetype = $ARGV[1];
+
+if(lc($filetype) =~ /gff/){
+    print STDERR "Reading filetype $filetype\n";
+    &parseGFF(\*STDIN,'gene','pseudogene');
+}
+elsif(lc($filetype) =~ /genbank/){
+    print STDERR "Reading filetype $filetype\n";
+    my $file;
+    print `bp_genbank2gff3.pl --filter misc_feature -in stdin -out - < | grep -v "# Input" >> /tmp/$$.gff`;
+    open FILE,"/tmp/$$.gff";
+    &parseGFF(\*FILE,'gene','pseudogene');
+    close FILE;
+}
+elsif(lc($filetype) =~ /ptt/){
+    my $seqname;
+    while(my $line=<STDIN>){
+	if($line =~ /^>/ || $line =~ /^Location/){
+	    if($line =~ /^>(\S+)/){
+		$seqname = $1;
+	    }
+	}
+	else{
+	    #36..1   -       35      XOCORF_0001     -       hypothetical protein
+	    my @elts = split(/\t/,$line);
+	    my ($fmin,$fmax) = ($elts[0] =~ /(\d+)\.\.(\d+)/);
+	    ($fmin,$fmax) = ($fmax < $fmin) ? ($fmax,$fmin) : ($fmin,$fmax);
+	    $fmin = $fmin-1;
+	    my $strand = $elts[1];
+	    my $featname = $elts[3];
+	    print "Adding feature $featname on sequence:$seqname $fmin,$fmax,$strand to alignment tree\n";
+	    $atree->insert([[$seqname,$fmin,$fmax,$strand,$fmax-$fmin."M"]],'gene:'.$featname,'gene');
+	    
+	}
+    }
+}
+else{
+    while(my $line=<STDIN>){
+	my($featname,$seqname,$fmin,$fmax,$strand) = split(/\s+/,$line);
+	$atree->insert([[$seqname,$fmin,$fmax,$strand,$fmax-$fmin."M"]],'gene:'.$featname,'gene');
+	print "Adding feature $featname on sequence:$seqname $fmin,$fmax,$strand to alignment tree\n";
+    }
+}
+
+print STDERR "Writing index to $ARGV[0]\n";
+$atree->serialize($ARGV[0]);
+
+sub parseGFF{
+    my $file = shift;
+    my @feattypes = @_;
+    my %featlookup = map {lc($_) => 1} @feattypes;
+    my $features={};
+    while(my $line=<$file>){
+	if($line !~ /^\#/){
+	    chomp $line;
+	    my @elts = split(/\t/,$line);
+	    if(length($line)>0 && scalar(@elts)==9){
+		if(exists $featlookup{lc($elts[2])}){
+		    my %attrs = map {split(/=/)} split(/;/,$elts[8]);
+		    my $geneid;
+		    if(exists $attrs{'locus_tag'}){
+			$geneid=$attrs{'locus_tag'};
+		    }
+		    elsif(exists $attrs{'ID'}){
+			#Can't expect that ID is unique across files, so append sequence name
+			$geneid=$elts[0].'_'.$attrs{'ID'};
+		    }
+		    else{
+			print STDERR "Skipping unrecognized GFF3 line $line\n";
+			next;
+		    }
+		    my $fmin = $elts[3];
+		    my $fmax = $elts[4];
+		    my $orient = $elts[6];
+		    my $i=0;
+		    
+		    while(exists $features->{$geneid}){
+			print "Duplicate named feature $geneid. Renaming to ${geneid}_$i\n";
+			$geneid=$geneid.'_'.++$i;
+		    }
+		    $features->{$geneid}++;
+		    die "Unsupported $fmax>=$fmin. Line: $line" if($fmax<=$fmin);
+			die "Bad orient $orient. Line: $line" if($orient ne '+' && $orient ne '-');
+		    $atree->insert([[$elts[0],$fmin-1,$fmax,$orient,($fmax-$fmin+1)."M"]],'gene:'.$geneid,'gene');
+		}
+	    }
+	}
+    }
+
+}
diff --git a/mapping/intersect.pl b/mapping/intersect.pl
new file mode 100755
index 0000000..80fc5f2
--- /dev/null
+++ b/mapping/intersect.pl
@@ -0,0 +1,58 @@
+#!/usr/bin/perl
+
+use strict;
+use AlignmentTree;
+use Data::Dumper;
+
+print STDERR "Reading $ARGV[0]\n";
+my $atree = AlignmentTree::deserialize($ARGV[0]);
+
+print STDERR "Querying $ARGV[1],$ARGV[2],$ARGV[3]\n";
+my @results = $atree->intersect($ARGV[1],$ARGV[2],$ARGV[3]);
+
+my $outputtable = [];
+my $rowlookup;
+my $columnlookup;
+
+$columnlookup->{$ARGV[1]} = 1;
+my $row=0;
+my $column=2;
+foreach my $r (@results){
+    if(!exists $rowlookup->{$r->[0]}){
+	$rowlookup->{$r->[0]}=$row++;
+    }
+    if(!exists $columnlookup->{$r->[1]}){
+	$columnlookup->{$r->[1]}=$column++;
+    }
+}
+
+foreach my $r (sort {$a->[2] <=> $b->[2]} @results){
+    $outputtable->[$rowlookup->{$r->[0]}]->[$columnlookup->{$r->[1]}] = "$r->[2] $r->[3]";
+    $outputtable->[$rowlookup->{$r->[0]}]->[0] = $r->[0];
+}
+
+my $columnwidth=20;
+my $printformat='%-'.$columnwidth.'.'.$columnwidth.'s';
+printf("$printformat\t","matchname");
+foreach my $col (sort {$columnlookup->{$a} <=> $columnlookup->{$b}} keys %$columnlookup){
+    printf("$printformat\t","$col");
+}
+print "\n";
+foreach my $row (sort {
+    if( $a->[1] eq $b->[1]){
+	$b->[1] cmp $a->[1];
+    }
+    else{
+	$a->[1] <=> $b->[1];
+    }
+}
+		 @$outputtable){
+    foreach my $col (@$row){
+	$col = '-' if(!$col);
+	printf("$printformat\t","$col");
+    }
+    print "\n";
+}
+
+
+
diff --git a/mapping/mafindex.pl b/mapping/mafindex.pl
new file mode 100755
index 0000000..4a5a2a8
--- /dev/null
+++ b/mapping/mafindex.pl
@@ -0,0 +1,139 @@
+#!/usr/bin/perl
+#
+#./mafindex.pl mugsyindex < mugsy.out
+#Adds an MAF formatted file to a MUGSY formatted index
+#Each alignment is saved as type 'alignment'
+#
+
+
+use strict;
+use lib '/usr/local/projects/angiuoli/mugsy_trunk/mapping';
+use lib './';
+use AlignmentTree;
+use Storable qw(store retrieve);
+use Data::Dumper;
+
+$Storable::Deparse = 1;
+$Storable::Eval = 1;
+
+my $atree = new AlignmentTree();
+if(-e $ARGV[0]){
+    $atree = AlignmentTree::deserialize($ARGV[0]);
+}
+else{
+
+}
+
+my $currscore;
+my $block = [];
+my $k=0;
+my $i=0;
+my $label;
+while(my $line=<STDIN>){
+    if($line =~ /^a\s+score=([\d\.\-]+)/){
+	my $name = "WGA_$label";
+	if(exists $atree->{_alignments}->{"WGA_$label"}){
+	    print "Creating new alignment name. $name taken\n";
+	    $name = "WGA_".$$."_$i";
+	}
+	if(scalar(@$block)){
+	    print "Saving alignments $name with ",scalar(@$block)," sequences\n";
+	    $atree->insert($block,"$name","alignment") if(scalar(@$block));
+	}
+	($label) = $line =~ /label=(\w+)/;
+	$label = "nolabel".++$k if !$label;
+	$currscore=$1;
+	$block=[];
+	$i++;
+    }
+
+    elsif($line =~ /^s\s+/){
+	my @elts = split(/\s+/,$line);
+	#$elts[1] =~ s/\./_/g;
+	#$elts[1] =~ s/\|/_/g;
+	#[1] - accession
+	#[2] - start
+	#[3] - length
+	#[4] - orient
+	#[5] - seqlen
+	#[6] - seq
+#From UCSC FAQ about MAF format
+#  start -- The start of the aligning region in the source sequence. This is a zero-based number. If the strand field is '-' then this is the start relative to the reverse-complemented source sequence.
+# size -- The size of the aligning region in the source sequence. This number is equal to the number of non-dash characters in the alignment text field 
+	my $start = $elts[2];
+	my $end = $start+$elts[3];
+	my $orient = $elts[4];
+	if($orient eq '-'){
+	    $start = ($elts[5] - $start - $elts[3]);
+	    $end = $start + $elts[3];
+	}
+	my ($cigar,$len) = &get_cigar($elts[6]);
+	my $seq = $elts[1];
+	#Check for species.accession formatted names, trim to accession if the same
+	my($species,$accession) = ($seq =~ /(\S+)\.(\S+)/);
+	if($species ne "" && $species eq $accession){
+	    $seq = $accession;
+	}
+	die "Bad orient: $orient\n" if($orient ne '-' && $orient ne '+');
+	print "$seq $start,$end ", $end-$start,"\n";
+	push @$block,[$seq,$start,$end,$orient,$cigar];
+    }
+}
+my $name = "WGA_$label";
+if(exists $atree->{_alignments}->{"WGA_$label"}){
+    $name = "WGA_".$$."_$i";
+}
+print "Saving alignments $name with ",scalar(@$block)," sequences\n";
+$atree->insert($block,"$name","alignment") if(scalar(@$block));
+print STDERR "Writing index to $ARGV[0]\n";
+$atree->serialize($ARGV[0]);
+
+
+sub get_cigar{
+    my($seqs) = @_;
+    my $cig;
+    my $len=0;
+    my @chars = split(//,$seqs);
+    my $count=0;
+    my $curr=0; #1 - match, 2 - gap
+    foreach my $c (@chars){
+	#match char
+	if($c ne '-'){
+	    if($curr==2){
+		#in gap
+		#write prev gap
+		$cig .= $count."X";
+		$count=0;
+	    }
+	    #in match
+	    $count++;
+	    $curr=1;
+	}
+	else{
+	    #gap char
+	    if($curr==1){
+		#in match
+		#write prev gap
+		$cig .= $count."M";
+		$len += $count;
+		$count=0;
+	    }
+	    #in gap
+	    $count++;
+	    $curr=2;
+	}
+    } 
+    if($curr==1){
+	#in gap
+	#write prev gap
+	$cig .= $count."M";
+	$len += $count;
+    }
+    if($curr==2){
+	#in gap
+	#write prev gap
+	$cig .= $count."X";
+    }
+    return ($cig,$len);
+}
+
diff --git a/mapping/mapfeatures.pl b/mapping/mapfeatures.pl
new file mode 100755
index 0000000..5b73b62
--- /dev/null
+++ b/mapping/mapfeatures.pl
@@ -0,0 +1,3865 @@
+#!/usr/bin/perl
+######################
+=head1 NAME
+
+mapfeatures - derives a set of mapped features according to a
+multiple sequence alignment. Reports on the consistency of
+annotated features in the mapping.
+
+=head1 USAGE
+
+mapfeatures.pl alignments.index seqs.fasta < features.txt 
+
+Outputs are a series of text reports and an HTML report that can be
+loaded in a web browser
+
+Inputs:
+
+(1) alignment.index - An index file containing a whole genome multiple
+alignment and genome annotations. This index can be generated with a
+combination of featureindex.pl,mafindex.pl,xmfaindex.pl. The whole
+genome multiple alignment can be produced by a whole genome aligner
+like Mugsy, TBA (indexed using mafindex.pl) or Mauve (index using
+xmfaindex.pl). The genome annotations in Genbank or GFF3 format can be
+indexed with featureindex.pl
+
+(2) seqs.fasta - Multi-FASTA file of the input genomes. These must be
+    the same genomes aligned.
+
+(3) features.txt - A space delimited file consisting of 
+feature_id sequence_id fmin fmax strand
+
+=head1 SYNOPSIS
+#############
+#Example usage
+#############
+
+#Generate whole genome alignment
+mugsy --prefix nmen_v16 v16/*.fsa
+
+#Index output
+mafindex.pl nmen.index < nmen_v16.maf 
+
+#Index annotations
+featureindex.pl n16.index genbank < nmen_v16.all.gbk > v16annotations.out
+cat v16/*.fsa > v16.all.fsa
+
+#Run mugsy-annotator
+mugsy-annotator ./n16.index ./v16.all.fsa < v16annotations.out > v16.features.mapping
+
+#For more detailed output (v16.html, v16.aln.report, v16.table, v16.clusters, v16.edits)
+mugsy-annotator --prefix v16 --print-alignments ./n16.index ./v16.all.fsa < v16annotations.out > v16.features.mapping
+
+#############
+#APPLICATIONS
+#############
+
+1)Reporting orthologs using whole genome alignment
+
+The script can be used to produce a list of orthologous genes in the
+case where the input alignments correspond to orthologous regions
+
+2)Reporting annotation inconsistencies, such as frameshifts or
+varying start sites
+Aligned annotations are further classified and checked for
+consistency of start and stop codons.  Inconsistencies may indicate
+annotation error, sequencing errors, or frameshifts.  Alternatively,
+the inconsistencies can be due to poor or missing alignments. The
+summary information provided at the end of the output provides an
+indication of the overall consistency of the annotations in the set.
+The script has been used to evaluate consistency of annotations
+across numerous sequenced strains of bacteria and identify likely
+errors
+
+#####################
+#PREPARATION OF INPUT
+#####################
+Meant to be used in conjunction the several utility scripts to
+identify orthologs and classify annotations in a set of aligned genomes
+Related scripts
+>mugsymapper aln.maf features.txt > clusters.out
+>mafindex
+>featureindex
+>gb2annottab genome.gbk1,...,genome.gbkN > orig.annot.tab
+>indextab alignments.index
+>updategb genome.gbk,....,genome.gbkN < annot.updates.tab
+
+#####################
+#BUGS/LIMITS/TODO
+#####################
+
+-printAlignments displays wrong frame for gene fragments that have
+more than one start or end in a single display line
+-Will report coverage,identity>1 if there are overlapping alignments
+-Does not detect cases where gene fragments run off end of the contig
+-no command line usage,help
+-need to rename alignmenttree, AlignedIntervalTree
+
+#NOTES
+##########
+#Input coordinates are zero start, interbase coordinates
+#0 1 2 3 4
+# A T A C
+#The feature TA above has coordinates 1-3 
+#specified in the code as fmin=1 fmax=3. Length is fmax-fmin=2
+#
+#Contact: S. Angiuoli (angiuoli at cs.umd.edu) 
+#December 2010
+
+=cut
+
+
+use strict;
+use lib '/usr/local/projects/angiuoli/mugsy_trunk/mapping';
+use lib './';
+
+use Pod::Usage;
+use Getopt::Long qw(:config no_ignore_case no_auto_abbrev);
+use File::Basename;
+
+#Bioperl is used only for translation machinery
+use Bio::Perl;
+#use Bio::DB::Fasta;
+use Bio::Seq;
+use Bio::SeqIO;
+use Bio::Tools::CodonTable;
+use Bio::Seq::EncodedSeq;
+#use Bio::LiveSeq::Mutation; tried this but couldn't get to work properly
+#Default cutoffs
+
+use AlignmentTree;
+
+my %options;
+my $results = GetOptions (\%options, 
+			  'prefix=s',
+			  'input_file=s',
+			  'map_file=s',
+			  'featlist=s', #Restrict mapping to list of features
+			  'duplications=s', #Report duplications, requires addl index file
+			  'coverage|c=s',
+			  'query_coverage|q=s',
+			  'identity|i=s',
+		
+			  'sortkeys=s', #sort order of fields 'gfreq','len','afreq' when reporting edits
+			  'reportedits=s', #top number of edits to report, default all
+			  'maxchange=s', #max allowable %length changes
+			  'prefix=s', #Generate output reports with file prefix
+			  'cogformat=s', #Output cog format to stdout
+			  'printalignments', 
+			  'printhtml',
+			  'skipframeshifts',
+			  #Missing gene options
+			  'minorflen=s',
+			  'maxorflen=s',
+			  'verbose|v', #Verbose warnings
+			  'debug|d=s') || pod2usage(-verbose => 1);
+
+pod2usage(-verbose=>1) if($options{'help'});
+
+
+my $coverage_cutoff = (exists $options{'coverage'}) ?  $options{'coverage'} : 0.5;
+my $query_coverage_cutoff = (exists $options{'query_coverage'}) ?  $options{'query_coverage'} : 0;
+my $pid_cutoff= (exists $options{'identity'}) ?  $options{'identity'} : 0.1;
+print "#Using coverage cutoff:$coverage_cutoff identity:$pid_cutoff query_coverage:$query_coverage_cutoff\n";
+
+my $MAXORFLEN = (exists $options{'maxorflen'}) ? $options{'maxorflen'} : 30000; #in bp
+my $MINORFLEN = $options{'minorflen'} || 30; #in aa residues
+my $ORFLEN_MAXDELTA = 0.5; #do not consider possible codons that are less than X the length of the maximum annotated ORF
+
+my $FS_THRESHOLD = 3;
+my $FSLEN_THRESHOLD = 9;
+#'frameshift_consistency=s', #only report frameshifts that occur in < X fraction of aligned sequences. 1 show all possible frameshifts, default 0.5.
+#my $FS_FRACTIONGENOME = .5;
+#my $FS_FRACTIONGENOME = (exists $options{'frameshift_consistency'}) ? $options{'frameshift_consistency'} : 0.5;;
+
+
+
+#Used for detecting contig boundaries
+my $PMARK_SPACER = "NNNNNCACACACTTAATTAATTAAGTGTGTGNNNNN";
+
+#Flag for checking consistent start,stop
+#Assumes input features are genes
+my $doconsistencychecks=1;
+
+#Report new ORFs using aligned start codons
+my $dofindneworfs = 0;
+my $autocorrect=0;
+
+#Only report alternative start codons that
+#results in a longer ORF
+my $longer_altstarts=1;
+my $moreconsistent_altstarts=1;
+
+#Only report alternative start codons that
+#appear more frequently in the aligned genoems
+my $freq_altstarts=1;
+my $freq_altstops=0;
+
+my $aligntoken="WGA";
+my $CODON_DELIM = '.';
+my $CODON_DELIM_REGEX = '\.';
+
+#Output flags
+my $COGoutputformat=(exists $options{'cogformat'}) ? $options{'cogformat'} : 0;
+my $cogfh; #cog format
+my $cfh; #cluster format
+my $ctfh; #table
+my $ctfh2; #table with coords
+my $htmlout=(exists $options{'printhtml'} ? 1 : 0);
+
+if($COGoutputformat){
+    open $cogfh, "+>$options{'cogformat'}" or die "Can't open COG file $options{'cogformat'}";#\*STDOUT;
+}
+else{
+    open $cogfh, "+>$options{'prefix'}clusters.cog";
+}
+if(! $options{'prefix'}){
+    $options{'prefix'} = "mugsyant/run$$";
+    print `mkdir -p $options{'prefix'}`;
+}
+print STDERR "Writing output to $options{'prefix'}clusters.table, $options{'prefix'}clusters.coords.table, $options{'prefix'}clusters.out \n";
+print "#Writing output to $options{'prefix'}clusters.table, $options{'prefix'}clusters.coords.table, $options{'prefix'}clusters.out \n";
+print "#EDITTBL format cluster_id, codon_id, genome_freq, currentannotated_freq, avglen, num_orgswithoverlaps, comments\n";
+open $cfh, "+>$options{'prefix'}clusters.out";
+open $ctfh, "+>$options{'prefix'}clusters.table";
+open $ctfh2, "+>$options{'prefix'}clusters.coords.table";
+
+
+my $printalignments=(exists $options{'printalignments'}) ? $options{'printalignments'} : 0;
+my @sortkeys = (exists $options{'sortkeys'}) ? (split(/,/,$options{'sortkeys'})) : ('gfreq','len','afreq');
+if(scalar @sortkeys != 3){
+
+    print STDERR "Enter sort order using names gfreq,afreq,len for aligned frequency in the genome, annotated frequency, and ORF length. Sort is in descending order, largest value first.\n";
+    print STDERR "eg. --sortkeys gfreq,len,afreq\n";
+
+    exit 1;
+}
+
+#Debugging flags
+my $checkbadlen=0;
+my $debug=$options{'debug'};
+my $printskipped=$debug;
+my $verbose=(exists $options{'verbose'} ? 1 : 0); #verbose warnings, mostly for debugging
+if($debug){
+    $verbose=$debug;
+}
+
+#Master list of features and attributes
+#0-seqname
+#1-fmin
+#2-fmax
+#3-len
+#4-orient
+#5-polyid
+#6-geneid
+#7-startcodon pos
+#8-startcodon aln
+#9-stopcodon pos
+#10-stopcodon aln
+my $features = {};
+my $allseqs = {};
+my $seqindex = {};
+
+#Featlist
+my $featlist;
+if(exists $options{'featlist'}){
+    foreach my $f (split(/\s+/,$options{'featlist'})){
+	$featlist->{$f}=1;
+    }
+}
+if($featlist && scalar (keys %$featlist)){
+	print STDERR "Limiting results to ",scalar(keys %$featlist)," genes\n";
+}
+my $codons = {};
+my $classes_sum = {};
+my $classes_all = {};
+
+my $newclasses_sum = {};
+
+#AlignmentTree is a interval tree that contains alignments between sequences
+#and features on those sequences
+my $atree = AlignmentTree::deserialize($ARGV[0]);
+$atree->{_debug}=$debug;
+#Read a white space delimited list of features to map from stdin
+
+my $datree;
+if(-e $options{'duplications'}){
+    $datree = AlignmentTree::deserialize($options{'duplications'});
+}
+
+my %featlookup;
+my $filetype;
+my $fh;
+
+$options{'input_file'} = $options{'map_file'} if(exists $options{'map_file'});
+
+if($options{'input_file'}) {
+	open($fh, "<$options{'input_file'}") or die "Error in opening the file, $options{'input_file'}, $!\n";
+} else {
+	$fh = \*STDIN;
+}
+my $seqref;
+while(my $line=<$fh>){
+    my($name,$seq,$fmin,$fmax,$orient,$polyid,$geneid,$annotations);
+    chomp $line;
+    if($line =~ /\#gff-version 3/){
+	$filetype = 'gff3';
+	$featlookup{'gene'}++;
+	$featlookup{'pseudogene'}++;
+    }
+    elsif($line =~ /^>/ || $line =~ /^Location/){
+	$filetype = 'ptt';
+	if($line =~ /^>(\S+)/){
+	    $seqref = $1;
+
+	}
+    }
+    elsif($line !~ /^\#/){
+	if($filetype eq 'gff3'){
+	    #GFF
+	    my @elts = split(/\t/,$line);
+	    if(scalar(@elts)==9){
+		if(exists $featlookup{lc($elts[2])}){
+		    my %attrs = map {split(/=/)} split(/;/,$elts[8]);
+		    if(exists $attrs{'locus_tag'}){
+			$name = $attrs{'locus_tag'};
+		    }
+		    elsif(exists $attrs{'ID'}){
+                        #Can't expect that ID is unique across files, so append sequence name
+                        $name=$elts[0].'_'.$attrs{'ID'};
+                    }
+
+		    if(exists $attrs{'product'}){
+			$annotations .= $attrs{'product'};
+		    }
+		    if(lc($elts[2]) eq 'pseudogene'){
+			$annotations .= "pseudogene ";
+		    }
+		    ($seq,$fmin,$fmax,$orient,$polyid,$geneid) = ($elts[0],$elts[3],$elts[4],$elts[6],$name,$name);
+		    ($fmin,$fmax) = ($fmin<$fmax) ? ($fmin-1,$fmax) : ($fmax-1,$fmin);
+		}
+		elsif(lc($elts[2]) eq 'cds'){
+		    #hack for names from genbank
+		    my %attrs = map {split(/=/)} split(/;/,$elts[8]);
+		    if(exists $attrs{'product'}){
+			$annotations .= $attrs{'product'};
+			my $cdsname;
+			if(exists $attrs{'locus_tag'}){
+			    $cdsname = $attrs{'locus_tag'};
+			    if(exists $features->{$cdsname}){
+				$features->{$cdsname}->[11] = $annotations;
+			    }
+			}
+			if(exists $attrs{'ID'} && (length($cdsname)>0 && !exists $features->{$cdsname})){
+			    #Can't expect that ID is unique across files, so append sequence name 
+			    $cdsname=$elts[0].'_'.$attrs{'ID'};
+			    if(exists $features->{$cdsname}){
+				$features->{$cdsname}->[11] = $annotations;
+			    }
+			}
+		    }
+		}
+	    }
+	    else{
+		#print "Skipping $line\n" if($debug);
+	    }
+	}
+	elsif($filetype eq 'ptt'){
+	    #36..1   -       35      XOCORF_0001     -       hypothetical protein
+	    my @elts = split(/\t/,$line);
+	    ($fmin,$fmax) = ($elts[0] =~ /(\d+)\.\.(\d+)/);
+	    ($fmin,$fmax) = ($fmax < $fmin) ? ($fmax,$fmin) : ($fmin,$fmax);
+	    $orient = $elts[1];
+	    $fmin = $fmin-1;
+		
+	    $name = $elts[3];
+	    $seq = $seqref;
+	    $annotations .= $elts[5];
+	}
+	else{
+	    #Custom simple space delim text
+	    my @annots;
+	    ($name,$seq,$fmin,$fmax,$orient,$polyid,$geneid, at annots) = split(/\s+/,$line);
+	    $annotations .= join (' ', at annots);
+	    #Allow for 0,1 orient
+	    if($orient =~ /\d/){
+		if($orient > 0){
+		    $orient = '+';
+		}
+		else{
+		    $orient = '-';
+		}
+	    } 
+	    die "Bad orient $orient\n" if($orient ne '-' && $orient ne '+');
+	}
+	if(length($name)>0){
+	    die "Unsupported $fmax>=$fmin. $line" if($fmax<=$fmin);
+	    if($fmin<0){
+		print STDERR "Illegal fmin $fmin for $seq,$fmin,$fmax,$fmax-$fmin,$orient,$polyid,$geneid\n";
+		$fmin=0;
+		next;
+	    }
+	    die "Bad orient $orient. $line" if($orient ne '+' && $orient ne '-');
+	    my $i=0;
+	    while(exists $features->{$name}){
+		print "#Duplicate named feature $name. Renaming to ${name}_$i\n";
+		$name=$name.'_'.++$i;
+	    }
+	    if(!defined $featlist || exists $featlist->{$name}){
+		$features->{$name} = [$seq,$fmin,$fmax,$fmax-$fmin,$orient,$polyid,$geneid];
+		
+		my($org) = ($seq =~ /([^\.]+)/);
+		$allseqs->{$org}++;
+		#[7]-[10] reserved for start,stop codon info
+		$features->{$name}->[11] = $annotations;
+	    }
+	}
+    }
+}
+
+my @sortedallseqs = sort {$a cmp $b} (keys %$allseqs);
+for(my $i=0;$i<scalar(@sortedallseqs);$i++){
+    $seqindex->{$sortedallseqs[$i]}=$i;
+}
+
+#Save a list of clusters
+my $clusters = {};
+#Current cluster id, a unique identifier for a cluster
+my $cluster_id = 0;
+#Count of clusters that pass cutoffs
+my $validcluster = 0;
+
+#All genes are categorized into one of three categories
+#mapped   - aligned to other genes in the set above cutoffs
+#unmapped - aligned to other genes in the set but none above cutoffs
+#nomatches- not aligned to any other genes in the input set
+#List of mapped,unmapped,nohit genes
+my $mapped = {};
+my $unmapped = {};
+my $deleted = {};
+my $nomatches = {};
+my $dups = {};
+my $neworfcount = 0;
+my $adjustedorfs = 0;
+
+#List of newly called ORFs
+my $neworfs = {};
+#and the annotated ORFs they replace
+my $subsumed = {};
+
+#Map of feature => organism
+my $feat2organism = {};
+
+my $db;
+
+
+if(-f "$ARGV[1]"){
+    print STDERR "Using FASTA file $ARGV[1]. Debug level: $debug\n";
+    #Faster to read everything into RAM
+    #$db = Bio::DB::Fasta->new($ARGV[1],'-reindex'=>1); 
+    my @ids;# = $db->ids();
+    my $istream = Bio::SeqIO->new(-file => $ARGV[1],
+				  -format => 'Fasta');
+    while ( my $seq = $istream->next_seq()){
+	push @ids, $seq->id();
+	$db->{$seq->id()} = $seq;
+	print "#Storing ",$seq->id(),"\n" if($verbose);
+    }
+    print "#Parsed FASTA sequences for ",join(',', at ids),"\n";
+}
+else{
+    print STDERR "No FASTA file provided. Reporting alternative start codons but not calling ORFs\n";
+}
+
+
+#The mapping algorithm builds clusters of aligned genes in a greedy
+#fashion, starting with the longest feature in the input set and
+#mapping all aligned features that pass cutoffs. In the case of where
+#features are genes and the alignments are orthologous regions, such
+#as those identified by whole genome alignments(WGA), the clusters
+#represent orthologous genes.
+
+#Sort query genes by length in decreasing order, longest to
+#shortest. In doing do, all aligned genes that cover the query gene
+#above cutoffs are considered putative orthologs to the query. And the
+#query gene is always the longest member of the cluster. The reported
+#%id and %cov are relative to the query
+
+foreach my $query (sort {$features->{$b}->[3] <=> $features->{$a}->[3]} #Sort on length, decreasing order
+		   keys %$features){                                    #Over all features
+    print "#Processing $query ",`date` if($verbose);
+    #As the algorithm progresses, features are mapped and removed from consideration
+    #Consider genes that remain unmapped or 
+    #remain covered by <= cutoff% of length in alignments already considered
+    if(!exists $mapped->{$query} && !exists $deleted->{$query}){
+
+	#Start a new cluster based on the query gene.  Set a new
+	#cluster id; each cluster can also be identified by the query
+	#gene ($query)
+	$cluster_id++;
+	
+	my($mappedorgs,$mappedgenes,$unmappedorgs,$unmappedgenes) = &buildCluster($atree,$query);
+	print "#MAPPED Num_orgs:",scalar(keys %$mappedorgs)," Num_genes:",scalar(keys %$mappedgenes)," UNMAPPED Num_orgs:",scalar(keys %$unmappedorgs)," Num_genes:",scalar(keys %$unmappedgenes),"\n" if($verbose);
+	die "Less than 2 mapped sequences" if(scalar(keys %$mappedgenes)>1 && scalar(keys %$mappedorgs)<=0);
+	die "No mapped genes" if(scalar(keys%$mappedgenes)<1);
+
+	#Mark inconsistencies in the cluster and save start,stop codon
+	#positions of annotated genes only
+	#Codon aligned, annotated frequency is also saved as 
+	#'start','stop',=>seqname
+	#'pairs'
+	#=>
+	# 'gfreq' -aligned genomic freq 
+	# 'afreq' -annotated freq
+	# 'len' - average length 
+	#
+	my($feat_attrs,$cluster_attrs,$codons) = &annotateCluster($atree,$mappedgenes,$mappedorgs);
+
+	my $seq_attrs = {};
+	my $new_orfs = {};
+	
+	#Look for unannotated ORFs in remaining aligned seqs using other annotated start codons
+	#This can also recall orfs in the unmapped set
+	#if($dofindneworfs && !$options{'skipneworfs'}){
+	#$new_orfs = &findnewORFs($db,$atree,$mappedorgs,$mappedgenes,$codons);	    
+	#}
+	
+	if((scalar(keys %$mappedgenes)>1 && scalar(keys %$mappedorgs)>1)){
+	    print "#Cluster WGA$cluster_id codon_pairs:",scalar(keys %{$codons->{'pairs'}}),"\n" if($verbose);
+	
+	    #We have a good cluster, save it
+	    #Save the cov,pid in master list of mapped genes
+
+	    my $totallen=0;
+	    my $maxlen=0;
+	    foreach my $feat_name (keys %$mappedgenes){
+		die "Feature $feat_name already mapped" if(exists $mapped->{$feat_name});
+		$mapped->{$feat_name}->{'cov'}=$mappedgenes->{$feat_name}->{'cov'}/$features->{$feat_name}->[3];
+		$mapped->{$feat_name}->{'pid'}=$mappedgenes->{$feat_name}->{'pid'}/$mappedgenes->{$feat_name}->{'len'};
+		$mapped->{$feat_name}->{'cluster_id'}=$cluster_id;
+		$totallen += $features->{$feat_name}->[3];
+		$maxlen = ($features->{$feat_name}->[3] > $maxlen) ? $features->{$feat_name}->[3] : $maxlen;
+		delete $unmapped->{$feat_name};
+		die if(exists $unmappedgenes->{$feat_name});
+	    }
+	    my $avglen=$totallen/(scalar keys %$mappedgenes);
+	    my $classesstr;
+	    my $classesallstr;
+
+	    #Save alternative ORFs
+	    my $altcodons = {};
+
+	    if(!defined $options{'reportedits'} || $options{'reportedits'} > 0){
+		#Save aligned and annotated codon frequency
+		foreach my $p (keys %{$codons->{'pairs'}}){
+		    print "#Analyzing codon pair $p\n" if($verbose);
+		    my($startcodon,$stopcodon) = split(/:/,$p);
+		    foreach my $seqname (keys %$mappedorgs,keys %$unmappedorgs){
+			print "#Sequence $seqname\n" if($verbose);
+			#if this is the annotated pair
+			if(exists $codons->{'pairs'}->{$p}->{'orgs'}->{$seqname} && $codons->{'pairs'}->{$p}->{'orgs'}->{$seqname}->[3]==1){
+			    #Do nothing, already annotated
+			    $codons->{'pairs'}->{$p}->{'features'}->{$seqname} = $mappedorgs->{$seqname}->{'features'};
+			    my($fmin,$fmax,$orient) = &findCoords($atree,$seqname,$startcodon,$stopcodon);
+			    my $isorf = &isORF($db,$seqname,$fmin,$fmax,$orient);
+			    if($isorf<=0){
+				print "#BAD ORF $seqname,$fmin,$fmax ",join(',',keys %{$mappedorgs->{$seqname}->{'features'}}),"\n" if($verbose);
+				foreach my $feat (keys %{$mappedorgs->{$seqname}->{'features'}}){
+				    $feat_attrs->{$feat}->{'CX'}++;
+				}
+			    }
+			    print "#annotated\n" if($verbose);
+			}
+			else{
+			    print "#checking\n" if($verbose);
+			    #check if this is an ORF in $seqname
+			    my($fmin,$fmax,$orient) = &findCoords($atree,$seqname,$startcodon,$stopcodon);
+			    if(defined $fmin && defined $fmax && defined $orient && $fmax>$fmin){
+				die "$atree,$seqname,$startcodon,$stopcodon" if(! defined $fmin || ! defined $fmax);
+				my $isorf = &isORF($db,$seqname,$fmin,$fmax,$orient);
+				if($isorf>0){
+				    print "#isORF true\n" if($verbose);
+				    #There is an ORF on $seqname over this interval
+				    if(exists $unmappedorgs->{$seqname}){
+					die if(exists $mappedorgs->{$seqname});
+					#genome is aligned but no ORFs above cutoffs
+					if(exists $unmappedorgs->{$seqname}->{'features'}){
+					    #the region is annotated with an ORF that matches below cutoffs
+					    #requires a new ORF that is different than currently annotated
+					    $codons->{'pairs'}->{$p}->{'orgs'}->{$seqname} = [$fmin,$fmax,$orient,-1];
+					    $codons->{'pairs'}->{$p}->{'features'}->{$seqname} = $unmappedorgs->{$seqname}->{'features'};	
+					}
+					else{
+					    #the region is not annotated
+					    #Requires a new ORF in an unannotated region
+					    my $olapgenes = &getFeaturesByInterval($atree,$seqname,$fmin,$fmax,$orient);
+					    my $nummapped=0;
+					    if(scalar(keys %$olapgenes)>0){
+						print "#Found ",scalar(keys %$olapgenes)," in region $seqname,$fmin,$fmax with no mapped,unmapped\n" if($debug);
+						foreach my $gene (keys %$olapgenes){
+						    if(exists $mapped->{$gene}){
+							$nummapped++;
+						    }
+						}
+					    }
+					    if(scalar (keys %$olapgenes)==0){
+                                                #the region is not annotated
+						#Requires a new ORF in an unannotated region
+						$codons->{'pairs'}->{$p}->{'orgs'}->{$seqname} = [$fmin,$fmax,$orient,-2];
+						print "#neworf $p $seqname ",$fmax-$fmin,"\n" if($verbose);
+					    }
+					    elsif($nummapped==0){
+						#the region is annotated
+						#Requires a alt ORF in an unannotated region
+						$codons->{'pairs'}->{$p}->{'orgs'}->{$seqname} = [$fmin,$fmax,$orient,-1];
+						$codons->{'pairs'}->{$p}->{'features'}->{$seqname} = $unmappedorgs->{$seqname}->{'features'};
+					    }
+					}
+				    }
+				    else{
+					#the region is aligned with an annotated ORF above cutoffs 
+					#requires a new ORF that is different than currently annotated
+					die if(! exists $mappedorgs->{$seqname});
+					$codons->{'pairs'}->{$p}->{'orgs'}->{$seqname} = [$fmin,$fmax,$orient,0];
+					if(exists $mappedorgs->{$seqname}){
+					    $codons->{'pairs'}->{$p}->{'features'}->{$seqname} = $mappedorgs->{$seqname}->{'features'};	
+					    print "#altorf\n" if($verbose);
+					}
+					else{
+					    print "#altorf, prev did not pass cutoffs\n" if($verbose);
+					}
+				    }
+				}
+				elsif($isorf==0){
+				    print "#isORF false\n" if($verbose);
+				    if(! defined $options{'skipframeshifts'}){
+					if(($fmax-$fmin)<$MAXORFLEN && ($fmax-$fmin)>$MINORFLEN){
+					    #See if we can call an ORF over this region with frameshifts
+					    my $feat_name;
+					    my $annotatedstop;
+					    my $annotatedstart;
+					    if(exists $mappedorgs->{$seqname} && scalar(keys %{$mappedorgs->{$seqname}->{'features'}}) == 1){
+						$feat_name = [keys %{$mappedorgs->{$seqname}->{'features'}}]->[0];
+						$annotatedstop = $features->{$feat_name}->[9] . $CODON_DELIM . $features->{$feat_name}->[10];
+						$annotatedstart = $features->{$feat_name}->[7] . $CODON_DELIM . $features->{$feat_name}->[8];
+					    }
+					    #Look for possible frameshifts if there are either
+					    if(exists $unmappedorgs->{$seqname}                             #a) No annotations aligned in this region above cutoffs
+					       || 
+					       (exists $mappedorgs->{$seqname}
+						&&
+						(scalar(keys %{$mappedorgs->{$seqname}->{'features'}}) > 1 	#b) Multiple annotated ORFs in this region
+						 
+						 ||
+						 $stopcodon ne $annotatedstop 			        #c) Single annotated ORF with a different stop codon
+						 ||
+						 $startcodon ne $annotatedstart
+						 )
+						)){
+						die "$seqname found in both mapped and unmapped org lists" if(exists $unmappedorgs->{$seqname} && exists $mappedorgs->{$seqname});
+						print "#Considering FS on $seqname for pair $startcodon,$stopcodon annotated:$annotatedstart,$annotatedstop $fmin,$fmax,$orient\n" if($verbose);
+						print "#Considering FS $stopcodon ne $annotatedstop for $feat_name on $seqname\n" if($debug && exists $mappedorgs->{$seqname});
+						#Find most similar sequence that has this ORF
+						my($nearestseq) = &findNearestNeighbor($atree,$seqname,$mappedorgs,$fmin,$fmax);
+						print "#Using $nearestseq as nearest neighbor to $seqname\n" if($verbose);
+						#Look for frameshifting mutations in $seqname
+						my($fs,$netfs) = &reportFrameShifts($atree,$db,$seqname,$nearestseq,$startcodon,$stopcodon);
+						if(ref $fs){
+						    my $isorf = &isORF($db,$seqname,$fmin,$fmax,$orient,$fs);
+						    print "#Possible ORF with frameshift indels:",scalar(@$fs)," net:$netfs isorf:$isorf\n" if($verbose);
+						    if($isorf>0){
+							if($isorf==2 || abs($netfs) < $FS_THRESHOLD){
+							    foreach my $fs (sort {$a->[0] <=> $b->[0]} @$fs){
+								if($verbose){
+								    print "#FS ",join(',',@$fs)," $netfs $isorf\n";
+								}
+							    }
+							    print "#Adding frameshift net:",scalar(@$fs)," $netfs\n" if($debug);
+							    if(exists $mappedorgs->{$seqname}){
+								$codons->{'pairs'}->{$p}->{'features'}->{$seqname} = $mappedorgs->{$seqname}->{'features'};
+								$codons->{'pairs'}->{$p}->{'orgs'}->{$seqname} = [$fmin,$fmax,$orient,0,$fs];
+							    }
+							    else{
+								my $olapgenes = &getFeaturesByInterval($atree,$seqname,$fmin,$fmax,$orient);
+								
+								if(scalar(keys %$olapgenes)>0){
+								    $codons->{'pairs'}->{$p}->{'orgs'}->{$seqname} = [$fmin,$fmax,$orient,0,$fs];
+								}
+								else{
+								    print "#Neworf in frameshifted region $seqname,$fmin,$fmax,$orient\n" if($debug);
+								    $codons->{'pairs'}->{$p}->{'orgs'}->{$seqname} = [$fmin,$fmax,$orient,-2,$fs];
+								}
+							    }
+							}
+						    }
+						}
+					    }
+					}
+					else{
+					    print "#Skipping frameshifts check on $seqname range too big or small $fmin-$fmax\n" if($verbose);
+					}
+				    }
+				}
+				elsif($isorf == -1){
+				    #TODO, check point mutation of start, stop codon
+				}
+			    }
+			}
+		    }
+		}
+		foreach my $p (keys %{$codons->{'pairs'}}){
+		    my $fscount=0;
+		    my $orgcount = scalar(keys %{$codons->{'pairs'}->{$p}->{'orgs'}});
+		    foreach my $org (keys %{$codons->{'pairs'}->{$p}->{'orgs'}}){
+			if(ref $codons->{'pairs'}->{$p}->{'orgs'}->{$org}->[4]){
+			    $fscount++;
+			}
+		    }
+		    foreach my $org (keys %{$codons->{'pairs'}->{$p}->{'orgs'}}){
+			#Skip frameshift if more occurs in more than $FS_FRACTIONGENOME
+			if(ref $codons->{'pairs'}->{$p}->{'orgs'}->{$org}->[4]){
+			    if(1){#$fscount/$orgcount <= $FS_FRACTIONGENOME){
+				$codons->{'pairs'}->{$p}->{'fsvars'}->{$org}=$codons->{'pairs'}->{$p}->{'orgs'}->{$org}->[4];
+				print "#FS ",join(',',@{$codons->{'pairs'}->{$p}->{'orgs'}->{$org}->[4]}),"\n" if($debug);
+			    }
+			    else{
+				next;
+			    }
+			}
+			print "#CODONPAIR ",join(',',@{$codons->{'pairs'}->{$p}->{'orgs'}->{$org}}),"\n" if($debug);
+			
+			$codons->{'pairs'}->{$p}->{'gfreq'}++;
+			$codons->{'pairs'}->{$p}->{'afreq'}++ if($codons->{'pairs'}->{$p}->{'orgs'}->{$org}->[3]==1); #inc only if annotated
+			$codons->{'pairs'}->{$p}->{'length'}+=($codons->{'pairs'}->{$p}->{'orgs'}->{$org}->[1] - $codons->{'pairs'}->{$p}->{'orgs'}->{$org}->[0]);
+			#-2 encodes a new orf, no prior annotation on this $org
+			if($codons->{'pairs'}->{$p}->{'orgs'}->{$org}->[3] == -2){
+			    $codons->{'pairs'}->{$p}->{'neworfs'}->{$org}->{'fmin'} = $codons->{'pairs'}->{$p}->{'orgs'}->{$org}->[0];
+			    $codons->{'pairs'}->{$p}->{'neworfs'}->{$org}->{'fmax'} = $codons->{'pairs'}->{$p}->{'orgs'}->{$org}->[1];
+			    $codons->{'pairs'}->{$p}->{'neworfs'}->{$org}->{'orient'} = $codons->{'pairs'}->{$p}->{'orgs'}->{$org}->[2];
+			}
+		    }
+		    $codons->{'pairs'}->{$p}->{'len'} = $codons->{'pairs'}->{$p}->{'length'}/$codons->{'pairs'}->{$p}->{'gfreq'} if($codons->{'pairs'}->{$p}->{'gfreq'} > 0);
+		}
+		
+		$classesstr = join(';',sort {$a cmp $b} keys %{$cluster_attrs});
+		#Suggest edits for inconsistently annotated clusters
+		if($doconsistencychecks){
+		    #Choose N best start,stop pairs according to sortkeys
+		    my @bestcodonpair = sort {
+			if($codons->{'pairs'}->{$a}->{$sortkeys[0]} eq $codons->{'pairs'}->{$b}->{$sortkeys[0]}){
+			    if($codons->{'pairs'}->{$a}->{$sortkeys[1]} eq $codons->{'pairs'}->{$b}->{$sortkeys[1]}){
+				#sort on tertiary sortkey, eg length
+				$codons->{'pairs'}->{$b}->{$sortkeys[2]} <=> $codons->{'pairs'}->{$a}->{$sortkeys[2]};
+			    }
+			    else{
+				#sort on secondary sortkey, eg annotated frequency
+				$codons->{'pairs'}->{$b}->{$sortkeys[1]} <=> $codons->{'pairs'}->{$a}->{$sortkeys[1]};
+			    }
+			}
+			else{
+			    #sort on primary sortkey, eg. aligned frequency of start codon in the genome
+			    $codons->{'pairs'}->{$b}->{'gfreq'} <=> $codons->{'pairs'}->{$a}->{'gfreq'};
+			}
+		    } (keys %{$codons->{'pairs'}});
+		    if(scalar(@bestcodonpair)>0){
+			open EFILE, ">$options{'prefix'}cluster$cluster_id.edits.out";
+			for(my $i=0;$i<scalar(@bestcodonpair);$i++){
+			    my $bestcodon = $bestcodonpair[$i];
+			    if($codons->{'pairs'}->{$bestcodon}->{'gfreq'} > 1){
+				my $codonlength = ($codons->{'pairs'}->{$bestcodon}->{'length'}/$codons->{'pairs'}->{$bestcodon}->{'gfreq'});
+				
+				my $deltafracmax = ($maxlen-$codonlength)/$maxlen;
+				if($deltafracmax < $ORFLEN_MAXDELTA && $deltafracmax > (-1)*$ORFLEN_MAXDELTA){
+				    
+				    print EFILE ">CLUSTER_$cluster_id $bestcodon\n";
+				    
+				    foreach my $org (keys %{$codons->{'pairs'}->{$bestcodon}->{'orgs'}}){
+					#Check if there are existing annotations
+					if(scalar(keys %{$codons->{'pairs'}->{$bestcodon}->{'features'}->{$org}})>0){
+					    foreach my $feat_name (keys %{$codons->{'pairs'}->{$bestcodon}->{'features'}->{$org}}){
+						my $pred_feat = $codons->{'pairs'}->{$bestcodon}->{'orgs'}->{$org};
+						#Check if codon pair results in an alternative ORF
+						if($pred_feat->[0] ne $features->{$feat_name}->[1] || 
+						   $pred_feat->[1] ne $features->{$feat_name}->[2]){
+						    my $fs = (defined $codons->{'pairs'}->{$bestcodon}->{'orgs'}->{$org}->[4]) ? "F" : "";
+						    #if($fs eq 'F'){
+							#die if(! exists $codons->{'pairs'}->{$bestcodon}->{'fsvars'}->{$org});
+							#my @fsruns = @{$codons->{'pairs'}->{$bestcodon}->{'fsvars'}->{$org}};
+							#foreach my $r (@fsruns){
+							#    $fs .= print "[$r->[0]-$r->[1] $r->[2]:$r->[3]] $r->[4]";
+							#}
+						    #}
+						    my $olapgenes = &getFeaturesByInterval($atree,$org,$pred_feat->[0],$pred_feat->[1],$pred_feat->[2]);
+						    my $olaps;
+						    foreach my $feat (keys %$olapgenes){
+							if($feat ne $feat_name && ! exists $codons->{'pairs'}->{$bestcodon}->{'features'}->{$org}->{$feat}){
+							    print "#Overlapping gene found on $org ",join(",",@{$olapgenes->{$feat}}),"\n" if($verbose);
+							    $olaps->{$feat} = $olapgenes->{$feat};
+							    $codons->{'pairs'}->{$bestcodon}->{'olaps'}->{$org}++
+							}
+						    }
+						    $altcodons->{$bestcodon}->{'orgs'}->{$org}->{'fmin'}=$pred_feat->[0];
+						    $altcodons->{$bestcodon}->{'orgs'}->{$org}->{'fmax'}=$pred_feat->[1];
+						    $altcodons->{$bestcodon}->{'orgs'}->{$org}->{'orient'}=$pred_feat->[2];
+						    $altcodons->{$bestcodon}->{'orgs'}->{$org}->{'fs'}=$fs;
+						    $altcodons->{$bestcodon}->{'orgs'}->{$org}->{'olaps'}=$olaps;
+						    my $sdist;
+						    my $sameframe=1;
+						    if($pred_feat->[2] eq $features->{$feat_name}->[4]){
+							$sameframe=1;
+						    }
+						    else{
+							$sameframe=0;
+						    }
+						    if($pred_feat->[2] eq '+'){
+							$sdist = $pred_feat->[0]-$features->{$feat_name}->[1];
+						    }
+						    elsif($pred_feat->[2] eq '-'){
+							$sdist = $pred_feat->[1]-$features->{$feat_name}->[2];
+						    }
+						    print EFILE "$feat_name\t$org\t$pred_feat->[0]\t$pred_feat->[1]\t",($pred_feat->[1] - $pred_feat->[0]),"\t$pred_feat->[2]\t$sameframe\t$sdist\t$fs\t";
+						    my @olaplist = keys %$olaps;
+						    for(my $i=0;$i<scalar(@olaplist);$i++){
+							print EFILE "$olaplist[$i]($olaps->{$olaplist[$i]}->[3];$olaps->{$olaplist[$i]}->[6] bp;";
+							die "Bad feat $olaplist[$i]" if(! exists $features->{$olaplist[$i]} && scalar(keys %$featlist)==0);
+							printf EFILE "%.1f)",($olaps->{$olaplist[$i]}->[6]/$features->{$olaplist[$i]}->[3]) if(exists $features->{$olaplist[$i]});
+
+							print EFILE "," if($i<scalar(@olaplist)-1);
+						    }
+						    print EFILE "\n";
+						}
+					    }
+					}
+					else{
+					    #Report an annotation in a region with annotations below cutoffs or neworfs
+					    my $pred_feat = $codons->{'pairs'}->{$bestcodon}->{'orgs'}->{$org};
+					    my $fs = (defined $codons->{'pairs'}->{$bestcodon}->{'orgs'}->{$org}->[4]) ? "F" : "";
+					    my $olapgenes = &getFeaturesByInterval($atree,$org,$pred_feat->[0],$pred_feat->[1],$pred_feat->[2]);
+					    
+					    my $feat_name;
+					    if(scalar(keys %$olapgenes)>0){
+						$feat_name = "ALTORF_C$cluster_id";
+					    }
+					    else{
+						#NEWORF due to frame
+						if(! exists $codons->{'pairs'}->{$bestcodon}->{'neworfs'}->{$org}){
+						    print STDERR "Unexpected neworf NEWORF_C$cluster_id: ",join(',',@$pred_feat),"\n";
+						    $codons->{'pairs'}->{$bestcodon}->{'neworfs'}->{$org}->{'fmin'} = $pred_feat->[0];
+						    $codons->{'pairs'}->{$bestcodon}->{'neworfs'}->{$org}->{'fmax'} = $pred_feat->[1];
+						    $codons->{'pairs'}->{$bestcodon}->{'neworfs'}->{$org}->{'orient'} = $pred_feat->[2];
+						}
+						$feat_name = "NEWORF_C$cluster_id";
+					    }
+					    my $sdist;
+					    my $sameframe=1;
+					    if(exists $features->{[keys %$olapgenes]->[0]}){
+						if($pred_feat->[2] eq $features->{[keys %$olapgenes]->[0]}->[4]){
+						    $sameframe=1;
+						}
+						else{
+						    $sameframe=0;
+						}
+						if($pred_feat->[2] eq '+'){
+						    $sdist = $pred_feat->[0]-$features->{[keys %$olapgenes]->[0]}->[1];
+						}
+						elsif($pred_feat->[2] eq '-'){
+						    $sdist = $pred_feat->[1]-$features->{[keys %$olapgenes]->[0]}->[2];
+						}
+					    }
+					    #name,org,fmin,fmax,len,orient,sameframe,startdist,fs,overlaps
+					    print EFILE "$feat_name\t$org\t$pred_feat->[0]\t$pred_feat->[1]\t",($pred_feat->[1] - $pred_feat->[0]),"\t$pred_feat->[2]\t$sameframe\t$sdist\t$fs\t";
+					    
+					    my @olaplist = keys %$olapgenes;
+					    for(my $i=0;$i<scalar(@olaplist);$i++){
+						print EFILE "$olaplist[$i]($olapgenes->{$olaplist[$i]}->[3];$olapgenes->{$olaplist[$i]}->[6] bp;";
+						die if(! exists $features->{$olaplist[$i]} && scalar(keys %$featlist)==0);
+						printf EFILE "%.1f)",($olapgenes->{$olaplist[$i]}->[6]/$features->{$olaplist[$i]}->[3]) if(exists $features->{$olaplist[$i]});
+						print EFILE "," if($i<scalar(@olaplist)-1);
+						$codons->{'pairs'}->{$bestcodon}->{'olapgenes'}->{$org}++;
+					    }
+					    print EFILE "\n";
+					    if(exists $codons->{'pairs'}->{$bestcodon}->{'neworfs'}->{$org} && scalar(keys %$olapgenes)>0){
+						foreach my $gene (keys %$olapgenes){
+						    if(exists $mapped->{$gene}){
+							print STDERR "#Neworfs marked in region on $org with genes already mapped into clusters ",join(',',@{$olapgenes->{$gene}}),"\n";
+						    }
+						    else{
+							print STDERR "#Neworfs marked in region on $org with other annotations ",join(',',@{$olapgenes->{$gene}}),"\n";
+						    }
+						}
+					    }
+					    $altcodons->{$bestcodon}->{'orgs'}->{$org}->{'fmin'}=$pred_feat->[0];
+					    $altcodons->{$bestcodon}->{'orgs'}->{$org}->{'fmax'}=$pred_feat->[1];
+					    $altcodons->{$bestcodon}->{'orgs'}->{$org}->{'orient'}=$pred_feat->[2];
+					    $altcodons->{$bestcodon}->{'orgs'}->{$org}->{'fs'}=$fs;
+					    $altcodons->{$bestcodon}->{'orgs'}->{$org}->{'olaps'}=$olapgenes;
+
+					}
+				    }
+				    
+
+				    $altcodons->{$bestcodon}->{'name'}="ALT$i";
+				    $altcodons->{$bestcodon}->{'gfreq'}=$codons->{'pairs'}->{$bestcodon}->{'gfreq'};
+				    $altcodons->{$bestcodon}->{'afreq'}=$codons->{'pairs'}->{$bestcodon}->{'afreq'};
+				    $altcodons->{$bestcodon}->{'len'}=$codons->{'pairs'}->{$bestcodon}->{'len'};
+				    $altcodons->{$bestcodon}->{'neworfs'}=$codons->{'pairs'}->{$bestcodon}->{'neworfs'};
+				    $altcodons->{$bestcodon}->{'fs'}=$codons->{'pairs'}->{$bestcodon}->{'fsvars'};
+
+			    
+				    my $newclassesstr = $codons->{'pairs'}->{$bestcodon}->{'cluster_attrs'};
+				    print "#CODON $bestcodon $codons->{'pairs'}->{$bestcodon}->{'gfreq'} max_annotated_len:$maxlen ";
+				    if($debug){
+					print "delta_len_max:$deltafracmax\n"; 
+				    }
+				    else{
+					print "\n";
+				    }
+				    
+				    #EDITTBL cluster_id, codon, genome_freq, annotated_freq, len, neworfs, overlaps, comments
+				    print "#EDITTBL\tC$cluster_id\t$bestcodon\t$codons->{'pairs'}->{$bestcodon}->{'gfreq'}";
+				    
+				    if(scalar(keys%{$codons->{'pairs'}->{$bestcodon}->{'neworfs'}}) > 0){
+					print "(N:",scalar(keys%{$codons->{'pairs'}->{$bestcodon}->{'neworfs'}}),")";
+				    }
+				    if(scalar(keys %{$codons->{'pairs'}->{$bestcodon}->{'fsvars'}}) > 0){
+					print "(F:",scalar(keys%{$codons->{'pairs'}->{$bestcodon}->{'fsvars'}}),")";
+				    }
+				    print "\t$codons->{'pairs'}->{$bestcodon}->{'afreq'}\t$codons->{'pairs'}->{$bestcodon}->{'len'}\t";
+				    print scalar(keys %{$codons->{'pairs'}->{$bestcodon}->{'olaps'}}),"\t";
+				    if(scalar(keys %{$codons->{'pairs'}->{$bestcodon}->{'olaps'}})){
+					print "#OVERLAP ";
+					$altcodons->{$bestcodon}->{'isoverlap'}=1;
+				    }
+				    if($codonlength eq $maxlen){
+					print "#MAXLENEDIT ";
+					$altcodons->{$bestcodon}->{'maxlen'}=1;
+				    }
+				    if($codons->{'pairs'}->{$bestcodon}->{'gfreq'} eq (scalar(keys %$mappedorgs)+scalar(keys %$unmappedorgs))){
+					print "#FCONSISTENT ";
+					$altcodons->{$bestcodon}->{'fcon'}=1;
+				    }
+				    print "\n";
+				    #Collapse all indels into runs
+					foreach my $org (keys %{$codons->{'pairs'}->{$bestcodon}->{'fsvars'}}){
+					
+					# my @coords = sort {$a->[0] <=> $b->[0]} (@{$codons->{'pairs'}->{$bestcodon}->{'fsvars'}->{$org}});
+# 					my @runs;
+# 					my $indelstr1;
+# 					my $indelstr2;
+# 					my $last;
+# 					my $start;
+# 					my $end;
+# 					for(my $i=0;$i<@coords;$i++){
+# 					    #print join(',',@{$coords[$i]}),"\n";
+# 					    if($i==0){
+# 						$start=$coords[$i]->[0];
+# 					    }
+# 					    elsif(abs($last+1 - $coords[$i]->[0]) > 1){
+# 						push @runs,[$start,$last,$indelstr1,$indelstr2];
+# 						$indelstr1="";
+# 						$indelstr2="";
+# 						$start=$coords[$i]->[0];
+# 					    }
+# 					    $last=$coords[$i]->[0];
+# 					    $indelstr1.=$coords[$i]->[1];
+# 					    $indelstr2.=$coords[$i]->[2];
+# 					}
+# 					push @runs,[$start,$last,$indelstr1,$indelstr2];
+# 					#Remove runs that are multiple of 3
+# 					my @fsruns;
+# 					foreach my $r (@runs){
+# 					    die if(length($r->[2]) != length($r->[3]));
+# 					    if(length($r->[2])%3!=0){
+# 						push @fsruns,$r;
+# 					    }
+# 					}
+					my @fsruns = @{$codons->{'pairs'}->{$bestcodon}->{'fsvars'}->{$org}};
+					print "#FS $org ";
+					foreach my $r (@fsruns){
+					    print "[$r->[0]-$r->[1] $r->[2]:$r->[3]] $r->[4]";
+					}
+					print "\n";
+				    }
+
+				    foreach my $org (keys %{$codons->{'pairs'}->{$bestcodon}->{'neworfs'}}){
+					my $fmin = $codons->{'pairs'}->{$bestcodon}->{'neworfs'}->{$org}->{'fmin'};
+					my $fmax = $codons->{'pairs'}->{$bestcodon}->{'neworfs'}->{$org}->{'fmax'};
+					my $orient = $codons->{'pairs'}->{$bestcodon}->{'neworfs'}->{$org}->{'orient'};
+					my $olapgenes = &getFeaturesByInterval($atree,$org,$fmin,$fmax,$orient);
+					if(scalar (keys %$olapgenes)>0){
+					    foreach my $feat (keys %$olapgenes){
+						print STDERR "Unexpected genes found ",join(",",@{$olapgenes->{$feat}}),"\n";
+					    }
+					    #die;
+					}
+					print "#NEWORF $org $fmin,$fmax,",($fmax-$fmin),",$orient\n";
+					$new_orfs->{$org}++;
+				    }
+				}
+			    }
+			    else{
+				#print STDERR "#WARNING Codon $bestcodon has 0 frequency\n";
+			    }
+			}
+			close EFILE;
+		    }
+		}
+	    }
+	    #Print cluster
+	    $classesallstr = join(';',sort {$a cmp $b} keys %{$cluster_attrs});
+	    $classes_all->{$classesallstr}->{'ngenes'} += scalar(keys %$mappedgenes);
+	    $classes_all->{$classesallstr}->{'nclusters'}++;
+	    $classes_all->{$classesallstr}->{'new_orfs'}+= scalar(keys %$new_orfs);
+
+	    &reportCluster($query,$mappedorgs,$mappedgenes,$unmappedorgs,$unmappedgenes,$feat_attrs,$cluster_attrs,$seq_attrs,$new_orfs);
+
+	    $classesstr = join(';',sort {$a cmp $b} keys %{$cluster_attrs});
+	    $clusters->{$cluster_id}->{'alts'} = $altcodons;
+	    $clusters->{$cluster_id}->{'codons'} = $codons->{'pairs'};
+
+	    $classes_sum->{$classesstr}->{'ngenes'} +=scalar(keys %$mappedgenes);
+	    $classes_sum->{$classesstr}->{'nclusters'}++;
+	    $classes_sum->{$classesstr}->{'new_orfs'}+= scalar(keys %$new_orfs);
+	    $neworfcount+=scalar(keys %$new_orfs);
+
+	    $validcluster++;
+
+	    print "#VALID\tCLUSTER_$cluster_id\tNum_organisms=",scalar(keys %$mappedorgs)+1,
+	    "\tNum_genes=",scalar(keys %$mappedgenes),"\n" if($debug);;
+
+	    #For unmapped genes, save the best overlapping alignment
+	    foreach my $feat_name (keys %$unmappedgenes){
+		if(!exists $mapped->{$feat_name}){
+		    if(exists $unmapped->{$feat_name} #first alignment encountered
+		       || $unmappedgenes->{$feat_name}->{'cov'} > $unmapped->{$feat_name}->{'cov'}){ #better coverage
+			$unmapped->{$feat_name}->{'cov'} = $unmappedgenes->{$feat_name}->{'cov'}/$features->{$feat_name}->[3]; #%coverage over gene length
+			if($unmappedgenes->{$feat_name}->{'len'}){
+			    $unmapped->{$feat_name}->{'pid'} = $unmappedgenes->{$feat_name}->{'pid'}/$unmappedgenes->{$feat_name}->{'len'}; #%id over aligned length
+			}
+			else{
+			    $unmapped->{$feat_name}->{'pid'} = 0;
+			}
+			$unmapped->{$feat_name}->{'len'} = $unmappedgenes->{$feat_name}->{'len'};
+			$unmapped->{$feat_name}->{'WGA_cluster'} = $cluster_id;
+		    }
+		    else{
+			die if(exists $unmappedgenes->{$feat_name} && !exists $unmapped->{$feat_name});
+		    }
+		} 
+	    }
+	}	
+	else{
+	    die "Feature $query not mapped but marked so" if(exists $mapped->{$query});
+	    #Cluster is a singleton, skip it or print for debugging
+	    print "#SKIPPED\t$query\tWGA$cluster_id\tNum_organisms=",scalar(keys %$mappedorgs),
+	    "\tNum_genes=",scalar(keys %$unmappedgenes),"\n" if($debug);
+	    #This cluster was skipped because it does not pass coverage cutoffs
+	    #Optionally print
+	    if($printskipped){
+		#print "#$query\tWGA$cluster_id\t$currorg\tcov:",$qcov/($fmax-$fmin),"\tid:1\tspan:$fmin-$fmax\tlen:",$fmax-$fmin,"\n";
+		foreach my $organism (sort {$a cmp $b} keys %$unmappedorgs){
+		    if(ref $unmappedorgs->{$organism} && exists $unmappedorgs->{$organism}->{'features'}){
+			my($start,$end) = &getspan($unmappedgenes,keys %{$unmappedorgs->{$organism}->{'features'}});
+			my @ogenes = sort {$features->{$a}->[1] <=> $features->{$b}->[1]} (keys %{$unmappedorgs->{$organism}->{'features'}});
+			my @ocovs = map {sprintf("%.2f",$unmappedgenes->{$_}->{'cov'}/$features->{$_}->[3])} (@ogenes);
+			my @oids  = map {sprintf("%.2f",$unmappedgenes->{$_}->{'pid'}/$unmappedgenes->{$_}->{'len'})} (@ogenes);
+			
+			print "#",join(',', at ogenes),
+			"\tWGA$cluster_id",
+			"\t$organism",
+			"\tcov:",join(',', at ocovs),
+			"\tid:",join(',', at oids),
+			"\tspan:$start-$end len:",$end-$start,
+			"\n" if($debug);
+		    }
+		}
+	    }
+	}
+
+	#foreach my $organism (keys %$new_orfs){
+	#my $orfidx=0;
+	#foreach my $alt (@{$new_orfs->{$organism}}){
+	#$neworfcount++;
+	#}
+	#}
+
+
+	if($autocorrect){
+	    #Auto-correct cluster
+	    my @neworfs;
+	    foreach my $organism (sort {$a cmp $b} keys %$mappedorgs){
+		my @ogenes = sort {$features->{$a}->[1] <=> $features->{$b}->[1]} (keys %{$mappedorgs->{$organism}});
+		my $classes;
+		my $longestorf=0;		
+		my $longestpairc=0;		
+		foreach my $gene (@ogenes){
+		    if(exists $feat_attrs->{$gene}){
+			foreach my $c (sort {$a cmp $b} keys %{$feat_attrs->{$gene}}){
+			    $classes->{$c}++;
+			}
+		    }
+		    $longestorf = ($features->{$gene}->[3] > $longestorf) ? $features->{$gene}->[3] : $longestorf;
+		    $longestpairc = ($feat_attrs->{$gene}->{'pairfreq'} > $longestpairc) ? $feat_attrs->{$gene}->{'pairfreq'} : $longestpairc;
+		}
+		##
+		
+		#my @attrs = sort {$a cmp $b} keys %$classes;
+		if(exists  $seq_attrs->{$organism}){
+		    #Report alternative start sites if they result in a longer ORF
+		    my @alts;
+		    my $orfidx=0;
+		    foreach my $alt (@{$seq_attrs->{$organism}}){
+			#Report alternative starts or possible frameshifts
+			if($alt =~ /alt_start/){
+			    print "#$alt\n" if($debug);;
+			    #Only report if results in a longer ORF
+			    my($astart,$aend,$aorient,$alen) = ($alt =~ /alt_start=(\d+)-(\d+),orient\:([^,]+),len\:(\d+)/);
+			    my($apairfreq) = ($alt =~ /pairfreq:(\d+)/);
+			    print "#alt $astart,$aend,$aorient,$alen\n" if($debug);;
+			    print STDERR "BAD $alt" if(!$astart || !$aend || !$aorient || !$alen);
+			    if(!$longer_altstarts || $alen>$longestorf){
+				if(!$moreconsistent_altstarts || $apairfreq>$longestpairc){
+				    push @alts,["ALTSTARTgene$organism$orfidx",$astart,$aend,$aend-$astart,$aorient];
+				    $orfidx++;
+				}
+			    }
+			    else{
+				print "#Skipping $alt $alen<$longestorf\n" if($debug);;
+			    }
+			}
+		    }
+		    #Report frameshifts if they result in a longer ORF
+		    #This should also include alt start, frameshift pairs if they result in a longer ORF
+		    foreach my $alt (@{$seq_attrs->{$organism}}){
+			#Report alternative starts or possible frameshifts
+			if($alt =~ /alt_fs/){
+			    print "#$alt\n" if($debug);;
+			    #Only report if results in a longer ORF
+			    my($astart,$aend,$aorient,$alen) = ($alt =~ /alt_fs=(\d+)-(\d+),orient\:([^,]+),len\:(\d+)/);
+			    print "#alt $astart,$aend,$aorient,$alen\n" if($debug);;
+			    die "$alt" if(!$astart || !$aend || !$aorient || !$alen);
+			    if(!$longer_altstarts || $alen>$longestorf){
+				push @alts,["ALTFSgene$organism$orfidx",$astart,$aend,$aend-$astart,$aorient];
+				$orfidx++;
+			    }
+			    else{
+				print "#Skipping $alt $alen<$longestorf\n" if($debug);;
+			    }
+			}
+		    }
+		    
+		    
+		    #Replace $ogenes
+		    if(scalar(@alts)>0){
+			my @sortedalts = sort {$b->[3] <=> $a->[3]} @alts;
+			my $neworf = $sortedalts[0];
+			print "#Num genes ",scalar(keys %$mappedgenes),"\n" if($debug);;
+			if($autocorrect){
+			    foreach my $gene (@ogenes){
+				$deleted->{$gene}++;
+				delete $mappedgenes->{$gene};
+				delete $mappedorgs->{$organism}->{$gene};
+				print "#Possible deleting $gene\n" if($debug);;
+			    }
+			}
+			my $featlen = $neworf->[3];
+			
+			#if($neworfcov/$featlen >= $coverage_cutoff  && #%coverage over matching gene length
+			#  $neworfpid/$alnlen >= $pid_cutoff){ #%id over aligned length onl
+			$features->{$neworf->[0]} = [$organism,$neworf->[1],$neworf->[2],$neworf->[3],$neworf->[4]];
+			$mappedgenes->{$neworf->[0]}->{'fmin'} = $neworf->[1];
+			$mappedgenes->{$neworf->[0]}->{'fmax'} = $neworf->[2];
+			$mappedgenes->{$neworf->[0]}->{'len'} = $neworf->[3];
+			$mappedgenes->{$neworf->[0]}->{'relorient'} = $neworf->[4];
+			$mappedorgs->{$organism}->{'features'}->{$neworf->[0]}++;
+			$mappedorgs->{$organism}->{'qcov'} = '?';
+			#Add new gene
+			print "#Adding ",join(',',@$neworf),"\n" if($debug);;
+			push @neworfs,$neworf->[0];
+			$adjustedorfs++;
+			print "#Num genes ",scalar(keys %$mappedgenes),"\n" if($debug);
+		    }
+		}
+	    }
+	    if(scalar(keys %$mappedgenes)>1 && scalar(keys %$mappedorgs)>1){
+		my($feat_attrs,$cluster_attrs,$codons) = &annotateCluster($atree,$mappedgenes,$mappedorgs);
+		my $classesstr = join(';',sort {$a cmp $b} keys %{$cluster_attrs});
+		$newclasses_sum->{$classesstr}->{'ngenes'} +=scalar(keys %$mappedgenes);
+		$newclasses_sum->{$classesstr}->{'nclusters'}++;
+	    }
+	    foreach my $neworf (@neworfs){
+		delete $mappedgenes->{$neworf};
+		delete $features->{$neworf};
+	    }
+	}
+    }
+}
+
+
+
+print "#NUM CLUSTERS $validcluster\n";
+
+#Mark the remaining features as singletons categorized as
+#1)not found in any alignments !exists mapped && !exists unmapped
+#2)aligned but below cutoffs   !exists mapped && exists unmapped
+#If duplications file provided, mark accordingly
+($nomatches,$dups) = &findSingletons($atree,$mapped,$unmapped,$subsumed,$datree);
+
+#Calculate summary stats 
+my $avgcov=0;
+my $avgid=0;
+my $mappedgenescount=0;
+
+my $unmappedgenescount=0;
+my $avgunmappedcov=0;
+my $avgunmappedid=0;
+my $unmappeddups=0;
+
+my $nohit=0;
+my $nohitdupcount=0;
+my $neworfscount=0;
+
+foreach my $feat_name (keys %$features){
+    my $fmin = $features->{$feat_name}->[1];
+    my $fmax = $features->{$feat_name}->[2];
+    if(exists $mapped->{$feat_name}){
+	die if(exists $unmapped->{$feat_name});
+	die if(exists $nomatches->{$feat_name});
+	if($mapped->{$feat_name}->{'cov'}>1){
+	    print STDERR "Bad cov ",$mapped->{$feat_name}->{'cov'},"\n" if($verbose);
+	    #$mapped->{$feat_name}->{'cov'}=1;
+	}
+	if($mapped->{$feat_name}->{'pid'}>1){
+	    print STDERR "Bad id ",$mapped->{$feat_name}->{'pid'},"\n" if($verbose);
+	    #$mapped->{$feat_name}->{'pid'}=1;
+	}
+	$avgcov+=$mapped->{$feat_name}->{'cov'};
+	$avgid+=$mapped->{$feat_name}->{'pid'};
+	$mappedgenescount++;
+    }
+    elsif(exists $unmapped->{$feat_name}){
+	die if(exists $mapped->{$feat_name});
+	die if(exists $nomatches->{$feat_name});	
+	if($unmapped->{$feat_name}->{'cov'}>1){
+	    print STDERR "Bad cov ",$unmapped->{$feat_name}->{'cov'},"\n" if($verbose);
+	    #$unmapped->{$feat_name}->{'cov'}=1;
+	}
+	if($unmapped->{$feat_name}->{'pid'}>1){
+	    print STDERR "Bad id ",$unmapped->{$feat_name}->{'pid'},"\n" if($verbose);
+	    #$unmapped->{$feat_name}->{'pid'}=1;
+	}
+	
+	$avgunmappedcov+=$unmapped->{$feat_name}->{'cov'};
+	$avgunmappedid+=$unmapped->{$feat_name}->{'pid'};
+	$unmappedgenescount++;
+	if(exists $dups->{$feat_name}){
+	    $unmappeddups++;
+	}
+    }
+    elsif(exists $dups->{$feat_name}){
+	$nohitdupcount++;
+    }
+    elsif(exists $nomatches->{$feat_name}){ 
+	$nohit++;
+    }
+    else{
+	#Genes should be categorized in mapped,unmapped,singletons
+	die if(exists $mapped->{$feat_name});
+	die if(exists $unmapped->{$feat_name});
+	die if(exists $nomatches->{$feat_name});	
+	#The rest are either deleted or newly called ORFs that are discarded
+	die if(!exists $subsumed->{$feat_name} && !exists $neworfs->{$feat_name});
+    }
+}
+
+print STDERR "#Mismatch between NOHIT=$nohit and nomatches lookup",scalar(keys %$nomatches),"\n" if($nohit != scalar(keys %$nomatches));
+   
+#Print summary stats
+print "\n\n\n";
+print "Class legend\n";
+print "C{S,E}1 - consistent start,stop\n";
+print "C{S,E}2 - inconsistent start,stop. More than one annotated in a group\n";
+print "C{S,E}3 - unaligned start,stop\n";
+print "C{S,E}4 - invalid start,stop according to translation table\n";
+print "C{S,E}0 - start,stop at/off contig boundary\n";
+print "CM1     - multiple gene fragments. possible interrupted genes\n";
+print "CX      - invalid translation\n";
+#print "CS/E2.1 - there is only one annotated gene for each genome, but not all genomes use the same start/stop\n";
+#print "CS/E2.2 - the start/stop of some genes fall in a gapped region of the alignment\n";
+
+print "Summary classes\n";
+foreach my $cstr (sort {$classes_sum->{$b}->{'ngenes'} <=> $classes_sum->{$a}->{'ngenes'}} (keys %$classes_sum)){
+    print "$cstr: num_genes:$classes_sum->{$cstr}->{'ngenes'} num_clusters:$classes_sum->{$cstr}->{'nclusters'}\n";
+}
+
+print "Complete classes\n";
+foreach my $cstr (sort {$classes_all->{$b}->{'ngenes'} <=> $classes_all->{$a}->{'ngenes'}} (keys %$classes_all)){
+    print "$cstr: num_genes:$classes_all->{$cstr}->{'ngenes'} num_clusters:$classes_all->{$cstr}->{'nclusters'}\n";
+}
+print "Number of clusters containing aligned features\n";
+print "CLUSTERS:$validcluster\n";
+print "Number aligned features mapped into clusters\n";
+print "MAPPED:$mappedgenescount AVGCOV:",$avgcov/$mappedgenescount," AVGID:",$avgid/$mappedgenescount,"\n" if($mappedgenescount);
+print "Number features with an overlapping alignment but are not mapped into clusters\n";
+print "UNMAPPED:$unmappedgenescount AVGCOV:",$avgunmappedcov/$unmappedgenescount," AVGID:",$avgunmappedid/$mappedgenescount," NUMDUPS:$nohitdupcount\n" if($unmappedgenescount && $mappedgenescount);
+if(exists $options{'duplications'}){
+    print "Number of features with no mapping and marked as duplications\n";
+    print "DUPS:$nohitdupcount\n";
+}
+print "Number of features with no overlapping alignment\n";
+print "NOHIT:$nohit\n";
+print "Number of missing annotations\n";
+print "MISSORF:$neworfcount\n";
+
+close $cfh;
+close $ctfh;
+close $ctfh2;
+
+&printExtJS($clusters);
+
+exit(0);
+
+#############################
+# Subroutines
+#############################
+#Primary method of obtaining mapped annotation from an alignment
+#Build a cluster of aligned features/genes based on a single query
+#gene, $query
+#TODO: Confirm qcov,qpid,cov,pid are calculated correctly. Correct for overlapping alignments
+sub buildCluster{
+    my ($atree,$query) = @_;
+
+    #Attributes of the query
+    my $qseqname = $features->{$query}->[0];
+    $feat2organism->{$query} = $qseqname;
+    my $qcurrorg = '?';
+    my $qcov = 0;
+    my $qpid = 0;
+    my $qalnfmin = undef;
+    my $qalnfmax = undef;
+    my $qfmin = $features->{$query}->[1];
+    my $qfmax = $features->{$query}->[2];
+    my $qfeatlen = $qfmax-$qfmin;
+    my $qrelorient = 0;
+
+    my $qorient = $features->{$query}->[4];
+    
+    print "#MAPFEATURE Mapping $query $qseqname:$qfmin-$qfmax len:",$qfmax-$qfmin,"\n" if($debug);;
+    
+    #AlignmentTree::map() 
+    #returns [0=alignment_name,1=seqid,2=align_start,3=align_stop,4=align_cov,5=feature_name,6=seqid,7=feature_cov,8=feature_pid]
+
+    my @isect = $atree->map($qseqname,$qfmin,$qfmax);
+    
+    #List of alignments that comprise the current cluster
+    my $goodalignments = {};
+    #List of seqs that overlap query in the cluster
+    my $allseqs = {};
+
+    #List of organism_ids in the current cluster 
+    my $mappedorgs = {}; #passes cutoffs
+    my $unmappedorgs = {}; #do not pass cutoffs
+    
+    #List of genes in the current cluster
+    my $mappedgenes = {}; #passes cutoffs
+    my $unmappedgenes = {}; #do not pass cutoffs
+    
+    #Contains list of annotations that are overlapping in an alignment
+    my $alnfeats = {};
+    my $alnorgs = {};
+
+    my $valid=0;
+    
+    my $nisect;
+    ($nisect,$allseqs,$goodalignments) = &getAlignedFeatures($atree,$qseqname,$query,$qfmin,$qfmax,'gene');
+    
+
+    if($verbose){
+	print "#QUERY=$query len=$features->{$query}->[3]";
+	print " coords=$qfmin-$qfmax len=$features->{$query}->[3] strand=$features->{$query}->[4]";
+	print " Num_alignments=",scalar(keys %$goodalignments);
+	print "\n";
+    }
+   
+    #Report annotated frame relative to query
+    my $seqalnpos;
+    my @isect;
+    if($qorient eq '+'){
+	@isect = $atree->intersect($qseqname,$qfmin,$qfmin+3);
+    }
+    else{
+	@isect = $atree->intersect($qseqname,$qfmax-3,$qfmax);
+    }
+    foreach my $r (@isect){
+	my $feat_name = $r->[0];
+	my $seqname = $r->[1];
+	my $align_name = $r->[5];
+	#print "#Setting $seqname query frame: qorient $qorient, alnframe: $r->[7] $r->[2]-$r->[3]\n";
+	if(exists $goodalignments->{$feat_name} && $feat_name =~ /^WGA/){
+	    if($qorient eq '+'){
+		if($r->[7] eq '-'){
+		    $seqalnpos->{$seqname}=$r->[3];
+		}
+		else{
+		    $seqalnpos->{$seqname}=$r->[2];
+		}
+	    }
+	    else{
+		if($r->[7] eq '-'){
+		    $seqalnpos->{$seqname}=$r->[3];
+		}
+		else{
+		    $seqalnpos->{$seqname}=$r->[2];
+		}
+	    }
+	}
+    }
+
+    foreach my $r ( sort { $features->{$b->[0]}->[3] <=> $features->{$a->[0]}->[3] } #sort on feature length
+		    @$nisect){
+	my $feat_name = $r->[0];
+	my $seqname = $r->[1];
+	my $align_name = $r->[5];
+	#Check if we want to consider this alignment
+	if(exists $goodalignments->{$align_name}){
+	    my($alnobj,$bv,$width) = $atree->getAlignment($align_name);
+	    $feat_name =~ s/gene\://;
+	    if(!exists $features->{$feat_name}){
+		print "#Bad feature found $feat_name. Not in input file. Skipping\n" if($debug);
+		next;
+	    }
+	    #Capture some stats on the matching genes
+	    #TODO the cov,pid stats assume non-overlapping alignments
+	    if($query ne $feat_name){
+		#Only report genes that have not been mapped
+		if(!exists $mapped->{$feat_name} && !exists $deleted->{$feat_name} && exists $features->{$feat_name}){
+		    print "#MAP:",join("\t",$cluster_id,@$r),"\n" if($debug);		
+		    die "Mismatching orientation for $feat_name. Mapping showing $r->[12]. Input reporting $features->{$feat_name}->[4]" if($r->[12] ne $features->{$feat_name}->[4]);
+		    die "fmax < fmin" if($r->[3]<$r->[2]);
+		    die "Mismatched strand for $feat_name. Expecting $r->[12], got $features->{$feat_name}->[4]" if($r->[12] ne $features->{$feat_name}->[4]);
+		    #Sum the coverage for each gene versus the query
+		    if(exists $alnfeats->{$feat_name}->{'fmin'}){
+			$alnfeats->{$feat_name}->{'fmin'}=($r->[2]<$alnfeats->{$feat_name}->{'fmin'}) ? $r->[2]: $alnfeats->{$feat_name}->{'fmin'};
+		    }
+		    else{
+			$alnfeats->{$feat_name}->{'fmin'}=$r->[2];
+		    }
+		    if(exists $alnfeats->{$feat_name}->{'fmax'}){
+			$alnfeats->{$feat_name}->{'fmax'}=($r->[3]>$alnfeats->{$feat_name}->{'fmax'}) ? $r->[3]: $alnfeats->{$feat_name}->{'fmax'};
+		    }
+		    else{
+			$alnfeats->{$feat_name}->{'fmax'}=$r->[3];
+		    }
+		    $alnfeats->{$feat_name}->{'cov'}+=$r->[7];
+		    $alnfeats->{$feat_name}->{'pid'}+=$r->[8];
+		    $alnfeats->{$feat_name}->{'len'}+=($r->[3]-$r->[2]);
+		    die "Bad pid $alnfeats->{$feat_name}->{'pid'} > $alnfeats->{$feat_name}->{'len'} from pid:$r->[8] len:($r->[3]-$r->[2]) ".($r->[3]-$r->[2]) if($alnfeats->{$feat_name}->{'pid'} > $alnfeats->{$feat_name}->{'len'});
+		    $alnfeats->{$feat_name}->{'relorient'} = $r->[11]; 
+		    $feat2organism->{$feat_name} = $r->[1];
+		    #num aligned residues $r->[8] indicates matches on query seq
+		    #   |NNNNNN---NNNNNNNNN| query 15 residues
+		    #      |NNNNNNNNN---NNNNNN| hit 15 residues - 9 matching , qcov=9/15, cov=9/15
+		    $alnfeats->{$feat_name}->{'qcov'}+=$r->[4];
+		    print "#$feat_name $align_name $r->[2]-$r->[3] len:",$r->[3]-$r->[2],
+		    ",$alnfeats->{$feat_name}->{'len'} cov:$r->[7],$alnfeats->{$feat_name}->{'cov'} id:$r->[8],$alnfeats->{$feat_name}->{'pid'} alnorient:$r->[10] featorient:$r->[11]\n" if($debug);
+		}
+		else{
+		    #This feature has already been mapped
+		    print "#Alternative mapping for $feat_name cov:$r->[4] pid:$r->[8] len:",$r->[3]-$r->[2]," matchingorient:$r->[10],$r->[11]\n" if($debug);;
+		}
+	    }
+	    else{
+		die if($feat_name ne $query);
+		die if($r->[10] ne $r->[11]);
+		#Capture some stats on the query
+		if(defined $qalnfmin){
+		    $qalnfmin = ($r->[2] < $qalnfmin) ? $r->[2] : $qalnfmin;
+		}
+		else{
+		    $qalnfmin = $r->[2];
+		}
+		if(defined $qalnfmax){
+		    $qalnfmax = ($r->[3] > $qalnfmax) ? $r->[3] : $qalnfmax;
+		}
+		else{
+		    $qalnfmax = $r->[3];
+		}
+		$qcov += $r->[7];
+		$qcurrorg = $r->[6];
+		$qpid += $r->[8];
+		$qrelorient = $r->[10];
+	    }
+	}
+    }
+
+    $mappedgenes->{$query}->{'fmin'} = $qalnfmin;
+    $mappedgenes->{$query}->{'fmax'} = $qalnfmax;
+    $mappedgenes->{$query}->{'cov'} = $qalnfmax-$qalnfmin;#$qcov;
+    $mappedgenes->{$query}->{'pid'} = $qpid;#TODO, not pid, rather %aln: allows mismatches but no gaps
+    $mappedgenes->{$query}->{'len'} = $qfeatlen;
+    $mappedgenes->{$query}->{'relorient'} = $qrelorient;
+    $mappedgenes->{$query}->{'alignments'} = [keys %$goodalignments];
+    $mappedorgs->{$qcurrorg}->{'features'}->{$query}++;
+    $mappedorgs->{$qcurrorg}->{'qcov'} = $qalnfmax-$qalnfmin;
+
+
+    #Set query coverage
+    foreach my $feat_name (keys %$alnfeats){
+	my $seqname = $features->{$feat_name}->[0];
+	my $fmin = $features->{$feat_name}->[1];
+	my $fmax = $features->{$feat_name}->[2];
+	my $orient = $features->{$feat_name}->[4];
+	my @isect;
+	my $qmatchstart;
+	if($orient eq '+'){
+	    @isect = $atree->intersect($seqname,$fmin,$fmin+3);
+	}
+	else{
+	    @isect = $atree->intersect($seqname,$fmax-3,$fmax);
+	}
+	my $alignedstart=0;
+	foreach my $r (@isect){
+	    my $feat_name = $r->[0];
+	    if(exists $goodalignments->{$feat_name} && $feat_name =~ /^WGA/){
+		if($r->[1] eq $qseqname){
+		    my $align_name = $r->[5];
+		    if($r->[7] eq '-'){
+			$qmatchstart=$r->[3];
+		    }
+		    else{
+			$qmatchstart=$r->[2];
+		    }
+		}
+		elsif($r->[1] eq $seqname){
+		    if(($orient eq '+' && $fmin == $r->[2] && $fmin+3 == $r->[3]) || 
+		       ($orient eq '-' && $fmax-3 == $r->[2] && $fmax == $r->[3])){
+			$alignedstart=1;
+			print "#Aligned start $feat_name $seqname $orient $fmin-$fmax $r->[2]-$r->[3]\n" if($debug);
+		    }
+		    else{
+			print "#Unaligned start for $feat_name $seqname $fmin == $r->[2] && $fmax == $r->[3]\n" if($debug);
+		    }
+		}
+	    }
+	}
+	    
+	my $featlen = $fmax-$fmin;
+	if($alnfeats->{$feat_name}->{'cov'}/$featlen >= $coverage_cutoff  && #%coverage over matching gene length
+	   $alnfeats->{$feat_name}->{'pid'}/$alnfeats->{$feat_name}->{'len'} >= $pid_cutoff){ #%id over aligned length only
+	    print "Summing query coverage feat_name $feat_name $feat2organism->{$feat_name} = $alnfeats->{$feat_name}->{'qcov'}. Current total $alnorgs->{$feat2organism->{$feat_name}}->{'qcov'}\n" if($debug);
+	    $alnorgs->{$feat2organism->{$feat_name}}->{'qcov'} += $alnfeats->{$feat_name}->{'qcov'};
+	}
+
+	if(exists $seqalnpos->{$seqname} && $alignedstart){
+	    if($features->{$feat_name}->[4] eq '+' ){
+		#print "#Feat $feat_name $features->{$feat_name}->[4] $seqalnpos->{$seqname}-$fmin ",$seqalnpos->{$seqname}-$fmin," ",($seqalnpos->{$seqname}-$fmax)%3,"\n";
+		$alnfeats->{$feat_name}->{'relqrysdist'}=($seqalnpos->{$seqname}-$fmin);
+		$alnfeats->{$feat_name}->{'frame'}=($seqalnpos->{$seqname}-$fmin)%3;
+		$alnfeats->{$feat_name}->{'frameinqry'}=($qfmin-$qmatchstart)%3;
+		#print "#qframe $feat_name $alnfeats->{$feat_name}->{'frameinqry'}\n";
+	    }
+	    else{
+		#print "#Feat $feat_name $features->{$feat_name}->[4] $seqalnpos->{$seqname}-$fmax ",$seqalnpos->{$seqname}-$fmax," ",($seqalnpos->{$seqname}-$fmax)%3,"\n";
+		$alnfeats->{$feat_name}->{'relqrysdist'}=($seqalnpos->{$seqname}-$fmax);
+		$alnfeats->{$feat_name}->{'frame'}=($seqalnpos->{$seqname}-$fmax)%3;
+		$alnfeats->{$feat_name}->{'frameinqry'}=($qfmax-$qmatchstart)%3;
+		#print "#qframe $feat_name $alnfeats->{$feat_name}->{'frameinqry'}\n";
+	    }
+	}
+    }
+    
+    foreach my $feat_name (keys %$alnfeats){
+	#Check gene is part of input feature list [optional]
+	die "Bad gene $feat_name" if(! exists $features->{$feat_name});
+	die "Query gene should not map to itself" if($feat_name eq $query);
+	#die "Can't find $feat_name in organism lookup" if(!exists $feat2organism->{$feat_name});
+	#die "Bad organism $feat2organism->{$feat_name" if(!exists $alnorgs->{$feat2organism->{$feat_name}});
+	#Check gene has not already been mapped
+	die if(exists $mapped->{$feat_name});
+	#coverage cutoff and percent identity cutoff
+	my $fmin = $features->{$feat_name}->[1];
+	my $fmax = $features->{$feat_name}->[2];
+	my $featlen = $fmax-$fmin;
+	die if($featlen<1);
+	if($verbose){
+	    print STDERR "Bad query coverage $qcov > $qfeatlen for $feat_name $fmin-$fmax\n" if($qcov > $qfeatlen);
+	    print STDERR "Bad match coverage $alnfeats->{$feat_name}->{'cov'} > $featlen==$fmax-$fmin for $feat_name\n" if($alnfeats->{$feat_name}->{'cov'} > $featlen);
+	    print STDERR "Bad match pid $alnfeats->{$feat_name}->{'pid'} > $alnfeats->{$feat_name}->{'len'} for $feat_name\n" if($alnfeats->{$feat_name}->{'pid'} > $alnfeats->{$feat_name}->{'len'});
+	    print STDERR "#WARNING Bad len $alnfeats->{$feat_name}->{'len'} > ($features->{$feat_name}->[2]-$features->{$feat_name}->[1]) ".($features->{$feat_name}->[2]-$features->{$feat_name}->[1])." for $feat_name\n"
+		if($alnfeats->{$feat_name}->{'len'} > ($features->{$feat_name}->[2]-$features->{$feat_name}->[1]));
+	}
+	#
+	#Coverage and percent_id cutoffs are checked here in the following order 
+	#Check that coverage over shorter of query and hit
+	#query_coverage > coverage_cutoff || hit_coverage > coverage_cutoff && hit_pid > pid_cutoff
+	#query_coverage > coverage_cutoff && hit_coverage > coverage_cutoff && hit_pid > pid_cutoff
+	#
+	print "Cutoff check $feat_name $feat2organism->{$feat_name} $alnorgs->{$feat2organism->{$feat_name}}->{'qcov'},$qfeatlen qcov=",($alnorgs->{$feat2organism->{$feat_name}}->{'qcov'}/$qfeatlen)," >= $query_coverage_cutoff ",
+	$alnfeats->{$feat_name}->{'cov'}/$featlen ," >=  $coverage_cutoff ", 
+	$alnfeats->{$feat_name}->{'pid'}/$alnfeats->{$feat_name}->{'len'},">= $pid_cutoff\n" if($debug);
+
+	if(($query_coverage_cutoff==0 || ($alnorgs->{$feat2organism->{$feat_name}}->{'qcov'}/$qfeatlen >= $query_coverage_cutoff)) && #%coverage over query(longer feature in the comparison)
+	   $alnfeats->{$feat_name}->{'cov'}/$featlen >= $coverage_cutoff  && #%coverage over matching gene length (shorter feature in the comparison)
+	   $alnfeats->{$feat_name}->{'pid'}/$alnfeats->{$feat_name}->{'len'} >= $pid_cutoff){ #%id over aligned length only
+	    
+	    print "PASSED $feat_name\n" if($debug);
+	    
+	    #Check matching len is <= length of gene
+	    $mappedorgs->{$feat2organism->{$feat_name}}->{'features'}->{$feat_name}++;
+	    #print "WARNING query coverage > query length: $alnorgs->{$feat2organism->{$feat_name}}->{'qcov'} > $qfeatlen\n" 
+	    #if($alnorgs->{$feat2organism->{$feat_name}}->{'qcov'} > $qfeatlen);
+	    $mappedorgs->{$feat2organism->{$feat_name}}->{'qcov'} = $alnorgs->{$feat2organism->{$feat_name}}->{'qcov'};
+
+	    $mappedgenes->{$feat_name}->{'fmin'} = $alnfeats->{$feat_name}->{'fmin'};
+	    $mappedgenes->{$feat_name}->{'fmax'} = $alnfeats->{$feat_name}->{'fmax'};
+	    $mappedgenes->{$feat_name}->{'cov'} = $alnfeats->{$feat_name}->{'cov'};
+	    $mappedgenes->{$feat_name}->{'pid'} = $alnfeats->{$feat_name}->{'pid'};
+	    $mappedgenes->{$feat_name}->{'len'} = $alnfeats->{$feat_name}->{'len'};
+	    $mappedgenes->{$feat_name}->{'relorient'} = $alnfeats->{$feat_name}->{'relorient'};
+	    #print "FRAME $alnfeats->{$feat_name}->{'frame'}\n";
+	    #print "RELQRYSTARTDIST $alnfeats->{$feat_name}->{'relqrysdist'}\n";
+	    $mappedgenes->{$feat_name}->{'frame'} = $alnfeats->{$feat_name}->{'frame'};
+	    $mappedgenes->{$feat_name}->{'frameinqry'} = $alnfeats->{$feat_name}->{'frameinqry'};
+	    $mappedgenes->{$feat_name}->{'relqrysdist'} = $alnfeats->{$feat_name}->{'relqrysdist'};
+	}
+	else{
+	    print "BELOW $feat_name\n" if($debug);
+	    #Does not pass cutoffs	    
+	    $unmappedgenes->{$feat_name}->{'cov'} = $alnfeats->{$feat_name}->{'cov'};
+	    $unmappedgenes->{$feat_name}->{'fmin'} = $alnfeats->{$feat_name}->{'fmin'};
+	    $unmappedgenes->{$feat_name}->{'fmax'} = $alnfeats->{$feat_name}->{'fmax'};
+	    $unmappedgenes->{$feat_name}->{'pid'} = $alnfeats->{$feat_name}->{'pid'};
+	    $unmappedgenes->{$feat_name}->{'len'} = $alnfeats->{$feat_name}->{'len'};
+	    $unmappedgenes->{$feat_name}->{'relorient'} = $alnfeats->{$feat_name}->{'relorient'};
+	    $unmappedgenes->{$feat_name}->{'frame'} = $alnfeats->{$feat_name}->{'frame'};
+	    $unmappedgenes->{$feat_name}->{'frameinqry'} = $alnfeats->{$feat_name}->{'frameinqry'};
+	    $unmappedgenes->{$feat_name}->{'relqrysdist'} = $alnfeats->{$feat_name}->{'relqrydist'};
+	}
+    }
+    foreach my $seq (keys %$allseqs){
+	if(!exists $mappedorgs->{$seq}){
+	    #Does not pass cutoffs
+	    $unmappedorgs->{$seq} = {};
+	}
+    }
+    foreach my $feat_name (keys %{$unmappedgenes}){
+	if(!exists $mappedorgs->{$feat2organism->{$feat_name}}){
+	    die "ORG found in mapped list $feat2organism->{$feat_name} $feat_name query:$query queryorg:$qcurrorg" if(exists $mappedorgs->{$feat2organism->{$feat_name}});
+	    $unmappedorgs->{$feat2organism->{$feat_name}}->{'features'}->{$feat_name}++;
+	    $unmappedorgs->{$feat2organism->{$feat_name}}->{'qcov'} += $alnfeats->{$feat_name}->{'qcov'};
+	}
+    }
+    return($mappedorgs,$mappedgenes,$unmappedorgs,$unmappedgenes);
+}
+    
+
+
+#Classify consistency of annotations within a cluster
+#Returns start,stop codon positions of annotated genes only
+#
+#Clusters are assigned one or more classes based on consistent gene structures
+#Class CS1: All start codons in the cluster are aligned
+#Class CS2: There are multiple, inconsistent start codons in the cluster
+#Class CS3: One or more of the start codons are not aligned in the cluster
+#Class CS4: Invalid annotated start codon
+#Class CE1-3. Same as CS1-3 but for stop codons
+#Class CM1 : Multiple spanned features in the cluster
+sub annotateCluster{
+    my($atree,$genes,$orgs) = @_;
+
+    my $cluster_attrs = {};
+    my $feat_attrs = {};
+
+    my $starts = {};
+    my $stops = {};
+    my $codonpairs = {};
+
+    my $alignedstartcount=0;
+    my $alignedstopcount=0;
+
+    my $seqstarts = {};
+    my $seqstops = {};
+    my $featstarts = {};
+    my $featstops = {};
+
+    foreach my $org (keys %$orgs){
+	if(scalar(keys %{$orgs->{$org}->{'features'}})>1){
+	    print "#Class CM1. Multiple genes spanning query. Count ",scalar(keys %{$orgs->{$org}->{'features'}}),"\n" if($debug);;
+	    $cluster_attrs->{'CM1'} = [$org,scalar(keys %{$orgs->{$org}->{'features'}})];
+	}
+    }
+    print "#Annotating cluster\n" if($debug);;
+    foreach my $feat_name (keys %$genes){
+	#Save relative position of start and stop codons in the
+        #alignment $align_name
+	die if(!exists $features->{$feat_name});
+	my ($seqname,$fmin,$fmax,$len,$orient) = @{$features->{$feat_name}};
+	my $relorient = $genes->{$feat_name}; #relative orientation of the annotation on the aligned seq
+	#$relorient == 1 Annotation and alignment are on the same strand
+	#$relorient == 0 Annotation and alignment are on opposite strands
+	my($startcodon,$stopcodon,$partial_start,$partial_stop,$bad_start,$bad_stop) = &findCodons($atree,
+												   $seqname,
+												   $fmin,
+												   $fmax,
+												   $orient,$feat_name);
+	if($verbose && !$bad_stop && !$bad_start){
+	    print "BAD ORF $seqname,$fmin,$fmax\n" if(&isORF($db,$seqname,$fmin,$fmax,$orient)<=0);
+	}
+
+
+	if(ref $startcodon){
+	    my($mcol,$align_name) = (@$startcodon);
+	    my $token = $mcol.$CODON_DELIM.$align_name;
+	    if($debug){
+		if($orient eq '+'){
+		    my @res= AlignmentTree::coordstocolumn($atree->{_alignments}->{$align_name}->[0],$seqname,$fmin,$fmin+3);
+		    die "$res[0] ne $mcol $seqname,$fmin,$fmin+3" if($res[0] ne $mcol);
+		}
+		else{
+		    my @res= AlignmentTree::coordstocolumn($atree->{_alignments}->{$align_name}->[0],$seqname,$fmax-3,$fmax);
+		    die "$res[0] ne $mcol $seqname,$fmin,$fmin+3" if($res[0] ne $mcol);
+		}
+	    }
+	    $starts->{$token}++;
+	    print "#Start codon $feat_name $startcodon->[0] $startcodon->[1] $startcodon->[2] $startcodon->[3] $orient\n" if($debug);;
+	    $alignedstartcount++;
+	    $features->{$feat_name}->[7] = $startcodon->[0];
+	    $features->{$feat_name}->[8] = $startcodon->[1];
+	    $seqstarts->{$seqname}->{$token}++;
+	    $featstarts->{$feat_name} = $token;
+	    if($partial_start){
+		$feat_attrs->{$feat_name}->{'CS0'}++; #start codon in PMARK spacer adjacent to contig boundary
+		$cluster_attrs->{'CS0'}++; 
+	    }
+	    if($debug){
+		$feat_attrs->{$feat_name}->{'startcol:'.$mcol}++;
+	    }
+	    if($bad_start){#$startcodon == -1){
+		$feat_attrs->{$feat_name}->{'CS4'}++; #invalid start
+		$cluster_attrs->{'CS4'}++; 
+	    }
+
+	}
+	else{
+	    $feat_attrs->{$feat_name}->{'CS3'}++;
+	    $cluster_attrs->{'CS3'}++; 
+	}
+	if(ref $stopcodon){
+	    my($mcol,$align_name) = (@$stopcodon);
+	    my $token = $mcol.$CODON_DELIM.$align_name;
+	    if($debug){
+		if($orient eq '+'){
+		    my @res= AlignmentTree::coordstocolumn($atree->{_alignments}->{$align_name}->[0],$seqname,$fmax-3,$fmax);
+		    die "$res[0] ne $mcol" if($res[0] ne $mcol);
+		}
+		else{
+		    my @res= AlignmentTree::coordstocolumn($atree->{_alignments}->{$align_name}->[0],$seqname,$fmin,$fmin+3);
+		    die "$res[0] ne $mcol" if($res[0] ne $mcol);
+		}
+	    }
+	    $stops->{$token}++;
+	    print "#Stop  codon $feat_name $stopcodon->[0] $stopcodon->[1] $stopcodon->[2] $stopcodon->[3] $orient\n" if($debug);;
+	    $alignedstopcount++;
+	    $features->{$feat_name}->[9] = $stopcodon->[0];
+	    $features->{$feat_name}->[10] = $stopcodon->[1];
+	    $seqstops->{$seqname}->{$token}++;
+	    $featstops->{$feat_name} = $token;
+	    if($partial_stop){
+		$feat_attrs->{$feat_name}->{'CE0'}++; #stop codon in PMARK spacer adjacent to contig boundary
+		$cluster_attrs->{'CE0'}++; 
+	    }
+	    if($debug){
+		$feat_attrs->{$feat_name}->{'stopcol:'.$mcol}++;
+	    }
+	    if($bad_stop){#$stopcodon == -1){
+		$feat_attrs->{$feat_name}->{'CE4'}++; #invalid stop
+		$cluster_attrs->{'CE4'}++; 
+	    }
+	}
+	else{
+	    $feat_attrs->{$feat_name}->{'CE3'}++;
+	    $cluster_attrs->{'CE3'}++; 
+	}
+	if(scalar(keys%{$orgs->{$seqname}->{'features'}}==1) && exists $featstarts->{$feat_name} && $featstops->{$feat_name}){
+	    #$codonpairs->{$featstarts->{$feat_name}.':'.$featstops->{$feat_name}}->{'gfreq'}++;
+	    #$codonpairs->{$featstarts->{$feat_name}.':'.$featstops->{$feat_name}}->{'afreq'}++;
+	    #$codonpairs->{$featstarts->{$feat_name}.':'.$featstops->{$feat_name}}->{'length'}+=$len;
+
+	    $codonpairs->{$featstarts->{$feat_name}.':'.$featstops->{$feat_name}}->{'orgs'}->{$seqname} = [$fmin,$fmax,$orient,1]; #[fmin,fmax,orient,is_annotated,fs_type]
+	    
+	}
+
+    }
+
+    if(scalar(keys %$starts)==1){
+	#There is only one annotated start
+	my @start = keys %$starts; 
+	if($starts->{$start[0]}==scalar(keys %$genes)){
+	    #and every gene has this annotated start
+	    print "#Class CS1. Consistent starts\n" if($debug);;
+	    $cluster_attrs->{'CS1'}++;
+	}
+	else{
+	    #some genes are missing this start codon but there are no others
+	    print "#Class CS3. Unaligned starts ",$starts->{$start[0]}, "==",scalar(keys %$genes),"\n" if($debug);;
+	    $cluster_attrs->{'CS2'}++; 
+	    $cluster_attrs->{'CS3'}++; 
+	}
+    }
+    else{
+	if($alignedstartcount == scalar(keys %$genes)){
+	    #there is one annotated start codon for each genome, but not all genomes use the same start
+	    print "#Class CS2. Inconsistent starts\n" if($debug);;
+	    $cluster_attrs->{'CS2'}++;
+	}
+	else{
+	    #there are multiple annotated start codons for genome
+	    print "#Class CS3. Unaligned starts ",$alignedstartcount," == ",scalar(keys %$genes),"\n" if($debug);;
+	    $cluster_attrs->{'CS2'}++; 
+	}
+    }
+    if(scalar(keys %$stops)==1){
+	#There is only one annotated stop
+	my @stop = keys %$stops;
+	if($stops->{$stop[0]}==scalar(keys %$genes)){
+	    #and every gene is annotated with this stop
+	    print "#Class CE1. Consistent stops\n" if($debug);;
+	    $cluster_attrs->{'CE1'}++;
+	}
+	else{
+            #some genes are missing this stop codon but there are no others
+	    print "#Class CE3. Unaligned stops\n" if($debug);;
+	    $cluster_attrs->{'CE2'}++; 
+	    $cluster_attrs->{'CE3'}++; 
+	}
+    }
+    else{
+	if($alignedstopcount == scalar(keys %$genes)){
+	    #there is one annotated stop codon for each genome, but not all genomes use the same stop
+	    print "#Class CE2. Inconsistent stops\n" if($debug);;
+	    $cluster_attrs->{'CE2'}++;
+	}
+	else{
+	    #there are multiple annotated stop codons for genome
+	    print "#Class CE3. Unaligned stops\n" if($debug);;
+	    $cluster_attrs->{'CE2'}++; 
+	}
+    }
+    
+    #Save frequency of annotated starts, stops
+    foreach my $feat_name (keys %$genes){
+	#$feat_attrs->{$feat_name}->{'pairfreq='.$codonpairs->{"$featstarts->{$feat_name}"."$featstops->{$feat_name}"}}++;
+	if(exists $featstarts->{$feat_name}){
+	    $feat_attrs->{$feat_name}->{'startfreq='.$starts->{$featstarts->{$feat_name}}}++;
+	    $feat_attrs->{$feat_name}->{'startcodon='.$featstarts->{$feat_name}}++;
+	}
+	if(exists $featstops->{$feat_name}){
+	    $feat_attrs->{$feat_name}->{'stopfreq='.$stops->{$featstops->{$feat_name}}}++;
+	    $feat_attrs->{$feat_name}->{'stopcodon='.$featstops->{$feat_name}}++;
+	}
+    }
+    return ($feat_attrs,$cluster_attrs,{'starts'=>$seqstarts,'stops'=>$seqstops,'pairs'=>$codonpairs,'featstops'=>$featstops,'featstarts'=>$featstarts});
+}
+
+###################################
+#Classify singletons and unannotated regions
+#
+#Singletons consist of all annotated ORFs that do not map into an existing cluster above cutoffs
+#Singletons are classified into the following classes
+#Class SLTN1: there are no alignments that overlap the singleton. apparently true singleton
+#Class SLTN2: there are overlapping alignments, annotated ORF start can be modified to pass cutoffs into an existing cluster
+#Class SLTN3: there are overlapping alignments, annotated ORF stop can be modified to pass cutoffs into an existing cluster
+#Class SLTN4: there are overlapping alignments and unannotated ORFs can be mapped above cutoffs
+#Class SLTN5: there are overlapping alignments, but no overlapping ORFs above cutoffs
+sub annotateSingletons{
+    my($atree,$seqname,$feat_name,$fmin,$fmax) = @_;
+    my @classes;
+    my @isect = $atree->intersect($seqname,$fmin,$fmax,$aligntoken);
+    my $goodalignments = {};
+    foreach my $r (@isect){
+	my $feat_name = $r->[0];
+	my $seqname = $r->[1];
+	my $align_name = $r->[5];
+	#Only consider WGA alignments (alignment name in $align_name) that span query (gene name in $feat_name)
+	if($feat_name eq 'gene:'.$feat_name){
+	    $goodalignments->{$align_name}++;
+	}
+    }
+    if(scalar (@isect)==0){
+	push @classes,"classSLTN1";
+    }
+    else{
+	push @classes,"classSLTN5 Num_alns:".scalar(keys %$goodalignments);
+    }
+    return \@classes;
+}
+
+#Check if fmin-fmax,orient on seqname is a valid ORF
+sub isORF(){
+    my($db,$seqname,$fmin,$fmax,$orient,$fs) = @_;
+    #hack to avoid some bioperl warnings that i cannot turn off
+    Bio::Root::Root::verbose(0);
+    *TEMP = \*STDERR;
+    open(FOO, ">/dev/null");
+    my $seqlen = ($fmax-$fmin);
+    if((! defined $fs && $seqlen%3!=0) || $seqlen > $MAXORFLEN || $seqlen < $MINORFLEN){
+	print "#Bad ORF length $seqname $fmin-$fmax ",$seqlen," ",$seqlen%3,"\n" if($verbose);
+	return 0;
+    }
+    #my $seqobj = $db->get_Seq_by_id($seqname);
+    my $seqobj = $db->{$seqname};
+    die "Bad coordinates $fmin-$fmax @_" if($fmin >= $fmax);
+    my $codon_table = Bio::Tools::CodonTable->new(-id=>11);
+    if($seqobj){
+	if($orient eq '+'){
+	    #die "Bad coordinates $fmax extends past end of sequence" if($fmax >= $seqobj->length());
+
+	    my $newobj;
+	    my $fsadj=0;
+	    my $pmark=0;
+	    my $adj=0;
+	    if($fs){
+		my $newobjs = $seqobj->trunc($fmin+1,$fmax);
+		my $gseq = $newobjs->seq();
+		#print "Seq size ",length($gseq)," ",$newobjs->length(),"\n";
+		#Check for PMARK spacer
+		#my $encoding = 'C'x$newobjs->length();
+		print "GSEQPRE:$gseq\n" if($debug);
+		foreach my $f (sort {$b->[0] <=> $a->[0]} @$fs){
+		    #print "SIZE ",scalar(@$fs),"\n";
+		    foreach my $start (sort {$b <=> $a} @{$f->[5]}){
+			if($start>=$fmin && $start<=$fmax){
+			    #print "SAM $seqname,$fsadj $fmin-$fmax $start $f->[0] $f->[1] $f->[2] $f->[3] $f->[4] $adj\n";
+			    #$fsadj+=$f->[4];
+			    #die if(($start-$fmin) >= length($encoding));
+
+			    die if(($start-$fmin) < 0);
+			    
+			    if($f->[4] == 1){
+				#substr($encoding,$start-$fmin+$adj,1,'F');
+				substr($gseq,$start-$fmin,1) = '';
+			    }
+			    elsif($f->[4] == -1){
+				#substr($encoding,$start-$fmin+1+$adj,0) = 'B';
+				substr($gseq,$start-$fmin+1,0) = 'N';
+				$adj++;
+			    }
+			    elsif($f->[4] == 0){
+				#substr($encoding,$start-$fmin,1,'G');
+			    }
+			}
+		    }
+		}
+
+		if($newobjs->seq() =~ /$PMARK_SPACER/){
+		    my $sloc = $-[0];
+		    print "FOUND PMARK+ $sloc ",substr($newobjs->seq(),$sloc,36),"\n" if($verbose);
+		#    substr($encoding,$sloc,36,'G'x36);
+		    $pmark=1;
+		}
+		$newobj = new Bio::Seq(-seq=>$gseq);
+		#$newobj = $newobj->revcom();
+		print "GSEQPOST:$gseq\n" if($debug);
+		#print "GSEQobj:",$newobj->seq(),"\n";
+		#print "#Encoding $encoding\n" if($debug);
+		#return 0 if(($seqlen+$fsadj)%3!=0);
+		# if(0){
+# 		    $newobjs->verbose(0);
+# 		    Bio::Root::Root::verbose(0);
+# 		    eval{
+# 			$newobj = new Bio::Seq::EncodedSeq(-seq=>$newobjs->seq(),
+# 							   -encoding=>$encoding,
+# 							   -verbose=>0,
+# 							   );
+			
+			
+# 		    }
+# 		    or do{
+# 			print "ERROR: ",$@,"\n" if($verbose);
+# 			print "$seqname $fmin,$fmax,$orient PMARK=$pmark ",$newobjs->seq(),"\n",$encoding,"\n" if($verbose);
+# 			return 0;
+			
+# 		    };
+# 		}
+	    }
+	    else{
+		$newobj = $seqobj->trunc($fmin+1,$fmax);
+	    }
+	    
+	    die if($verbose && $newobj->length() > $MAXORFLEN);
+	    die if($verbose && $newobj->length() < $MINORFLEN);
+	    
+	    if(1){#if($codon_table->is_start_codon($newobj->subseq(1,3)) && 
+		      #($codon_table->is_ter_codon($newobj->subseq($newobj->length()-3+1,$newobj->length())))){
+		      #*STDERR = *FOO;
+		my $protein_seq_obj;
+		eval{
+		    if(0 && $fs){
+			#print "Using FS\n";
+			$protein_seq_obj = $newobj->cds()->translate(
+								     -codontable_id =>11,
+								     #-orf=>1,
+								     -complete => 1,
+								     -throw => 1,
+								     -verbose => 0
+								     );
+		    }
+		    else{
+			$protein_seq_obj = $newobj->translate(-codontable_id =>11,
+							      -complete => 1,
+							      -throw => 1
+							      );
+		    }
+		}
+		or do {
+		    print "ERROR translate: ",$@,"\n" if($verbose);
+		    print "ERROR translate $seqname $fmin,$fmax,$orient ",$newobj->seq(),"\n" if($verbose);
+		    return 0;
+		};
+		#*STDERR = *TEMP;
+		return 0 if(!$protein_seq_obj);
+		if($protein_seq_obj->length()>0){# +1 == ($seqlen+$fsadj)/3){
+		    die if($protein_seq_obj->seq() =~ /\*/);
+		    return 1+$pmark;
+		}
+		else{
+		    print "#Unexpected sequence length ",$protein_seq_obj->length()," expecting ",($seqlen+$fsadj)/3," from ORF $seqname $fmin-$fmax $orient ",$protein_seq_obj->seq(),"\n" if($verbose);
+		}
+	    }
+	    else{
+		#print "Possible alternative ORF on $seqname $fmin-$fmax,$orient has invalid start:",$newobj->subseq(1,3)," ",$codon_table->is_start_codon($newobj->subseq(1,3))," or stop:",$newobj->subseq($newobj->length()-3+1,$newobj->length())," ",$codon_table->is_ter_codon($newobj->subseq($newobj->length()-3+1,$newobj->length())),"\n" if($verbose);
+		return -1;
+	    }
+	}
+	else{
+	    die if($orient ne '-');
+	    my $newobj;
+	    my $fsadj=0;
+	    my $pmark=0;
+	    my $adj=0;
+	    my $encoding;
+	    if($fs){
+		my $newobjs = $seqobj->trunc($fmin+1,$fmax);
+		my $gseq = $newobjs->seq();
+		#$encoding = 'C'x$newobjs->length();
+		print "GSEQPRE:$gseq\n" if($debug);
+		#print "FS ",join(',',@$fs),"\n";
+		#print $newobjs->seq(),"\n";
+ 		foreach my $f (sort {$b->[0] <=> $a->[0]} @$fs){
+		    #print "SIZE ",scalar(@$fs),"\n";
+		    foreach my $start (sort {$b <=> $a} @{$f->[5]}){
+			if($start>=$fmin && $start<=$fmax){
+			    #print "SAM $seqname,$fsadj $fmin-$fmax $start $f->[4] $adj\n";
+			    #$fsadj+=(-1*($f->[4]*(scalar(@{$f->[5]}))));
+			    if($f->[4] == 1){
+				#substr($encoding,$fmax - $start-1,1,'F');
+				substr($gseq,$start-$fmin,1) = '';
+				$adj++;
+			    }
+			    elsif($f->[4] == -1){
+				#substr($encoding,$fmax - $start,1,'B');
+				#substr($encoding,$fmax- $start,0) = 'B';
+				substr($gseq,$start-$fmin+1,0) = 'N';
+			    }
+			    elsif($f->[4] == 0){
+				#substr($encoding,$fmax - $start-1,1,'I');
+			    }		
+			    die if(($fmax-$start) < 0);
+			    #die if(($fmax-$start) >= length($encoding));
+			}
+		    }
+		}
+		eval{
+		    $newobjs = $newobjs->revcom();
+		};
+		if($newobjs->seq() =~ /$PMARK_SPACER/){
+		    my $sloc = $-[0];
+		    print "FOUND PMARK- $sloc ",substr($newobjs->seq(),$sloc,length($PMARK_SPACER)),"\n" if($verbose);
+		    #substr($encoding,$sloc,length($PMARK_SPACER),'F'x36);
+		    $pmark=1;
+		}
+		$newobj = new Bio::Seq(-seq=>$gseq);
+		$newobj = $newobj->revcom();
+		print "GSEQPOST:$gseq\n" if($debug);
+		#print "GSEQobj:",$newobj->seq(),"\n";
+		
+		#print "$seqname $seqlen+$fsadj $fmin,$fmax,$orient PMARK=$pmark ",$newobjs->seq(),"\n",$encoding,"\n";
+		print "#Encoding $encoding\n" if($debug);
+		#return 0 if(($seqlen+$fsadj)%3!=0);
+		#return 0 if(length($encoding) != $newobjs->length());
+		#die if(length($encoding) != $newobjs->length());
+
+		# if(0){
+# 		    eval{
+# 			$newobj = new Bio::Seq::EncodedSeq(-seq=>$newobjs->seq(),
+# 							   -encoding=>$encoding,
+# 							   -verbose=>0);
+# 		    }
+# 		    or do{
+# 			print "ERROR encoding: ",$@,"\n" if($verbose);
+# 			print "$seqname $fmin,$fmax,$orient PMARK=$pmark ",$newobjs->seq(),"\n",$encoding,"\n" if($verbose);
+# 			return 0;
+# 		    };
+# 		}
+	    }
+	    else{
+		$newobj = $seqobj->trunc($fmin+1,$fmax);
+		eval{
+		    $newobj = $newobj->revcom();
+		};
+	    }
+	    #print "NEW ",$newobj->seq(),"\n";
+	    die if($verbose && $newobj->length() > $MAXORFLEN);
+	    die if($verbose && $newobj->length() < $MINORFLEN);
+	    #Check if valid start codon
+	    if(1){#$codon_table->is_start_codon($newobj->subseq(1,3)) && ($codon_table->is_ter_codon($newobj->subseq($newobj->length()-3+1,$newobj->length())))){
+		*STDERR=*FOO;
+		my $protein_seq_obj;
+		eval{
+		    if(0 && $fs){
+			#print "Using FS\n";
+			$newobj->verbose(0);
+			$protein_seq_obj = $newobj->cds()->translate(
+								     -codontable_id =>11,
+								     #-orf=>1,
+								     -complete => 1,
+								     -throw => 1,
+								     -verbose => 0
+								     );
+		    }
+		    else{
+			#print "PRETRANS ",$newobj->seq(),"\n";
+			$protein_seq_obj = $newobj->translate(-codontable_id =>11,
+							      -complete => 1,
+							      -throw => 1
+							      );
+		    }
+		}
+		or do {
+		    print "ERROR translate: ",$@,"\n" if($verbose);
+		    print "ERROR translate $seqname $fmin,$fmax,$orient ",$newobj->seq(),"\n" if($verbose);
+		    return 0;
+		};
+		*STDERR=*TEMP;
+
+		return 0 if(!$protein_seq_obj);
+		#print $protein_seq_obj->length()," ",$newobj->length()," ",$fsadj,"\n";
+		if($protein_seq_obj->length() >0){#== ($newobj->length()+$fsadj)/3){
+		    die if($protein_seq_obj->seq() =~ /\*/);
+		    return 1+$pmark;
+		}
+		else{
+		    print "#Unexpected sequence length ",$protein_seq_obj->length()," expecting ",($newobj->length()+$fsadj)/3," from ORF $seqname $fmin-$fmax $orient ",$protein_seq_obj->seq(),"\n" if($verbose);
+		    for(my $i=0;$i<$protein_seq_obj->length();$i++){
+			print $protein_seq_obj->subseq($i+1,$i+1)," ",$newobj->subseq($i*3+1,$i*3+3),"\n";
+		    }
+		}
+	    }
+	    else{
+		#print "Possible alternative ORF on $seqname $fmin-$fmax,$orient has invalid start:",$newobj->subseq(1,3)," ",$codon_table->is_start_codon($newobj->subseq(1,3))," or stop:",$newobj->subseq($newobj->length()-3+1,$newobj->length())," ",$codon_table->is_ter_codon($newobj->subseq($newobj->length()-3+1,$newobj->length()))," ",$newobj->seq(),"\n" if($verbose);
+		return -1;
+	    }
+	}
+    }
+    close FOO;
+    return 0;
+}
+
+
+#
+#Print members and attributes for a cluster
+#$query is the longest member of a cluster
+#Supported attributes
+sub reportCluster{
+    my($query,$mappedorgs,$mappedgenes,$unmappedorgs,$unmappedgenes,$feat_attrs,$cluster_attrs,$seq_attrs,$new_orfs) = @_;
+    if(scalar(keys %$mappedgenes)>0){
+	print $cogfh "COG = $cluster_id, size ",scalar(keys %$mappedgenes), ", connections = 0, perfect = 0;\n";
+	print $cogfh "\t$features->{$query}->[5]\n";
+	foreach my $organism (sort {$a cmp $b} keys %$mappedorgs){
+	    foreach my $gene (sort {$features->{$a}->[1] <=> $features->{$b}->[1]} (keys %{$mappedorgs->{$organism}->{'features'}})){
+		if($gene ne $query){
+		    print $cogfh "\t$features->{$gene}->[5]\n";
+		}
+	    }
+	}
+	my @posscauses = ('CS3','CE3','CS4','CE4','CS0','CE0');
+	my $causesstr;
+	foreach my $p (@posscauses){
+	    if(exists $cluster_attrs->{$p}){
+		$causesstr .= "$p;";
+		delete $cluster_attrs->{$p};
+	    }
+	}
+	my $classesstr = join(';',sort {$a cmp $b} keys %{$cluster_attrs});
+	print ">CLUSTER_$cluster_id num_seqs=",scalar(keys %$mappedorgs)," num_genes=",scalar(keys %$mappedgenes);
+	if(exists $mappedgenes->{$query}->{'alignments'}){
+	    print " classes=$classesstr query=$query ";
+	    if(length($causesstr)>0){
+		print " causes=$causesstr ";
+	    }
+	    print " num_alignments=",scalar(@{$mappedgenes->{$query}->{'alignments'}})," alignments=",join(',',@{$mappedgenes->{$query}->{'alignments'}}) if($debug);
+	}
+	print "\n";
+	print $cfh "CLUSTER_$cluster_id (",scalar(keys %$mappedgenes)," features,",scalar(keys %$mappedorgs)," genomes, classes=$classesstr, query=$query): ";
+	print $ctfh "C_$cluster_id\t";
+	print $ctfh2 "C_$cluster_id\t";
+	$clusters->{$cluster_id}->{'num_feats'} = scalar(keys %$mappedgenes);
+	$clusters->{$cluster_id}->{'num_genomes'} = scalar(keys %$mappedorgs);
+	$clusters->{$cluster_id}->{'num_alignments'} = scalar(@{$mappedgenes->{$query}->{'alignments'}});
+	$clusters->{$cluster_id}->{'classes'} = $classesstr;
+
+	my $qfmin = $features->{$query}->[1];
+	my $qfmax = $features->{$query}->[2];
+	my $qseqname = $features->{$query}->[0];
+	my @mappedfeats;
+	my $outtable = [];
+	foreach my $organism (sort {$a cmp $b} keys %$mappedorgs){
+	    my($start,$end) = &getspan($mappedgenes,keys %{$mappedorgs->{$organism}->{'features'}});
+	    my @ogenes = sort {$features->{$a}->[1] <=> $features->{$b}->[1]} (keys %{$mappedorgs->{$organism}->{'features'}});
+	    my @ocovs;# = map {sprintf("%.2f",$mappedgenes->{$_}->{'cov'}/$features->{$_}->[3])} (@ogenes); #%coverage over gene length
+	    my @oids;#  = map {sprintf("%.2f",$mappedgenes->{$_}->{'pid'}/$mappedgenes->{$_}->{'len'})} (@ogenes); #%id over aligned length
+	    my @orients;
+	    my @names;
+	    my @frames;
+	    my @qframes;
+	    my @sdist;
+
+	    my $classes;
+	    my $longestorf=0;
+
+
+
+	    foreach my $gene (@ogenes){
+		if(exists $feat_attrs->{$gene}){
+		    foreach my $c (sort {$a cmp $b} keys %{$feat_attrs->{$gene}}){
+			$classes->{$c}++;
+		    }
+		}
+		$longestorf = ($features->{$gene}->[3] > $longestorf) ? $features->{$gene}->[3] : $longestorf;
+
+		push @ocovs,(sprintf("%.2f",$mappedgenes->{$gene}->{'cov'}/$features->{$gene}->[3])) if($features->{$gene}->[3]>0);
+		push @oids,(sprintf("%.2f",$mappedgenes->{$gene}->{'cov'}/$features->{$gene}->[3])) if($features->{$gene}->[3]>0);
+		push @orients,"$features->{$gene}->[4]";	
+		#Print frame and sdist for inconsistent clusters only
+		if(exists $cluster_attrs->{'CS1'} && exists $cluster_attrs->{'CE1'}){
+		}
+		else{
+		    if($mappedgenes->{$gene}->{'relqrysdist'}>0 || $verbose==1){
+			push @frames,"altframeqry=$mappedgenes->{$gene}->{'frame'}";
+			push @qframes,"frameinqry=$mappedgenes->{$gene}->{'frameinqry'}";
+			push @sdist,"sdist=$mappedgenes->{$gene}->{'relqrysdist'}" if(scalar(@ogenes)==1);
+		    }
+		}
+		if(defined $features->{$gene}->[11]){
+		    push @names,"product=$features->{$gene}->[11]";
+		}
+		#push @attrs,"aln_orient=$mappedgenes->{$gene}->{'relorient'}";
+		#my $frame;
+		#if($features->{$gene}->[4] eq '-'){
+		#    $frame=($end%3)*-1;
+		#}
+		#else{
+		#    $frame=$start%3;
+		#}
+		#push @attrs,"frame=$frame";
+
+	    }
+
+            #Print attributes
+	    my @attrs = sort {$a cmp $b} keys %$classes;
+	    push @attrs, at frames, at qframes, at sdist;
+	    #push @attrs,map {"frame=$_"} @frames;
+	    #push @attrs,map {"sdist=$_"} @sdist;
+
+
+	    #Brief cluster output
+	    print $cfh join(',', at ogenes),"($organism) ";
+	    my($realorg) = ($organism =~ /([^\.]+)/);
+	    $outtable->[$seqindex->{$realorg}]->[0] = join(',', at ogenes);
+	    $outtable->[$seqindex->{$realorg}]->[1] = $start;
+	    $outtable->[$seqindex->{$realorg}]->[2] = $end;
+	    $outtable->[$seqindex->{$realorg}]->[3] = join(',', at orients);
+
+	    #Detailed output
+	    print join(',', at ogenes),
+	    "\tC$cluster_id",
+	    "\t$organism",
+	    "\tcov=",join(',', at ocovs),
+	    "\tpid=",join(',', at oids),
+	    "\tqcov=",sprintf("%.2f",$mappedorgs->{$organism}->{'qcov'}/($qfmax-$qfmin)),
+	    "\t$start-$end",
+	    "\t",join(',', at orients),
+	    "\t",$end-$start,
+	    "\t",join(';', at attrs, at names),
+	    "\n";
+
+	    $clusters->{$cluster_id}->{'orgs'}->{$organism}->{'genes'} = \@ogenes;
+	    $clusters->{$cluster_id}->{'orgs'}->{$organism}->{'cov'} = \@ocovs;
+	    $clusters->{$cluster_id}->{'orgs'}->{$organism}->{'pid'} = \@oids;
+	    $clusters->{$cluster_id}->{'orgs'}->{$organism}->{'frame'} = \@frames;
+	    $clusters->{$cluster_id}->{'orgs'}->{$organism}->{'frameinqry'} = \@qframes;
+	    $clusters->{$cluster_id}->{'orgs'}->{$organism}->{'sdist'} = \@sdist;
+	    $clusters->{$cluster_id}->{'orgs'}->{$organism}->{'fmin'} = $start;
+	    $clusters->{$cluster_id}->{'orgs'}->{$organism}->{'fmax'} = $end;
+	    $clusters->{$cluster_id}->{'orgs'}->{$organism}->{'len'} = $end-$start;
+	    $clusters->{$cluster_id}->{'orgs'}->{$organism}->{'orient'} = \@orients;
+	    $clusters->{$cluster_id}->{'orgs'}->{$organism}->{'desc'} = join(';', at names);
+
+	}
+	if($verbose){
+	    foreach my $organism (sort {$a cmp $b} keys %$unmappedorgs){
+		if(! exists $unmappedorgs->{$organism}->{'features'}){
+		    if(exists $new_orfs->{$organism}){
+			print "#ALIGNED_NEWORFS $organism aligned with unannotated matching ORFs\n";
+		    }
+		    else{
+			#No annotated features on this org
+			print "#ALIGNED_NOORFS $organism aligned with no matching ORFs, possibly in a gapped region of the alignment\n";
+		    }
+		}
+		else{
+		    #print annotated features
+		    my($start,$end) = &getspan($unmappedgenes,keys %{$unmappedorgs->{$organism}->{'features'}});
+		    my @ogenes = sort {$features->{$a}->[1] <=> $features->{$b}->[1]} (keys %{$unmappedorgs->{$organism}->{'features'}});
+		    
+		    my @ocovs;
+		    my @oids;
+		    my @orients;
+		    my @names;
+		    my $classes;
+		    my $longestorf=0;
+		    
+		    foreach my $gene (@ogenes){
+			if(exists $feat_attrs->{$gene}){
+			    foreach my $c (sort {$a cmp $b} keys %{$feat_attrs->{$gene}}){
+				$classes->{$c}++;
+			    }
+			}
+			$longestorf = ($features->{$gene}->[3] > $longestorf) ? $features->{$gene}->[3] : $longestorf;
+			
+			push @ocovs,(sprintf("%.2f",$unmappedgenes->{$gene}->{'cov'}/$features->{$gene}->[3])) if($features->{$gene}->[3]>0);
+			push @oids,(sprintf("%.2f",$unmappedgenes->{$gene}->{'cov'}/$features->{$gene}->[3])) if($features->{$gene}->[3]>0);
+			push @orients,"$features->{$gene}->[4]";		
+			
+			if(defined $features->{$gene}->[11]){
+			    push @names,"product=$features->{$gene}->[11]";
+			}
+		    }
+		    #Print attributes
+		    my @attrs = sort {$a cmp $b} keys %$classes;
+		    print "#ALIGNED_BELOW_CUTOFFS\t";
+		    print join(',', at ogenes),
+		    "\tC$cluster_id",
+		    "\t$organism",
+		    "\tcov=",join(',', at ocovs),
+		    "\tpid=",join(',', at oids),
+		    "\tqcov=",sprintf("%.2f",$unmappedorgs->{$organism}->{'qcov'}/($qfmax-$qfmin)),
+		    "\t$start-$end",
+		    "\t",join(',', at orients),
+		    "\t",$end-$start,
+		    "\t",join(';', at attrs, at names),
+		    "\n";
+		}
+	    }
+	}
+	    ##
+	    #Report ORFs that are conserved and aligned but not annotated
+# 	    foreach my $organism (keys %$new_orfs){
+# 		my $orfidx=0;
+# 		foreach my $alt (@{$new_orfs->{$organism}}){
+# 		    die if(exists $mappedorgs->{$organism});
+# 		    my($astart,$aend) = ($alt =~ /alt_start=(\d+)-(\d+)/);
+# 		    my($len) = ($alt =~ /len:(\d+)/);
+# 		    my($orient) = ($alt =~ /orient:([\+\-])/);
+# 		    die "Mismatching lengths $len != $aend - $astart" if($len != ($aend-$astart));
+# 		    #Check that this ORF is longer than genes that are already annotated on $organism in this region
+# 		    my @unmappedlist;
+# 		    foreach my $feat_name (keys %$unmappedgenes){
+# 			if($feat2organism->{$feat_name} eq $organism){
+# 			    push @unmappedlist,[$feat_name,$features->{$feat_name}->[3]];
+# 			}
+# 		    }
+# 		    my @longestunmapped = sort {$b->[1] <=> $a->[1]} @unmappedlist;
+# 		    if(scalar (%$unmappedgenes) ==0 || $len > $longestunmapped[0]){
+# 			push @mappedfeats,[[[$organism,$astart,$aend,'+',($aend-$astart).'M']],"NEWORF$organism$orfidx",'gene'];
+# 			print "NEWORF$organism$orfidx",
+# 			"\tWGA$cluster_id",
+# 			"\t$organism",
+# 			"\tcov=",
+# 			"\tpid=",
+# 			"\t$astart-$aend",
+# 			"\t",$aend-$astart,
+# 			"\t",join(';',@{$new_orfs->{$organism}}),
+# 			"\n";
+# 			$orfidx++;
+# 		    }
+# 		}
+# 	     }
+
+	for(my $i=0;$i<scalar(@sortedallseqs);$i++)
+	{
+	    print $ctfh $outtable->[$i]->[0];
+	    print $ctfh2 "$outtable->[$i]->[0] $outtable->[$i]->[1] $outtable->[$i]->[2] $outtable->[$i]->[3]";
+	    if($i!=$#sortedallseqs){
+		print $ctfh "\t";
+		print $ctfh2 "\t";
+	    }
+	    else{
+		print $ctfh "\n";
+		print $ctfh2 "\n";
+	    }
+	}
+	
+	if($printalignments){
+	    print "#Printing query $query $qseqname,$qfmin,$qfmax\n" if($debug);
+	    my $outfh;#=\*STDOUT;
+	    open $outfh,"+>$options{'prefix'}cluster_${cluster_id}.aln.out";
+
+	    my @isect = $atree->map($qseqname,$qfmin,$qfmax);
+	    #Print all features overlapping the alignment window.
+	    #This may include addl features than those in the cluster
+	    my $printedfeats = {};
+	    foreach my $feat (@isect){
+		my $feat_name = $feat->[0];
+		$feat_name =~ s/gene\://;
+		$printedfeats->{$feat_name}++;
+	    }
+	    foreach my $feat_name (keys %$printedfeats){
+		my $fmin = $features->{$feat_name}->[1];
+		my $fmax = $features->{$feat_name}->[2];
+		my $seqname = $features->{$feat_name}->[0];
+		my $orient = $features->{$feat_name}->[4];
+		if(exists $mappedgenes->{$feat_name}){
+		    push @mappedfeats,[[[$seqname,$fmin,$fmax,$orient,($fmax-$fmin).'M']],'gene:'.$feat_name,'gene'];
+		}
+		else{
+		    #print "#WARNING Expected gene $feat_name in unmapped list: ".join(',',keys %$unmappedgenes)."\n" if(!exists $unmappedgenes->{$feat_name});
+		    if(exists $features->{$feat_name} && $features->{$feat_name}->[3]){
+			my $cov = sprintf("c%.1f,i%.1f ",$unmappedgenes->{$feat_name}->{'cov'}/$features->{$feat_name}->[3],
+					  $unmappedgenes->{$feat_name}->{'pid'}/$features->{$feat_name}->[3]);
+			push @mappedfeats,[[[$seqname,$fmin,$fmax,$orient,($fmax-$fmin).'M']]," $cov *gene:".$feat_name.":$orient",'gene'];
+		    }
+		}
+	    }
+	    
+	    #Sort all alignments that span query gene
+	    my @qryalns;
+	    #TODO fix for reverse strand
+	    foreach my $align_name (@{$mappedgenes->{$query}->{'alignments'}}){
+		die if(!exists $feat2organism->{$query} || length($feat2organism->{$query})==0);
+		my $alni = $atree->getAlignedInterval($align_name,$feat2organism->{$query});
+		if($alni){
+		    print "#QRYALN $align_name $feat2organism->{$query} $alni->[0] $alni->[1] $alni->[2]\n" if($debug);
+		    push @qryalns,[$align_name,$alni->[1]];
+		}
+	    }
+	    my $aidx;
+	    foreach my $al (sort {$a->[1] <=> $b->[1]} @qryalns){
+		my($align_name) = @$al;
+		#Check that new range is still within $alignment
+		print "#Checking the $qseqname,$qfmin,$qfmax,$align_name is within range\n" if($debug);;
+		my @isect = $atree->intersect($qseqname,$qfmin,$qfmax,$align_name);
+		my $printfmin;
+		my $printfmax;
+		foreach my $aln (@isect){
+		    if($aln->[1] eq $qseqname && $aln->[0] eq $align_name){
+			#print join(',',@$aln),"\n";
+			$printfmin = $aln->[2];
+			if($aidx==0){
+			    if($atree->contains($align_name,$qseqname,$printfmin-20,$printfmin)){
+				$printfmin -= 20;
+			    }
+			}
+			$printfmax = $aln->[3];
+			if($aidx==(scalar(@qryalns)-1)){
+			    if($atree->contains($align_name,$qseqname,$printfmax,$printfmax+20)){
+				$printfmax += 20;
+			    }
+			}
+			print "#Resetting print range to $printfmin-$printfmax from $qfmin-$qfmax\n" if($debug);
+		    }
+		}
+		if(defined $printfmin && defined $printfmax){
+		    print $outfh "CLUSTER_$cluster_id ALIGNMENT:$align_name\n";
+		    my($colstart,$colend) = AlignmentTree::coordstocolumn($atree->{_alignments}->{$align_name}->[0],$qseqname,$printfmin,$printfmax,1);
+		    $atree->printAlignment($outfh,$align_name,$colstart,$colend,$db,$qseqname,\@mappedfeats,$htmlout);
+		}
+		else{
+		    die;
+		}
+		$aidx++;
+	    }
+	    close $outfh;
+	    print `cat $options{'prefix'}cluster_${cluster_id}.aln.out`;
+
+	}
+	print "\n";
+	print $cfh "\n";
+    }
+    else{
+	#No genes in cluster
+	die;
+    }
+}
+
+
+#
+sub getFeaturesByInterval{
+    my($atree,$org,$fmin,$fmax,$orient) = @_;
+    my @misects = $atree->intersect($org,$fmin,$fmax,"gene");
+    my $feats;
+    foreach my $fisectn (@misects){
+	#my($fname,$fseq,$fstart,$fend,$fcoverage,$fpid,$forient1,$forient2) = @$fisectn;
+	my $feat_name = $fisectn->[0];
+	$feat_name =~ s/^gene://;
+	$feats->{$feat_name} = [$fisectn->[1],$fisectn->[2],$fisectn->[3],$fisectn->[7],$feat_name,$feat_name,$fisectn->[4],$fisectn->[5]];
+    }
+    return $feats;
+}
+
+#Returns @features, at seqs, at alignments that overlap query
+sub getAlignedFeatures{
+    my($atree,$seqname,$query,$fmin,$fmax,$type) = @_;
+    #Aligned features
+    my @nisects;
+    #Aligned seqs
+    my $seqs = {};
+    #Alignments
+    my $alignments = {};
+
+    #Parse overlapping genes
+    my @isect = $atree->map($seqname,$fmin,$fmax);
+    #First screen all overlapping alignments to ensure that they
+    #include the query gene
+    my $alignments;
+    foreach my $r (@isect){
+	my $feat_name = $r->[0];
+	my $seqname = $r->[1];
+	my $align_name = $r->[5];
+	#Only consider WGA alignments (alignment name in $align_name) that span query (gene name in $query)
+	if($feat_name eq $type.':'.$query){
+	    print "#Mapped $query $align_name\n" if($debug);
+	    $alignments->{$align_name}++;
+	}
+    }
+
+    #Capture all seqs in this alignment
+    foreach my $align_name (keys %$alignments){
+	my $alignedseqs  = $atree->{_alignments}->{$align_name}->[0];
+	foreach my $seq (@$alignedseqs){
+	    die if(ref $seq->[0]);
+	    $seqs->{$seq->[0]}++;
+	    #TODO capture stats on alignment
+	    #$seqs->{$seq->[0]}->{'len'}+=;
+	    #$seqs->{$seq->[0]}->{'pid'}+=;
+	    #$seqs->{$seq->[0]}->{'cov'}+=;
+	}
+    }
+
+    #Transform feat_name, stripping leading type:
+    my @nisect;
+    foreach my $r (@isect) {
+	$r->[0] =~ s/$type\://;
+	if(! exists $features->{$r->[0]} && !defined $featlist){
+	    print STDERR "Unknown feature $r->[0]\n";
+	}
+	print "#SAM$r->[0] ",(exists $features->{$r->[0]}),"\n" if($debug);
+	push @nisect,$r if(exists $features->{$r->[0]});
+    }
+    
+    return (\@nisect,$seqs,$alignments);    
+}
+
+sub findSingletons{
+    my($atree,$mapped,$unmapped,$subsumed,$datree) = @_;
+    my $singletons = {};
+    my $dups = {};
+    foreach my $feat_name (keys %$features){
+	my $fmin = $features->{$feat_name}->[1];
+	my $fmax = $features->{$feat_name}->[2];
+	my $seqname = $features->{$feat_name}->[0];
+	if(! exists $mapped->{$feat_name}){
+	    die if(exists $mapped->{$feat_name});
+	    my $classes = &annotateSingletons($atree,$features->{$feat_name}->[0],$feat_name,$fmin,$fmax);
+
+	    my $nisect;
+	    my $allseqs;
+	    my $goodalignments;
+	    my $dupfeats;
+	    if(defined $datree){
+		#print "Querying $seqname,$feat_name,$fmin,$fmax,'gene'\n";
+		($nisect,$allseqs,$goodalignments) = &getAlignedFeatures($datree,$seqname,$feat_name,$fmin,$fmax,'gene');
+		#print scalar(@$nisect),"\n";
+		foreach my $r (
+			       sort { $features->{$b->[0]}->[3] <=> $features->{$a->[0]}->[3] } #sort on feature length
+			       @$nisect){
+		    my $dfeat_name = $r->[0];
+		    my $seqname = $r->[1];
+		    my $align_name = $r->[5];
+		    #Check if we want to consider this alignment
+		    if(exists $goodalignments->{$align_name}){
+			$dfeat_name =~ s/gene\://;
+			if(!exists $features->{$dfeat_name}){
+			    print STDERR "#Bad feature found $dfeat_name. Not in input file. Skipping\n";
+			    next;
+			}
+			if($dfeat_name ne $feat_name){
+			    #print "Saving dup $dfeat_name , $feat_name\n";
+			    $dupfeats->{$feat_name}->{$dfeat_name}->{'cov'} += $r->[7];
+			    $dupfeats->{$feat_name}->{$dfeat_name}->{'pid'} += $r->[8];
+			    $dupfeats->{$feat_name}->{$dfeat_name}->{'len'} += ($r->[3]-$r->[2]);
+			}
+		    }
+		}
+	    }
+
+	    if(exists $unmapped->{$feat_name}){
+		my $query=$feat_name;
+		my($mappedorgs,$mappedgenes,$unmappedorgs,$unmappedgenes) = &buildCluster($atree,$query);
+		my($feat_attrs,$cluster_attrs,$codons) = &annotateCluster($atree,$mappedgenes,$mappedorgs);
+		my $new_orfs = &findnewORFs($db,$atree,$mappedorgs,$mappedgenes,$codons);
+		if(scalar(keys %$new_orfs)){
+		    my $seq_attrs = {};	 
+		    &reportCluster($query,$mappedorgs,$mappedgenes,$unmappedorgs,$unmappedgenes,$feat_attrs,$cluster_attrs,$seq_attrs,$new_orfs);
+		    $cluster_id++;
+		}
+		my $featlen = $fmax-$fmin;
+		my $mappedlen = $unmapped->{$feat_name}->{'len'};
+		if($featlen <= 0){
+		    print STDERR "#Bad featlen for feature $feat_name $fmax-$fmin\n";
+		    $featlen=1;
+		}
+		if($mappedlen <= 0){
+		    print STDERR "#Bad coverage for feature $feat_name Coverage:$unmapped->{$feat_name}->{'len'}\n";
+		    $mappedlen=1;
+		}
+		my ($seqname,$fmin,$fmax,$len,$orient) = @{$features->{$feat_name}};
+		print "#SINGLETON $feat_name len:$features->{$feat_name}->[3]\tbest_cluster:C$unmapped->{$feat_name}->{'WGA_cluster'}\tcov:";
+		#printf("%.2f",$unmapped->{$feat_name}->{'cov'}/$featlen);
+		printf("%.2f",$unmapped->{$feat_name}->{'cov'});
+		print " pid:";
+		#printf("%.2f",$unmapped->{$_}->{'pid'}/$mappedlen);
+		printf("%.2f",$unmapped->{$feat_name}->{'pid'});
+		printf(" lenbp:%f ",$mappedlen);
+		join(' ',@$classes);
+		if(defined $features->{$feat_name}->[11]){
+		    print " product=$features->{$feat_name}->[11]";
+		}
+		if(scalar(keys %{$dupfeats->{$feat_name}}) > 0){
+		    foreach my $dfeat_name (sort {$dupfeats->{$feat_name}->{$b}->{'pid'} <=> $dupfeats->{$feat_name}->{$a}->{'pid'}} keys %{$dupfeats->{$feat_name}}){
+			print " #DUP matches:$dfeat_name(pid:";
+			    printf("%.2f",($dupfeats->{$feat_name}->{$dfeat_name}->{'pid'}/$features->{$feat_name}->[3]));
+			print ",cov:";
+			printf("%.2f",($dupfeats->{$feat_name}->{$dfeat_name}->{'cov'}/$features->{$feat_name}->[3]));
+			print ") ";
+		    }
+		}
+		print "\n";
+	    }
+	    else{
+		if(exists $subsumed->{$feat_name}){
+		    print "#DELETED $feat_name\n";
+		}
+		else{
+		    print "#SINGLETON $feat_name len:$features->{$feat_name}->[3] ",join(' ',@$classes);
+		    if(defined $features->{$feat_name}->[11]){
+			print " product=$features->{$feat_name}->[11]";
+		    }
+		    if(scalar(keys %{$dupfeats->{$feat_name}}) > 0){
+			foreach my $dfeat_name (keys %{$dupfeats->{$feat_name}}){
+			    print " #DUP matches:$dfeat_name(pid:";
+				printf("%.2f",($dupfeats->{$feat_name}->{$dfeat_name}->{'pid'}/$features->{$feat_name}->[3]));
+			    print ",cov:";
+			    printf("%.2f",($dupfeats->{$feat_name}->{$dfeat_name}->{'cov'}/$features->{$feat_name}->[3]));
+			    print ") ";
+			}
+			$dups->{$feat_name}++;
+		    }
+		    else{
+			$singletons->{$feat_name}++;
+		    }
+		    print "\n";
+		}
+	    }
+	}
+	else{
+	    #Mapped ORF, not a singleton
+	}
+    }
+
+    return ($singletons,$dups);
+}
+
+
+###############################
+#General utility funcs
+sub getspan{
+    my($features) = shift;
+    my @coords;
+    foreach my $gene (@_){
+	die if(! exists $features->{$gene});
+	die if(! exists $features->{$gene});
+	push @coords,$features->{$gene}->{'fmin'},$features->{$gene}->{'fmax'};
+    }
+    my @sortedcoords = sort {$a <=> $b} @coords;
+    return ($sortedcoords[0],$sortedcoords[$#coords]);
+}
+
+
+sub findCoords{
+    my($atree,$seqname,$startcodon,$stopcodon) = @_;
+    
+    #$codon is a tuple of alignment,aligned_column
+    my($startcol,$aln_s) = split(/$CODON_DELIM_REGEX/,$startcodon);
+    #find corresponding stop
+    my($stopcol,$aln_e) = split(/$CODON_DELIM_REGEX/,$stopcodon);
+    
+    my $si = &getAlignment($atree,$aln_s,$seqname);
+    my $ei = &getAlignment($atree,$aln_e,$seqname);
+    my $start_s;
+    my $start_e;
+    my $stop_s;
+    my $stop_e;
+    if($si){
+	($start_s,$start_e) = AlignmentTree::columntocoords($si,$startcol,$startcol+2);
+	if($start_s == $start_e){
+	    #aligned to a gap
+	    $start_s = undef;
+	    $start_e = undef;
+	}
+	if($ei){
+	    ($stop_s,$stop_e) = AlignmentTree::columntocoords($ei,$stopcol,$stopcol+2);
+	    if($stop_s == $stop_e){
+		#aligned to a gap
+		$stop_s = undef;
+		$stop_e = undef;
+	    }
+	}
+	else{
+	    print "Can't find alignment $aln_s on $seqname from $startcodon\n" if($debug);
+	    return undef;
+	}
+    }
+    else{
+	print "Can't find alignment $aln_s on $seqname from $startcodon\n" if($debug);
+	return undef;
+    }
+    if($start_s<$stop_s){
+	#forward strand 5'start -----> 3'stop
+	return ($start_s,$stop_e,'+');
+    }
+    else{
+	#reverse strand
+	#3'stop <-- 5'start
+	return ($stop_s,$start_e,'-');
+    }
+    return undef;
+}
+
+#Returns aligned location of start and stop codons
+#If annotation is not a valid start or stop codons returns -1
+#If codon is not aligned returns undef
+sub findCodons{
+    my($atree,$seqname,$fmin,$fmax,$orient,$fname) = @_;
+    #my($name,$seq,$start,$end,$coverage,$qpid) = @$aln;
+    my $codon_table = Bio::Tools::CodonTable->new(-id=>11);
+    #my $seqobj = $db->get_Seq_by_id($seqname);
+    my $seqobj = $db->{$seqname};
+    if(!$seqobj){
+	print "Can't find seqname: $seqname\n";
+	return;
+    }
+    my $startcodon=undef;
+    my $stopcodon=undef;
+    my $is_partial_start=0;
+    my $is_partial_stop=0;
+    my $is_bad_start=0;
+    my $is_bad_stop=0;
+    my $aln_orient=undef;
+    if($orient eq '+'){
+	if($fmin+1<=0){print STDERR "Bad start parameter $fmin+1<=0 $seqname,$fmin,$fmax,$orient,$fname\n";return} 
+	if($fmax-3+1<=0){print STDERR "Bad end parameter $fmax-3+1<=0 $seqname,$fmin,$fmax,$orient,$fname\n";return}
+	if($fmin+3>$seqobj->length()){print STDERR "Bad start parameter $fmin+1<=0 $seqname,$fmin,$fmax,$orient,$fname\n";return};
+	if($fmax>$seqobj->length()){print STDERR "Bad end parameter $fmax-3+1<=0 $seqname,$fmin,$fmax,$orient,$fname\n";return};
+	
+	if(!$codon_table->is_start_codon($seqobj->subseq($fmin+1,$fmin+3))){ #bioperl is 1-base coordinates
+	    print "#Bad start codon $fname,$seqname,$fmin,$fmax,$orient codon $fmin+1,$fmin+2+1 ",$seqobj->subseq($fmin+1,$fmin+3)," aln_orient:$aln_orient\n" if($verbose || $debug);
+	    $startcodon = &getAlignedCols($atree,$seqname,$fmin,$fmin+3);
+	    $is_bad_start=1;
+	} 
+	else{
+	    #Find start codon + strand
+	    $startcodon = &getAlignedCols($atree,$seqname,$fmin,$fmin+3);
+	}
+	
+	if(!$codon_table->is_ter_codon($seqobj->subseq($fmax-3+1,$fmax))){
+	    print "#Bad stop $fname,$seqname,$fmin,$fmax,$orient codon $fmax-3+1,$fmax ",$seqobj->subseq($fmax-3+1,$fmax)," aln_orient:$aln_orient\n" if($verbose || $debug);
+	    $stopcodon = &getAlignedCols($atree,$seqname,$fmax-3,$fmax);
+	    $is_bad_stop=1;
+	}
+	else{
+	    #Find stop codon - strand
+	    $stopcodon = &getAlignedCols($atree,$seqname,$fmax-3,$fmax);
+	}
+	#Check if in pmark spacer adjacent to contig boundary
+	if($fmin-length($PMARK_SPACER)>0 && $fmin+length($PMARK_SPACER) <= $seqobj->length()){
+	    my $startregion = $seqobj->subseq($fmin-length($PMARK_SPACER),$fmin+length($PMARK_SPACER));
+	    if($startregion =~ /$PMARK_SPACER/){
+		$is_partial_start=1;
+	    }
+	}
+	if($fmax-length($PMARK_SPACER)>0 && $fmax+length($PMARK_SPACER) <= $seqobj->length()){
+	    my $stopregion = $seqobj->subseq($fmax-length($PMARK_SPACER),$fmax+length($PMARK_SPACER));
+	    if($stopregion =~ /$PMARK_SPACER/){
+		$is_partial_stop=1;
+	    }
+	}
+	
+    }
+    else{
+	die "Bad orient $orient" if($orient ne '-');
+	print STDERR "Bad start parameter $fmin+1<=0 $seqname,$fmin,$fmax,$orient,$fname" if($fmax-3+1<=0);
+	print STDERR "Bad end parameter $fmax-3+1<=0 $seqname,$fmin,$fmax,$orient,$fname" if($fmin+1<=0);
+	print STDERR "Bad start parameter $fmax>$seqobj->length() $seqname,$fmin,$fmax,$orient,$fname" if($fmax>$seqobj->length());
+	print STDERR "Bad end parameter $fmin+3>$seqobj->length() $seqname,$fmin,$fmax,$orient,$fname" if($fmin+3>$seqobj->length());
+	eval{
+	    if(!$codon_table->is_start_codon(revcom($seqobj->subseq($fmax-3+1,$fmax))->seq())){
+		print "#Bad start codon $fname,$seqname,$fmin,$fmax,$orient codon $fmax-3+1,$fmax ",revcom($seqobj->subseq($fmax-3+1,$fmax))->seq()," aln_orient:$aln_orient\n" if($verbose || $debug);
+		$startcodon = &getAlignedCols($atree,$seqname,$fmax-3,$fmax);
+		$is_bad_start=0;
+	    } 
+	    else{
+		#Find start codon on - strand
+		$startcodon = &getAlignedCols($atree,$seqname,$fmax-3,$fmax);
+	    }
+	    if(!$codon_table->is_ter_codon(revcom($seqobj->subseq($fmin+1,$fmin+3))->seq())){
+		print "#Bad stop codon $fname,$seqname,$fmin,$fmax,$orient codon $fmin+1,$fmin+3 ",revcom($seqobj->subseq($fmin+1,$fmin+3))->seq()," aln_orient:$aln_orient\n" if($verbose || $debug);
+		$stopcodon = &getAlignedCols($atree,$seqname,$fmin,$fmin+3);
+		$is_bad_stop=1;
+	    } 
+	    else{
+		#Find stop codon on - strand
+		$stopcodon = &getAlignedCols($atree,$seqname,$fmin,$fmin+3);
+	    }
+	
+	    #Check if in pmark spacer adjacent to contig boundary
+	} or do{
+	    warn $@ if($verbose);
+	    print STDERR "ERROR invalid start,stop codons or invalid translation. $seqname,$fmin,$fmax,$orient,$fname\n";
+	    return 0;
+	};
+	if($fmax-length($PMARK_SPACER)>0 && $fmax+length($PMARK_SPACER) <= $seqobj->length()){
+	    my $startregion = $seqobj->subseq($fmax-length($PMARK_SPACER),$fmax+length($PMARK_SPACER));
+	    if($startregion =~ /$PMARK_SPACER/){
+		$is_partial_start=1;
+	    }
+	}
+	if($fmin-length($PMARK_SPACER)>0 && $fmin+length($PMARK_SPACER) <= $seqobj->length()){
+	    my $stopregion = $seqobj->subseq($fmin-length($PMARK_SPACER),$fmin+length($PMARK_SPACER));
+	    if($stopregion =~ /$PMARK_SPACER/){
+		$is_partial_stop=1;
+	    }
+	}
+    }
+    return ($startcodon,$stopcodon,$is_partial_start,$is_partial_stop,$is_bad_start,$is_bad_stop);
+}
+
+
+sub getAlignment{
+    my($atree,$align_name,$seqname) = @_;
+    my $alignment = $atree->{_alignments}->{$align_name}->[0];
+    foreach my $i (@$alignment){
+	if($i->[0] eq $seqname){
+	    return $i;
+	}
+    }
+    print "#Can't find $seqname on alignment $align_name\n" if($debug);
+    return undef;
+}
+
+#Look for indels in alignment columns [$codon-$offset,$codon+2]
+#Refseq is optional, otherwise uses most frequently occuring allele as reference
+#Returns
+#[coord,refchar,qrychar,column,frame]
+sub reportVariants{
+    my($atree,$db,$aln,$seq,$startcol,$endcol,$refseq) = @_;
+    my $skipgapcheck=0;
+    my $GAPWINDOW=10;
+    die if($endcol<$startcol);
+    print "#Analyzing codon position $startcol in alignment $aln seq $seq \n" if($debug);
+
+    print "#Retrieving alignment matrix for $startcol-$endcol for alignment $aln \n" if($debug);
+
+    my ($mmatrix,$seqmatrix,$names) = $atree->getAlignmentMatrix($aln,$startcol,$endcol,$db,$refseq,$seq);
+    print "#Expecting width ",($endcol-$startcol+1)," row count ",scalar(@$mmatrix)," ",scalar(@$names),"\n" if($debug);
+    #List of columns with variants
+    my $results = {};
+    my @edits;
+
+    my $qryidx;
+    #For optional reference seq
+    my $refidx=-1;
+
+    my $width;
+    for(my $i=0;$i<@$mmatrix;$i++){
+	if($names->[$i] eq $seq){
+	    $qryidx = $i;
+	}
+	if(defined $refseq && $names->[$i] eq $refseq){
+	    $refidx = $i;
+	}
+    }
+    #Matrix cols start at 0
+    for(my $j=0;$j<($endcol-$startcol+1);$j++){
+	if(defined $refseq){
+	    if(uc(substr($mmatrix->[$refidx],$j,1)) ne uc(substr($mmatrix->[$qryidx],$j,1))){
+		#print uc(substr($mmatrix->[$refidx],$j,1))," ", uc(substr($mmatrix->[$qryidx],$j,1)),"\n";
+		$results->{$j}++;
+	    }
+	}
+	else{
+	    for(my $i=0;$i<@$mmatrix;$i++){
+		if(substr($mmatrix->[$i],$j,1) ne '.'){
+		    if($skipgapcheck || substr($mmatrix->[$i],$j,$GAPWINDOW) =~ /\./ ){ #gap < GAPWINDOW
+			#column $i has multiple characters, gaps or mutations
+			print "#MUT $i $j ",substr($mmatrix->[$i],$j,1)," $names->[$i] $seq\n" if($debug);
+			$results->{$j}++;
+		    }
+		}
+	    }
+	}
+    }
+    my $alni = &getAlignment($atree,$aln,$seq);
+    foreach my $r (sort {$a cmp $b} keys %$results){
+	my $reloffset = $startcol+$r;
+	my $freqchar = {};
+	my $refchar;
+	my $qrychar;
+	if(defined $refseq){
+	    $qrychar = substr($mmatrix->[$qryidx],$r,1);
+	    $refchar = substr($mmatrix->[$refidx],$r,1);
+	}
+	else{
+	    for(my $i=0;$i<@$mmatrix;$i++){
+		
+		my $char;
+		#TODO this is slow, improve perf
+		if(substr($mmatrix->[$i],$r,1) eq '-'){
+		    #gap
+		    $char = substr($mmatrix->[$i],$r,1);
+		    #die "Unexpected char $i $r $seqmatrix->[$i]->[$r] $mmatrix->[$i]->[$r]" if(defined $seqmatrix->[$i]->[$r]);
+		}
+		else{
+		    #retrieve base
+		    $char = substr($seqmatrix->[$i],$r,1);
+		}
+		die "Bad char '$char'" if(length($char)!=1);
+		$freqchar->{$char}++;
+	    }
+	}
+
+
+
+	my $fstype = 0;
+
+	if(defined $refseq){
+	    if(uc($refchar) ne uc($qrychar)){
+		if($refchar eq '-'){
+		    $fstype=1;
+		}
+		elsif($qrychar eq '-'){
+		    $fstype=-1;
+		}
+		else{
+		    $fstype=0;
+		}
+		#Ignore point mutations for now
+		if($fstype!=0){
+		    my($fsstart,$fsend) = AlignmentTree::columntocoords($alni,$reloffset,$reloffset);		
+		    if(1){#$fsstart != $fsend){
+			print "#ALT col:$reloffset coord:$fsstart-$fsend base:$refchar freq:$freqchar->{$refchar} $seq:$qrychar $freqchar->{$qrychar} fstype:$fstype\n" if($debug);
+			push @edits,[$fsstart,$refchar,$qrychar,$reloffset,$fstype];
+			#if(scalar(@edits)>$FS_THRESHOLD){
+			#return \@edits;
+			#}
+		    }
+		}
+	    }
+	}
+	else{
+	    die;
+	    #report most frequent character
+	    my @sortedchars = sort {$b <=> $a} (keys %$freqchar);
+	    #retrieve coordinate on $seq for reloffset
+	    foreach my $base (@sortedchars){
+		if(uc($base) ne uc($qrychar)
+		   #&& $freqchar->{$base}>=$freqchar->{$qrychar}	    #only consider bases that occur more frequently than 
+		   #&& $freqchar->{$base}>=scalar(@$mmatrix)/2){  	    #optionally also in majority of sequences
+		   ){
+		    if($base eq '-'){
+			$fstype=1;
+		    }
+		    elsif($qrychar eq '-'){
+			$fstype=-1;
+		    }
+		    else{
+			$fstype=0;
+		    }
+		    my($fsstart,$fsend) = AlignmentTree::columntocoords($alni,$reloffset,$reloffset);		    
+		    print "#ALT col:$reloffset coord:$fsstart-$fsend base:$base freq:$freqchar->{$base} $seq:$qrychar $freqchar->{$qrychar} fstype:$fstype\n" if($debug);
+		    push @edits,[$fsstart,$base,$qrychar,$reloffset,$fstype];
+		}
+		else{
+		    #last;#can shortcircuit, only consider more frequent bases
+		}
+	    }
+	}
+    }
+    return \@edits;
+}
+
+#Returns the overlapping alignment and start-end column for a sequence range
+#Inputs
+#getAlignedCols(seq,fmin,fmax)
+#Returns [start_colnum,alignment_obj,end_colnum,matching_bits]
+sub getAlignedCols{
+    my($atree,$seqname,$fmin,$fmax) = @_;
+    my $ret;
+    my @alignments = $atree->intersect($seqname,$fmin,$fmax,$aligntoken);
+    my $found=0;
+    foreach my $aln (@alignments){
+	if($seqname eq $aln->[1]){
+	    my $align_name = $aln->[0];
+	    my $align_start = $aln->[2];
+	    my $align_end = $aln->[3];
+	    die "Bad alignment name $align_name" if(!exists $atree->{_alignments}->{$align_name});
+	    die "Mis-mathed orient $aln->[6] ne $aln->[7]" if($aln->[7] ne $aln->[6]);
+	    my $alni = $atree->{_alignments}->{$align_name}->[0];
+	    if($align_start == $fmin && $fmax == $align_end){
+		if($found){
+		    print "#WARNING Overlapping aligned region found for $seqname,$fmin,$fmax. $align_name and $ret->[1]\n" if($debug);
+		}
+		my @res= AlignmentTree::coordstocolumn($alni,$seqname,$fmin,$fmax,1);
+		$ret = [$res[0],$align_name,$res[1],$res[2]];
+		$found=1;
+	    }
+	}
+    }
+    return $ret;
+}
+
+################
+#DEPRECATED CODE
+##############################
+#
+#callORF()
+#Attempts to call an ORF using start codon specified by [start-end]
+#Start,end should be codon coordinates relative to the + strand. start<end
+
+#Will attempt to call an ORF on one strand.
+#Leading strand 5'->3' increasing coordinates [start-firstStop] 
+#Lagging strand 5'->3' decreasing coordinates [end-firstStop]
+
+#Will only call ORF if start,end,orient corresponds to an acutal start
+#codon, specified by the configurable codon table
+
+#fsedits is a array reference of signed locations of the frameshift relative to the sequence start
+#eg. +10 is a forward frameshift 10 bp downstream from translation start
+#    -9 is a backward frameshift 9 bp downstream from translation start
+sub callORF{
+    my($seqobj,$codon_start,$codon_end,$orient,$fs) = @_;
+    die "Bad start codon $seqobj:$codon_start-$codon_end $orient" if($codon_end < $codon_start || $codon_end - $codon_start != 3);
+    my $codon_table = Bio::Tools::CodonTable->new(-id=>11);
+    if($seqobj){
+	if($orient eq '+'){
+            my $seqlen = ($seqobj->length()>$MAXORFLEN) ? $codon_start+$MAXORFLEN : $seqobj->length(); 
+	    my $newobjs = $seqobj->trunc($codon_start+1,$seqlen);
+	    my $encoding = 'C'x$newobjs->length();
+
+	    foreach my $fs_loc (@$fs){
+		if(defined $fs_loc){
+		    if($fs_loc>0){
+			print "Encoding a forward frameshift at $fs_loc in ORF of length ",$newobjs->length(),"\n";
+			#a forward frameshift
+			#substr($encoding,$fs_loc,1) = 'F';
+			substr($encoding,$fs_loc,1,'F');
+		    }
+		    else{
+			#a backward frameshift
+			print "Encoding a reverse frameshift at $fs_loc in ORF of length ",$newobjs->length(),"\n";
+			#substr($encoding,($fs_loc*-1),1) = 'B';
+			substr($encoding,($fs_loc*-1),1,'B');
+		    }
+		}
+	    }
+	    die if(length($encoding)!=$newobjs->length());
+	    my $newobj = new Bio::Seq::EncodedSeq(-seq=>$newobjs->seq(),
+						  -encoding=>$encoding);
+
+
+	    #Check if valid start codon
+	    if($codon_table->is_start_codon($newobj->subseq(1,3))){
+		my $protein_seq_obj = $newobj->translate(-orf => 1,
+							 -codontable_id =>11);
+		return ($protein_seq_obj->seq(),$orient);
+	    }
+	    else{
+		print "#callORF trying '-' $seqobj,$codon_start,$codon_end,$orient Bad start codon ",$newobj->subseq(1,3) if($debug);;
+		my $seqlen = ($codon_end>$MAXORFLEN) ? $codon_end-$MAXORFLEN : 1;
+		my $newobj = $seqobj->trunc($seqlen,$codon_end);
+		eval{
+		    $newobj = $newobj->revcom();
+		};
+		#print " REV:",$codon_table->is_start_codon($newobj->subseq(1,3))," ",$newobj->subseq(1,3),"\n";
+		if($codon_table->is_start_codon($newobj->subseq(1,3))){
+		    my $protein_seq_obj = $newobj->translate(-orf => 1,
+							     -codontable_id =>11);
+		    
+		    return ($protein_seq_obj->seq(),'-');
+		}
+		else{
+		    print "#WARNING: Skipping callORF $seqobj,$codon_start,$codon_end,$orient. '",$newobj->subseq(1,3),"' is not a valid start codon\n" if($debug);
+		}
+	    }		
+	}
+	else{
+	    die if($orient ne '-');
+            my $seqlen = ($codon_end>$MAXORFLEN) ? $codon_end-$MAXORFLEN : 1;
+	    my $newobj = $seqobj->trunc($seqlen,$codon_end);
+	    eval{
+		$newobj = $newobj->revcom();
+	    };
+	    #Check if valid start codon
+	    if($codon_table->is_start_codon($newobj->subseq(1,3))){
+		my $protein_seq_obj = $newobj->translate(-orf => 1,
+							 -codontable_id =>11);
+		
+		return ($protein_seq_obj->seq(),$orient);
+	    }
+	    else{
+		print "#callORF trying '+' $seqobj,$codon_start,$codon_end,$orient Bad start codon ",$newobj->subseq(1,3) if($debug);;
+		my $seqlen = ($seqobj->length()>$MAXORFLEN) ? $codon_start+$MAXORFLEN : $seqobj->length(); 
+		my $newobj = $seqobj->trunc($codon_start+1,$seqlen);
+		if($codon_table->is_start_codon($newobj->subseq(1,3))){
+		    my $protein_seq_obj = $newobj->translate(-orf => 1,
+							     -codontable_id =>11);
+		    
+		    return ($protein_seq_obj->seq(),'+');
+		}
+		else{
+		    print "WARNING: Skipping callORF $seqobj,$codon_start,$codon_end,$orient. '",$newobj->subseq(1,3),"' is not a valid start codon\n";
+		}
+	    }
+	}
+	    
+    }
+    else{
+	print "#ERROR invalid seq obj $seqobj\n" if($debug);;
+    }
+    return undef;
+}
+
+#Print alternative start sites
+#
+#Method:
+#Report aligned but un-annotated start codons
+#Reports
+#(1) alternative start location, frequency annotated in the alignment
+#(2) resulting ORF, len
+sub checkStarts{
+    my ($db,$codons,$seqs,$seq_attrs) = @_;
+
+    my $altorfs;
+    #Save list of all start codons $codon->$freq
+    my $starts = {};
+    my $stops = {};
+    die if(!exists $codons->{'starts'});
+    foreach my $seqname (keys %{$codons->{'starts'}}){
+	foreach my $codon (keys %{$codons->{'starts'}->{$seqname}}){
+	    $starts->{$codon} += $codons->{'starts'}->{$seqname}->{$codon};
+	}
+    }
+    die if(!exists $codons->{'stops'});
+    foreach my $seqname (keys %{$codons->{'stops'}}){
+	foreach my $codon (keys %{$codons->{'stops'}->{$seqname}}){
+	    $stops->{$codon} += $codons->{'stops'}->{$seqname}->{$codon};
+	}
+    }
+
+    foreach my $seqname (@{$seqs}){
+	#Consider all codons that are not currently annotated on this sequence
+	foreach my $codon (keys %$starts){
+	    if(! exists $codons->{'starts'}->{$seqname}->{$codon}){ #start codon is not annotated on $seqname
+		print "#CODON $codon not annotated on $seqname\n" if($debug);;
+		#check if $codon is aligned
+		#$codon is a tuple of alignment,aligned_column
+		my($col,$aln) = split(/$CODON_DELIM_REGEX/,$codon);
+		my $gapped=1; #isgapped
+		#check is $col,$col+3 is gapped, return start coordinate on the genome
+		my $i = &getAlignment($atree,$aln,$seqname);
+		if($i){
+		    die "Cannot find alignment $aln that contains $seqname" if(!$i);
+		    #Obtain coordinates of the putative start codon
+		    my($start,$end) = AlignmentTree::columntocoords($i,$col,$col+2);
+		    $gapped = (abs($end-$start) == 3) ? 0 : 1;
+		    if(!$gapped){
+			#codon is aligned, attempt to call ORF
+			#save and report it. save frequency
+			if($db){
+			    my $orient = $i->[3];
+			    print "#Looking for ORF $start,$end,$orient on $seqname\n" if($debug);
+			    #my $seqobj = $db->get_Seq_by_id($seqname);
+			    my $seqobj = $db->{$seqname};
+			    if($seqobj){
+				die "Can't find sequence $seqname obj:$seqobj" if(!defined $seqobj);
+				my ($neworf,$callorient) = &callORF($seqobj,$start,$end,$orient);
+				if(length($neworf)>$MINORFLEN){
+				    print "#Calling ORF on strand $callorient start coord = $start\n" if($debug);;
+				    #$codons->{'alt_starts'}->{$seqname}->{$codon}->{'freq'} = $starts->{$codon};
+				    #$codons->{'alt_starts'}->{$seqname}->{$codon}->{'neworf'} = $neworf;
+				    #$codons->{'alt_starts'}->{$seqname}->{$codon}->{'orient'} = $callorient;
+				    my $fmin;
+				    my $fmax;
+				    if($callorient eq '+'){
+					$fmin=$start;
+					$fmax=$start+(length($neworf)*3);
+				    }
+				    else{
+					$fmin=$end-(length($neworf)*3);
+					$fmax=$end;
+				    }
+				    if(!$fmin || $fmin<0){
+					print STDERR "Bad ORF call on $seqname $start,$end converted to $fmin,$fmax\n";
+					next;
+				    }
+				    #$codons->{'alt_starts'}->{$seqname}->{$codon}->{'start'} = $fmin;
+				    #$codons->{'alt_starts'}->{$seqname}->{$codon}->{'end'} = $fmax;
+				    my($strc,$stpc) = &findCodons($atree,
+								  $seqname,
+								  $fmin,
+								  $fmax,
+								  $callorient);
+				    #if($callorient eq '-'){
+				#	($strc,$stpc) = ($stpc,$strc);
+					
+				#    }
+				    my $startcodon;
+				    my $stopcodon;
+				    if(ref $strc){
+					my($mcol,$align_name) = (@$strc);
+					$startcodon = $mcol.$CODON_DELIM.$align_name;
+					#die "Can't find start $mcol,$align_name $callorient,$orient from $seqname $codon" if(!exists $starts->{$startcodon});
+					#if(!exists $starts->{$startcodon}){
+					#    $codons->{'alt_starts'}->{$seqname}->{$codon}->{'startfreq'} = 0;
+					#}
+					#else{
+					#    $codons->{'alt_starts'}->{$seqname}->{$codon}->{'startfreq'} = $starts->{$startcodon};
+					#}
+					#$codons->{'alt_starts'}->{$seqname}->{$codon}->{'startcol'} = $mcol;
+					#$codons->{'alt_starts'}->{$seqname}->{$codon}->{'startcodon'} = $startcodon;
+				    }
+				    if(ref $stpc){
+					my($mcol,$align_name) = (@$stpc);
+					$stopcodon = $mcol.$CODON_DELIM.$align_name;
+					#die "Can't find stop $mcol,$align_name $callorient,$orient from $seqname $codon" if(!exists $stops->{$stopcodon});
+					#if(!exists $stops->{$stopcodon}){
+					#    $codons->{'alt_starts'}->{$seqname}->{$codon}->{'stopfreq'} = 0;
+					#}
+					#else{
+					#    $codons->{'alt_starts'}->{$seqname}->{$codon}->{'stopfreq'} = $stops->{$stopcodon};					
+					
+				        #}
+					#$codons->{'alt_starts'}->{$seqname}->{$codon}->{'stopcol'} = $mcol;
+					#$codons->{'alt_starts'}->{$seqname}->{$codon}->{'stopcodon'} = $stopcodon;
+				    }
+				    #Save start,stop pair
+				    if($startcodon && $stopcodon){
+					#$codons->{'pairs'}->{$startcodon.':'.$stopcodon}->{'gfreq'}++;
+					#$codons->{'pairs'}->{$startcodon.':'.$stopcodon}->{'length'} += ($fmax-$fmin);
+					#$codons->{'pairs'}->{$startcodon.':'.$stopcodon}->{'orgs'}->{$seqname} = [$fmin,$fmax,0];
+					push @$altorfs,[$seqname,$fmin,$fmax,$callorient,$startcodon,$stopcodon];
+				    }
+				}
+				else{
+				    print "Skipping short ORF ",length($neworf)," <$MINORFLEN $start,$end,$orient\n" if($debug);
+				}
+			    }
+			    else{
+				print "#WARNING. Sequence $seqname not found in FASTA file. Skipping calling new ORFs.\n";
+			    }
+			}
+			else{
+			    print "#WARNING. No FASTA file, cannot call new ORFs\n";
+			}
+		    }
+		    else{
+			print "#alignment to codon contains gaps $start,$end\n" if($debug);;
+		    }
+		}
+		else{
+		    print "#can't find alignment $aln $seqname\n" if($debug);;
+
+		}
+	    }
+	}
+    }
+    return $altorfs;
+}
+
+
+sub findNearestNeighbor{
+    my($atree,$seqname,$mappedseqs,$start,$end) = @_;
+    my @res = $atree->map($seqname,$start,$end);
+    my @sres = sort {$b->[8] <=> $a->[8]} @res;
+    foreach my $s (@sres){
+	if($s->[1] ne $seqname && exists $mappedseqs->{$s->[1]}){
+	    return $s->[1];
+	}
+    }
+}
+
+sub reportFrameShifts{
+    my($atree,$db,$seqname,$nearestseq,$startcodon,$stopcodon) = @_;
+    #$codon is a tuple of alignment,aligned_column
+    my($startcol,$aln_s) = split(/$CODON_DELIM_REGEX/,$startcodon);
+    #find corresponding stop
+    my($stopcol,$aln_e) = split(/$CODON_DELIM_REGEX/,$stopcodon);
+    my $si = &getAlignment($atree,$aln_s,$seqname);
+    my $ei = &getAlignment($atree,$aln_e,$seqname);
+    my($startcoord) = AlignmentTree::columntocoords($si,$startcol,$startcol);
+    my($stopcoord) = AlignmentTree::columntocoords($ei,$stopcol,$stopcol);
+    #Make sure we have not traversed a rearrangement
+    if(abs($stopcoord-$startcoord)<$MAXORFLEN){
+	
+	my $fsvars = [];
+	my $netfs = 0;
+		
+	#TODO, relax to allow multiple spanning alignments
+	print "#Looking for frameshifts in $startcodon,$stopcodon $aln_s $aln_e $startcoord $stopcoord\n" if($debug);
+	my @sortedproj;
+	if($startcoord < $stopcoord){
+	    my @proj = $atree->intersect($seqname,$startcoord,$stopcoord,'WGA');
+	    @sortedproj = sort {$a->[2] <=> $b->[2]} @proj;
+	}
+	else{
+	    my @proj = $atree->intersect($seqname,$stopcoord,$startcoord,'WGA');
+	    @sortedproj = sort {$b->[3] <=> $a->[3]} @proj;
+	}
+	print "#Found ",scalar(@sortedproj)," alignments\n" if($debug);
+	foreach my $aln (@sortedproj){
+	    if($aln->[1] eq $seqname){
+		my ($startcol,$stopcol) = AlignmentTree::coordstocolumn($atree->{_alignments}->{$aln->[0]}->[0],$seqname,$aln->[2],$aln->[3],1);
+		my $sv = &reportVariants($atree,$db,$aln->[0],$seqname,$startcol,$stopcol,$nearestseq);
+		foreach my $v (@$sv){
+		    if($v->[4] != 0){
+			print "#FSVAR $seqname ",join(',',@$v),"\n" if($debug);
+			push @$fsvars,$v;
+			$netfs += $v->[4];
+			#if(abs($netfs) > $FS_THRESHOLD){
+			    #Short circuit
+			    #return undef;
+			#}
+		    }
+		    else{
+			die;
+		    }
+		}
+	    }
+	}		
+	my @coords = sort {$a->[0] <=> $b->[0]} (@$fsvars);
+	my $pos = [];
+	my @runs;
+	my $indelstr1;
+	my $indelstr2;
+	my $last;
+	my $start;
+	my $lasttype;
+	my $end;
+	for(my $i=0;$i<@coords;$i++){
+	    if($i==0){
+		$start=$coords[$i]->[0];
+		$lasttype=$coords[$i]->[4];
+		$pos= [];
+	    }
+	    elsif(abs($last+1 - $coords[$i]->[0]) > 1 || $coords[$i]->[4] != $lasttype){
+		#print "Adding $start,$last,$indelstr1,$indelstr2,$lasttype ",scalar(@$pos),"\n";
+		push @runs,[$start,$last,$indelstr1,$indelstr2,$lasttype,$pos];
+		$indelstr1="";
+		$indelstr2="";
+		$start=$coords[$i]->[0];
+		$pos=[];
+	    }
+	    $last=$coords[$i]->[0];
+	    $lasttype=$coords[$i]->[4];
+	    $indelstr1.=$coords[$i]->[1];
+	    $indelstr2.=$coords[$i]->[2];
+	    push @$pos,$coords[$i]->[0];
+	}
+	if($last){
+	    #print "Adding_post $start,$last,$indelstr1,$indelstr2,$lasttype ",scalar(@$pos),"\n";
+	    push @runs,[$start,$last,$indelstr1,$indelstr2,$lasttype,$pos];
+	}
+	my $ispmark=0;
+	foreach my $r (@runs){
+	    $ispmark = ($r->[2] =~ /$PMARK_SPACER/) ? 1 : 0;
+	    $ispmark = ($r->[3] =~ /$PMARK_SPACER/) ? 1 : 0 if(!$ispmark);
+	    last if($ispmark);
+	}
+
+
+	#Remove runs that are multiple of 3
+	my @fsruns;
+	foreach my $r (@runs){
+	    die if(length($r->[2]) != length($r->[3]));
+	    if($ispmark==1){
+		push @fsruns,$r;
+	    }else{#if(length($r->[2])%3!=0 || (length($r->[2]) < $FSLEN_THRESHOLD)){
+		push @fsruns,$r;
+	    }
+	}
+	
+	if($verbose){
+	    print "#FS num_runs ",scalar(@fsruns),"\n";
+	    foreach my $r (@fsruns){
+		print "[$r->[0]-$r->[1] $r->[2]:$r->[3]] $r->[4] ",scalar(@{$r->[5]}),"\n";
+	    }
+	    print "\n";
+	}
+	return (\@fsruns,$netfs);
+    }
+}
+
+##############################
+#Report ORFs on aligned sequences that are unannotated
+#
+#Method:
+
+#For all aligned segments that do not contain any annotated ORF
+#Attempt to use annotated and aligned start codons from other genomes in the cluster
+#to call new ORFs
+sub findnewORFs{
+    my($db,$atree,$mappedorgs,$mappedgenes,$codons) = @_;
+    #Consider all possible aligned starts in the cluster
+    my $allcodons = {};
+    foreach my $seq (keys %{$codons->{'starts'}}){
+	foreach my $codon (keys %{$codons->{'starts'}->{$seq}}){
+	    $allcodons->{$codon}++;
+	}
+    }
+    my $noorfseqs = {};
+    #Foreach codon, attempt to find ORFs if none annotated above cutoffs
+    print "#Total number of possible codons ",scalar(keys %$allcodons),"\n" if($debug);;
+    foreach my $codon (keys %$allcodons){
+	my($col,$aln) = split(/\$CODON_DELIM_REGEX/,$codon);
+	my $alignedseqs  = $atree->{_alignments}->{$aln}->[0]; #get seqs for $lan
+	foreach my $alnseq (@$alignedseqs){
+	    my $seq = $alnseq->[0];
+	    #Check if sequence already has a mapped gene
+	    if(! exists $mappedorgs->{$seq}){
+		print "#No ORFs on seq $seq\n" if($debug);;
+		$noorfseqs->{$seq}++;
+	    }
+	}
+    }
+    my $seq_attrs = {};
+    print "#Looking for new starts in new orfs\n" if($debug);;
+    &checkStarts($db,$codons,[keys %$noorfseqs],$seq_attrs,1);
+    return $seq_attrs;
+}
+
+sub printExtAlts{
+#Longest row in Green
+#Frameshifts in Red
+#Start fully consistent
+#Alt,Num,Annotated,Length
+    
+
+
+}
+
+sub printExtAlignments{
+#Use CSS classes for each codon, highlight in color
+#Use CSS classes for each gene
+
+#getRowClass
+#grid.getView().getRowClass = function(record, index){
+#return (record.data.change<0.7 ? (record.data.change<0.5 ? (record.data.change<0.2 ? 'red-row' : 'green-row') : 'blue-row') : '');
+#};
+
+    var fDataTpl = new Ext.XTemplate(
+        '<tpl for=".">',
+            '<div>',
+                '<pre class="x-fixed">{element}</pre>',
+            '</div>',
+        '</tpl>'
+    );
+
+#From http://www.sencha.com/blog/2010/07/13/a-side-by-side-diff-viewer-built-with-ext-js/
+#        // Obtain reference to HTML templates
+#        lineTpl = Ext.ux.CodeViewer.lineTpl,
+#        emptyLineTpl = Ext.ux.CodeViewer.emptyLineTpl,
+
+#        // Create a "pre" tag to hold the code
+#        pre = this.el.createChild({tag: 'pre'}),
+
+#        var el = lineTpl.append(pre, [i+1, this.highlightLine(lines[i])]);
+# Ext.fly(el).addClass('ux-codeViewer-modified');
+
+
+}
+
+
+
+
+sub printExtJSCluster{
+
+    my($cluster_id,$clusterref) = @_;
+    #List cluster members
+    my @clustergrid;
+
+#Longest row in Green
+#Frameshifts in Red
+#Start fully consistent
+#Alt,Num,Annotated,Length
+    my @altgrid;
+    foreach my $alt (keys %{$clusterref->{'alts'}}){
+	my $isfcon = (exists $clusterref->{'alts'}->{$alt}->{'fcon'}) ? 1 : 0;
+	my $ismax = (exists $clusterref->{'alts'}->{$alt}->{'maxlen'}) ? 1 : 0;
+	push @altgrid,["'".$clusterref->{'alts'}->{$alt}->{'name'}."'",
+		       $clusterref->{'alts'}->{$alt}->{'gfreq'},
+		       $clusterref->{'alts'}->{$alt}->{'afreq'},
+		       $clusterref->{'alts'}->{$alt}->{'len'},
+		       scalar(keys %{$clusterref->{'alts'}->{$alt}->{'neworfs'}}),
+		       scalar(keys %{$clusterref->{'alts'}->{$alt}->{'fs'}}),
+		       $isfcon,
+		       $ismax];
+		       
+    }
+    foreach my $g (keys %{$clusterref->{'orgs'}}){
+	my @codoninfo;
+    	foreach my $alt (keys %{$clusterref->{'alts'}}){
+	    if(exists $clusterref->{'codons'}->{$alt}->{'features'}){
+		push @codoninfo,$clusterref->{'alts'}->{$alt}->{'name'};
+	    }
+	}
+	my $gref = $clusterref->{'orgs'}->{$g};
+	push @clustergrid,["'CLUSTER_".$cluster_id."'",
+			   "'".$g."'",
+			   "'".join(',',@{$gref->{'genes'}})."'",
+			   "'".join(',',@{$gref->{'cov'}})."'",
+			   "'".join(',',@{$gref->{'pid'}})."'",
+			   $gref->{'fmin'},
+			   $gref->{'fmax'},
+			   $gref->{'len'},
+			   "'".join(',',@{$gref->{'orient'}})."'",
+			   "'".join(',', at codoninfo)."'",
+			   "'".$gref->{'desc'}."'"
+			   ];
+    }
+    #List edits
+
+    #Show alignment
+
+    if($htmlout){
+	#Link to prev and next cluster    
+	my $jsfh;
+	my $htmlfh;
+	my $htmlrelpath = basename("$options{'prefix'}cluster_${cluster_id}.html");
+	my $jsrelpath = basename("$options{'prefix'}cluster_${cluster_id}.js");
+	open $jsfh,"+>$options{'prefix'}cluster_${cluster_id}.js";
+	open $htmlfh,"+>$options{'prefix'}cluster_${cluster_id}.html";
+	
+	print $htmlfh <<_CLUSTERHTMLHEADER;
+	
+	<html>
+	    <head>
+	    <title>Cluster $cluster_id</title>
+	    <link rel="stylesheet" type="text/css" href="http://dev.sencha.com/deploy/dev/resources/css/ext-all.css" />
+	    <script type="text/javascript" src="http://dev.sencha.com/deploy/dev/adapter/ext/ext-base.js"></script>
+	    <script type="text/javascript" src="http://dev.sencha.com/deploy/dev/ext-all-debug.js"></script>
+	    </head>
+	    <body>
+	    <script type="text/javascript" src="$jsrelpath"></script>
+	    <div id="my-div" class="x-hidden">
+            <pre>
+
+_CLUSTERHTMLHEADER
+;
+	
+	
+	print $htmlfh `cat $options{'prefix'}cluster_${cluster_id}.aln.out`;
+	
+	print $htmlfh <<_CLUSTERHTMLFOOTER;
+
+	    </pre>
+	    
+</body>
+</html>
+
+_CLUSTERHTMLFOOTER
+;
+	
+
+    print $jsfh <<_CLUSTERJSHEADER;
+    function renderGeneURL(val){
+      return '<a href="javascript:document.getElementById(\\''+val+'\\').scrollIntoView(true);">'+val+'</a>';
+    }
+    Ext.onReady(function(){
+	
+	Ext.QuickTips.init();
+	
+	var xg = Ext.grid;
+		
+	var featstore = new Ext.data.ArrayStore({
+	  fields: [
+		   {name: 'cluster'},
+		   {name: 'genome'},
+		   {name: 'name'},
+		   {name: 'coverage', type: 'float'},
+		   {name: 'identity', type: 'float'},
+		   {name: 'fmin', type: 'float'},
+		   {name: 'fmax', type: 'float'},
+		   {name: 'length', type: 'float'},
+		   {name: 'strand'},
+		   {name: 'codon_pairs'},
+		   {name: 'desc'}
+		   ]
+	});
+	
+	var altstore = new Ext.data.ArrayStore({
+	  fields: [
+		   {name: 'name'},
+		   {name: 'gfreq'},
+		   {name: 'afreq'},
+		   {name: 'len'},
+		   {name: 'neworfs'},
+		   {name: 'fs'},
+		   {name: 'isfcon'},
+		   {name: 'ismax'},
+		   ]
+	});
+	
+	featstore.loadData(xg.clusterData);
+	altstore.loadData(xg.altData);
+
+	var alntext = new Ext.Panel({
+	    'id':'alntext',
+	    'title':'Alignment detail',
+	    'region':'south',
+	  split:true,
+	  height:300, 
+	  collapsible: true,
+	  autoScroll:true,
+	  contentEl:'my-div'
+	  });
+	
+	var featgrid = new xg.GridPanel({
+	  store: featstore,
+	  columns: [
+		    {id:'cluster',header: "Cluster", width: 70, sortable: true, dataIndex: 'cluster'},
+		    {header: "Feature", width: 100, sortable: true, dataIndex: 'name'},
+		    {header: "Genome", width: 100, sortable: true, dataIndex: 'genome'},
+		    {header: "fmin", width: 50, sortable: true, dataIndex: 'fmin'},
+		    {header: "fmax", width: 50, sortable: true, dataIndex: 'fmax'},
+		    {header: "strand", width: 30, sortable: true, dataIndex: 'strand'},
+		    {header: "Len", width: 50, sortable: true, dataIndex: 'length'},
+		    {header: "Coverage", width: 50, sortable: true, dataIndex: 'coverage'},
+		    {header: "Identity", width: 50, sortable: true, dataIndex: 'identity'},
+		    {header: "Alt ORFs", width: 200, sortable: true, dataIndex: 'codon_pairs'},
+		    {header: "Description", width: 500, sortable: true, dataIndex: 'desc'},
+		    ],	      
+	    viewConfig: {
+		    forceFit: true},
+		    frame: true,
+		    animCollapse: false,
+		    title: 'Cluster ${cluster_id} annotation summary',
+		    iconCls: 'icon-grid',
+		    fbar  : ['->', {
+		    text:'Save as text',
+				     handler : null
+			}],
+		    columnWidth: .6,
+		    flex:1
+		});
+
+	var editgrid = new xg.GridPanel({
+		store: altstore,
+		    columns: [
+			      {id:'name',header: "ORF", width: 70, sortable: true, dataIndex: 'name'},
+			      {header: "Aligned Freq", width: 50, sortable: true, dataIndex: 'gfreq'},
+			      {header: "Annotated Freq", width: 50, sortable: true, dataIndex: 'afreq'},
+			      {header: "Len", width: 50, sortable: true, dataIndex: 'len'},
+			      {header: "# Missing", width: 50, sortable: true, dataIndex: 'neworfs'},
+			      {header: "# FS", width: 50, sortable: true, dataIndex: 'fs'},
+			      {header: "isfcon", width: 50, sortable: true, dataIndex: 'isfcon'},
+			      {header: "ismax", width: 50, sortable: true, dataIndex: 'ismax'},
+			      ],	      
+		    split:true,
+		    frame: true,
+		    collapsible: true,
+		    animCollapse: false,
+		    title: 'Cluster 1 edit summary',
+		    iconCls: 'icon-grid',
+		    columnWidth: .4,
+		    flex:1
+		    });
+	var viewport = new Ext.Viewport({
+	  layout:'border',
+	    //defaults: {autoScroll:true,height:500},
+	  items: [
+		  new Ext.Panel({
+		    layout:'fit',
+		    region:'center',
+		    items: [
+			    new Ext.Panel({
+			      layout:'hbox',
+			      layoutConfig: {
+				  align : 'stretch',
+				  pack  : 'start',
+			      },
+							    region:'center',
+			      items: [ featgrid,editgrid]
+			      })
+						]
+					    }),
+		  alntext
+		  ]
+	      });
+	viewport.doLayout();
+
+    });
+
+_CLUSTERJSHEADER
+    ;
+ 
+    print $jsfh "Ext.grid.clusterData = [";
+    foreach my $c (@clustergrid){
+	print $jsfh "[",join(',',@$c),"],\n";
+    }
+    print $jsfh "];\n";
+
+    print $jsfh "Ext.grid.altData = [";
+    foreach my $c (@altgrid){
+	print $jsfh "[",join(',',@$c),"],\n";
+    }
+    print $jsfh "];\n";
+
+    close $jsfh;
+    close $htmlfh;
+    }
+
+}
+
+sub printExtJS{
+    my($clusters) = @_;
+    my @clustergrid;
+    
+    #Summary is cluster_id,#genomes,#genes,class,
+
+    foreach my $cluster_id (keys %$clusters){
+	push @clustergrid,[$cluster_id,$clusters->{$cluster_id}->{'num_feats'},$clusters->{$cluster_id}->{'num_genomes'},"'".$clusters->{$cluster_id}->{'classes'}."'"];
+	&printExtJSCluster($cluster_id,$clusters->{$cluster_id});
+    }
+
+    my $jsfh;
+    my $htmlfh;
+    my $jsrelpath = basename("$options{'prefix'}main.js");
+    my $relpath = basename("$options{'prefix'}");
+    open $jsfh,"+>$options{'prefix'}main.js";
+    open $htmlfh,"+>$options{'prefix'}index.html";
+
+    print $htmlfh <<_HTMLHEADER;
+
+<html>
+<head>
+<title>Mugsy-Annotator Report</title>
+<link rel="stylesheet" type="text/css" href="http://dev.sencha.com/deploy/dev/resources/css/ext-all.css" />
+<script type="text/javascript" src="http://dev.sencha.com/deploy/dev/adapter/ext/ext-base.js"></script>
+<script type="text/javascript" src="http://dev.sencha.com/deploy/dev/ext-all-debug.js"></script>
+</head>
+<body>
+<script type="text/javascript" src="$jsrelpath"></script>
+
+</body>
+</html>
+
+_HTMLHEADER
+;
+    
+
+    print $jsfh <<_MAINJSHEADER;
+	
+
+    function renderClusterURL(val){
+	return '<a href="${relpath}cluster_'+val+'.html">CLUSTER_'+val+'</a>';
+    }
+
+    Ext.onReady(function(){
+
+	Ext.QuickTips.init();
+	
+	var xg = Ext.grid;
+	
+	
+	// shared reader
+	    var reader = new Ext.data.ArrayReader({}, [
+						       {name: 'cluster_id'},
+						       {name: 'num_feats'},
+						       {name: 'num_genomes'},
+						       {name: 'quality_class'}
+						       ]);
+	var store = new Ext.data.GroupingStore({
+	  reader: reader,
+	  data: xg.summaryData,
+	  sortInfo:{field: 'cluster_id', direction: "ASC"},
+	  groupField:'quality_class'
+        });
+	
+	var grid = new xg.GridPanel({
+	  store: store,
+	  columns: [
+		    {id:'Cluster',header: "Cluster", width: 10, sortable: true, dataIndex: 'cluster_id', renderer:renderClusterURL},
+		    {header: "Features", width: 10, sortable: true, dataIndex: 'num_feats'},
+		    {header: "Genomes", width: 10, sortable: true, dataIndex: 'num_genomes'},
+		    {header: "Class", width: 20, sortable: true, dataIndex: 'quality_class'},
+		    ],
+	      
+        view: new Ext.grid.GroupingView({
+            forceFit:true,
+            groupTextTpl: '{text} ({[values.rs.length]} {[values.rs.length > 1 ? "Items" : "Item"]})'
+        }),
+
+        frame:true,
+        width: 700,
+        height: 450,
+        collapsible: true,
+        animCollapse: false,
+        title: 'Annotation summary',
+        iconCls: 'icon-grid',
+        fbar  : ['->', {
+            text:'Clear Grouping',
+            iconCls: 'icon-clear-group',
+            handler : function(){
+                store.clearGrouping();
+            }
+        }],
+        renderTo: document.body
+    });
+});
+
+_MAINJSHEADER
+    ;
+
+
+    print $jsfh "Ext.grid.summaryData = [";
+
+    foreach my $c (@clustergrid){
+	print $jsfh "[",join(',',@$c),"],\n";
+    }
+    print $jsfh "];\n";
+
+    close $jsfh;
+    close $htmlfh;
+
+}
+
diff --git a/mapping/mugsy-annotator b/mapping/mugsy-annotator
new file mode 100644
index 0000000..3cbab63
--- /dev/null
+++ b/mapping/mugsy-annotator
@@ -0,0 +1,48 @@
+#!/bin/sh
+#USAGE: mugsy-annotator allgenomes.fsa aln.maf *.gbk
+#
+#Features can be either a GFF3 file, a GBK genbank flat file, or 5 column text files ($featname $seqname $fmin $fmax $strand )
+#Generate a multi-FASTA file with all your genome sequences
+#cat genome1 ...genomeN > allgenomes.fsa
+
+PREFIX=/usr/local/projects/angiuoli/mugsy_trunk/mapping
+
+if [ ! -d "$PREFIX" ]
+then
+    echo "Cannot find installation directory $PREFIX. Edit the script to configure a valid directory"
+    exit 1
+fi
+
+if [ $# -lt 3 ]
+then
+    echo "USAGE: mugsy-annotator allgenomes.fsa aln.maf *.gbk"
+    exit 1
+fi
+
+IDXFILE=/tmp/$$.idx
+FSAFILE=$1
+
+echo "Building index for alignment $2" >&2
+$PREFIX/mafindex.pl $IDXFILE < $2 > /tmp/$$.mafidx
+
+shift
+shift
+
+for gff in $@
+  do
+  isgb=`head -1 $gff | grep "^LOCUS"`
+  if [ "$isgb" != "" ]
+      then
+      echo "Converting file $gff to GFF" >&2
+      `bp_genbank2gff3.pl --filter misc_feature -in stdin -out - < $gff | grep -v "# Input" >> /tmp/$$.gff`;
+  else
+      cat $gff >> /tmp/$$.gff
+  fi
+done
+echo "Building index for features" >&2
+$PREFIX/featureindex.pl $IDXFILE gff < /tmp/$$.gff > /tmp/$$.featidx
+echo "Mapping features" >&2
+echo "To print with aligment detail. Run $PREFIX/mapfeatures.pl --printalignments $IDXFILE $FSAFILE < /tmp/$$.gff" 
+echo "To print with html reports. Run $PREFIX/mapfeatures.pl --printhtml $IDXFILE $FSAFILE < /tmp/$$.gff" 
+$PREFIX/mapfeatures.pl $IDXFILE $FSAFILE < /tmp/$$.gff 
+
diff --git a/mapping/mugsyindex.pl b/mapping/mugsyindex.pl
new file mode 100755
index 0000000..09eb557
--- /dev/null
+++ b/mapping/mugsyindex.pl
@@ -0,0 +1,38 @@
+#!/usr/bin/perl
+#
+#./mugsyindex.pl index.file < mugsy.out
+#Adds MUGSY output to a MUGSY formatted index
+#Each block is saved as type 'syntenyblk'
+
+use strict;
+use lib '/usr/local/projects/angiuoli/developer/sangiuoli/mugsy/trunk';
+use AlignmentTree;
+use Data::Dumper;
+
+my $atree = new AlignmentTree();
+if(-e $ARGV[0]){
+    $atree = AlignmentTree::deserialize($ARGV[0]);
+}
+else{
+
+}
+
+my $currscore;
+my $block = [];
+my $k=0;
+my $name;
+while(my $line=<STDIN>){
+    chomp $line;
+    if($line !~ /^[\s\#]/){
+	my @elts = split(/\s+/,$line);
+	if($name ne $elts[0]){
+	    $atree->insert($block,$name,"synteny") if(scalar @$block>0 && $name);
+	    $name = "$elts[0]";
+	    $block = [];
+	}
+	push @$block,[$elts[1],$elts[3],$elts[4],$elts[2]];
+    }
+}
+$atree->insert($block,$name,"synteny") if(scalar @$block>0 && $name);
+print STDERR "Writing index to $ARGV[0]\n";
+$atree->serialize($ARGV[0]);
diff --git a/mapping/mugsymapper b/mapping/mugsymapper
new file mode 100755
index 0000000..24cdd0b
--- /dev/null
+++ b/mapping/mugsymapper
@@ -0,0 +1,34 @@
+#!/bin/sh
+#USAGE: mugsymapper allgenomes.fsa aln.maf *.gbk
+#
+#Features can be either a GFF3 file, a GBK genbank flat file, or 5 column text files ($featname $seqname $fmin $fmax $strand )
+#Generate a multi-FASTA file with all your genome sequences
+#cat genome1 ...genomeN > allgenomes.fsa
+
+PREFIX=/usr/local/projects/angiuoli/mugsy_trunk/mapping
+
+IDXFILE=/tmp/$$.idx
+FSAFILE=$1
+
+echo "Building index for alignment $2" >&2
+$PREFIX/mafindex.pl $IDXFILE < $2 > /tmp/$$.mafidx
+
+shift
+shift
+
+for gff in $@
+  do
+  isgb=`head -1 $gff | grep "^LOCUS"`
+  if [ "$isgb" != "" ]
+      then
+      echo "Converting file $gff to GFF" >&2
+      `bp_genbank2gff3.pl --filter misc_feature -in stdin -out - < $gff | grep -v "# Input" >> /tmp/$$.gff`;
+  else
+      cat $gff >> /tmp/$$.gff
+  fi
+done
+
+$PREFIX/featureindex.pl $IDXFILE gff < /tmp/$$.gff > /tmp/$$.featidx
+echo "Mapping features" >&2
+$PREFIX/mapfeatures.pl $IDXFILE $FSAFILE < /tmp/$$.gff 
+
diff --git a/mapping/query.pl b/mapping/query.pl
new file mode 100644
index 0000000..4e6412c
--- /dev/null
+++ b/mapping/query.pl
@@ -0,0 +1,19 @@
+use strict;
+use AlignmentTree;
+use Storable qw(store retrieve);
+use Data::Dumper;
+
+$Storable::Deparse = 1;
+$Storable::Eval = 1;
+
+my $atree;
+if(-e $ARGV[0]){
+    $atree = retrieve($ARGV[0]);
+}
+
+my @results = $atree->intersect($ARGV[1],$ARGV[2],$ARGV[3]);
+
+foreach my $r (@results){
+    print "INTERSECT RESULT ",join(' ',@$r),"\n";
+}
+
diff --git a/mapping/reportvariants.pl b/mapping/reportvariants.pl
new file mode 100755
index 0000000..4d662b9
--- /dev/null
+++ b/mapping/reportvariants.pl
@@ -0,0 +1,118 @@
+#!/usr/bin/perl
+#./reportvariants.pl index fasta
+
+use strict;
+use Bio::Perl;
+use Bio::DB::Fasta;
+use Bio::Seq;
+use lib '/usr/local/projects/angiuoli/developer/sangiuoli/mugsy/trunk/mapping/';
+use Getopt::Long qw(:config no_ignore_case no_auto_abbrev);
+use AlignmentTree;
+
+my %options;
+my $results = GetOptions (\%options, 
+			  'gap_window|g=s',
+			  'display_window|d=s',
+			  'gaps_allowed|a=s') || pod2usage(-verbose => 1);
+
+pod2usage(-verbose=>1) if($options{'help'});
+
+my $atree = AlignmentTree::deserialize($ARGV[0]);
+
+my $db = Bio::DB::Fasta->new($ARGV[1],'-reindex'=>1); 
+
+my $gapthreshold=0;
+if(exists $options{'gaps_allowed'}){
+    $gapthreshold = $options{'gaps_allowed'};
+}
+my $gap_window=5;
+if(exists $options{'gap_window'}){
+    $gap_window = $options{'gap_window'};
+}
+my $display_window=5;
+if(exists $options{'display_window'}){
+    $display_window = $options{'display_window'};
+}
+
+
+shift @ARGV;
+shift @ARGV;
+
+my $pwseqs = {};
+my $refname = shift @ARGV;
+foreach my $seq (@ARGV){
+    $pwseqs->{$seq}++;
+}
+
+open VFILE,"+>$$.pwvariants.out" or die "Can't open file pwvariants.out";
+open SFILE,"+>$$.snpvariants.out" or die "Can't open file snpvariants.out";
+foreach my $alnname (sort {$a cmp $b} keys %{$atree->{_alignments}}){
+    my($alnobj,$aln_bv,$align_width) = @{$atree->{_alignments}->{$alnname}};
+    my ($mmatrix,$seqmatrix,$names) = $atree->getAlignmentMatrix($alnname,1,$align_width,$db);
+    if(@$seqmatrix > 1){
+	#print STDERR "Checking alignment $alnname $align_width ",scalar(@$seqmatrix),"\n";
+	
+	my $ngaps;
+	my $nmismatches;
+	my $variants = {};
+	my $seqvariants = {};
+	my $refidx;
+	for(my $i=0;$i<@$seqmatrix;$i++){
+	    if($names->[$i] eq $refname){	
+		$refidx=$i;
+	    }
+	}
+#Matrix cols start at 0
+	for(my $j=0;$j<$align_width;$j++){
+	    my $b;
+	    my $refbp = lc(substr($seqmatrix->[$refidx],$j,1));
+	    for(my $i=0;$i<@$seqmatrix;$i++){
+		if($i ne $refidx){
+		    my $currbp = lc(substr($seqmatrix->[$i],$j,1));
+		    if($currbp ne $refbp && $currbp !~ /[yskrmwnw]/){
+			$variants->{$j}++;
+			$seqvariants->{$i}->{$j}++;
+		    }
+		}
+		#print "$b=$currbp " if($b ne '-' && $currbp ne '-');
+	    }
+	}
+	#print STDERR "variants ",scalar(keys %$variants),"\n";
+	foreach my $col (sort {$a <=> $b} keys %$variants){
+	    my $gaps=0;
+	    for(my $i=0;$i<@$seqmatrix;$i++){
+		my $start = $col - $gap_window;
+		$start = 0 if($start < 0);
+		my $end = $col + $gap_window;
+		$end = $align_width if($end > $align_width);
+		$gaps+= (substr($seqmatrix->[$i],$start,$end-$start+1) =~ tr/\-/\-/);
+	    }
+	    if($gaps<=$gapthreshold){
+		my $refc;
+		for(my $i=0;$i<@$seqmatrix;$i++){
+		    my $start = $col - $display_window;
+		    $start = 0 if($start < 0);
+		    my $end = $col + $display_window;
+		    $end = $align_width if($end > $align_width);
+		    my($alni) = $atree->getAlignedInterval($alnname,$names->[$i]);
+		    my $colstart = 1+$start;
+		    my $colend = $colstart;
+		    my($startc,$endc) = AlignmentTree::columntocoords($alni,$col+1,$col+1);
+		    $refc = $startc if($names->[$i] eq "$refname");
+		    #AlignmentTree::printAlignmentDebug($alnobj);
+		    printf("%10s %s\tcoords:%d-%d\n",$names->[$i],lc(substr($seqmatrix->[$i],$start,$end-$start+1)),$startc,$endc);
+#, substr($seqmatrix->[$i],$start,$end-$start),"\n";
+		    
+		    if($names->[0] eq "$refname" && exists $pwseqs->{$names->[$i]} && $seqvariants->{$i}->{$col}){
+			print SFILE "$names->[$i]\t$refname\t$refc\t",$refc+1,"\t",uc(substr($seqmatrix->[0],$col,1)),"\n";
+			print VFILE "$names->[$i]\t$refc\t",$refc+1,"\t",substr($seqmatrix->[0],$col,1),"/",substr($seqmatrix->[$i],$col,1),"\t$names->[$i]\t$startc-$endc\n";
+		    }
+		}
+		printf("%10s      ^     \n");
+		print "\n";
+	    }
+	}
+    }
+}
+close VFILE;
+close SFILE;
diff --git a/mapping/testitree.pl b/mapping/testitree.pl
new file mode 100755
index 0000000..58e2239
--- /dev/null
+++ b/mapping/testitree.pl
@@ -0,0 +1,327 @@
+#!/usr/bin/perl
+
+use strict;
+use IntervalTree;
+use AlignmentTree;
+use Data::Dumper qw(Dumper);
+
+#remove only using for revcom
+use Bio::Perl;
+use Bio::DB::Fasta;
+use Bio::Seq;
+use Bio::Tools::CodonTable;
+
+#Assumptions fmin<fmax,colstart<colend
+#Genome coordinate system is 0 start, interbase coordinates. Feature length = fmax-fmin
+#Alignment coordinate system is 1 start counting bases. Feature length is fmax-fmin+1.
+
+#Test cases
+#Sequence 1 ...AATTGGCCAA...
+#Sequence 2 ...AATTGGCCAA...
+#Sequence 3 ...AATTGGCCAA...
+
+#Alignment 1 S1,S2,S3 +,+,+
+#Alignment 2 +,-,+
+#Alignment 3 -,-,+
+
+#Test feature1 orient='+' fmin=102 fmax=107 'TTGGC'
+#Test feature2 orient='-' fmin=103 fmax=108 'GCCAA'
+#
+#+ Alignment, + annotation end5<end3 colorient '+' fmin -> coords increasing -> fmax
+#Eg. feature1
+#100 1 AATTGGCCAA 10 110 
+#100 1 AATTGGCCAA 10 110 
+#        TTGGC    
+#         GCCAA
+#col   123456789
+#query:fmin=102,fmax=107 strand +
+#result:colstart=3,coldend=7,revcomp=0
+
+#- Alignment, - annotation end3<end5 colorient '+' fmax -> coords decreasing -> fmin
+#Eg. feature2
+#110 1 TTGGCCAATT 10 100 
+#110 1 TTGGCCAATT 10 100 
+#         GCCAA
+#         CGGTT         
+#col   123456789
+#query:fmin=102,fmax=107 strand -
+#result:colstart=4,colend=8,revcomp=0
+
+#+ Alignment, - annotation end3<end5 colorient '-' fmin -> coords increasing -> fmax. revcom matching interval
+#Eg. feature2
+#100 1 AATTGGCCAA 10 110 
+#100 1 AATTGGCCAA 10 110 
+#        AACCG - reversed 107-102
+#col   123456789
+#query:fmin=102,fmax=107 strand - 
+#result:colstart=3,colend=7,revcomp=1
+
+#- Alignment, + annotation end5<end3 colorient '-' fmax -> coords decreasing -> fmin. revcom matching interval
+#120 1 TTGGCCAATT 10 100 
+#110 1 TTGGCCAATT 10 100 
+#         CGGTT - reversed 107-102
+#col   123456789
+#query:fmin=102,fmax=107 strand + 
+#result:colstart=4,colend=8,revcomp=1
+
+
+
+my @alignments = ([
+		   ['genome1',10,1000,'+','900M100X','g1'],
+		   ['genome2',100,900,'+','100X800M100X','g2'],
+		   ['genome3',350,1350,'+','1000M','g3'],
+		   ],
+		  [
+		   ['genome1',20,2000,'+','1820M180X','g1'],
+		   ['genome2',200,900,'+','180X700M1120X','g2'],
+		   ['genome3',450,2350,'+','100X1900M','g3'],
+		   ['genome4',450,2350,'+','100X1900M','g4']
+		   ]
+		  );
+
+my @alignqueries = (["genome1",1010,1020],
+		    ["genome2",500,720]
+		    );
+
+my @intervals = ([10,1000,1,'+'],
+		 [100,900,2,'+'],
+		 [350,10000,3,'+']);
+
+my @intqueries = ([1010,1020],
+	       [500,720]
+	       );
+
+my @filter = ('g1','g4');
+
+#
+#Test intervaltree
+my $tree = new IntervalTree(1,1000000);
+foreach my $i (@intervals){
+    $tree->insert(@$i);
+}
+
+#for(my $i=10000;$i>=0;$i--){
+#    print "$i\n" if($i%1000==0);
+#    $tree->insert($i,$i+1,$i);
+    #print Dumper($tree),"\n";
+#}
+
+#for(my $i=0;$i<10000;$i++){
+#    print "$i\n" if($i%1000==0);
+#    $tree->insert($i,$i+1,$i);
+#}
+
+foreach my $q (@intqueries){
+    print "QUERY ",join(' ',@$q),"\n";
+    my @results = $tree->intersect(@$q);
+    foreach my $r (@results){
+	print "RESULT $r\n";
+    }
+}
+
+#
+#Test alignment tree
+my $atree = new AlignmentTree();
+
+my $k=0;
+foreach my $a (@alignments){
+    $atree->insert($a,"MAUVE$k","MAUVE");
+    $k++;
+}
+print "Alignmenttree intersect queries\n";
+foreach my $q (@alignqueries){
+    print "QUERY ",join(' ',@$q),"\n";
+    my @results = $atree->intersect(@$q);
+    
+    foreach my $r (@results){
+	print "INTERSECT RESULT ",join(' ',@$r),"\n";
+    }
+    print "DONE\n";
+}
+print "Alignmenttree map()\n";
+foreach my $q (@alignqueries){
+    print "QUERY ",join(' ',@$q),"\n";
+    my @results = $atree->map(@$q);
+    
+    foreach my $r (@results){
+	print "MAP RESULT ",join(' ',@$r),"\n";
+    }
+}
+print "DONE\n";
+
+print "Adding filter ",join(',', at filter),"\n";
+$atree->filter(@filter);
+
+foreach my $q (@alignqueries){
+    print "QUERY ",join(' ',@$q),"\n";
+    my @results = $atree->intersect(@$q);
+    
+    foreach my $r (@results){
+	print "INTERSECT RESULT ",join(' ',@$r),"\n";
+    }
+}
+
+
+#TEST 1 +,+ alignment mapped features on opposing strands
+#
+#Test alignment tree
+
+open FILE,">/tmp/$$.testing" or die "Can't open file /tmp/$$.testing";
+print FILE <<_FASTAEND;
+>genome1
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+AATTGGCCAANNNN
+>genome2
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+AATTGGCCAANNNN
+_FASTAEND
+
+    ;
+close FILE;
+
+my $db = Bio::DB::Fasta->new("/tmp/$$.testing",'-reindex'=>1); 
+
+my $atree = new AlignmentTree();
+
+#0 1 2 3 4 5 6 7 8 9 10
+# A A T T G G C C A A
+
+my @alignments2 = ([
+		    ['genome1',100,110,'+','10M','g1'], #AATTGGCCAA
+		    ['genome2',100,110,'+','10M','g2']  #AAATTGGCCA
+		    ],
+		   [
+		    ['genome1',100,110,'+','5M1X5M','g1'], #AATTGGCCAA
+		    ['genome2',100,110,'+','5M1X5M','g2']  #AAATTGGCCA
+		    ]);
+
+my $expectedalns = ['AATTGGCCAA','AATTGGCCAA','AATTGGCCAA','AATTGGCCAA'];
+
+my @features = ([['genome1',102,107,'+','5M']], #ATTGG
+		[['genome2',102,107,'-','5M']]  #CCAAT
+		);
+my $expectedfeats = ['TTGGC','GCCAA','TTGGC','GCCAA'];
+
+
+my $k=0;
+my $alnidx=0;
+foreach my $a (@alignments2){
+    foreach my $f (@$a){
+	#convert from 0 base to 1 base
+	my $queryseq = $db->get_Seq_by_id($f->[0]);
+	my $queryseqsubstr = $queryseq->subseq($f->[1]+1,$f->[2]);
+	if($f->[3] eq '-'){
+	    $queryseqsubstr = revcom($queryseqsubstr)->seq();
+	}
+	else{
+	    $queryseqsubstr = $queryseq->subseq($f->[1]+1,$f->[2]);
+	}
+	if($queryseqsubstr =~ /N/){
+	    die "ERROR unexpected alignment sequence $k found $queryseqsubstr =~ /N/\n";
+	}
+	if($queryseqsubstr ne $expectedalns->[$k]){
+	    die "ERROR unexpected alignment sequence $k found $queryseqsubstr ne $expectedalns->[$k]\n";
+	}
+	$k++;
+    }
+    $atree->insert($a,"MAUVE$alnidx","MAUVE");
+    $alnidx++;
+}
+$k=0;
+$alnidx=0;
+foreach my $f (@features){
+    $atree->insert($f,'gene:'.$k,'gene');
+    #convert from 0 base to 1 base
+    my $queryseq = $db->get_Seq_by_id($f->[0]->[0]);
+    my $queryseqsubstr = $queryseq->subseq($f->[0]->[1]+1,$f->[0]->[2]);
+    if($f->[0]->[3] eq '-'){
+	print "REVCOM $queryseqsubstr\n";
+	$queryseqsubstr = revcom($queryseqsubstr)->seq();
+    }
+    if($queryseqsubstr =~ /N/){
+	die "ERROR unexpected sequence found $queryseqsubstr =~ /N/\n";
+    }
+    if($queryseqsubstr ne $expectedfeats->[$k]){
+	die "ERROR unexpected sequence found $queryseqsubstr ne $expectedfeats->[$k]\n";
+    }
+    print "QUERYSEQ $f->[0]->[1]-$f->[0]->[2] $f->[0]->[3] ",$queryseqsubstr,"\n";
+    $k++;
+}
+
+print "INTERSECT TEST1\n";
+$k=0;
+foreach my $f (@features){
+    my @results = $atree->intersect($f->[0]->[0],$f->[0]->[1],$f->[0]->[2],'gene');
+    die "More results than expected" if(scalar(@results)>1);
+    my $r = $results[0];
+    print "genome1 INTERSECT RESULT ",join(' ',@$r),"\n";
+
+    my $queryseq = $db->get_Seq_by_id($r->[1]);
+    my $queryseqsubstr = $queryseq->subseq($r->[2]+1,$r->[3]);
+    if($r->[6] eq '-'){
+	print "REVCOM $queryseqsubstr\n";
+	$queryseqsubstr = revcom($queryseqsubstr)->seq();
+    }
+    if($queryseqsubstr =~ /N/){
+	die "ERROR unexpected sequence found $queryseqsubstr =~ /N/ $r->[2]+1,$r->[3]\n";
+    }
+    if($queryseqsubstr ne $expectedfeats->[$k]){
+	die "ERROR unexpected sequence found $queryseqsubstr ne $expectedfeats->[$k] $r->[2]+1,$r->[3]\n";
+    }
+    $k++;
+
+    my @results1 = $atree->map($f->[0]->[0],$f->[0]->[1],$f->[0]->[2],'MAUVE');
+    foreach my $r (@results1){
+	print "MAP RESULT ",join(' ',@$r),"\n";
+    }
+}
+$k=0;
+my $alnidx=0;
+#For all alignments
+foreach my $a (@alignments2){
+    my($alnobj,$bv,$width) = $atree->getAlignment("MAUVE$alnidx");
+    my ($mmatrix,$seqmatrix,$names) = $atree->getAlignmentMatrix("MAUVE$alnidx",1,$width,$db);
+    #See if we can map features into alignment matrix
+    for(my $i=0;$i<@features;$i++){
+	my $f = $features[$i];
+	print "MATRIX MAUVE$alnidx ",join(',',@{$f->[0]}),"\n";
+	my $flen = $f->[0]->[2]-$f->[0]->[1];
+	die "Bad sequence $seqmatrix->[$i]" if(length ($seqmatrix->[$i])<1);
+	#Returned seq length == input length + 1
+	my($cs,$ce) = AlignmentTree::coordstocolumn($alnobj,$f->[0]->[0],$f->[0]->[1],$f->[0]->[2]);
+	my $queryseqsubstr = substr($seqmatrix->[$i],$cs-1,$ce-$cs+1);
+	$queryseqsubstr =~ s/\-//g;
+	if($features[$i]->[0]->[3] eq '-'){
+	    print "REVCOM $queryseqsubstr\n";
+	    $queryseqsubstr = revcom($queryseqsubstr)->seq();
+	}
+
+	if($queryseqsubstr =~ /N/){
+	    die "ERROR unexpected sequence found $queryseqsubstr =~ /N/\n";
+	}
+	if($queryseqsubstr ne $expectedfeats->[$k]){
+	    die "ERROR unexpected sequence $k found $queryseqsubstr ne $expectedfeats->[$k]\n";
+	}
+	else{
+	    print "Sequence $k $queryseqsubstr eq $expectedfeats->[$k] OK\n";
+	}
+	$k++;
+    }
+    $atree->printAlignment("MAUVE$alnidx",1,$width,$db);
+	
+    $alnidx++;
+}
+$alnidx=0;
+foreach my $a (@alignments2){
+    my($alnobj,$bv,$width) = $atree->getAlignment("MAUVE$alnidx");
+    foreach my $f (@features){
+	my @results1 = $atree->map($f->[0]->[0],$f->[0]->[1],$f->[0]->[2],'MAUVE');
+	foreach my $r (@results1){
+	    print "MAP RESULT ",join(' ',@$r),"\n";
+	}
+	$atree->printAlignment("MAUVE$alnidx",1,$width,$db,\@results1);
+    }
+    $alnidx++;
+}
diff --git a/mapping/xmfaindex.pl b/mapping/xmfaindex.pl
new file mode 100755
index 0000000..beab1f4
--- /dev/null
+++ b/mapping/xmfaindex.pl
@@ -0,0 +1,145 @@
+#!/usr/bin/perl
+#
+#./mafindex.pl mugsyindex < mugsy.out
+#Adds an MAF formatted file to a MUGSY formatted index
+#Each alignment is saved as type 'alignment'
+#
+use strict;
+use lib '/usr/local/projects/angiuoli/developer/sangiuoli/mugsy/trunk';
+use AlignmentTree;
+use Storable qw(store retrieve);
+use Data::Dumper;
+
+$Storable::Deparse = 1;
+$Storable::Eval = 1;
+
+my $atree = new AlignmentTree();
+if(-e $ARGV[0]){
+    $atree = AlignmentTree::deserialize($ARGV[0]);
+}
+else{
+
+}
+
+my $index=0;
+my $seqlookup = {};
+if(-e $ARGV[1]){
+    open FILE,"$ARGV[1]" or die "Can't open file $ARGV[1]";
+    while(my $line=<FILE>){
+	my($seq) = ($line =~ /\>?(\S+)/);
+	$seqlookup->{++$index} = $seq;
+    }
+    close FILE;
+}
+
+my $currscore;
+my $block = [];
+my $k=0;
+my $label=0;
+
+my $seqname;
+my $start;
+my $end;
+my $orient;
+my @seqinfo;
+while(my $line=<STDIN>){
+    if($line =~ /^=/){
+	if(defined $seqname && $start>0){
+	    my ($cigar,$len) = &get_cigar(join('', at seqinfo));
+	    die "Bad match length $len in cigar $cigar" if ($end-$start+1 != $len);
+	    #Convert alignment to zero start, interbase coordinates
+	    push @$block,[$seqname,$start-1,$end,$orient,$cigar];
+	    print "Adding aligned sequence $seqname $start-1,$end,$orient to alignment MAUVE_$label\n";
+	}
+	$atree->insert($block,"MAUVE_$label","alignment") if(scalar(@$block));
+	$label++;
+	$block=[];
+	$seqname=undef;
+	@seqinfo=();
+    }
+    elsif($line =~ /^>\s+(\d+)\:(\d+)-(\d+)\s+([\+\-])\s+(\S+)/){
+	if(defined $seqname && $start>0){
+	    my ($cigar,$len) = &get_cigar(join('', at seqinfo));
+	    die "Bad match length $len in cigar $cigar" if ($end-$start+1 != $len);
+	    push @$block,[$seqname,$start-1,$end,$orient,$cigar];
+	    print "Adding aligned sequence $seqname $start-1,$end,$orient to alignment MAUVE_$label\n";
+	}
+	my $seqid = $1;
+	$start = $2;
+	if($start>0){
+	    $end = $3;
+	    #XMFA format start always < end 
+	    die "Invalid coordinates $start-$end" if($start>$end);
+	    #Relative orientation of the alignment
+	    $orient = $4;
+	    my $file = $5;
+	    $seqname = $file;
+	    if(exists $seqlookup->{$seqid}){
+		$seqname = $seqlookup->{$seqid};
+	    }
+	    else{
+		#Hack for strep pneumo xmfa files
+		$seqname =~ s/\.fsa//g;
+	    }
+	}
+	@seqinfo=();
+    }
+    else{
+	if(defined $seqname){
+	    chomp $line;
+	    push @seqinfo,$line;
+	}
+    }
+}
+$atree->insert($block,"MAUVE_$label","alignment") if(scalar(@$block));
+print STDERR "Writing index to $ARGV[0]\n";
+$atree->serialize($ARGV[0]);
+
+sub get_cigar{
+    my($seqs) = @_;
+    my $cig;
+    my $len=0;
+    my $mlen=0;
+    my @chars = split(//,$seqs);
+    my $count=0;
+    my $curr=0; #1 - match, 2 - gap
+    foreach my $c (@chars){
+	#match char
+	if($c ne '-'){
+	    if($curr==2){
+		#in gap
+		#write prev gap
+		$cig .= $count."X";
+		$count=0;
+	    }
+	    #in match
+	    $count++;
+	    $curr=1;
+	}
+	else{
+	    #gap char
+	    if($curr==1){
+		#in match
+		#write prev gap
+		$cig .= $count."M";
+		$len += $count;
+		$count=0;
+	    }
+	    #in gap
+	    $count++;
+	    $curr=2;
+	}
+    } 
+    if($curr==1){
+	#in gap
+	#write prev gap
+	$cig .= $count."M";
+	$len += $count;
+    }
+    if($curr==2){
+	#in gap
+	#write prev gap
+	$cig .= $count."X";
+    }
+    return ($cig,$len);
+}
diff --git a/mugsy b/mugsy
new file mode 100755
index 0000000..b6dfd8c
--- /dev/null
+++ b/mugsy
@@ -0,0 +1,1013 @@
+#!/usr/bin/perl
+if(! -d $ENV{'MUGSY_INSTALL'}){
+    my $default_install = "/usr/local/projects/angiuoli/mugsy_trunk/";
+    if( -d $default_install){
+	$ENV{'MUGSY_INSTALL'} = $default_install;
+	print STDERR "MUGSY_INSTALL environment variable not set. Using $default_install\n";
+    }
+    else{
+	print STDERR "ERROR: MUGSY_INSTALL environment variable not set. Set using export MUGSY_INSTALL=/somepath/to/mugsy\n";
+    }    
+}
+
+=head1 NAME
+
+mugsy - a multiple whole genome aligner
+
+=head1 USAGE
+
+mugsy [-p output prefix] multifasta_genome1.fsa multifasta_genome2.fsa ... multifasta_genomeN.fsa
+
+=head1 SYNOPSIS
+
+Mugsy is multiple whole genome aligner. Mugsy uses Nucmer for pairwise
+alignment, a custom graph based segmentation procedure for identifying
+LCBs (synchain-mugsy), and a segment-based progressive multiple
+alignment strategy from Seqan::TCoffee. Mugsy accepts draft genomes in
+the form of multi-FASTA files. Mugsy does not require a reference
+genome and is robust in the presence of large scale genome flux and
+genome rearrangments. Mugsy performs best on closely related genomes
+and has been used to align several dozens bacterial genomes.
+
+Mugsy outputs a series of alignments in MAF format. 
+
+See http://mugsy.sf.net for more information
+
+=head1 INPUT
+
+Input is one or more (multi)FASTA files, one per genome. Each file
+should contain all the sequences for a single organism/species. The
+filename is used as the genome name.
+
+Limitations on FASTA input:
+ input FASTA headers must not contain ':' or '-'
+ ambiguity characters are converted to N in output
+
+Common options:
+
+    -p|prefix       prefix for output files
+
+    --directory directory used to store output and temporary
+      files. Must be a absolute path
+
+    -d|--distance   maximum distance along a single sequence (bp) for
+     chaining anchors into locally colinear blocks (LCBs).  This is
+     used by the segmentation step synchain-mugsy. Default is 1000bp.
+
+    -c|--minlength minimum span of an aligned region in a colinear
+     block (bp). This is used by the segmentation step
+     synchain-mugsy. Default is 30bp.
+
+    -duplications 1 - Detect and report duplications. 0 - Skip. Default is 0.
+
+Other options:
+
+    -nucmeropts options passed through to the Nucmer
+     package. Eg. -nucmeropts "-l 15" sets the minimum MUM length in
+     NUCmer to 15. See the Nucmer documentation at
+     http://mummer.sf.net for more information.  Default is -l 15.
+
+    -allownestedlcbs. Default=false. Places each multi-genome anchor
+     in exactly one LCB; the longest spanning LCB
+
+    -plot output genome dot plots in GNUplot format. Overlays LCBS
+     onto pairwise plots from mummerplot. Display of draft genomes in
+     these plots is not supported.
+
+    -fullsearch Run a complete all pairs Nucmer search with each
+     sequence as a reference and query (n^2-1 total searches). Default
+     is one direction only (n^2-1/2 searches).
+
+    -refine run an second iteration of Mugsy on each LCB to refine the
+     alignment using either Mugsy (--refine mugsy), FSA (--refine
+     fsa), Pecan (--refine pecan), MLAGAN (--refine mlagan). Requires
+     necessary tools are in your path:  
+     fsa: fsa
+     pecan: muscle,exonerate, in the path. classpath set for bp.pecan.Pecan.
+     mlagan: mlagan.sh
+
+
+    -debug           debug level. > 2 verbose
+
+
+=head1 OUTPUT
+
+Primary output is MAF format.
+
+Utilities for parsing MAF are available at the UCSC genome browser and
+in the multiz,TBA toolkit. GMAJ is a popular visualization tool for MAF.
+
+=head1 MORE INFO
+
+This script is a wrapper that invokes an all-against-all Nucmer search
+and the mugsy aligner.  The two primary components of the aligner
+can also be run independently
+
+1) mugsyWGA
+
+Generates a whole genome alignment (WGA) from a library of pairwise
+alignments in XMFA format. Implemented with the refined segment graph
+and progressive consistency-based alignment procedure described in
+Seqan::TCoffee (Rausch et al 2008).  Invokes synchain-mugsy to segment
+the input genomes into alignable regions.
+
+2) synchain-mugsy
+
+Derives a segmentation of genome anchors that fulfill --distance and
+--minlength criteria.  Anchors can be any oriented features that span
+two or more of the input genomes.  The output is a set of locally
+colinear blocks (LCBs)
+
+=head1 Using Mugsy with other aligners 
+
+Mugsy supports realignment of LCBs using FSA,Pecan, MLAGAN. For FSA, make sure FSA is in your PATH and run with --refine fsa
+    
+=head1 For more information
+
+http://www.sf.net/mugsy.
+
+AUTHOR:
+Sam Angiuoli 
+angiuoli at cs.umd.edu
+2009
+
+=cut
+
+use strict;
+use Getopt::Long qw(:config no_ignore_case no_auto_abbrev);
+use File::Basename;
+use Pod::Usage;
+use POSIX;
+
+#Only needed for TBA evaluation
+#eval{
+#    require TreeParse;
+#};
+#if (! $@){
+#    TreeParse->import();
+#}
+
+my %options;
+my $results = GetOptions (\%options, 
+    'prefix|p=s',
+    'directory=s',
+    'distance|d=s',
+    'minlength|c=s',
+    'fullsearch',
+    'tree|t=s',
+    'treefile|f=s',
+    'skipsearch',
+    'skiprefine',
+    'allownestedlcbs',
+    'refine:s',
+    'colinear',
+    'skipunique',
+    'duplications=s',
+    'keeptmpfiles',
+    'keepsearchfiles',
+    'tba|s',
+    'mugsywga|s',
+    'nucmeropts|o=s',
+    'plot',
+    'nofilter|n',
+    'translated|s',
+    'debug=s',
+    'log=s',
+    'help|h',	
+    'fasta_file_list=s'	
+    ) || pod2usage(-verbose => 3);
+
+pod2usage(-verbose=>3) if($options{'help'});
+
+$options{'debug'} = 0 if(!defined $options{'debug'});
+
+###EXTERNAL PROGS####
+my $mugsyinstall = $ENV{'MUGSY_INSTALL'};
+
+##Customized version of Nucmer with maf conversion utilities and
+#delta-filter -b for reporting duplications
+my $nucmerinstall = "$ENV{'MUGSY_INSTALL'}/MUMmer3.20";
+
+##
+#Mugsy aligner
+my $mugsywgacmd = "$mugsyinstall/mugsyWGA";
+
+##
+#Nucmer package
+my $nucmercmd = "$nucmerinstall/nucmer";
+my $promercmd = "$nucmerinstall/promer";
+my $searchcmd = $options{'translated'} ? $promercmd : $nucmercmd;
+my $deltafiltcmd = "$nucmerinstall/delta-filter";
+my $deltadupscmd = "$mugsyinstall/delta-dups.sh";
+#/usr/local/projects/angiuoli/developer/sangiuoli/mugsy/trunk/delta-dups.sh";
+my $mummerplotcmd = "$nucmerinstall/mummerplot";
+my $delta2mafcmd = "$nucmerinstall/delta2maf";
+
+##
+#Mugsy utils
+my $maf2fastacmd = "$mugsyinstall/maf2fasta.pl";
+my $labelblockscmd = "$mugsyinstall/labelblocks.pl";
+my $fixnamescmd = "$mugsyinstall/fixMAFnames.pl";
+
+##
+#MAF utils and TBA 
+#This wrapper includes support to use TBA with Nucmer for evaluation purposes
+my $muscleinstall = "/usr/local/projects/angiuoli/developer/sangiuoli/muscle/trunk";
+my $multizinstall = "/usr/local/projects/angiuoli/developer/sangiuoli/multiz-tba/trunk";
+if(-d $multizinstall){
+    $ENV{'PATH'} = "$ENV{'PATH'}:$multizinstall/";
+}
+my $tbacmd = "$multizinstall/tba";
+my $singlecovcmd = "$multizinstall/single_cov2";
+my $mafsortcmd = "$multizinstall/maf_sort";
+#Customized version MUSCLE v3.7 or later was required to quickly build guide tree
+my $musclecmd = "$muscleinstall/muscle";
+
+#characters like . cannot be included in the output prefix -prefix
+#because of assumptions made by some downstream parsers.
+#Check for problem characters in -prefix and report errors
+my $problemchars = "\.\?\-";
+#FASTA headers must not contain
+my $fastaproblemchars = "\:\-";
+
+if(defined $options{'directory'}){
+    if(! -d "$options{'directory'}"){
+	die "-directory must be a directory";
+    }
+    elsif($options{'directory'} !~ /\/$/){
+	$options{'directory'} .= "/";
+    }
+}
+else{
+    $options{'directory'} = "/tmp/";
+}
+
+my $absprefix =  $options{'directory'};
+if(!defined $options{'prefix'}){
+    $absprefix .= "tmp";
+}
+else{
+    if($options{'prefix'} =~ /([$problemchars])/){
+	die "Character '$1' found in --prefix=$options{'prefix'}.  Please choose another --prefix that excludes '$1'.\n";
+    }
+    $absprefix .= $options{'prefix'};
+}
+
+my $logfh;
+if(lc($options{'log'}) eq 'stderr'){
+    $logfh=*STDERR;
+}
+elsif($options{'log'}){
+    open $logfh,"+>$options{'log'}";
+}else{
+    open $logfh,"+>$options{'prefix'}.mugsy.log";
+}
+
+####################################################                                                                                             
+## We need to append the filenames present in the tag                                                                                                
+## so modifying the code so that it would take input                                                                                                           
+## fasta files either from the command line or from                                                                                                             
+## the tagged dataset.                                                                                                                                          
+## Modified by: Mahesh Vangala                                                                                                                                  
+#####################################################                                                                                                           
+my @inputseqfiles = ();
+unless($options{'fasta_file_list'}) {
+    @inputseqfiles = @ARGV;
+} else {
+    getFastaFilesPath($options{'fasta_file_list'}, \@inputseqfiles);
+}
+#Attempt to detect and convert genbank files                                                                                                                    
+for(my $i=0;$i<@inputseqfiles;$i++){
+    if(`head -1 $inputseqfiles[$i]` =~ /^LOCUS\s+/){
+        print STDERR "Attempting to convert $inputseqfiles[$i] to FASTA\n";
+        my $bname = basename($inputseqfiles[$i]);
+        print `bp_seqconvert.pl --from genbank --to fasta < $inputseqfiles[$i] > $options{'directory'}/$bname.fsa`;
+        $inputseqfiles[$i]="$options{'directory'}/$bname.fsa";
+    }
+}
+print LOG "Processing FASTA files ",join(',', at inputseqfiles),"\n" if($options{'debug'});
+
+pod2usage(-verbose=>3, -message => "Need to specify valid input fasta file") if(! scalar(@inputseqfiles));
+
+#Skipsearch automatically sets keepsearchfiles so
+#that search output is preserved
+if($options{'skipsearch'}){
+    $options{'keepsearchfiles'}=1;
+}
+
+#Set defaults for --distance and --minlength
+$options{'distance'} = (defined $options{'distance'}) ? $options{'distance'} : 1000;
+die "--distance must be an integer. Passed $options{'distance'}" if($options{'distance'} =~ /\D/);
+$options{'minlength'} = (defined $options{'minlength'}) ? $options{'minlength'} : 30;
+
+if(!defined $options{'nucmeropts'}){
+    #Added -l 15 to make defaults comparable with Mauve defaults
+    $options{'nucmeropts'} = $options{'nucmeropts'}." -l 15";
+}
+if(defined $options{'collinear'}){
+    $options{'nucmeropts'} .= $options{'nucmeropts'}." -maxmatch";
+}
+die "Cannot pass both --refine and --skiprefine" if(exists $options{'refine'} && exists $options{'skiprefine'});
+if(exists $options{'refine'}){
+    $options{'keeptmpfiles'}=1;
+}
+
+if(!exists $options{'refine'} && !exists $options{'skiprefine'}){
+    $options{'skiprefine'}='true';
+}
+die "Cannot pass both --tba and --mugsywga" if(exists $options{'mugsywga'} && $options{'tba'});
+
+#Multiple alignment method
+my $method;
+if(exists $options{'tba'}){
+    $method="tba";
+}
+elsif($options{'mugsywga'}){
+    $method="mugsywga";
+}
+else{
+    $method="mugsywga";
+}
+
+#TODO remove this
+if(defined $options{'plot'}){
+    $options{'keeptmpfiles'}=1;
+}
+
+#Set default options for reporting duplications
+#if(! exists $options{'duplications'}){
+#    $options{'duplications'}=1;
+#}
+
+my $seqfiles = {};
+my $genome2seqslookup = {}; 
+my $seqlengthlookup = {};
+
+#my $allfastafiles = {};
+
+my $cleanregex = '[\-]';
+
+#Cleanup directory
+foreach my $seqfile (@inputseqfiles){
+    if(! -e $seqfile){
+	die "Invalid input file. Can't find $seqfile\n";
+    }
+    #default species name will be basename of the file
+    #upto the first dot
+    my $fname = basename($seqfile);
+    $fname =~ s/\.[^.]+//g;
+    $fname =~ s/$cleanregex/_/g;
+    my $speciesname = $fname;
+    unlink "$options{'directory'}/$speciesname" if(-e "$options{'directory'}/$speciesname");
+}
+#Aggregate all sequences for a lineage and concatenate together    
+foreach my $seqfile (@inputseqfiles){
+    my $fname = basename($seqfile);
+    $fname =~ s/\.[^.]+//g;
+    $fname =~ s/$cleanregex/_/g;
+    my $speciesname = $fname;
+    print STDERR "Parsing sequences for $speciesname ";
+    my $header;
+    my $seqlen=0;
+    my @seqs;
+    open FILE,"$seqfile" or die "Can't open file $seqfile";
+    while(my $line=<FILE>){
+	if($line =~ /^>/){
+	    if($seqlen>0){
+		&printFASTA("$options{'directory'}/$speciesname","$speciesname:$header:1:+:$seqlen",\@seqs);
+		$genome2seqslookup->{$speciesname} = [] if (!exists $genome2seqslookup->{$speciesname});
+		my $tbaheader;
+		if($speciesname eq $header){
+		    $tbaheader = "$speciesname";
+		}
+		else{
+		    $tbaheader = "$speciesname.$header";
+		}
+		push @{$genome2seqslookup->{$speciesname}},["$speciesname:$header:1:+:$seqlen",$tbaheader,$seqlen,"$options{'directory'}/$speciesname"];
+		$seqfiles->{"$options{'directory'}/$speciesname"}++;
+	    }
+	    $seqlen=0;
+	    @seqs=();
+	    $header='';
+	    chomp $line;
+	    if($line =~ /^>([^:]+):([^:]+):/){
+		#multiz,tba formatted headers
+		#species name specified, override filename
+		$speciesname = $1;
+		$header = $2;
+		print $logfh "Parsing FASTA entry header:$header speciesname:$speciesname\n" if($options{'debug'});
+	    }
+	    elsif($line =~ /gi\|\d+\|\w+\|([^.]+)\|\S+/){
+		#special handling of ncbi formatted headers
+		#just pull accession
+		$header = $1;
+		print $logfh "Parsing FASTA entry header:$header speciesname:$speciesname\n" if($options{'debug'});
+	    }
+	    elsif($line =~ /^>(\S+)/){
+		#plain ole header
+		$header = $1;
+                $header =~ s/$cleanregex/_/g;
+		print $logfh "Parsing FASTA entry header:$header speciesname:$speciesname\n" if($options{'debug'});
+		
+	    }
+	    else{
+		die "Can't parse FASTA header for $seqfile";
+	    }
+	}
+	else{
+	    $line =~ s/\s//g;
+	    $seqlen += length($line);
+	    push @seqs,$line;
+	}
+    }
+    #
+    if($seqlen){
+	$seqlengthlookup->{$speciesname} = $seqlen;
+	&printFASTA("$options{'directory'}/$speciesname","$speciesname:$header:1:+:$seqlen",\@seqs);
+	$genome2seqslookup->{$speciesname} = [] if (!exists $genome2seqslookup->{$speciesname});
+	my $tbaheader;
+	if($speciesname eq $header){
+	    $tbaheader = "$speciesname";
+	}
+	else{
+	    $tbaheader = "$speciesname.$header";
+	}
+	die "Cannot file FASTA file $options{'directory'}/$speciesname" if(! -e "$options{'directory'}/$speciesname");
+	
+	push @{$genome2seqslookup->{$speciesname}},
+	["$speciesname:$header:1:+:$seqlen",$tbaheader,$seqlen,"$options{'directory'}/$speciesname"];
+	
+	$seqfiles->{"$options{'directory'}/$speciesname"}++;
+    }
+    close FILE; 
+    print STDERR " num_seqs:",scalar(@{$genome2seqslookup->{$speciesname}}),"\n";
+}
+
+my @genomenodes;
+my $treestring;
+
+#This wrapper supports running TBA for evaluation purposes using the same
+#Nucmer input as is passed to Mugsy
+if(exists $options{'tba'}){
+#TODO Muscle is used to build a guide tree from kmer counts.
+#The tree was used to test Nucmer+TBA but is not needed by Mugsy.
+#Removing this tree code will also remove need for muscle
+    if(defined $options{'tree'} || defined $options{'treefile'}){
+	$treestring = $options{'tree'};
+	if(! -e $options{'treefile'}){
+	    print $logfh "Writing tree file $absprefix.tree\n" if($options{'debug'});
+	    open FILE,"+>$absprefix.tree" or die "Can't open file $absprefix.tree";
+	    print FILE "$options{'tree'}\n";
+	close FILE;
+	    $options{'treefile'}  = "$absprefix.tree";
+	}
+	else{
+	    open FILE, "$options{'treefile'}" or die "Can't open treefile $options{'treefile'}";
+	    my @treein=<FILE>;
+	    close FILE;
+	    chomp @treein;
+	    $treestring = join('', at treein);
+	    chomp $treestring;
+	}
+    }
+    else{
+	print $logfh "Estimating phylogenetic tree from sequence using muscle. Shared k-mer distance method and UPGMA\n" if($options{'debug'});
+	print $logfh "Starting tree estimation: ",`date`;
+	my @seqs = keys %$seqfiles;
+	$treestring = &getkmerdisttree(\@seqs,"$absprefix.tree");
+	die "Unable to generate tree using MUSCLE. Check input FASTA files for correctness" if(! -e "$absprefix.tree");
+	unlink "$absprefix.tree" if(! defined $options{'keeptmpfiles'});
+	print $logfh "Ending tree estimation with MUSCLE: ",`date`;
+    }
+    
+    print $logfh "Processing tree $treestring\n" if($options{'debug'});
+    
+    print $logfh "Using guide tree $treestring\n";
+    my ($treeio) = new TreeParse();
+    my ($status) = $treeio->parseNHTree($treestring,1);
+    if($status != 0){
+	die "Failed to parse tree \"$treestring\", expecting Newick format\n";
+    }
+    my $tree = $treeio->getTree();
+    my @genomenodest= $tree->leaves_under($tree);
+    #Returns leaves of tree left->right. 
+    foreach my $i (@genomenodest){
+	push @genomenodes,$i->attributes->{'nh_label'};
+    }
+}
+else{
+    foreach my $seqfile (sort {$seqlengthlookup->{basename($b)} <=> $seqlengthlookup->{basename($a)}} (keys %$seqfiles)){
+	my $speciesname = basename($seqfile);
+	die "Can't find species $speciesname" if($seqlengthlookup->{$speciesname}<=0);
+	$speciesname =~ s/\.[^.]+//g;
+	$speciesname =~ s/$cleanregex/_/g;
+	push @genomenodes,$speciesname;
+    }
+}
+print $logfh "Processing ",scalar(@genomenodes)," genomes\n" if($options{'debug'});
+
+########################################
+# Pairwise alignment steps
+#
+# Generate pairwise alignments 
+# using the Nucmer packages
+# Input:  FASTA files of input sequences and guide tree
+# Output: Pairwise alignments in MAF format
+my $currdir = `pwd`;
+chomp $currdir;
+print $logfh "Current dir:'$currdir'\n" if($options{'debug'});
+
+print STDERR scalar(@genomenodes), " genomes\n";
+print STDERR "Starting Nucmer: ",`date`;
+
+my @maffiles; 
+my @dupmaffiles; #maf files of duplicated regions
+
+chdir($options{'directory'}) or die;
+
+for(my $i=0;$i<@genomenodes;$i++){
+    my $genomename1 = $genomenodes[$i];#$genomenodes[$i]->attributes->{'nh_label'};
+    print $logfh `date`;
+    unlink "$absprefix.$genomename1.queries.fsa" if(-e "$absprefix.$genomename1.queries.fsa");
+    my @queryfiles;
+    #Searches are performed uni-directional by default
+    my $start = (defined $options{'fullsearch'}) ? 0 : $i+1;
+    #for(my $j=0;$j<@nodes;$j++){
+    for(my $j=$start;$j<@genomenodes;$j++){
+	if($j!=$i){
+	    my $genomename2 = $genomenodes[$j];#$genomenodes[$j]->attributes->{'nh_label'};
+	    die "Unable to find sequences for genome $genomename2. Check FASTA file names or headers" if(! exists $genome2seqslookup->{$genomename2});
+	    #print STDERR "$genome2seqslookup->{$genomename2}->[0]->[3]\n";
+	    push @queryfiles,"$options{'directory'}/$genomename2";
+	}
+    }
+    if(@queryfiles>0){
+	my $catcmd = "cat ".join(' ', at queryfiles)." > $absprefix.$genomename1.queries.fsa";
+	print $logfh "CMD:$catcmd\n" if($options{'debug'});
+	print $logfh `$catcmd`;
+
+	my $deltafile = "$absprefix.$genomename1.filt.delta";
+	my $origdeltafile = "$absprefix.$genomename1.delta";
+	my $clusterfile = "$absprefix.$genomename1.cluster";
+	$deltafile =~ s/\\-/-/g;
+	$clusterfile =~ s/\\-/-/g;
+	#Run NUCMER/PROMER
+	#TODO: consider forking child processes here to provide simple parallelization
+	#print STDERR "Looking for existing delta file $deltafile\n";
+	if(($options{'skipsearch'}) && -e "$deltafile"){
+	    print STDERR "Using existing delta file $deltafile\n";
+	    print $logfh `touch $deltafile`;
+	}
+	else{ 
+	    print STDERR ".";
+	    &runsearch("$options{'directory'}/$genomename1","$absprefix.$genomename1.queries.fsa",$genomename1);
+	}
+	push @maffiles,&generateMAF($deltafile,$genomename1);
+	if($options{'duplications'}){
+	    my $dupscmd = "$deltadupscmd $origdeltafile > $absprefix.$genomename1.dups.maf";
+	    print $logfh "CMD:$dupscmd\n" if($options{'debug'});
+	    print $logfh `$dupscmd`;
+	    push @dupmaffiles,"$absprefix.$genomename1.dups.maf";
+	}
+	unlink "$absprefix.$genomename1.queries.fsa" if(! defined $options{'keeptmpfiles'});
+	#Keep for --plot
+	if(! defined $options{'plot'} && ! defined $options{'keepsearchfiles'} && ! defined $options{'duplications'}){
+	    unlink "$deltafile" if(! defined $options{'keeptmpfiles'});
+	}
+	unlink "$absprefix.$genomename1.delta" if(! defined $options{'keeptmpfiles'});
+	unlink "$clusterfile" if(! defined $options{'keeptmpfiles'});
+    }
+}
+
+print STDERR "\nFinished Nucmer ",`date`;
+
+
+########################################
+#
+# Progressive alignment steps
+# Produce multiple alignment blocks from pairwise input
+# Input:  Set of MAF files for each pairwise comparison
+# Output: Single MAF file containing all alignment blocks
+#
+my $mafoutput;
+my $pwfasta = "$absprefix.xmfa";
+my $allfsafile = "$absprefix.all.fsa";
+if($method eq "mugsywga"){
+    print STDERR "Starting MUGSYWGA: ",`date`;
+    my $pwdupsfasta = "$absprefix.dups.xmfa";
+    print $logfh `rm -f $allfsafile`;
+    foreach my $fsafile (keys %$seqfiles){
+	#HACK 
+	#Temp fix for headers 
+	my $perlcmd = q|perl -ne 'if(/^\>([^\s\:]+)\:([^\s\:]+)/){if($1 ne $2){ print ">$1.$2 $1\n";} elsif(defined $1 && defined $2){print ">$1.$2\n";}else {print ">$1\n";}}else{die if(/\>/);print $_}'|;
+	print $logfh "CMD:cat $fsafile | $perlcmd >> $allfsafile\n";
+	print $logfh `cat $fsafile | $perlcmd >> $allfsafile`;
+	unlink $fsafile if(! defined $options{'keeptmpfiles'});
+    }
+    unlink "$pwfasta";
+    foreach my $maf (@maffiles){
+	my $maf2fasta = "$maf2fastacmd < $maf >> $pwfasta";
+	print $logfh "CMD:$maf2fasta\n" if($options{'debug'});
+	print $logfh `$maf2fasta`;
+    }
+if($options{'duplications'}){
+	unlink "$pwdupsfasta";
+	foreach my $maf (@dupmaffiles){
+	    my $maf2fasta = "$maf2fastacmd < $maf >> $pwdupsfasta";
+	    print $logfh "CMD:$maf2fasta\n" if($options{'debug'});
+	    print $logfh `$maf2fasta`;
+	    unlink $maf if(! defined $options{'keeptmpfiles'});
+	}
+    } 
+    if(scalar(@maffiles)==0 || -z "$pwfasta"){
+	open FILE, "+>$absprefix.maf" or die "Can't open $absprefix.maf";
+	print FILE "##maf version=1 scoring=mugsy\n";
+	print FILE "##eof maf\n";
+	close FILE; 
+        $mafoutput="$absprefix.maf";
+    }
+    else{
+	$mafoutput = &runMugsywga($allfsafile,$pwfasta,$pwdupsfasta,$options{'distance'},$options{'minlength'});
+	print STDERR "\nFinished MUGSYWGA: ",`date`;
+	unlink $allfsafile if(! defined $options{'keeptmpfiles'});
+    }
+}
+elsif($method eq "tba"){
+    print STDERR "Starting TBA: ",`date`;
+
+    #Munge tree format so that TBA is happy
+    #convert , to ' '
+    $treestring =~ s/,/ /g;
+    #remove distances
+    $treestring =~ s/\:-*\d+\.\d+e*-*\d*//g;
+    $treestring =~ s/\:-*\d+//g;
+    $treestring =~ s/\:-*\d+//g;
+    #remove ;
+    $treestring =~ s/\;//g;
+
+    die "No MAF files" if(scalar(@maffiles)==0);
+    $mafoutput = &runTBA(join(' ', at maffiles),$treestring);
+
+    print STDERR "Finished TBA: ",`date`;
+}
+else{
+    print STDERR "Unsupported multiple alignment method\n";
+}
+
+foreach my $file (@maffiles){
+    if(! defined $options{'keepsearchfiles'}){
+	unlink $file if(! defined $options{'keeptmpfiles'});
+    }
+}
+chdir($currdir) or die;
+
+if(defined $options{'refine'}){
+    if($method eq "mugsywga"){
+	print `mv $absprefix.maf $absprefix.maf.orig`;
+	print STDERR "Alignment completed. MAF output $absprefix.maf.orig\n";
+	print STDERR "Starting iterative refinement: ",`date`;
+	if($options{'refine'} eq ''){
+	    $options{'refine'} = 'true';
+	}
+	my $refinecmd = "$mugsywgacmd --outfile $absprefix --seq $absprefix.all.fsa --aln $absprefix.xmfa --distance $options{'distance'} --minlength $options{'minlength'} --refine $options{'refine'} --infile $absprefix.maf.orig\n";
+	print $logfh "CMD:$refinecmd\n" if($options{'debug'});
+	my $ret = system($refinecmd);
+	print STDERR "\nFinished refinement: ",`date`;
+	#print `mv $absprefix.maf.refined $absprefix.maf`;
+	#Add mult to MAF file for easy parsing
+	#TODO make mult draft genome aware
+	open FILE, "$absprefix.maf.refined" or die "Can't open refined MAF $absprefix.maf.refined";
+	open OUTFILE, "+>$absprefix.maf" or die "Can't open MAF $absprefix.maf";
+	my $mult=0;
+	my @buffer;
+	while(my $line=<FILE>){
+	    if($line =~ /^a\s+/){
+		if(scalar(@buffer)>0){
+		    if($buffer[0] =~ /^a\s+/){
+			chomp $buffer[0];
+			$buffer[0] .= " mult=$mult\n";
+		    }
+		    print OUTFILE @buffer;
+		}
+		$mult=0;
+		@buffer=();
+		push @buffer, $line;
+	    }
+	    else{
+		if($line =~ /^s\s+/){
+		    $mult++;
+		}
+		push @buffer,$line;
+	    }
+	}
+	if($buffer[0] =~ /^a\s+/){
+	    chomp $buffer[0];
+	    $buffer[0] .= " mult=$mult\n";
+	}
+	print OUTFILE @buffer;
+	close FILE;
+    }
+}
+else{
+
+}
+if(! defined $options{'directory'}){
+    print `cp $absprefix.maf .`;
+print STDERR "Final output (MAF format): ./$options{'prefix'}.maf\n";
+}
+else{
+    print STDERR "Final output (MAF format): $absprefix.maf\n";
+}
+
+#TODO, create separate singletons and core MAF files
+#open MAFFILE, "$absprefix.maf" or die "Can't open maf file $absprefix.maf";
+#open SFILE, "+>$absprefix.singletons.maf" or die "Can't open file $absprefix.maf";
+#my $printsingle=0;
+#my $printcore=0;
+#while(my $line=<MAFFILE>){
+#    if($line =~ /^a/){
+#	if($line =~ /mult=(\d+)/){
+#	    if(defined $1){
+#		if(mult==1){
+#		    $printsingle=1;
+#		}
+#	    }
+#	}
+#    }
+#}
+#close MAFFILE;
+
+
+#Print plot
+if($options{'plot'}){
+    for(my $i=0;$i<@genomenodes-1;$i++){
+	my $genomename = $genomenodes[$i];#->attributes->{'nh_label'};
+	my $mugsyoutput = "$absprefix.mugsy.out";
+	my $mugsyoutputtrimmed = "$absprefix.mugsy.out.brkpts";
+	my $varoutput;# = $detectvariants ? "$mugsyoutput.var.list" : "";
+	my $mugsyresultsfile;
+	my $plotcmd = "cat $mafoutput | $mugsyinstall/plot.pl $absprefix $genomename $mugsyresultsfile $varoutput > $absprefix.$genomename.plot.gp";
+	print $logfh "CMD:$plotcmd\n" if($options{'debug'});
+	print `$plotcmd`;
+	print STDERR "Alignment and synteny plot (gnuplot format): $absprefix.$genomename.plot.gp\n";
+    }
+}
+#if(!exists $options{'refine'}){
+    #print $logfh "Iterative refinement realigns each region by running a second iteration of Mugsy and can sometimes improve the alignment. To run a second iteration and produce an output file $absprefix.refined.maf:\n $mugsywgacmd --outfile $absprefix --seq $allfsafile --aln $pwfasta --distance $options{'distance'} --minlength $options{'minlength'} --refine true --infile $absprefix.maf\n";
+#}
+
+print STDERR "Finished ",`date`;
+
+
+#####################
+#Utility functions
+#
+#Produce guide tree based on kmer distance between sequences
+#in $seqfile.
+#Write output in newick format to $outfile
+sub getkmerdisttree{
+    my($seqfiles,$outfile) = @_;
+
+    my @files;
+    foreach my $seqfile (@$seqfiles){
+	my $speciesname = basename($seqfile);
+	$speciesname =~ s/\.[^.]+//g;
+	print $logfh "Writing $absprefix$speciesname.header\n" if($options{'debug'});
+	print $logfh `echo ">$speciesname" > $absprefix$speciesname.header`;
+	my $fillfsacode = '$ns = "";$line = $_;chomp $line; if(length($line)<60){$ns = \'N\' x (60-length($line));} print $line,$ns,"\n"';
+	print $logfh `grep -h -v "^>" $seqfile | perl -ne '$fillfsacode' > $absprefix$speciesname.sequence`;
+	push @files,"$absprefix$speciesname.header";
+	push @files,"$absprefix$speciesname.sequence";
+    }
+    my $filestr = join(' ', at files);
+    unlink $outfile if(-e $outfile);
+    my $mcmd = "cat $filestr |  $musclecmd -clusteronly -in - -tree1 $outfile 2>&1";
+    print $logfh "CMD:$mcmd\n" if($options{'debug'});
+    print $logfh `$mcmd`;
+
+    my $treestring;
+    open FILE, "$outfile" or die "Can't open treefile $outfile";
+    while(my $line=<FILE>){
+	$line =~ s/(^\S+)\s+.*\:(-*\d+\.\d+)/$1:$2/;
+	$treestring .= $line;
+    }
+    close FILE;
+    $treestring =~ s/\n//g;
+    foreach my $file (@files){
+	unlink $file if(! defined $options{'keeptmpfiles'});
+    }
+    return $treestring;
+}
+
+#
+#Run pairwise Nucmer on sequences in $reffile vs. $queryfile
+#Write output to $prefix
+sub runsearch{
+    my($reffile,$queryfile,$prefix) = @_;
+    #
+    #Run NUCMER
+    my $nucmercmd = "$searchcmd $reffile $queryfile -p $absprefix.$prefix $options{'nucmeropts'} 2>&1";
+    print $logfh "CMD:$nucmercmd\n" if($options{'debug'});
+    print $logfh `$nucmercmd`;
+    #
+    #Run delta-filter to chain hits and exclude spurious matches
+    #-1 specifies intersection of LIS chaining of hits wrt ref and query; includes rearrangements but ignores duplications
+    #-m specifies union of LIS chaining of hits wrt ref and query; includes rearrangments and duplications
+    if(! defined $options{'nofilter'}){
+	#-1 Filter for one-to-one alignments only
+	my $chainingopt = "-1";# -o 0";#"-m"; 
+	if($options{'colinear'}){
+	$chainingopt = '-m';
+	}
+	my $deltacmd = "$deltafiltcmd $chainingopt $absprefix.$prefix.delta > $absprefix.$prefix.filt.delta";
+	print $logfh "CMD:$deltacmd\n" if($options{'debug'});
+	print $logfh `$deltacmd`;
+    }
+    else{
+	my $deltacmd = "cp $absprefix.$prefix.delta $absprefix.$prefix.filt.delta";
+	print $logfh "CMD:$deltacmd\n" if($options{'debug'});
+	print $logfh `$deltacmd`;
+    }
+   
+}
+
+#
+#
+#Convert delta to MAF
+sub generateMAF{
+    my($deltafile,$prefix) = @_;
+    die "Nucmer search failed. Can't find delta file $deltafile" if(! -e $deltafile);
+    #Convert delta to MAF using $delta2mafcmd
+    #TODO: Reduce IO bottlenecks in this step. A lot of wasted time here.
+    #1)Write MAF files directly as part of nucmer to limit IO bottlenecks reading and writing files
+    #2)Also merge $fixnamescmd and $mafsortcmd into code that directly dumps MAF
+    #3)Support direct output of pairwise MAF from a multi-way comparison to make TBA happy
+    #if($options{'skipsearch'} && -e "$absprefix.$prefix.orig.maf" && -e "$absprefix.$prefix.maf"){
+    if($options{'skipsearch'} && -e "$absprefix.$prefix.maf"){
+	print STDERR "Using existing MAF file $absprefix.$prefix.maf\n";
+	print $logfh `touch $absprefix.$prefix.maf`;
+    }
+    else{
+	my $mafcmd;
+	if($method eq "tba"){
+	    $mafcmd = "$delta2mafcmd $absprefix.$prefix.filt.delta | $fixnamescmd | $mafsortcmd /dev/stdin $prefix 1> $absprefix.$prefix.maf ";
+	}
+	else{
+	    $mafcmd = "$delta2mafcmd $absprefix.$prefix.filt.delta | $fixnamescmd 1> $absprefix.$prefix.maf ";
+	    #Sort is not necessary?
+	    #$mafcmd = "$delta2mafcmd $absprefix.$prefix.filt.delta | $fixnamescmd | $mafsortcmd /dev/stdin $prefix 1> $absprefix.$prefix.maf ";
+	}
+	print $logfh "CMD:$mafcmd\n" if($options{'debug'});
+	print $logfh `$mafcmd`;
+    }
+    
+    my @mafprocessed;
+
+    #This wrapper supports TBA for evaluation purposes
+    #TBA requires splitting the MAF
+    if($method eq "tba"){
+	#Create species specific MAF files for TBA
+	#Make sure absprefix ends in a '_' This is used for parsing
+	my $splitmafcmd = "cat $absprefix.$prefix.maf | $mugsyinstall/splitmaf.pl $absprefix"."_";
+	print $logfh "CMD:$splitmafcmd\n" if($options{'debug'});
+	my @maffiles = `$splitmafcmd`;
+
+	foreach my $file (@maffiles){
+	    print $logfh "Processing MAF file $file\n";
+	    chomp $file;
+	    my($genomename1,$genomename2) = ($file =~ /_([^\.\/]+)\.([^\.\/]+)\.maf/);
+	    die "Can't parse names from $file" if(!defined $genomename1 || !defined $genomename2);
+	    #Note: TBA is picky about the input file names
+	    #MUST be of the form "$genomename1.$genomename2.sing.maf"
+	    #Can't use $absprefix for now
+	    #I've only been able to get singlecov to work with alignments
+	    #that map one-to-one for some reason. It appears that
+	    #singlecov does not handle multiple sequences per
+	    #genome and will lead to removal of all regions that
+	    #match multiple sequences. Besides, if delta-filter -1 is used then I think
+	    #is singlecov redundant?
+	    my $singcmd;
+	    if(defined $options{'nofilter'}){
+		print STDERR "WARNING:singlecov removes regions that match multiple sequences and may trim aligned regions. Run without -nofilter to keep all best alignments between pairs of sequences\n";
+		$singcmd = "$singlecovcmd $file > $genomename1.$genomename2.sing.maf";
+	    }
+	    else{
+		$singcmd = "mv $file $genomename1.$genomename2.sing.maf";
+	    }
+	    $singcmd =~ s/\|/\\|/g;
+	    print $logfh "CMD:$singcmd\n" if($options{'debug'});
+	    if($options{'skipsearch'} && -e "$genomename1.$genomename2.sing.maf"){
+		print $logfh `touch $genomename1.$genomename2.sing.maf`;
+	    }
+	    else{
+		print $logfh `$singcmd`;
+	    }
+	    push @mafprocessed,"$genomename1.$genomename2.sing.maf ";
+	}
+	if(! defined $options{'keepsearchfiles'}){
+	    unlink "$absprefix.$prefix.maf" if(! defined $options{'keeptmpfiles'});
+	}
+    }
+    else{#For mugsywga and others, pass through
+	push @mafprocessed,"$absprefix.$prefix.maf";
+    }
+    return @mafprocessed;
+}
+
+#
+# Run Mugsy whole genome aligner on a set of MAF files
+sub runMugsywga{
+    my($fsafile,$pwfasta,$pwdupsfasta,$distance,$minlength) = @_;
+    my $outputfile = "$absprefix.maf";
+    #
+    #Run MUGSYWGA
+    my $colinearopt = (exists $options{'colinear'}) ? "--refine colinear" : "";
+    my $uniqueopt = (defined $options{'skipunique'}) ? "" : "--unique true";
+    my $dupsopt = ($options{'duplications'}) ? ",$pwdupsfasta --duplications true " : "";
+    my $nestedlcbs = (exists $options{'allownestedlcbs'}) ?"--allownestedlcbs true" : "";
+    my $runmugsywgacmd = "$mugsywgacmd --outfile $absprefix --seq $fsafile --aln $pwfasta$dupsopt --distance $distance --minlength $minlength $colinearopt $uniqueopt $nestedlcbs > $absprefix.mugsywga.out 2> $absprefix.mugsywga.stderr";
+    print $logfh "CMD:$runmugsywgacmd\n";# if($options{'debug'});
+    my $ret = system($runmugsywgacmd);
+    #Cleanup and return
+    if($ret !=0){
+	die "system $runmugsywgacmd failed: $?:$!";
+    }
+    else{
+	if(!$options{'debug'}){
+	    unlink "$absprefix.mugsywga.out" if(! defined $options{'keeptmpfiles'});
+	    unlink "$absprefix.mugsywga.stderr" if(! defined $options{'keeptmpfiles'});
+	}
+    }
+    unlink "$pwfasta" if(! defined $options{'keeptmpfiles'});
+    unlink "$pwdupsfasta" if(defined $options{'duplications'} && ! defined $options{'keeptmpfiles'});
+    return "$outputfile";
+}
+#
+# Run TBA on a set of MAF files using the provided guidetree, $treestring
+sub runTBA{
+    my($maffiles,$treestring) = @_;
+    
+    my $outputfile = "$absprefix.maf";
+    #
+    #Run TBA straight up
+    my $runtbacmd = "$tbacmd \"$treestring\" $maffiles $outputfile 1> $absprefix.tba.out 2> $absprefix.tba.stderr";
+    print $logfh "CMD:$runtbacmd\n" if($options{'debug'});
+    my $ret = system($runtbacmd);
+    if($ret !=0){
+	die "system $runtbacmd failed: $?:$!";
+    }
+    chdir($currdir) or die;
+    #
+    #Add block labels to MAF output in the form label=# These are
+    #used in post-processing as a unique identifier to keep track of
+    #blocks
+    my $runlabelcmd = "cat $absprefix.maf | $labelblockscmd > $outputfile.labelled";
+    print $logfh "CMD:$runlabelcmd\n" if($options{'debug'});
+    print $logfh `$runlabelcmd`;
+    
+    return "$outputfile.labelled";
+}
+
+sub printFASTA{
+    my($fname,$header,$seqs) = @_;
+
+    print $logfh "Writing file $fname\n" if($options{'debug'});
+
+    open FFILE,">>$fname" or die "Can't open file\n";
+    if($header =~ /([$fastaproblemchars])/){
+	#print STDERR "Invalid FASTA header $header with characters $fastaproblemchars\n";
+	#$header =~ s/([$fastaproblemchars])/_/g;
+    }
+    print FFILE ">$header\n";
+    foreach my $s (@$seqs){
+	print FFILE $s,"\n";
+    }
+    close FFILE;
+}
+
+##################################
+# takes the tag and a reference to a list
+# go through the tag and tests whether the
+# file listed exists and is readable.
+# If so, then appends the file into the reference
+# @author - Mahesh Vangala
+#################################
+sub getFastaFilesPath {
+        my ($tag,$refArray) = @_;
+        open(FH, "<$tag") or die "Error in opening the file, $tag, $!\n";
+        while(my $file = <FH>) {
+                $file =~ s/^\s+//;
+                $file =~ s/\s+$//;
+                if(-e $file && -r $file) {
+                        push @$refArray, $file;
+                }
+        }
+        close FH;
+}
+
+__END__
+
+
+
+
+
diff --git a/mugsy-seqan/projects/library/apps/Makefile b/mugsy-seqan/projects/library/apps/Makefile
new file mode 100644
index 0000000..4764fb5
--- /dev/null
+++ b/mugsy-seqan/projects/library/apps/Makefile
@@ -0,0 +1,48 @@
+SEQAN_BASE = ..
+
+# Link against runtime library on Linux systems
+OS_NAME=$(shell uname)
+ifeq ($(OS_NAME),Linux)
+  LDFLAGS += -lrt
+endif
+
+tbb_root?=../extra/tbb
+
+#check, if tbb_root is not absolute path (the filter keeps only /* paths)
+ifeq ($(filter /% $(SLASH)%, $(subst :, ,$(tbb_root)) ),)
+    # also changes related variables like work_dir
+    override tbb_root := $(CWD)$(SLASH)..
+    export TBB21_INSTALL_DIR := $(tbb_root)
+endif
+
+# explicitly compile for a 32 or 64 bit platform
+#CXXFLAGS += -m32
+#CXXFLAGS += -m64
+
+CXXFLAGS += -I$(SEQAN_BASE)
+CXXFLAGS += -O9 
+#-march=nocona -mfpmath=sse -msse2
+#CXXFLAGS += -O0 -g
+CXXFLAGS += -pedantic -W -Wall
+CXXFLAGS += -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 ${CXXEXTRAFLAGS}
+
+TARGETS = dfi/dfi seqan_tcoffee/seqan_tcoffee seqcons/seqcons razers/paramChooser razers/razers pair_align/pair_align micro_razers/micro_razers tree_recon/tree_recon mugsy/mugsy
+mugsy: mugsy/mugsy
+
+all:           check_seqan_base $(TARGETS)
+dfi:           check_seqan_base dfi/dfi 
+razers:        check_seqan_base razers/razers razers/paramChooser
+micro_razers:  check_seqan_base micro_razers/micro_razers
+seqan_tcoffee: check_seqan_base seqan_tcoffee/seqan_tcoffee 
+seqcons:       check_seqan_base seqcons/seqcons
+pair_align:    check_seqan_base pair_align/pair_align
+tree_recon:    check_seqan_base tree_recon/tree_recon
+
+check_seqan_base:
+	@if [ ! -d "$(SEQAN_BASE)/seqan" ]; then \
+		echo "The directory $(SEQAN_BASE)/seqan could not be found!"; \
+		exit 1; \
+	fi
+
+clean:
+	rm -f $(TARGETS) $(TARGETS:=.o)
diff --git a/mugsy-seqan/projects/library/apps/mugsy/mugsy.cpp b/mugsy-seqan/projects/library/apps/mugsy/mugsy.cpp
new file mode 100644
index 0000000..64fe0f7
--- /dev/null
+++ b/mugsy-seqan/projects/library/apps/mugsy/mugsy.cpp
@@ -0,0 +1,6035 @@
+
+//This code has mixed conventions because it combines different
+//original sources. I've done my best to use the Seqan conventions
+//where I can, but there is quite a jumble between STL and Seqan data
+//structures
+
+#define SEQAN_PROFILE 
+//#define SEQAN_PROFILE2 //more verbose. SEQAN_PROFILE must also be defined
+//#define SEQAN_TEST
+#define NDEBUG //define this to disable assert statements
+//#define KEEPCHAINTMP
+
+#define TIMING
+#ifdef TIMING
+#include <time.h>
+time_t now;
+time_t lasttime;
+#endif
+
+//#define DEBUGGING //SVA custom debugging
+//#define DEBUGGING_GRAPH //SVA custom debugging
+//#define DEBUGGING2 //SVA verbose custom debugging
+
+//There is some overhead to capturing scoring info
+//Undef to turn off reporting of SP scores
+//#define SCORING 
+
+
+#include <seqan/basic.h>
+#include <seqan/graph_msa.h>
+#include <seqan/graph_types.h>
+#include <seqan/graph_align.h>
+#include <seqan/modifier.h>
+#include <seqan/refinement.h>
+
+#include "rna_alphabet.h"
+#include <seqan/modifier.h>
+#include <seqan/misc/misc_cmdparser.h>
+
+//#include "sangiuoli/mummer/trunk/MUMmer3.20/src/tigr/delta.hh"
+
+#include <sstream>
+#include <fstream>
+#include <vector>
+#include <set>
+#include <queue>
+#include <list>
+#include <bitset>
+#include <algorithm>
+
+#include <cstdlib>
+#include <errno.h>
+//#include <stdio.h>
+//#include <stdlib.h>
+#include <libgen.h>
+
+#include <boost/graph/iteration_macros.hpp>
+#include <boost/graph/adjacency_list.hpp>
+#include <boost/graph/filtered_graph.hpp>
+#include <boost/graph/graph_utility.hpp>
+#include <boost/graph/connected_components.hpp>
+#include <boost/graph/strong_components.hpp>
+#include <boost/graph/topological_sort.hpp>
+#include <boost/tokenizer.hpp>
+#include <boost/lexical_cast.hpp>
+#include <boost/graph/dijkstra_shortest_paths.hpp>
+#include <boost/graph/graph_traits.hpp>
+#include <boost/graph/properties.hpp>
+
+#include <boost/graph/edmonds_karp_max_flow.hpp>
+#include <boost/graph/kolmogorov_max_flow.hpp>
+#include <boost/graph/push_relabel_max_flow.hpp>
+
+#include <boost/graph/adjacency_list.hpp>
+#include <boost/graph/read_dimacs.hpp>
+#include <boost/graph/graph_utility.hpp>
+
+#ifdef SEQAN_PROFILE
+SEQAN_PROTIMESTART(__myProfileTime); // Profiling
+#endif
+
+//Example transform in multiz-tba/trunk/transformcoords.cpp
+#include "transformcoords.h"
+
+using namespace seqan;
+using namespace std;
+
+struct s_offset{
+  unsigned int offset;
+  unsigned int spanlen;
+  unsigned int seqlen;
+  unsigned int orient;
+};
+
+struct iloc{
+  int first;
+  int second;
+  int blocknum;
+};
+
+
+struct s_score{
+  unsigned int numGapEx;
+  unsigned int numGap;
+  unsigned int numPairs;
+  unsigned int numIdents;
+  unsigned int alignLen;
+  unsigned int totalLen;
+  unsigned int alignScore;
+  unsigned int seqCount;
+  String<unsigned int> colCount;
+  String<unsigned int> pairCount;
+};
+
+//////////////////////////////////////////////////////////////////////////////
+namespace SEQAN_NAMESPACE_MAIN
+{
+
+  struct vectorsizecmp {
+    bool operator()( const String<Fragment<> > & s1, const String<Fragment<> > & s2 ) const {
+      return length(s1) > length(s2);
+    }
+  };
+
+  template <typename TMap>
+  class lcblencmp{
+  public:
+    lcblencmp(TMap & m)
+      :myMap(&m)
+    {}
+    bool operator() ( const int i, const int j) const {
+      assert(myMap != NULL);
+      assert(myMap->find(i)!=myMap->end());
+      assert(myMap->find(j)!=myMap->end());
+      return (myMap->find(i)->second > myMap->find(j)->second);
+    }
+    TMap *myMap;
+  };
+  
+  template<typename TGraph>
+  class vertexdegreecmp
+  {
+  public:
+    vertexdegreecmp(TGraph & g)
+      :myGraph(&g)
+    {}
+    template<typename TVertexDescriptor>
+    bool operator()( const TVertexDescriptor v1, const TVertexDescriptor v2 ) const {
+      return degree(*myGraph,v1) > degree(*myGraph,v2);
+    }
+    TGraph * myGraph;
+  };
+  
+  template<typename TPosScores>
+  class edgeposscorecmp
+  {
+  public:
+    edgeposscorecmp(TPosScores & p)
+      :posscores(&p)
+    {}
+    template<typename TEdgeDescriptor>
+    bool operator()( const TEdgeDescriptor &e1, const TEdgeDescriptor &e2 ) const {
+      return posscores->find(e1)->second < posscores->find(e2)->second;
+    }
+    TPosScores * posscores;
+  };
+  
+  template<typename TPos>
+  class poscmp
+  {
+  public:
+    poscmp()
+    {}
+    bool operator()( const TPos &e1, const TPos &e2 ) const {
+      if(e1.first==e2.first){
+	//return false if e2 is interval close
+	/*
+	if(e2.second == false){
+	  return 0;
+	}
+	else{
+	  return 1;
+	}
+	*/
+	return e1.second < e2.second;
+      }
+      else{
+	return e1.first < e2.first;
+      }
+    }
+  };
+
+  template<typename TGraph>
+  class vertexposcmp
+  {
+  public:
+    vertexposcmp(TGraph & g)
+      :myGraph(&g)
+    {}
+    template<typename TVertexDescriptor>
+    bool operator()( const TVertexDescriptor v1, const TVertexDescriptor v2 ) const {
+      return fragmentBegin(*myGraph,v1) < fragmentBegin(*myGraph,v2);
+    }
+    TGraph * myGraph;
+  };
+
+  template<typename TScoreMap>
+  class edgescorecmp
+  {
+  public:
+    edgescorecmp(TScoreMap * s)
+      :myScoreMap(s)
+    {}
+    edgescorecmp()
+      :myScoreMap(NULL)
+    {}
+    template<typename TEdgeDescriptor>
+    bool operator()( const TEdgeDescriptor e1, const TEdgeDescriptor e2 ) const {
+      if(myScoreMap!=NULL && myScoreMap->size()>0){
+	assert(myScoreMap->find(e1)!=myScoreMap->end());
+	assert(myScoreMap->find(e2)!=myScoreMap->end());
+	if(abs(cargo(e1)) == abs(cargo(e2))){
+	  //Secondary sort on adjacency score
+	  return myScoreMap->find(e1)->second < myScoreMap->find(e2)->second;
+	}
+	else{
+	  //Primary sort on consistency score
+	  return abs(cargo(e1))<abs(cargo(e2));
+	}
+      }
+      else{
+	//Primary sort on consistency score
+	return abs(cargo(e1))<abs(cargo(e2));
+      }
+    }
+    TScoreMap * myScoreMap;
+  };
+
+  template<typename TBlock, typename TSize=unsigned int>  
+  class blockorder
+  {
+  public:
+    blockorder(TSize s){
+      currentSeq = s;
+    }
+    bool operator()( const TBlock * s1, const TBlock * s2 ) const {
+      assert(s1->currentSeq == currentSeq);
+      assert(s2->currentSeq == currentSeq);
+      return s1->begCoord < s2->begCoord;
+    }
+    TSize currentSeq;
+  };
+
+  template<typename TComponent = unsigned int, typename TSize = unsigned int, 
+	   typename TVertexDescriptor = unsigned int, typename TPos = unsigned int>
+  class SVABlock
+  {
+  public:
+    SVABlock()
+    {}
+    SVABlock(const SVABlock & s)
+      :begCoord(s.begCoord),
+       endCoord(s.endCoord),
+       orient(s.orient),
+       c(s.c),
+       currentSeq(s.currentSeq),
+       currV(s.currV)
+    {}
+    SVABlock(TComponent inc, TSize s, TPos b, TPos e, char o, TVertexDescriptor v)
+      :begCoord(b),endCoord(e),orient(o),c(inc),currentSeq(s)
+    {
+      currV.push_back(v);
+    }
+    template<typename TGraph>
+    void addVertex(TGraph & g, TVertexDescriptor v){
+      assert(sequenceId(g,v)==currentSeq);
+      if(fragmentBegin(g,v)<begCoord){
+	begCoord=fragmentBegin(g,v);
+      }
+      if(fragmentBegin(g,v)+fragmentLength(g,v)>endCoord){
+	endCoord=fragmentBegin(g,v)+fragmentLength(g,v);
+      }
+      currV.push_back(v);
+    }
+    
+    TPos begCoord;
+    TPos endCoord;
+    char orient;//'+' or '-', consider changing to false, true to be consistent with fragment.reversed
+    TComponent c;
+    TSize currentSeq;
+    std::vector<TVertexDescriptor> currV;
+  };
+}
+void process_mem_usage(double& vm_usage, double& resident_set)
+{
+   using std::ios_base;
+   using std::ifstream;
+   using std::string;
+
+   vm_usage     = 0.0;
+   resident_set = 0.0;
+
+   // 'file' stat seems to give the most reliable results
+   //
+   ifstream stat_stream("/proc/self/stat",ios_base::in);
+
+   // dummy vars for leading entries in stat that we don't care about
+   //
+   string pid, comm, state, ppid, pgrp, session, tty_nr;
+   string tpgid, flags, minflt, cminflt, majflt, cmajflt;
+   string utime, stime, cutime, cstime, priority, nice;
+   string O, itrealvalue, starttime;
+
+   // the two fields we want
+   //
+   unsigned long vsize;
+   long rss;
+
+   stat_stream >> pid >> comm >> state >> ppid >> pgrp >> session >> tty_nr
+               >> tpgid >> flags >> minflt >> cminflt >> majflt >> cmajflt
+               >> utime >> stime >> cutime >> cstime >> priority >> nice
+               >> O >> itrealvalue >> starttime >> vsize >> rss; // don't care about the rest
+
+   long page_size_kb = sysconf(_SC_PAGE_SIZE) / 1024; // in case x86-64 is configured to use 2MB pages
+   vm_usage     = vsize / 1024.0;
+   resident_set = rss * page_size_kb;
+}
+
+
+//////////////////////////////////////////////////////////////////////////////
+// Connected components
+//////////////////////////////////////////////////////////////////////////////
+
+
+//////////////////////////////////////////////////////////////////////////////
+//Return minimum distance seperating two intervals s1-e1 and s2-e2
+//If intervals are overlapping distance is 0
+template<typename TSize>
+inline unsigned int getIntervalDist(TSize &s1, TSize &e1, TSize &s2, TSize &e2){
+  //Overlapping or contained
+  if(s1>s2 && s1<e2){
+    return 0;
+  }
+  else{
+    if(s2>s1 && s2<e1){
+      return 0;
+    }
+    else{
+      if(s1<s2){
+	return s2-s1;
+      }
+      else{
+	return s1-s2;
+      }
+    }
+  }
+}
+
+template<typename TGraph, 
+	 typename TVertexDescriptor,
+	 typename TGenomeVertexMapIter>
+inline unsigned int distance(TGraph const& g, 
+			     TVertexDescriptor const & u,
+			     std::pair<TGenomeVertexMapIter,TGenomeVertexMapIter> &vmapiter){
+  typedef unsigned int TSize;
+  TSize mindist = std::numeric_limits<TSize>::max();
+  for (TGenomeVertexMapIter vit=vmapiter.first; vit!=vmapiter.second; ++vit){
+    if(sequenceId(g,u)==sequenceId(g,vit->second)){
+      TSize beg1 = fragmentBegin(g,u);
+      TSize end1 = beg1+fragmentLength(g,u);
+      TSize beg2 = fragmentBegin(g,vit->second);
+      TSize end2 = beg2+fragmentLength(g,vit->second);
+      TSize dist = getIntervalDist(beg1,end1,beg2,end2);
+      mindist = dist<mindist ? dist : mindist;
+    }
+  }
+  return mindist;
+}
+
+
+template<typename TSpec, 
+	 typename TVertexDescriptor, 
+	 typename TTokenMap, 
+	 typename TComponents, 
+	 typename TVal, 
+	 typename TGenomeVertexMap, 
+	 typename TNames,
+	 typename TSize>
+inline void
+_cc_visit_g_ranked(Graph<TSpec> const& g,
+		   TVertexDescriptor const u,
+		   TTokenMap& tokenMap,
+		   TComponents& components,
+		   TVal label, 
+		   TVal &maxlabel, //changed to reference so that i can reassign
+		   std::vector<TGenomeVertexMap> & genomeMap,
+		   TNames &genomeNames,
+		   TSize &maxdist)
+{
+  //SEQAN_CHECKPOINT
+	typedef typename Iterator<Graph<TSpec>, AdjacencyIterator>::Type TAdjacencyIterator; 
+	typedef typename EdgeDescriptor<Graph<TSpec> >::Type TEdgeDescriptor; 
+	typedef typename Iterator<Graph<TSpec>, OutEdgeIterator>::Type TOutEdgeIterator;
+	//Add all edges from u to ccedges
+	if(getProperty(tokenMap, u) == false){ 
+	  //TODO support for genomeidx in addition to sequenceid
+	  assert(sequenceId(g,u)<length(genomeNames));
+#ifdef DEBUGGING
+	  std::cout << "Connecting vertex " << u << " from sequence " << sequenceId(g,u) << std::endl;
+#endif
+	  assert(label<genomeMap.size());
+	  //Multiple copies of this genome in the current component
+	  TSize gname = genomeNames[sequenceId(g,u)];
+	  if(genomeMap[label].find(genomeNames[sequenceId(g,u)]) != genomeMap[label].end()){
+	    std::pair<typename TGenomeVertexMap::iterator,typename TGenomeVertexMap::iterator> vmapiter =genomeMap[label].equal_range(gname); 
+	    unsigned int dist = distance(g,u,vmapiter);
+#ifdef DEBUGGING
+	    std::cout << "Multiple copies found for genome " << gname
+		      << " seq " << sequenceId(g,u)  << " on vertex " << u << std::endl;
+	    std::cout << "Minimum distance to a copy: " << dist << std::endl;
+	    for(typename TGenomeVertexMap::iterator it=vmapiter.first;it!=vmapiter.second;it++){
+	      std::cout << "Copy " << it->second  << " seq_id:" << sequenceId(g,it->second) << std::endl;
+	      assert((TSize)genomeNames[sequenceId(g,it->second)]==gname);
+	      assert(dist>=0);
+	      assert(maxdist>0);
+	      if(dist<=maxdist){
+		assert(sequenceId(g,it->second)==sequenceId(g,u));
+	      }
+	    }
+#endif
+	    //Start a new component
+	    if(dist>maxdist){
+	      //Increment and set maxlabel
+	      ++maxlabel;
+	      label=maxlabel;
+#ifdef DEBUGGING
+	      std::cout << "Starting new component " << label << " for " << u << std::endl;
+#endif
+	      genomeMap.push_back(TGenomeVertexMap());
+	      //Ensure genome is not present in the existing label
+	      assert(label<genomeMap.size());
+	      assert(label==genomeMap.size()-1);
+	      assert(genomeMap[label].find(gname) == genomeMap[label].end());
+	    }
+	  }
+#ifdef DEBUGGING
+	  std::cout << "Component " << label << " V:" << u << " seq:" << sequenceId(g,u) << std::endl;
+#endif
+	  assert(label<genomeMap.size());
+	  //Add vertex,u to component,label
+	  genomeMap[label].insert(std::make_pair(gname,u));
+	  assignProperty(tokenMap, u, true);
+	  assignProperty(components, u, label);
+	  
+	  //Capture all edges for this vertex
+	  std::vector<TEdgeDescriptor> ccedges;
+	  for(TOutEdgeIterator itOut(g, u);!atEnd(itOut); ++itOut) {
+	    //TODO if(!visited) shortcut
+	    ccedges.push_back(*itOut);
+	  }
+	  assert(ccedges.size()==degree(g,u));
+	  //Sort edges on consistency and visit most consistent edges first in a greedy fashion
+	  sort(ccedges.begin(),ccedges.end(),edgescorecmp<std::map<TEdgeDescriptor,float> >());
+
+	  for(typename std::vector<TEdgeDescriptor>::reverse_iterator cit = ccedges.rbegin();cit!=ccedges.rend();++cit){
+	    TVertexDescriptor s = getSource(*cit);
+	    TVertexDescriptor t = getTarget(*cit);
+	    assert(s==u || t==u);
+	    if(s!=u){
+	      assert(getProperty(tokenMap,t)==true);
+	      if (getProperty(tokenMap, s) == false) {
+#ifdef DEBUGGING
+		std::cout << " edge " << u << "-" << s;
+		std::cout << std::endl;
+#endif
+		_cc_visit_g_ranked(g, s, tokenMap, components, label, maxlabel,genomeMap,genomeNames,maxdist);
+	      }
+	    }
+	    else{
+	      if(t!=u){
+		assert(getProperty(tokenMap,s)==true);
+		if (getProperty(tokenMap, t) == false) {
+#ifdef DEBUGGING
+		  std::cout << " edge " << u << "-" << t;
+		  std::cout << std::endl;
+#endif
+		  _cc_visit_g_ranked(g, t, tokenMap, components, label, maxlabel,genomeMap,genomeNames,maxdist);
+		}
+	      }
+	      else{
+		assert(false);
+	      }
+	    }
+	  }
+	}
+}
+//////////////////////////////////////////////////////////////////////////////
+
+/**
+.Function.connected_components:
+..cat:Graph
+..summary:Decomposes an undirected graph into its connected components.
+..signature:connected_components(g, components)
+..param.g:In-parameter:An undirected graph.
+...type:Spec.Undirected graph
+..param.components:Out-parameter:A property map.
+...remarks:Each vertex is mapped to a component id. If two vertices share the same id they are in the same component.
+..returns: The number of components.
+*/
+
+template<typename TSpec, typename TComponents, typename TNames, typename TSize2>
+typename Size<Graph<TSpec> >::Type
+connected_components_by_genome_ranked_RECURSIVE(Graph<TSpec> const& g,
+						TComponents& components,
+						TNames &genomeNames,
+						TSize2 maxdist)
+{
+  //SEQAN_CHECKPOINT
+
+	typedef typename Size<Graph<TSpec> >::Type TSize;
+	typedef typename Iterator<Graph<TSpec>, EdgeIterator>::Type TEdgeIterator;
+	typedef typename Iterator<Graph<TSpec>, VertexIterator>::Type TVertexIterator;
+	typedef typename VertexDescriptor<Graph<TSpec> >::Type TVertexDescriptor;
+	typedef typename EdgeDescriptor<Graph<TSpec> >::Type TEdgeDescriptor;
+
+	typedef std::multimap<TSize,TVertexDescriptor> TGenomeVertexMap;
+
+	typedef typename Iterator<Graph<TSpec>, AdjacencyIterator>::Type TAdjacencyIterator; 
+	typedef typename Iterator<Graph<TSpec>, OutEdgeIterator>::Type TOutEdgeIterator;
+
+	clear(components);
+	resizeVertexMap(g,components);
+
+#ifdef DEBUGGING	
+	std::cout << "Calculating connected components on" << length(genomeNames) << "genomes" << std::endl;
+#endif
+
+	// Initialization
+	String<bool> tokenMap;
+	fill(tokenMap, getIdUpperBound(_getVertexIdManager(g)), false);
+	
+	// Genome tracker
+	std::vector<TGenomeVertexMap> genomeMap(1);
+
+
+	// Find connected components greedy on consistency score
+	TSize label = 0;
+	TSize maxlabel = label;
+	TEdgeIterator itE(g);
+	std::vector<TEdgeDescriptor> ccedges;
+	std::set<TVertexDescriptor> visited;
+	for(;!atEnd(itE);goNext(itE)){
+	  ccedges.push_back(*itE);
+	}
+	//Sort on consistency score
+	sort(ccedges.begin(),ccedges.end(),edgescorecmp<std::map<TEdgeDescriptor,float> >());
+	TVertexDescriptor s,t;
+	//From most consistent edges in G to least, determine CC
+	for(typename std::vector<TEdgeDescriptor>::reverse_iterator cit = ccedges.rbegin();cit!=ccedges.rend();cit++){
+	  s = getSource(*cit);
+	  if (getProperty(tokenMap, s) == false) {
+#ifdef DEBUGGING
+	    std::cout << "Component" << maxlabel
+		      << std::endl;
+#endif
+	    //Capture all vertices connected to s
+	    _cc_visit_g_ranked(g, s, tokenMap, components, label, maxlabel, genomeMap,genomeNames,maxdist);
+	    ++maxlabel;
+	    label=maxlabel;
+	    genomeMap.push_back(TGenomeVertexMap());
+	  }
+	  t = getTarget(*cit);
+	  if (getProperty(tokenMap, t) == false) {
+	    _cc_visit_g_ranked(g, t, tokenMap, components, label, maxlabel, genomeMap,genomeNames,maxdist);
+	    ++maxlabel;
+	    label=maxlabel;
+	    genomeMap.push_back(TGenomeVertexMap());
+	  }
+	}
+	//Capture all vertices with no edges degree==0
+	TVertexIterator it(g);
+	TVertexDescriptor u;
+	for(;!atEnd(it);goNext(it)) {
+	  u = getValue(it);
+	  if (getProperty(tokenMap, u) == false) {
+#ifdef DEBUGGING
+	    std::cout << "Component" << maxlabel
+		      << std::endl;
+#endif
+	    assert(degree(g,u)==0);
+	    _cc_visit_g_ranked(g, u, tokenMap, components, label, maxlabel, genomeMap,genomeNames,maxdist);
+	    ++maxlabel;
+	    label=maxlabel;
+	    genomeMap.push_back(TGenomeVertexMap());
+	  }
+	}
+	return label;
+}
+
+
+//connected_components_by_genome_ranked()
+//
+//Connected components greedy on consistency score and ensuring one
+//anchor per genome.
+//
+//Used to convert segment graph (V=genome segments on one genome) into
+//anchor graph (V=genome segments on multiple genomes)
+//
+//Run DFS to determine connected components. Order traversal by edge
+//score largest-smallest. Break and start a new component upon
+//encountering a second anchor in a genome that has already been
+//visited if the new anchor > maxdist from the other anchors already
+//visited
+template<typename TSpec, typename TComponents, typename TNames, typename TSize2>
+typename Size<Graph<TSpec> >::Type
+connected_components_by_genome_ranked(Graph<TSpec> const& g,
+				      TComponents& components,
+				      TNames &genomeNames,
+				      TSize2 maxdist){
+  //SEQAN_CHECKPOINT
+  typedef typename Size<Graph<TSpec> >::Type TSize;
+  typedef typename Iterator<Graph<TSpec>, EdgeIterator>::Type TEdgeIterator;
+  typedef typename Iterator<Graph<TSpec>, VertexIterator>::Type TVertexIterator;
+  typedef typename VertexDescriptor<Graph<TSpec> >::Type TVertexDescriptor;
+  typedef typename EdgeDescriptor<Graph<TSpec> >::Type TEdgeDescriptor;
+  
+  typedef std::multimap<TSize,TVertexDescriptor> TGenomeVertexMap;
+  
+  typedef typename Iterator<Graph<TSpec>, AdjacencyIterator>::Type TAdjacencyIterator; 
+  typedef typename Iterator<Graph<TSpec>, OutEdgeIterator>::Type TOutEdgeIterator;
+  
+  clear(components);
+  resizeVertexMap(g,components);
+  
+#ifdef DEBUGGING	
+  std::cout << "Calculating connected components" << std::endl;
+#endif
+  
+  // Initialization
+  String<bool> tokenMap;
+  fill(tokenMap, getIdUpperBound(_getVertexIdManager(g)), false);
+  fill(components,getIdUpperBound(_getVertexIdManager(g)), 0);
+  for(unsigned int i=0;i<getIdUpperBound(_getVertexIdManager(g));++i){
+    assignProperty(components,i,0);
+    assert(getProperty(components,i)==0);
+  }
+  // Genome tracker
+  std::vector<TGenomeVertexMap> genomeMap(1);
+  
+  //TODO
+  //Initial CC with maxdist
+  //Save all nodes,edges > maxdist 
+  //Score edges by adjacency score
+  //Break edges < cutoff || keep only highest scoring node per genome
+  //Recompute CC
+  
+  // Connected components
+  TEdgeIterator itE(g);
+  std::vector<TEdgeDescriptor> ccedges;
+  int maxlabel=-1;
+  for(;!atEnd(itE);goNext(itE)){
+    ccedges.push_back(*itE);
+  }
+  sort(ccedges.begin(),ccedges.end(),edgescorecmp<std::map<TEdgeDescriptor,float> >());
+  std::vector<std::pair<TVertexDescriptor,TVertexDescriptor> > stack;
+  //outer loop ensures we visit disconnected subgraphs, considering most consistent edges first
+  for(typename std::vector<TEdgeDescriptor>::reverse_iterator cit = ccedges.rbegin();cit!=ccedges.rend();cit++){
+    TVertexDescriptor s = getSource(*cit);
+    TVertexDescriptor t = getTarget(*cit);
+#ifdef DEBUGGING
+    std::cout << "Edge score:" << cargo(*cit) << std::endl;
+#endif
+    if (getProperty(tokenMap, s) == false){
+      if(getProperty(tokenMap,t) == false) {
+	stack.push_back(std::make_pair(s, t));
+	stack.push_back(std::make_pair(t, s));
+      }
+      else{
+	stack.push_back(std::make_pair(s, t));
+      }
+    }
+    else{
+      if(getProperty(tokenMap,t) == false) {
+	stack.push_back(std::make_pair(t, s));
+      }
+    }
+    while(!stack.empty()){
+      std::pair<TVertexDescriptor,TVertexDescriptor> & node = stack.back();
+      TVertexDescriptor u = node.first;
+      TVertexDescriptor prev = node.second;
+      assert(sequenceId(g,u)<length(genomeNames));
+      TSize gname = genomeNames[sequenceId(g,u)];
+      stack.pop_back();
+      if(getProperty(tokenMap,u)==false){
+	assert(getProperty(components,u)==0);
+#ifdef DEBUGGING
+	std::cout << "New node " << u << " " << getProperty(components,u) 
+		  << std::endl;
+#endif
+	//Encountered new node, assign label, track genome
+	assignProperty(tokenMap, u, true);
+	int label=-1;
+	if(getProperty(tokenMap,prev)==false){
+	  //Use new label
+	  ++maxlabel;
+#ifdef DEBUGGING
+	  std::cout << "Starting new component " << maxlabel << " for " << u << std::endl;
+#endif
+	  label=maxlabel;
+	  genomeMap.push_back(TGenomeVertexMap());
+	}
+	else{
+	  //there is already a label
+	  int prevlabel=getProperty(components,prev);
+	  if(genomeMap[prevlabel].find(gname) != genomeMap[prevlabel].end()){
+	    //there is already a genome, retrieve all the anchors in this genome to determine the anchors
+	    std::pair<typename TGenomeVertexMap::iterator,typename TGenomeVertexMap::iterator> vmapiter =genomeMap[prevlabel].equal_range(gname); 
+	    unsigned int dist = distance(g,u,vmapiter);
+#ifdef DEBUGGING
+	    std::cout << "Multiple copies found for genome " << gname
+		      << " seq " << sequenceId(g,u)  << " on vertex " << u << std::endl;
+	    std::cout << "Minimum distance to a copy: " << dist << std::endl;
+	    for(typename TGenomeVertexMap::iterator it=vmapiter.first;it!=vmapiter.second;it++){
+	      std::cout << "Copy " << it->second  << " seq_id:" << sequenceId(g,it->second) << std::endl;
+	      assert((TSize)genomeNames[sequenceId(g,it->second)]==gname);
+	      assert(dist>=0);
+	      if(dist<=maxdist){
+		assert(sequenceId(g,it->second)==sequenceId(g,u));
+	      }
+	    }
+#endif
+	    //Start a new component
+	    if(dist>=maxdist){
+	      //Increment and set maxlabel
+	      ++maxlabel;
+#ifdef DEBUGGING
+	      std::cout << "Starting new component " << maxlabel << " for " << u << std::endl;
+#endif
+	      label=maxlabel;
+	      genomeMap.push_back(TGenomeVertexMap());	      
+	      //Ensure genome is not present in the existing label
+	      assert((unsigned int)maxlabel<genomeMap.size());
+	      assert(genomeMap[maxlabel].find(gname) == genomeMap[maxlabel].end());
+	    }
+	    else{
+#ifdef DEBUGGING
+	      std::cout << "Adding to component " << getProperty(components,prev) << " for " << u << std::endl;
+#endif
+	      label=prevlabel;
+	    }
+	  }
+	  else{
+#ifdef DEBUGGING
+	      std::cout << "Adding to component " << getProperty(components,prev) << " for " << u << std::endl;
+#endif
+	      label=prevlabel;
+	  }
+	}
+
+	assignProperty(components, u, label);
+#ifdef DEBUGGING
+	std::cout << "V:" << u << " component " << getProperty(components,u) << std::endl;
+#endif
+	//assert(getProperty(components,u)>=0);
+	assert(getProperty(components,u) < genomeMap.size());
+
+	genomeMap[label].insert(std::make_pair(gname,u));
+      
+	//Add all edges from u to ccedges
+	std::vector<TEdgeDescriptor> ccedges;
+	//Edge iterator
+	TOutEdgeIterator itOut(g, u);
+	for(;!atEnd(itOut); ++itOut) {
+	  if(getProperty(tokenMap,getSource(*itOut)) || getProperty(tokenMap,getTarget(*itOut))){
+	    ccedges.push_back(*itOut);
+	  }
+	}
+	//assert(ccedges.size()==degree(g,u));
+	sort(ccedges.begin(),ccedges.end(),edgescorecmp<std::map<TEdgeDescriptor,float> >());
+	//traverse scores low->high so highest scores are last on the stack
+	for(typename std::vector<TEdgeDescriptor>::iterator cit = ccedges.begin();cit!=ccedges.end();cit++){
+	  TVertexDescriptor s = getSource(*cit);
+	  TVertexDescriptor t = getTarget(*cit);
+	  assert(s==u || t==u);
+	  if(s!=u){
+	    assert(getProperty(tokenMap,t)==true);
+	    if (getProperty(tokenMap, s) == false) {
+#ifdef DEBUGGING
+	      std::cout << " edge " << u << "-" << s;
+	      std::cout << std::endl;
+	      std::cout << "Edge score:" << cargo(*cit) << std::endl;
+#endif
+	      stack.push_back(std::make_pair(s,u));
+	    }
+	  }
+	  else{
+	    if(t!=u){
+	      assert(getProperty(tokenMap,s)==true);
+	      if (getProperty(tokenMap, t) == false) {
+#ifdef DEBUGGING
+		std::cout << " edge " << u << "-" << t;
+		std::cout << std::endl;
+		std::cout << "Edge score:" << cargo(*cit) << std::endl;
+#endif
+		stack.push_back(std::make_pair(t,u));
+	      }
+	    }
+	    else{
+	      assert(false);
+	    }
+	  }
+	}
+      }
+    }
+  }
+  //Capture all vertices with no edges degree==0
+  TVertexIterator it(g);
+  TVertexDescriptor u;
+  for(;!atEnd(it);goNext(it)) {
+    u = getValue(it);
+    if (getProperty(tokenMap, u) == false) {
+#ifdef DEBUGGING
+      std::cout << "Component" << maxlabel
+		<< std::endl;
+#endif
+      assert(degree(g,u)==0);
+      ++maxlabel;
+      assignProperty(components, u, maxlabel);
+      genomeMap.push_back(TGenomeVertexMap());
+      TSize gname = genomeNames[sequenceId(g,u)];
+      assert(sequenceId(g,u)<length(genomeNames));
+      genomeMap[getProperty(components,u)].insert(std::make_pair(gname,u));
+    }
+  }
+  return maxlabel+1;
+}
+
+template<typename TSpec, typename TComponents>
+typename Size<Graph<TSpec> >::Type
+connected_components_ranked(Graph<TSpec> const& g,
+				      TComponents& components){
+  //SEQAN_CHECKPOINT
+  typedef typename Size<Graph<TSpec> >::Type TSize;
+  typedef typename Iterator<Graph<TSpec>, EdgeIterator>::Type TEdgeIterator;
+  typedef typename Iterator<Graph<TSpec>, VertexIterator>::Type TVertexIterator;
+  typedef typename VertexDescriptor<Graph<TSpec> >::Type TVertexDescriptor;
+  typedef typename EdgeDescriptor<Graph<TSpec> >::Type TEdgeDescriptor;
+  
+  typedef typename Iterator<Graph<TSpec>, AdjacencyIterator>::Type TAdjacencyIterator; 
+  typedef typename Iterator<Graph<TSpec>, OutEdgeIterator>::Type TOutEdgeIterator;
+  
+  clear(components);
+  resizeVertexMap(g,components);
+  
+#ifdef DEBUGGING	
+  std::cout << "Calculating connected components" << std::endl;
+#endif
+  
+  // Initialization
+  String<bool> tokenMap;
+  fill(tokenMap, getIdUpperBound(_getVertexIdManager(g)), false);
+  fill(components,getIdUpperBound(_getVertexIdManager(g)), 0);
+  for(unsigned int i=0;i<getIdUpperBound(_getVertexIdManager(g));++i){
+    assignProperty(components,i,0);
+    assert(getProperty(components,i)==0);
+  }
+  TEdgeIterator itE(g);
+  std::vector<TEdgeDescriptor> ccedges;
+  int maxlabel=-1;
+  for(;!atEnd(itE);goNext(itE)){
+    ccedges.push_back(*itE);
+  }
+  sort(ccedges.begin(),ccedges.end(),edgescorecmp<std::map<TEdgeDescriptor,float> >());
+  std::vector<std::pair<TVertexDescriptor,TVertexDescriptor> > stack;
+  //outer loop ensures we visit disconnected subgraphs, considering most consistent edges first
+  for(typename std::vector<TEdgeDescriptor>::reverse_iterator cit = ccedges.rbegin();cit!=ccedges.rend();cit++){
+    TVertexDescriptor s = getSource(*cit);
+    TVertexDescriptor t = getTarget(*cit);
+    if (getProperty(tokenMap, s) == false){
+      if(getProperty(tokenMap,t) == false) {
+	stack.push_back(std::make_pair(s, t));
+	stack.push_back(std::make_pair(t, s));
+      }
+      else{
+	stack.push_back(std::make_pair(s, t));
+      }
+    }
+    else{
+      if(getProperty(tokenMap,t) == false) {
+	stack.push_back(std::make_pair(t, s));
+      }
+    }
+    while(!stack.empty()){
+      std::pair<TVertexDescriptor,TVertexDescriptor> & node = stack.back();
+      TVertexDescriptor u = node.first;
+      TVertexDescriptor prev = node.second;
+      stack.pop_back();
+      if(getProperty(tokenMap,u)==false){
+	assert(getProperty(components,u)==0);
+#ifdef DEBUGGING
+	std::cout << "New node " << u << " " << getProperty(components,u) 
+		  << std::endl;
+#endif
+	assignProperty(tokenMap, u, true);
+	int label=-1;
+	if(getProperty(tokenMap,prev)==false){
+	  //Use new label
+	  ++maxlabel;
+#ifdef DEBUGGING
+	  std::cout << "Starting new component " << maxlabel << " for " << u << std::endl;
+#endif
+	  label=maxlabel;
+	}
+	else{
+	  //there is already a label
+	  int prevlabel=getProperty(components,prev);
+#ifdef DEBUGGING
+	  std::cout << "Adding to component " << getProperty(components,prev) << " for " << u << std::endl;
+#endif
+	  label=prevlabel;
+	}
+
+	assignProperty(components, u, label);
+#ifdef DEBUGGING
+	std::cout << "V:" << u << " component " << getProperty(components,u) << std::endl;
+#endif
+	//Add all edges from u to ccedges
+	std::vector<TEdgeDescriptor> ccedges;
+	//Edge iterator
+	TOutEdgeIterator itOut(g, u);
+	for(;!atEnd(itOut); ++itOut) {
+	  if(getProperty(tokenMap,getSource(*itOut)) || getProperty(tokenMap,getTarget(*itOut))){
+	    ccedges.push_back(*itOut);
+	  }
+	}
+	//assert(ccedges.size()==degree(g,u));
+	sort(ccedges.begin(),ccedges.end(),edgescorecmp<std::map<TEdgeDescriptor,float> >());
+	for(typename std::vector<TEdgeDescriptor>::iterator cit = ccedges.begin();cit!=ccedges.end();cit++){
+	  TVertexDescriptor s = getSource(*cit);
+	  TVertexDescriptor t = getTarget(*cit);
+	  assert(s==u || t==u);
+	  if(s!=u){
+	    assert(getProperty(tokenMap,t)==true);
+	    if (getProperty(tokenMap, s) == false) {
+#ifdef DEBUGGING
+	      std::cout << " edge " << u << "-" << s;
+	      std::cout << std::endl;
+#endif
+	      stack.push_back(std::make_pair(s,u));
+	    }
+	  }
+	  else{
+	    if(t!=u){
+	      assert(getProperty(tokenMap,s)==true);
+	      if (getProperty(tokenMap, t) == false) {
+#ifdef DEBUGGING
+		std::cout << " edge " << u << "-" << t;
+		std::cout << std::endl;
+#endif
+		stack.push_back(std::make_pair(t,u));
+	      }
+	    }
+	    else{
+	      assert(false);
+	    }
+	  }
+	}
+      }
+    }
+  }
+  //Capture all vertices with no edges degree==0
+  TVertexIterator it(g);
+  TVertexDescriptor u;
+  for(;!atEnd(it);goNext(it)) {
+    u = getValue(it);
+    if (getProperty(tokenMap, u) == false) {
+#ifdef DEBUGGING
+      std::cout << "Component" << maxlabel
+		<< std::endl;
+#endif
+      assert(degree(g,u)==0);
+      ++maxlabel;
+      assignProperty(components, u, maxlabel);
+    }
+  }
+  return maxlabel+1;
+}
+
+
+//////////////////////////////////////////////////////////////////////////////
+
+/**
+.Function.alignmentEvaluation:
+..summary:Given a multiple alignment, this function calculates all kinds of alignment statistics.
+..cat:Graph
+..signature:
+alignmentEvaluation(graph, score_type, gapExCount, gapCount, pairCount, numPairs, len)
+..param.graph:An alignment graph.
+...type:Spec.Alignment Graph
+..param.score_type:A score object.
+...type:Class.Score
+..param.gapExCount:Number of gap extensions.
+..param.gapCount:Number of gaps.
+..param.pairCount:Number of aligned pairs.
+..param.numPairs:Counter for each pair.
+..param.len:Alignment length.
+..returns:Score of the alignment.
+*/
+template<typename TStringSet, typename TCargo, typename TSpec, typename TScore, typename TSize> 
+//inline typename Value<TScore>::Type
+s_score 
+alignmentEvaluationCustom(Graph<Alignment<TStringSet, TCargo, TSpec> > const& g,
+		    TScore const& score_type,
+		    TSize& gapExCount,
+		    TSize& gapCount,
+		    TSize& pairCount,
+		    TSize& pairIdent,
+		    String<TSize>& numPairs,
+		    String<TSize>& numIdentCols,
+		    TSize& len,
+		    TSize& totalLen)
+{
+  //SEQAN_CHECKPOINT
+	typedef Graph<Alignment<TStringSet, TCargo, TSpec> > TGraph;
+	typedef typename Value<TScore>::Type TScoreValue;
+	typedef typename Value<typename Value<TStringSet>::Type>::Type TAlphabet;
+	TSize alphSize = ValueSize<TAlphabet>::VALUE;
+	s_score sscore;
+	// Initialization;
+	gapExCount = 0;
+	gapCount = 0;
+	pairCount = 0;
+	clear(numPairs);
+
+	// Convert the graph
+	String<char> mat;
+	convertAlignment(g, mat);
+	char gapChar = gapValue<char>();
+
+	TScoreValue gap = scoreGapExtend(score_type);
+	TScoreValue gapOpen = scoreGapOpen(score_type);
+	TSize nseq = length(stringSet(g));
+	len = length(mat) / nseq;
+
+	for(TSize i = 0; i<nseq; ++i) {
+	  totalLen += length(stringSet(g)[i]);
+	}
+
+	fill(numIdentCols, nseq+1, 0);
+	for(TSize j=0; j<=nseq; ++j) {
+	  assert(numIdentCols[j]==0);
+	}
+	char c;
+	for(TSize k=0;k<len; ++k) {
+	  TSize numIdents=0;
+	  for(TSize j=0; j<nseq; ++j) {
+	    if (value(mat, j*len+k) != gapChar) {
+	      if(numIdents==0){
+		 c = TAlphabet(value(mat, j*len+k));
+		 ++numIdents;
+	      }
+	      else{
+		if(TAlphabet(value(mat, j*len+k))==c){
+		  ++numIdents;
+		}
+		else{
+		  numIdents=0;
+		  break;
+		}
+	      }
+	    }
+	  }
+	  assert(numIdents<=nseq);
+	  numIdentCols[numIdents]++;
+	}
+
+	bool gapOpeni = false;
+	bool gapOpenj = false;
+	TScoreValue totalScore = 0;
+	fill(numPairs, alphSize * alphSize, 0);
+	for(TSize i = 0; i<nseq-1; ++i) {
+	  for(TSize j=i+1; j<nseq; ++j) {
+			for(TSize k=0;k<len; ++k) {
+				if (value(mat, i*len+k) != gapChar) {
+					if (value(mat, j*len + k) != gapChar) {
+						gapOpeni = false;
+						gapOpenj = false;
+						++pairCount;
+						if(TAlphabet(value(mat, i*len+k)) == TAlphabet(value(mat, j*len + k))){
+						  ++pairIdent;
+						}
+						TSize index1 = ordValue(TAlphabet(value(mat, i*len+k)));
+						TSize index2 = ordValue(TAlphabet(value(mat, j*len + k)));
+						value(numPairs, index1 * alphSize + index2) += 1;
+						totalScore += score(const_cast<TScore&>(score_type), TAlphabet(value(mat, i*len+k)), TAlphabet(value(mat, j*len + k)));
+					} else {
+						if (gapOpenj) {
+							++gapExCount;
+							totalScore += gap;
+						} else {
+							gapOpenj = true;
+							++gapCount;
+							totalScore += gapOpen;
+						}
+					}
+				} else if (value(mat, j*len + k) != gapChar) {
+						if (gapOpeni) {
+							++gapExCount;
+							totalScore += gap;
+						} else {
+							++gapCount;
+							gapOpeni = true;
+							totalScore += gapOpen;
+						}
+				}
+			}
+		}
+	}
+	sscore.alignScore = totalScore;
+	sscore.numGap = gapCount;
+	sscore.numGapEx = gapExCount;
+	sscore.numPairs = pairCount;
+	sscore.numIdents = pairIdent;
+	sscore.alignLen = len;
+	sscore.totalLen = totalLen;
+	sscore.colCount = numIdentCols;
+	sscore.seqCount = nseq;
+	assert(length(numIdentCols)==nseq+1);
+	sscore.pairCount = numPairs;
+	//return totalScore;
+	return sscore;
+}
+
+//readBlockFile()
+//Read set of LCBs 
+//File format is
+//I seq1 start-end orient seq2 ....
+//V v1 v2 v3 ;
+//List of vertices in each LCB on a line ending with ;
+//start-end are ignored. Boundaries are determined by extent of the member anchors
+
+
+template<typename TVertexDescriptor,
+	 typename TNames,
+	 typename TVertexOrientMap,
+	 typename TVertexSeqMap,
+	 typename TGraph>
+void doReadBlockFile(const std::string & filename,
+		     std::map<unsigned int, std::set<TVertexDescriptor> > & block2fragMap,
+		     std::vector<std::vector<TVertexDescriptor> > & lcbs,
+		     TNames &sequenceNames,
+		     TVertexOrientMap & vertexOrientMap,
+		     TVertexSeqMap & vertexSeqMap,
+		     TGraph & g,
+		     bool checkbounds){
+  std::ifstream file;
+  file.open(filename.c_str(), std::ios_base::in | std::ios_base::binary);
+  typedef std::ifstream TFile;
+  typedef Value<TFile>::Type TValue;
+  std::string line;
+  std::vector<unsigned int> currblock;
+  std::vector<unsigned int> currlcb;
+  //Map sequence name -> char
+  std::map<std::string,char> sequenceOrientMap;
+  std::map<std::string,std::pair<unsigned int, unsigned int> > sequenceCoordsMap;
+  std::map<String<char>,int> seqNamesIdxMap;
+  for(int i=0;i<length(sequenceNames);++i){
+    seqNamesIdxMap[sequenceNames[i]]=i;
+  }
+
+  unsigned int vertexcount = 0;
+  while(file){
+    getline(file,line); 
+    std::istringstream in(line);
+    std::string c;
+    in >> c;
+    if(c == "V"){
+      while(in>>c){
+	if(c == ";"){
+	  for(std::vector<unsigned int>::iterator it=currblock.begin();it!=currblock.end();it++){
+	    assert(block2fragMap.find(*it)!=block2fragMap.end());
+	    //currlcb.insert(currlcb.end(),block2fragMap[*it].begin(),block2fragMap[*it].end());
+
+	    for(typename std::set<TVertexDescriptor>::iterator vit=block2fragMap[*it].begin();vit!=block2fragMap[*it].end();++vit){
+	      assert(vertexSeqMap.find(*vit)!=vertexSeqMap.end());
+	      std::string sname(toCString(sequenceNames[vertexSeqMap[*vit]]));
+	      //Check if segment is reported as part of the LCB
+	      //The LCB identification step may not report all sequences that are part of the anchor
+	      if(sequenceOrientMap.find(sname) != sequenceOrientMap.end()){
+		if(!checkbounds || fragmentBegin(g,*vit)>=sequenceCoordsMap[sname].first && fragmentBegin(g,*vit)<=sequenceCoordsMap[sname].second){
+		  vertexOrientMap[*vit] = sequenceOrientMap[sname];
+		  //add node to block
+		  currlcb.push_back(*vit);
+#ifdef DEBUGGING
+		  std::cout << "Adding segment V:" << *vit << " from anchor:" << *it << std::endl;
+#endif
+		}
+		else{
+#ifdef DEBUGGING
+		  std::cout << "Skipping out-of-bounds anchor segment " << *vit << " len:" << fragmentLength(g,*vit) << " from anchor " << *it << " on sequence " << sname << " " << sequenceOrientMap[sname]
+			    << " fragmentBegin:" << fragmentBegin(g,*vit) << " bounds:" << sequenceCoordsMap[sname].first << "-" << sequenceCoordsMap[sname].second << " " << vertexOrientMap[*vit] << std::endl;
+#endif
+		}
+	      }
+	      else{
+#ifdef DEBUGGING
+		std::cout << "Skipping anchor segment " << *vit << " len:" << fragmentLength(g,*vit) << " from anchor " << *it << " on sequence " << sname << std::endl;
+	        for(std::map<std::string,char>::iterator sit=sequenceOrientMap.begin();sit!=sequenceOrientMap.end();++sit){
+	            std::cerr << sit->first << " " << sit->second << std::endl;
+                }	
+#endif 
+		//currlcb.push_back(*vit);
+		//assert(false);
+	      }
+	    }
+	  }
+	  vertexcount = vertexcount + currlcb.size();
+	  lcbs.push_back(currlcb);
+	  currblock.clear();
+	  currlcb.clear();
+	}
+	else{
+	  currblock.push_back(atoi(c.c_str()));
+	  //Update vertex orientation
+	}
+      }
+      currblock.clear();
+    }
+    else{
+      sequenceOrientMap.clear();
+      sequenceCoordsMap.clear();
+      if(c == "I"){
+	while(in>>c){
+	  if(c != ";"){
+	    //read sequence name
+	    std::string seqname=c;
+	    char orient;
+	    std::string coords;
+	    assert(seqname != ";");
+	    //read orient
+	    in >> orient;
+	    assert(orient != ';');
+	    assert(orient == '+' || orient == '-');
+	    sequenceOrientMap[seqname] = orient;
+	    //read coords
+	    in >> coords;
+	    std::string start;
+	    std::string end;
+	    std::istringstream coordsin(coords);
+	    getline(coordsin, start, '-');
+	    getline(coordsin, end, '-');
+	    unsigned int startcoord = atoi(start.c_str());
+	    unsigned int endcoord = atoi(end.c_str());
+	    /*
+	    if(orient == '-'){
+	      int slen = length(getValueById(stringSet(g), seqNamesIdxMap[seqname]));
+	      unsigned int tmpstartcoord=startcoord;
+	      startcoord = slen-endcoord;
+	      endcoord = slen-startcoord;
+	    }
+	    */
+	    sequenceCoordsMap[seqname] = make_pair(startcoord,endcoord);
+	  }
+	}
+      }
+    }
+  }
+  std::cerr << "Read " << lcbs.size() << " LCBs containing " << vertexcount << " segments " << std::endl;
+  //Sort based on length
+  
+
+}
+template<typename TVertexDescriptor,
+	 typename TNames,
+	 typename TVertexOrientMap,
+	 typename TGraph>
+void readBlockFile(const std::string & filename,
+		   std::map<unsigned int, std::set<TVertexDescriptor> > & block2fragMap,
+		   std::vector<std::vector<unsigned int> > & lcbs,
+		   TNames &sequenceNames,
+		   TVertexOrientMap & vertexOrientMap,
+		   TGraph & g,
+		   bool checkbounds=false){
+  typedef typename Iterator<TGraph, VertexIterator>::Type TVertexIterator;
+  std::map<TVertexDescriptor,unsigned> vertexSeqMap;
+  TVertexIterator it(g);
+  for(;!atEnd(it);goNext(it)) {
+    vertexSeqMap[*it] = sequenceId(g,*it);
+  }
+  doReadBlockFile(filename,block2fragMap,lcbs,sequenceNames,vertexOrientMap,vertexSeqMap,g,checkbounds); 
+  std::cerr << "Sorting LCBs by length" << std::endl; 
+  //Sort LCBs in decreasing order by length
+  std::map<int,int> lcbidxlenmap;
+  std::vector<int> lcbsidx;
+  for(unsigned int i=0;i<lcbs.size();i++){
+    int totalsize=0;
+    std::vector<unsigned int>::const_iterator vit;
+    for(vit = lcbs[i].begin();vit!=lcbs[i].end();vit++){
+      totalsize = totalsize + fragmentLength(g,*vit);
+    }
+    assert(totalsize!=0);
+    lcbidxlenmap[i] = totalsize;
+    lcbsidx.push_back(i);
+  }
+
+  assert(lcbidxlenmap.size()==lcbs.size());
+  assert(lcbsidx.size()==lcbs.size());
+  sort(lcbsidx.begin(),lcbsidx.end(),lcblencmp<std::map<int,int> >(lcbidxlenmap));
+
+  std::vector<std::vector<unsigned int> > newlcbs;
+  for(std::vector<int>::iterator lit = lcbsidx.begin();lit != lcbsidx.end();++lit){
+    assert((unsigned int)*lit<lcbs.size());
+    newlcbs.push_back(lcbs[*lit]);
+    if(lit != lcbsidx.begin()){
+#ifdef DEBUGGING
+      std::cout << "LCB: " << *lit << " " << lcbidxlenmap[*lit] << " <= " << lcbidxlenmap[*(lit-1)] << std::endl;
+#endif
+      assert(lcbidxlenmap[*lit]<=lcbidxlenmap[*(lit-1)]);
+    }
+  }
+  lcbs=newlcbs;
+}
+/*
+template<typename TSeqs>
+void do_segmentation_MERCATOR(){
+  std::fstream strm;
+  //(2)Support for Mercator
+  //G.chroms
+  std::string genomestr;
+  for(unsigned int i=0;i<nSeq;i++){
+    std::ostringstream currfilename;
+    currfilename << "G" << i;
+    genomestr += currfilename.str();
+    genomestr += " ";
+    currfilename << ".chroms";
+    strm.open(currfilename.str().c_str(), std::ios_base::out | std::ios_base::trunc);
+    strm << "S" << i << "\t" //seqname
+	 << length(seqSet[i]) << std::endl; //chromLength
+    strm.close();
+  }
+  for(unsigned int i=0;i<nSeq;i++){
+    std::ostringstream currfilename;
+    currfilename << "G" << i << ".anchors";
+    strm.open(currfilename.str().c_str(), std::ios_base::out | std::ios_base::trunc);
+    typename std::vector<TBlock>::const_iterator bit = blocks.begin();
+    for(bit = blocks.begin();
+	bit!=blocks.end();
+	bit++){
+      if(bit->currentSeq==i){
+	//mercator format
+	strm << bit->currentSeq << "." << bit->c << "\t" //anchorname
+	     << "S" << bit->currentSeq << "\t" //seqname
+	     << bit->orient << "\t" //strand
+	     <<bit->begCoord << " " << bit->endCoord << "\t" //startCoord endCoord 0-based half open interval [start, end)
+	     << 1 << std::endl; //isCoding
+      }
+    }
+    strm.close();
+  }  
+  //G1-G2.hits
+  std::map<std::pair<unsigned int,unsigned int>,std::set<int> > hitMap;
+  std::map<std::pair<unsigned int,unsigned int>,std::set<int> >::iterator hmit;
+  bool inserted;
+  
+  bit2 = blocksbycomponent.begin();
+  for(; bit2!= blocksbycomponent.end();bit2++){//all cc
+    std::vector<TBlock> currblocks = bit2->second;
+    typename std::vector<TBlock>::iterator it1,it2;
+    for(it1=currblocks.begin();it1!=currblocks.end();it1++){
+      for(it2=currblocks.begin();it2!=currblocks.end();it2++){
+	if(it1!=it2){
+	  std::pair<unsigned int,unsigned int> key;
+	  if(it1->currentSeq<it2->currentSeq){
+	    key = std::make_pair(it1->currentSeq,it2->currentSeq);
+	  }
+	  else{
+	    key = std::make_pair(it2->currentSeq,it1->currentSeq);
+	  }
+	  
+	  std::pair<std::map<std::pair<unsigned int,unsigned int>,std::set<int> >::iterator,bool> s 
+	    = hitMap.insert(std::make_pair(key,std::set<int>()));
+	  hmit = s.first;
+	  inserted = s.second;
+	  hmit->second.insert(it1->c);
+	}
+      }
+    }
+  }
+  for(hmit=hitMap.begin();hmit!=hitMap.end();hmit++){
+    std::ostringstream currfilename;
+    currfilename << "G" << hmit->first.first << "-" << "G" << hmit->first.second << ".hits";
+    strm.open(currfilename.str().c_str(), std::ios_base::out | std::ios_base::trunc);
+    for(std::set<int>::iterator it=hmit->second.begin();it!=hmit->second.end();it++){
+      strm << hmit->first.first  << "." << *it << "\t"  //anchorName1
+	   << hmit->first.second << "." << *it << "\t" //anchorName2
+	   << 1 << "\t"
+	   << 1 << std::endl;
+    }
+    strm.close();
+  }
+}
+*/
+
+template<typename TBlock,
+	 typename TVertexDescriptor, 
+	 typename TMSAOptions, 
+	 typename TSeqs, 
+	 typename TNames,
+	 typename TGenomeNames,
+	 typename TVertexOrientMap,
+	 typename TGraph>
+void do_segmentation_ENREDO(std::vector<TBlock> & blocks,
+			    std::vector<std::vector<unsigned int> > & lcbs,
+			    std::map<unsigned int, std::set<TVertexDescriptor> > & block2fragMap, 
+			    std::string distance,
+			    std::string minlen,
+			    TMSAOptions const &msaOpt,
+			    TSeqs & seqSet,
+			    TNames & sequenceNames,
+			    TGenomeNames & genomeNames,
+			    TVertexOrientMap & vertexOrientMap,
+			    TGraph & g){
+  /*
+    From Enredo README
+  The input file contains the result of mapping a set of anchors onto several
+  genomes. Anchors are expected to be sorted by organism, chromosome and
+  position. Each line should correspond to an anchor and each line contains 6
+  values separated by tabs. The six values are: the anchor name (a string),
+  the species name (a string), the chromosome name (a string), the start
+  position (an integer value), the end position (an integer value), the strand
+  (either + or -) and the score (a real value). Here is an example:
+  
+  A1      Spcs1   X       53      85      +       123
+  B1      Spcs1   X       458     498     +       11
+  C1      Spcs1   X       3601    3639    +       434
+  B1      Spcs1   X       5480    5520    +       1
+  D1      Spcs1   X       6479    6510    +       41
+  A       Spcs1   Y       1379    4410    +       1567
+  E       Spcs1   Y       5879    5910    +       311
+  E       Spcs1   Y       6479    6510    +       217
+  D       Spcs1   Y       6567    6593    +       135
+  */
+  std::fstream strm;
+  std::fstream strm2;
+  String<char> pf = msaOpt.outfile;
+  char * pfilename = toCString(pf);
+  std::string projfilename(pfilename);
+  projfilename = projfilename + "enredo.anchors";
+  std::string idxfilename(pfilename);
+  idxfilename = idxfilename + "enredo.idx";
+  std::cerr << "Writing ENREDO anchors to " << projfilename.c_str() << std::endl;
+  strm.open(idxfilename.c_str(), std::ios_base::out | std::ios_base::trunc);
+  strm2.open(projfilename.c_str(), std::ios_base::out | std::ios_base::trunc);
+  //enredo anchors file
+  //G.anchors
+  unsigned int nseq = length(seqSet);
+  for(unsigned int i=0;i<nseq;i++){
+    std::vector<TBlock *> seqblocks;
+    int idx=0;
+    strm << i << " " << sequenceNames[i] << std::endl;
+    for(typename std::vector<TBlock>::const_iterator bit = blocks.begin(); bit!=blocks.end();bit++){
+      if(bit->currentSeq==i){
+	seqblocks.push_back(&(blocks[idx]));
+      }
+      idx++;
+    }
+    
+    std::sort(seqblocks.begin(),seqblocks.end(),blockorder<TBlock,unsigned int>(i));	  
+    unsigned int blen = seqblocks.size();
+    //if(blen>1){
+      for(unsigned int j=0;j<blen;j++){
+	strm2 << seqblocks[j]->c << "\t" 
+	      << i << "\t"
+	      << genomeNames[i] << "\t" 
+	  //1-start base coordinates
+	      << seqblocks[j]->begCoord+1 << "\t" << seqblocks[j]->endCoord << "\t" 
+	      << seqblocks[j]->orient << "\t" 
+	      << seqblocks[j]->endCoord - seqblocks[j]->begCoord 
+	      << std::endl;
+	//}
+    }
+  }	
+  /*  
+  unsigned int nSeq = length(seqSet);
+  for(unsigned int i=0;i<nSeq;i++){
+    strm << i << " " << sequenceNames[i] << std::endl;
+    typename std::vector<TBlock>::const_iterator bit = blocks.begin();
+    for(bit = blocks.begin();
+	bit!=blocks.end();
+	bit++){
+      if(bit->currentSeq==i){
+	//enredo format
+	strm2 << bit->c << "\t" 
+	      << bit->currentSeq << "\t" 
+	      << genomeNames[i] << "\t";
+	strm2 << bit->begCoord << "\t" 
+	      << bit->endCoord << "\t";
+	strm2 << bit->orient << "\t"
+	      << bit->endCoord-bit->begCoord<< std::endl;
+      }
+    }
+  }
+  */
+  strm.close();
+  strm2.close();
+  //(2)Sort anchors
+  std::string sortedprojfilename(projfilename+".sorted");
+  std::string sortcmd = "sort -k 2,3 -k 4n,4n < " + projfilename + " > " + sortedprojfilename;
+  int res = system(sortcmd.c_str());
+  if(res!=0){
+    perror("Could not run system command: ");
+    std::cerr << sortcmd.c_str() << std::endl 
+	      << "SYSTEM:" << res << std::endl;
+    exit(1);
+  }
+  //(3)Run Enredo
+  std::string mugsyinstall = std::string(std::getenv("MUGSY_INSTALL"));
+  assert(mugsyinstall.length()>0);
+  std::string cmd = mugsyinstall+"/enredo ";
+  std::string stdoutfilename(boost::lexical_cast<std::string>(getpid())+"lcbs.out");
+  std::string stderrfilename(boost::lexical_cast<std::string>(getpid())+"synchain-mugsy.out");
+  char * enredoenvopts = std::getenv("ENREDO_OPTS");
+  std::string enredoopts;
+  if(enredoenvopts==NULL || strlen(enredoenvopts)==0){
+       enredoopts = std::string(" --min-score 0 --max-ratio 0 ") + std::string(" --min-length ") + minlen + std::string(" --max-gap-length ") + distance + std::string(" --min-anchors 1 ");
+  }
+  else{
+       enredoopts = std::string(enredoenvopts);
+  }
+  cmd = cmd + enredoopts + " " + sortedprojfilename
+    + " | "+mugsyinstall+"/enredo2mugsy.pl "+idxfilename+" > "+stdoutfilename+" 2> "+stderrfilename;
+  assert(cmd.length()>0);
+  //#ifdef DEBUGGING
+  std::cerr << "Running " << cmd.c_str() << std::endl;
+  //#endif
+  res = system(cmd.c_str());
+  if(res!=0){
+    perror("Could not run system command: ");
+    std::cerr << cmd.c_str() << std::endl 
+	      << "SYSTEM:" << res << std::endl;
+    exit(1);
+  }
+  assert(res==0);
+  //(3) Read output file to obtain list of LCBs
+  readBlockFile(stdoutfilename,
+		block2fragMap,
+		lcbs,
+		sequenceNames,
+		vertexOrientMap,
+		g,
+		true); //must check bounds
+}
+
+template<typename TBlock,
+	 typename TNames,
+	 typename TGenomeNames>
+void writeProjectionFile(std::string projfilename,
+			 std::vector<TBlock> & blocks,
+			 TNames & sequenceNames,
+			 TGenomeNames & genomeNames){
+  std::fstream strm;
+  strm.open(projfilename.c_str(), std::ios_base::out | std::ios_base::trunc);
+  unsigned int nseq = length(sequenceNames);
+  assert(nseq==length(sequenceNames));
+  assert(nseq==length(genomeNames));
+  unsigned int cdist=0;
+  //(1)Project blocks onto each sequence and write to a file
+  //Topological sort of the blocks over each sequence
+  //projecting block onto sequence and printing neighbors (n->n+1)
+  for(unsigned int i=0;i<nseq;i++){
+    std::vector<TBlock *> seqblocks;
+    int idx=0;
+    for(typename std::vector<TBlock>::const_iterator bit = blocks.begin(); bit!=blocks.end();bit++){
+      if(bit->currentSeq==i){
+	seqblocks.push_back(&(blocks[idx]));
+      }
+      idx++;
+    }
+    
+    std::sort(seqblocks.begin(),seqblocks.end(),blockorder<TBlock,unsigned int>(i));	  
+    unsigned int blen = seqblocks.size();
+    if(blen>1){
+      for(unsigned int j=0;j<blen;j++){
+	if(j<blen-1){
+	  assert(seqblocks[j]->currentSeq==seqblocks[j+1]->currentSeq);
+	  cdist = std::abs((long int)(seqblocks[j]->endCoord - seqblocks[j+1]->begCoord));
+	  strm << seqblocks[j]->c << " " << seqblocks[j+1]->c << " " << sequenceNames[i] << " " << cdist << " " << genomeNames[i] << " " 
+	       << seqblocks[j]->orient << " " << seqblocks[j+1]->orient << " " 
+	       << seqblocks[j]->begCoord << " " << seqblocks[j]->endCoord << " " 
+	       << seqblocks[j+1]->begCoord << " " << seqblocks[j+1]->endCoord
+	       << std::endl;
+	}
+      }
+    }
+    else{
+      if(blen==1){
+	strm << seqblocks[0]->c << " " << seqblocks[0]->c << " " << sequenceNames[i] << " " << 0 << " " << genomeNames[i] << " " 
+	     << seqblocks[0]->orient << " " << seqblocks[0]->orient << " " 
+	     << seqblocks[0]->begCoord << " " << seqblocks[0]->endCoord << " " 
+	     << seqblocks[0]->begCoord << " " << seqblocks[0]->endCoord
+	     << std::endl;
+      }
+    }
+  }
+  strm.close();
+    
+
+}
+
+template<typename TBlock,
+	 typename TVertexDescriptor, 
+	 typename TMSAOptions, 
+	 typename TNames,
+	 typename TGenomeNames,
+	 typename TVertexOrientMap,
+	 typename TGraph>
+void do_segmentation_MUGSY( std::vector<TBlock> & blocks,
+			    std::vector<std::vector<unsigned int> > & lcbs,
+			    std::map<unsigned int, std::set<TVertexDescriptor> > & block2fragMap, 
+			    std::string distance,
+			    std::string minlen,
+			    TMSAOptions const &msaOpt,
+			    TNames & sequenceNames,
+			    TGenomeNames & genomeNames,
+			    TVertexOrientMap & vertexOrientMap,
+			    TGraph & g){
+
+  std::fstream strm;
+  String<char> pf = msaOpt.outfile;
+  char * pfilename = toCString(pf);
+
+  //(1)Write projection file
+
+  std::string projfilename(pfilename);
+  projfilename = projfilename + "projections.out";
+  writeProjectionFile(projfilename,blocks,sequenceNames,genomeNames);
+  //(2)Run synchain-mugsy using the projection
+  std::string mugsyinstall = std::string(std::getenv("MUGSY_INSTALL"));
+  assert(mugsyinstall.length()>0);
+  std::string cmd = "cat "+projfilename+" | "+mugsyinstall+"/synchain-mugsy ";
+  std::string stdoutfilename(boost::lexical_cast<std::string>(getpid())+"lcbs.out");
+  std::string stderrfilename(boost::lexical_cast<std::string>(getpid())+"synchain-mugsy.out");
+  cmd = cmd + distance + " " + minlen + " "+ minlen
+    + " > "+stdoutfilename+" 2> "+stderrfilename;
+  assert(cmd.length()>0);
+  //#ifdef DEBUGGING
+  std::cerr << "Running " << cmd.c_str() << std::endl;
+  //#endif
+  #ifdef TIMING 
+  time(&now);
+  std::cerr << "TIME PRE-SYNCHAIN:" << lasttime << " " << now << " " << now-lasttime << std::endl;
+  lasttime=now;
+  #endif 
+  int res = system(cmd.c_str());
+  #ifdef TIMING 
+  time(&now);
+  std::cerr << "TIME SYNCHAIN:" << lasttime << " " << now << " " << now-lasttime << std::endl;
+  lasttime=now;
+  #endif
+  if(res!=0){
+    perror("Could not run system command: ");
+    std::cerr << cmd.c_str() << std::endl 
+	      << "SYSTEM:" << res << std::endl;
+    exit(1);
+  }
+  assert(res==0);
+  //(3) Read output file to obtain list of LCBs
+  readBlockFile(stdoutfilename,
+		block2fragMap,
+		lcbs,
+		sequenceNames,
+		vertexOrientMap,
+		g,
+		false); //No bounds check
+  if(res==0){
+#ifdef KEEPCHAINTMP
+    ;
+#else
+    unlink(stdoutfilename.c_str());
+    unlink(stderrfilename.c_str());
+    unlink(projfilename.c_str());
+#endif
+
+  }
+  #ifdef TIMING 
+  time(&now);
+  std::cerr << "TIME POST-SYNCHAIN:" << lasttime << " " << now << " " << now-lasttime << std::endl;
+  lasttime=now;
+  #endif
+}
+
+
+//Build ungapped profiles from the segment graph
+//Output is array of TBlocks needed for mugsy-chaining
+template<typename TGraph,
+	 typename TComponentMap,
+	 typename TComponent,
+	 typename TBlock,
+	 typename TName,
+	 typename TLoc,
+	 typename TNames
+	 >
+void convertCC2Blocks(TGraph &g, 
+		      TComponentMap& component,
+		      std::map<std::pair<TComponent,TComponent>,TBlock *> & componentVertexMap,
+		      std::vector<std::vector<TBlock> > & blocksbycomponent,
+		      std::map<TName,std::vector<TLoc> >&aintervals,
+		      TNames & sequenceNames){
+  
+  typedef typename Id<TGraph>::Type TIdType;
+  typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
+  //typedef typename EdgeDescriptor<TGraph>::Type TEdgeDescriptor;
+  typedef typename Size<TGraph>::Type TSize;
+  typedef std::pair<TIdType, TSize> TKey;
+  typedef std::map<TKey, TVertexDescriptor> TPosToVertexMap;
+  typedef FragmentInfo<TIdType, TSize> TFragmentInfo;
+
+  // data_pvMap is an STL Map to retrieve a vertex given SeqId, Position
+  // first.first == seqId
+  // first.second == pos
+  // second == VertexDescriptor
+  typename TPosToVertexMap::const_iterator it1 = g.data_pvMap.begin();
+  typename TPosToVertexMap::const_iterator it1End = g.data_pvMap.end();
+  typedef typename Position<TGraph>::Type TPos;
+
+  std::map<TComponent,int> seqsPerComponent;
+  
+  TPos begCoord,endCoord;
+  char orient='?';
+
+  int lostbp=0;
+  int numlostv=0;
+  
+  //Track number of sequences per component
+  for(;it1!=it1End;++it1) {
+    TVertexDescriptor currV = it1->second;
+    if(currV != getNil<TVertexDescriptor>()){
+      assert(getProperty(component,currV)==component[currV]);
+      TComponent c = getProperty(component, currV);
+      assert(c < blocksbycomponent.size());
+      if(seqsPerComponent.find(c)!=seqsPerComponent.end()){
+	seqsPerComponent[c]++;
+      }
+      else{
+	seqsPerComponent.insert(std::make_pair(c,1));
+      }
+    }
+  }
+  for(it1 = g.data_pvMap.begin();it1!=it1End;++it1) {
+    TVertexDescriptor currV = it1->second;
+    if(currV != getNil<TVertexDescriptor>()){
+      assert(getProperty(component,currV)==component[currV]);
+      TComponent c = getProperty(component, currV);
+      assert(c < blocksbycomponent.size());
+      TSize currentSeq = sequenceId(g,currV);
+      if(seqsPerComponent[c] > 1){
+#ifdef DEBUGGING
+	std::cout << "Component " << c << " V:" << currV << " seq:" << currentSeq << " degree:" << degree(g,currV) << " coord:" << fragmentBegin(g,currV) << std::endl;
+#endif    
+	//First block for currentseq
+	typename std::map<std::pair<TComponent,TComponent>,TBlock *>::iterator fit = componentVertexMap.find(std::make_pair(c,currentSeq));
+	if(fit==componentVertexMap.end()){
+	  begCoord = fragmentBegin(g,currV);
+	  assert((int)begCoord>=0);
+	  endCoord = begCoord+fragmentLength(g,currV);
+	  orient='?';
+	  typename std::vector<TBlock>::iterator bit = blocksbycomponent[c].insert(blocksbycomponent[c].end(),
+										   TBlock(c,currentSeq,begCoord,endCoord,orient,currV));
+	  componentVertexMap[std::make_pair(c,currentSeq)] = &(*bit);
+	  //blocksbycomponent[c].push_back(TBlock(c,currentSeq,begCoord,endCoord,orient,currV));
+	  //unsigned int idx = blocksbycomponent[c].size()-1;
+	  //componentVertexMap[std::make_pair(c,currentSeq)] = &(blocksbycomponent[c][idx]);
+#ifdef DEBUGGING
+	  std::cout << "Adding component " << c << " seq:" << currentSeq << " coords" 
+		    << begCoord << "-" << endCoord << " o:" << orient << " V:" << currV << std::endl;
+#endif
+	}
+	else{
+	  //Block already inserted
+	  TBlock * blk = fit->second;
+	  blk->addVertex(g,currV);
+#ifdef DEBUGGING
+	  std::cout << "Adding vertex to component " << c << " seq:" << currentSeq 
+		    << " coords:" << begCoord << "-" << endCoord << " V:" << currV << std::endl;
+#endif
+	}
+      }
+      else{
+	//Repetitive sequence
+	/* Remove to improve reporting of unique sequences
+	if(degree(g,currV)>0){
+	  lostbp = lostbp + fragmentLength(g,currV);
+	  numlostv++;
+	  typename std::map<TName,std::vector<TLoc > >::iterator ait = aintervals.find(sequenceNames[currentSeq]);
+	  if(ait==aintervals.end()){      
+	    aintervals.insert(std::make_pair(sequenceNames[currentSeq],std::vector<TLoc >()));
+	  }
+	  ait = aintervals.find(sequenceNames[currentSeq]);
+	  assert(ait!=aintervals.end());
+	  TLoc t1,t2;
+	  t1.first = fragmentBegin(g,currV);
+	  t1.second = 1;
+	  t1.blocknum = 0;
+	  ait->second.push_back(t1);
+	  t2.first = t1.first+fragmentLength(g,currV);
+	  t2.second = -1;
+	  t2.blocknum = 0;
+	  ait->second.push_back(t2);
+	}
+	*/
+      }
+    }
+  }
+  std::cerr << "Disconnected " << numlostv << " vertices marking " 
+	    << lostbp << " aligned bp" << std::endl;
+}
+
+
+//Assign orientation using greedy approach
+//Start assignment of edges with best consistency score.
+//Break ties with positional score
+//?TODO?: Break inconsistent edges
+//Output:Blocks:          std::vector<TBlock>
+//       vertexOrientMap: map vertex->orient
+template<typename TGraph, 
+	 typename TBlock, 
+	 typename TVertexOrientMap,
+	 typename TEdgeDescriptor>
+void assignBlockOrientation(TGraph &g,
+			    std::vector<std::vector<TBlock> > &blocksbycomponent, 
+			    std::vector<TBlock> &blocks,
+			    TVertexOrientMap &vertexOrientMap,
+			    std::map<TEdgeDescriptor,float> &posScores){
+  typedef unsigned int TSize;
+  typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
+  typename std::vector<std::vector<TBlock> >::iterator bit2 = blocksbycomponent.begin();
+
+  //Track number of conflicting orientation assignments
+  int conflicts=0;
+  std::set<TEdgeDescriptor> conflictEdges;
+  bool removeConflictingEdges=false;
+
+  for(; bit2!= blocksbycomponent.end();bit2++){//all cc
+    if(bit2->size()>0){
+      std::vector<TEdgeDescriptor> ccedges;
+      //std::vector<TBlock> * currblocks = bit2;
+      int unorientedSegments=bit2->size();
+#ifdef DEBUGGING
+      std::cout << "Examining block with " << unorientedSegments << " sequences" << std::endl;
+#endif
+      std::map<TSize,TBlock *> seqBlockMap;
+      //capture all edges in component
+      //For all segments on seqs i+1->numseqs{
+      for(unsigned int i=0;i<bit2->size();i++){
+	for(unsigned int j=i+1;j<bit2->size();j++){
+	  //all members of block
+	  for(typename std::vector<TVertexDescriptor>::iterator vit=bit2->at(i).currV.begin();vit!=bit2->at(i).currV.end();vit++){
+	    TVertexDescriptor currV0 = *vit;
+	    //assert(degree(g,currV0)>0);
+	    for(typename std::vector<TVertexDescriptor>::iterator vit1=bit2->at(j).currV.begin();vit1!=bit2->at(j).currV.end();vit1++){
+	      TVertexDescriptor currV = *vit1;
+	      //assert(degree(g,currV)>0);
+	      TEdgeDescriptor ed = findEdge(g,currV0,currV);
+	      if(ed!=0){
+		ccedges.push_back(ed);
+	      }
+	      else{
+		assert(findEdge(g,currV,currV0)==0);
+	      }
+	    }
+	  }
+	}
+	seqBlockMap.insert(std::make_pair(bit2->at(i).currentSeq,&bit2->at(i)));
+#ifdef DEBUGGING
+	std::cout << "Block " << i << " seq:" << bit2->at(i).currentSeq  << " vertices:" << bit2->at(i).currV.size() << std::endl;
+#endif
+#ifdef DEBUGGING
+	for(typename std::vector<TVertexDescriptor>::iterator vit=bit2->at(i).currV.begin();vit!=bit2->at(i).currV.end();vit++){
+	  TVertexDescriptor currV0 = *vit;
+	  std::cout << "V:"<<currV0 << std::endl;
+	}
+#endif
+      }
+      assert(seqBlockMap.size()==bit2->size());
+#ifdef DEBUGGING
+      std::cout << "Number of edges " << ccedges.size() << std::endl;
+#endif
+      //Sort edges in order of 
+      //(1) consistency
+      //(2) posscore
+      //This way the most consistent and syntenic edges should determine the 
+      //relative orientation of segments in the block
+      sort(ccedges.begin(),ccedges.end(),edgescorecmp<std::map<TEdgeDescriptor,float> >(&posScores));
+      //traverse edges in decreasing order ranked by consistency, posScores
+      typename std::vector<TEdgeDescriptor>::reverse_iterator eit=ccedges.rbegin();
+      TEdgeDescriptor ed = *eit;
+      TVertexDescriptor v1 = getSource(ed);
+      TVertexDescriptor v2 = getTarget(ed);
+      assert(seqBlockMap.find(sequenceId(g,v1))!=seqBlockMap.end());
+      assert(seqBlockMap.find(sequenceId(g,v2))!=seqBlockMap.end());
+      TBlock * blockv1 = seqBlockMap[sequenceId(g,v1)];
+      TBlock * blockv2 = seqBlockMap[sequenceId(g,v2)];
+      assert(blockv2->orient == '?');
+      assert(blockv1->orient == '?');
+      blockv1->orient = '+';
+      assert(cargo(ed)!=0);
+      if(cargo(ed)>0){
+#ifdef DEBUGGING
+	std::cout << " SAME ORIENT " << blockv1->orient << std::endl;
+#endif
+	blockv2->orient = blockv1->orient;
+      }
+      else{
+#ifdef DEBUGGING
+	std::cout << " OPPOSITE ORIENT OF " << blockv1->orient << std::endl;
+#endif
+	blockv2->orient = (blockv1->orient=='+'?'-':'+');
+      }
+#ifdef DEBUGGING
+      std::cout << "Examining edge " << " " << v1 << "-" << v2 
+		<< blockv1->orient << " " << blockv2->orient
+		<< std::endl;
+#endif
+      //check all currV in blockv1,blockv2 for consistency with this assignment
+      //First two blocks are oriented relative to each other
+      unorientedSegments-=2;
+      eit++;
+      //Propogate relative orientation through the component graph
+      while(unorientedSegments>0){
+#ifdef DEBUGGING
+	std::cout << "Num blocks unoriented " << unorientedSegments << std::endl;
+#endif
+	for(;eit!=ccedges.rend();eit++){
+	  ed = *eit;
+	  v1 = getSource(ed);
+	  v2 = getTarget(ed);
+	  assert(v1!=v2);
+	  assert(seqBlockMap.find(sequenceId(g,v1))!=seqBlockMap.end());
+	  assert(seqBlockMap.find(sequenceId(g,v2))!=seqBlockMap.end());
+	  blockv1 = seqBlockMap[sequenceId(g,v1)];
+	  blockv2 = seqBlockMap[sequenceId(g,v2)];
+#ifdef DEBUGGING
+	  std::cout << "Examining edge:" << " " << v1 << "-" << v2 
+		    << blockv1->orient << " " << blockv2->orient
+		    << std::endl;
+#endif
+	  if(blockv1->orient == '?'){
+	    if(blockv2->orient != '?'){
+	      //assignment
+	      unorientedSegments--;
+	      if(cargo(ed)>0){
+#ifdef DEBUGGING
+		std::cout << " SAME ORIENT " << blockv2->orient << std::endl;
+#endif
+		blockv1->orient = blockv2->orient;
+	      }
+	      else{
+#ifdef DEBUGGING
+		std::cout << " OPPOSITE ORIENT OF " << blockv2->orient << std::endl;
+#endif
+		blockv1->orient = (blockv2->orient=='+'?'-':'+');
+	      }
+	    }
+	    else{
+	      //no assignment
+	    }
+	  }
+	  else{
+	    if(blockv2->orient == '?'){
+	      if(blockv1->orient != '?'){
+		//assignment
+		unorientedSegments--;
+		if(cargo(ed)>0){
+#ifdef DEBUGGING
+		  std::cout << " SAME ORIENT " << blockv1->orient << std::endl;
+#endif
+		  blockv2->orient = blockv1->orient;
+		}
+		else{
+#ifdef DEBUGGING
+		  std::cout << " OPPOSITE ORIENT OF " << blockv1->orient << std::endl;
+#endif
+		  blockv2->orient = (blockv1->orient=='+'?'-':'+');
+		}
+	      }
+	      else{
+		//no assignment
+	      }
+	    }
+	    else{
+	      //already assigned
+	      //check
+	      assert(cargo(ed)!=0);
+	      if(cargo(ed)>0){
+		if(blockv1->orient!=blockv2->orient){
+#ifdef DEBUGGING
+		  std::cout << "Conflicting orientation. Edge:" << cargo(ed) 
+			    << " for vertices V1:" << v1 << "," << blockv1->orient 
+			    << " V2:" << v2 << "," << blockv2->orient << std::endl;
+#endif
+		  //TODO break edge?
+		  conflicts++;
+		  conflictEdges.insert(ed);
+		}
+	      }
+	      else{
+		if(blockv1->orient==blockv2->orient){
+#ifdef DEBUGGING
+		  std::cout << "Conflicting orientation. Edge:" << cargo(ed) 
+			    << " for vertices V1:" << v1 << "," << blockv1->orient 
+			    << " V2:" << v2 << "," << blockv2->orient << std::endl;
+#endif
+		  //TODO break edge?
+		  conflicts++;
+		  conflictEdges.insert(ed);
+		}
+	      }
+	    }
+	  }
+	}
+	//Start search again at beginning of list of edges
+	eit=ccedges.rbegin();
+      }
+    } //if 
+    //copy final output
+    blocks.insert(blocks.end(),bit2->begin(),bit2->end());
+    for(unsigned int i=0;i<bit2->size();i++){
+      //all members of block
+      for(typename std::vector<TVertexDescriptor>::iterator vit=bit2->at(i).currV.begin();vit!=bit2->at(i).currV.end();vit++){
+	vertexOrientMap[*vit] = bit2->at(i).orient;
+      }
+    }
+  } //for all CC
+
+  //Resolve conflicts if necessary
+  if(removeConflictingEdges){
+    for(typename std::set<TEdgeDescriptor>::iterator eit=conflictEdges.begin();eit!=conflictEdges.end();++eit){
+      removeEdge(g,*eit);
+    }
+  }
+  std::cerr << "Num conflicts: " << conflicts << " when assigning orientation" << std::endl;
+}
+
+
+//Fragments can be oriented
+template<typename TFragment,
+	 typename TGraph,
+	 typename TVertexDescriptor,
+	 typename TSize>
+void buildFrag(TFragment & frag,
+	       TGraph & g,
+	       TVertexDescriptor vd1,
+	       TVertexDescriptor vd2,
+	       TSize id1,
+	       int vd1len,
+	       unsigned offset1,
+	       char orient1,
+	       TSize id2,
+	       int vd2len,
+	       unsigned offset2,
+	       char orient2){
+  if(orient1 == '-'){
+    if(orient2 == '+'){
+      //id1:- id2:+
+      frag = TFragment(id1,
+		       vd1len-(fragmentBegin(g,vd1)+fragmentLength(g,vd1))-offset1,
+		       id2,
+		       fragmentBegin(g,vd2)-offset2, 
+		       fragmentLength(g,vd1),
+		       true);
+    }
+    else{
+      //id1:- id2:-
+      assert(orient1 == '-');
+      assert(orient2 == '-');
+      frag = TFragment(id1,
+		       vd1len-(fragmentBegin(g,vd1)+fragmentLength(g,vd1))-offset1,
+		       id2,
+		       vd2len-(fragmentBegin(g,vd2)+fragmentLength(g,vd2))-offset2, 
+		       fragmentLength(g,vd1),
+		       false);
+    }
+  }
+  else{
+    if(orient2 == '-'){
+      //id1:+ id2:-
+      assert(orient1 == '+');
+      assert(orient2 == '-');
+      frag = TFragment(id1,
+		       fragmentBegin(g,vd1)-offset1,
+		       id2,
+		       vd2len-(fragmentBegin(g,vd2)+fragmentLength(g,vd1))-offset2, 
+		       fragmentLength(g,vd1),
+		       true);
+    }
+    else{
+      //id1:+ id2:+
+      assert(orient1 == '+');
+      assert(orient2 == '+');
+      frag = TFragment(id1,
+		       fragmentBegin(g,vd1)-offset1,
+		       id2,
+		       fragmentBegin(g,vd2)-offset2, 
+		       fragmentLength(g,vd1),
+		       false);
+    }
+  }
+  assert(frag.begin1 >=0);
+  assert(frag.begin2 >=0);
+  assert(frag.begin1+frag.len <=vd1len);
+  assert(frag.begin2+frag.len <=vd2len);
+}
+	    
+//Matches and fragment graph both have coordinates on the leading strand
+template<typename TGraph,
+	 typename TString,
+	 typename TSpec,
+	 typename TFragmentString,
+	 typename TScoreValues>
+void buildMatchesFromGraph(TGraph &g,
+			   StringSet<TString, TSpec> &seqSet,
+			   TFragmentString &currmatches,
+			   TScoreValues &currscores){
+  typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
+  typedef typename EdgeDescriptor<TGraph>::Type TEdgeDescriptor; 
+  typedef typename Size<StringSet<TString, TSpec> >::Type TSize;
+  typedef Fragment<> TFragment;
+
+  typedef typename Id<TGraph>::Type TId;
+  TVertexDescriptor nilVertex = getNil<TVertexDescriptor>();
+  typedef typename Iterator<TGraph, EdgeIterator>::Type TEdgeIterator;
+  TEdgeIterator itE(g);
+  TVertexDescriptor vd1,vd2;
+  TSize vd1seq,vd2seq;
+  for(;!atEnd(itE);goNext(itE)){
+    vd1 = getSource(*itE);
+    vd2 = getTarget(*itE);
+    vd1seq = sequenceId(g,vd1);
+    vd2seq = sequenceId(g,vd2);
+    assert(vd1!=nilVertex);
+    assert(vd2!=nilVertex);
+    assert(vd1!=vd2);
+    //findEdge() implemented in graph_impl_undirected.h 
+    //traced from data_align object in graph_impl_align.h
+    TEdgeDescriptor ed = findEdge(g,vd1,vd2);
+    assert(ed);
+    //There is an alignment between vd1 and vd2
+    assert(vd1seq!=vd2seq);
+    int vd1len = length(getValueById(stringSet(g), vd1seq));
+    int vd2len = length(getValueById(stringSet(g), vd2seq));
+    TFragment currfrag;	
+    buildFrag(currfrag,g,vd1,vd2,
+	      vd1seq,vd1len,0,'+',
+	      vd2seq,vd2len,0,(int)(cargo(ed)<0) ? '-' : '+');
+    assert(currfrag.begin1 >=0);
+    assert(currfrag.begin2 >=0);
+    assert(currfrag.begin1+currfrag.len <=length(getValueById(stringSet(g), vd1seq)));
+    assert(currfrag.begin2+currfrag.len <=length(getValueById(stringSet(g), vd2seq)));
+    assert(currfrag.len==fragmentLength(g,vd1));
+    assert(currfrag.len==fragmentLength(g,vd2));
+    if(currfrag.reversed){
+      currfrag.begin2 = length(seqSet[currfrag.seqId2]) - (currfrag.begin2+currfrag.len);
+    }
+    appendValue(currmatches, currfrag);
+    appendValue(currscores, fragmentLength(g,vd1));
+  }
+}
+
+//buildMatchesFromGraph()
+//Populate a TFragmentString(currmatches)
+//based on edges in TGraph(g) that connect vertices in TLCB(lit)
+//Only vertices with sequenceId in seqIdMap are considered
+
+//TODO: Current impl iterates over all pairs of vertices. Performance
+//can be improved using a BFS
+template<typename TGraph,
+	 typename TString,
+	 typename TSpec,
+	 typename TSeqLenMap,
+	 typename TLCB,
+	 typename TSize,
+	 typename TOffsets,
+	 typename TFragmentString,
+	 typename TScoreValues>
+void buildMatchesFromGraph(TGraph &g,
+			   StringSet<TString, TSpec> &seqSet,
+			   TSeqLenMap &seqLenMap,
+			   TLCB &lit,
+			   std::map<TSize,TOffsets> &offsets,
+			   std::map<TSize,TSize> &seqIdMap,
+			   TFragmentString &currmatches,
+			   TScoreValues &currscores){
+  typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
+  typedef typename EdgeDescriptor<TGraph>::Type TEdgeDescriptor;
+  typename TLCB::const_iterator vit;
+  typename TLCB::const_iterator vit2;
+  typename TLCB::const_iterator vit_end;
+  typename TLCB::const_iterator vit2_end;
+
+  typedef Fragment<> TFragment;
+
+  typedef typename Id<TGraph>::Type TId;
+#ifdef NDEBUG
+  ;
+#else
+  TVertexDescriptor nilVertex = getNil<TVertexDescriptor>();
+#endif
+  //if edge(vit,vit2) present in input graph
+  //then mark as present in output graph
+  //Add vertex from graph g to LCB graph
+  vit_end = lit.end();
+  vit2_end = lit.end();
+#ifdef DEBUGGING
+  std::cout << "Building matches from graph for LCB" << std::endl;
+#endif
+  TVertexDescriptor vd1,vd2;
+  TSize vd1seq,vd2seq;
+  TEdgeDescriptor ed;
+  for(vit = lit.begin();vit!=vit_end;vit++){
+    vd1 = *vit;
+    vd1seq = sequenceId(g,vd1);
+    for(vit2 = lit.begin();vit2!=vit2_end;vit2++){
+      //TODO see if shortcircuit on id1!=id2 improves performance here
+      if(vit != vit2){
+	assert(vd1!=nilVertex);
+	//assert(degree(g,vd1)>0);
+	vd2 = *vit2;
+	vd2seq = sequenceId(g,vd2);
+	assert(vd2!=nilVertex);
+	//assert(degree(g,vd2)>0);
+	assert(vd1!=vd2);
+	//findEdge() implemented in graph_impl_undirected.h 
+	//traced from data_align object in graph_impl_align.h
+	ed = findEdge(g,vd1,vd2);
+	if(!ed){
+
+	}
+	//There is an alignment between vd1 and vd2
+	if(ed){
+	  assert(vd1seq!=vd2seq);
+	  TId id1 = idToPosition(seqSet, vd1seq);	  
+	  TId id2 = idToPosition(seqSet, vd2seq);
+	  assert(id1!=id2);
+	  if(seqIdMap.find(id1)!=seqIdMap.end() &&
+	     seqIdMap.find(id2)!=seqIdMap.end()){ //Check edge and if sequence was not trimmed out of LCB due to length
+	    assert(seqIdMap.find(id1)!=seqIdMap.end());
+	    assert(seqIdMap.find(id2)!=seqIdMap.end());
+	    
+	    int vd1len = seqLenMap[vd1seq];
+	    int vd2len = seqLenMap[vd2seq];
+	    //assert(vd1len == length(getValueById(stringSet(g), vd1seq)));
+	    //assert(vd2len == length(getValueById(stringSet(g), vd2seq)));
+	    //length(getValueById(stringSet(g), idToPosition(seqSet,vd1seq)))	    //length(getValueById(stringSet(g),id1));
+	    //length(getValueById(stringSet(g), idToPosition(seqSet,vd2seq))) 	    //length(getValueById(stringSet(g),id2));
+
+#ifdef DEBUGGING
+	    std::cout << " seqs:" 
+		      << seqIdMap[vd1seq] << ":" << offsets[id1].orient
+		      << " " 
+		      << seqIdMap[vd2seq] << ":" << offsets[id2].orient
+		      << " lengths:" << vd1len << " "  << vd2len
+		      << " coords:" 
+		      << fragmentBegin(g,vd1) << "-"  << fragmentBegin(g,vd1) + fragmentLength(g,vd1)
+		      << "," 
+		      << fragmentBegin(g,vd2) << "-"  << fragmentBegin(g,vd2) + fragmentLength(g,vd1)
+		      << " offset1 " << offsets[id1].offset
+		      << " offset2 " << offsets[id2].offset
+		      << " edge weight:" << cargo(ed)
+		      << std::endl;
+#endif
+	    
+
+	    if(offsets[id1].orient == '-'){
+	      if(offsets[id2].orient=='+'){
+		//id1:- id2:+
+		assert(offsets[id1].orient == '-');
+		assert(offsets[id2].orient == '+');
+		appendValue(currmatches, TFragment(seqIdMap[id1],
+						   vd1len-(fragmentBegin(g,vd1)+fragmentLength(g,vd1))-offsets[id1].offset,
+						   seqIdMap[id2],
+						   fragmentBegin(g,vd2)-offsets[id2].offset, 
+						   fragmentLength(g,vd1),
+						   false));
+	      }
+	      else{
+		//id1:- id2:-
+		assert(offsets[id1].orient == '-');
+		assert(offsets[id2].orient == '-');
+		appendValue(currmatches, TFragment(seqIdMap[id1],
+						   vd1len-(fragmentBegin(g,vd1)+fragmentLength(g,vd1))-offsets[id1].offset,
+						   seqIdMap[id2],
+						   vd2len-(fragmentBegin(g,vd2)+fragmentLength(g,vd2))-offsets[id2].offset, 
+						   fragmentLength(g,vd1),
+						   false));
+	      }
+	      appendValue(currscores, fragmentLength(g,vd1));
+	    }
+	    else{
+	      if(offsets[id2].orient == '-'){
+		//id1:+ id2:-
+		assert(offsets[id1].orient == '+');
+		assert(offsets[id2].orient == '-');
+		appendValue(currmatches, TFragment(seqIdMap[id1],
+						   fragmentBegin(g,vd1)-offsets[id1].offset,
+						   seqIdMap[id2],
+						   vd2len-(fragmentBegin(g,vd2)+fragmentLength(g,vd1))-offsets[id2].offset, 
+						   fragmentLength(g,vd1),
+						   false));
+	      }
+	      else{
+		//id1:+ id2:+
+		assert(offsets[id1].orient == '+');
+		assert(offsets[id2].orient == '+');
+		appendValue(currmatches, TFragment(seqIdMap[id1],
+						   fragmentBegin(g,vd1)-offsets[id1].offset,
+						   seqIdMap[id2],
+						   fragmentBegin(g,vd2)-offsets[id2].offset, 
+						   fragmentLength(g,vd1),
+						   false));
+	      }
+	      appendValue(currscores, fragmentLength(g,vd1));
+	    }	    
+	  }
+	  else{
+	    //Ignore matches, one of the sequences was trimmed from the LCB, probably due to length
+#ifdef DEBUGGING
+	    if(seqIdMap.find(id1)==seqIdMap.end()){
+	      std::cout << "Ignoring match. Trimmed seq V" << vd1 << "-V" << vd2 << " S1" <<id1 << std::endl;
+	    }
+	    if(seqIdMap.find(id2)==seqIdMap.end()){
+	      std::cout << "Ignoring match. Trimmed seq V" << vd1 << "-V" << vd2 << " S2" <<id2 << std::endl;
+	    }
+#endif
+	  }
+	}
+	else{
+	  //Ignore matches
+	  //Segments vd1,vd2 are connected in the component
+	  //but not directly via an alignment edge
+#ifdef DEBUGGING
+	  TId id1 = idToPosition(seqSet, vd1seq);	  
+	  TId id2 = idToPosition(seqSet, vd2seq);
+	  if(id1 != id2
+	     && seqIdMap.find(id1)!=seqIdMap.end() 
+	     && seqIdMap.find(id2)!=seqIdMap.end()){
+	    assert(seqIdMap.find(id1)!=seqIdMap.end());
+	    assert(seqIdMap.find(id2)!=seqIdMap.end());
+	    /*
+	    std::cout << "Ignoring match. Indirect connections V" << vd1 << "-V" << vd2 << " S1" <<id1 << "-" << " S2" << id2 << std::endl;
+	    std::cout << "Ignored seqs:" 
+		      << seqIdMap[vd1seq] << ":" << offsets[id1].orient
+		      << " " 
+		      << seqIdMap[vd2seq] << ":" << offsets[id2].orient
+		      << " coords:" 
+		      << fragmentBegin(g,vd1) << "-"  << fragmentBegin(g,vd1) + fragmentLength(g,vd1)
+		      << "," 
+		      << fragmentBegin(g,vd2) << "-"  << fragmentBegin(g,vd2) + fragmentLength(g,vd1)
+		      << " offset1 " << offsets[id1].offset
+		      << " offset2 " << offsets[id2].offset
+		      << std::endl;
+	    */
+	  }
+#endif
+	}
+      }
+    }
+  }
+}
+
+
+//Set currseqs,offsets
+template<typename TSeqID,
+	 typename TString,
+	 typename TSpec,
+	 typename TMap,
+	 typename TOffsets>
+void setLCBProps(TSeqID i,
+		 char currorient,
+		 unsigned int min,
+		 unsigned int max,
+		 TString & str,
+		 StringSet<TString, TSpec> &seqSet,
+		 TMap & seqIdMap,
+		 TOffsets &offsets){
+  //Check if mapped ids
+  TSeqID seqidx = i;
+  if(seqIdMap.find(i)!=seqIdMap.end()){
+    seqidx = seqIdMap[i];
+  }
+  //Set substring in currseqs
+  //and offsets
+  //First, check orientation
+  if(currorient == '+'){
+    str = infix(seqSet[seqidx],min,max);
+    //appendValue(currseqs,infix(seqSet[i],min,max));
+    //orients[i] = '+';
+    offsets[i].orient = '+';
+    
+    //offsets[i] = min;
+    offsets[i].offset = min;
+    
+    //spanlens[i] = max-min;
+    offsets[i].spanlen = max-min;
+    
+    //seqlens[i] = length(seqSet[i]);
+    offsets[i].seqlen = length(seqSet[seqidx]);
+  }
+  else{
+    assert(currorient == '-');
+    //Handle reverse orientation
+#ifdef DEBUGGING
+    std::cout << "REVERSING SEQUENCE " << i << " of length " << length(seqSet[seqidx]) 
+	      << " " << min << " - " << max << " " << std::endl;
+#endif
+    assert(min<max);
+    assert(int(min)>=0);
+    assert(max<=length(seqSet[seqidx]));
+    //TString str = DnaStringReverseComplement(infix(seqSet[i],min,max));
+    //TString str = infix(seqSet[i],min,max);
+    str = infix(seqSet[seqidx],min,max);
+    //Reverse complement
+    convertInPlace(str, FunctorComplement<Dna5>());
+    reverseInPlace(str);
+    //appendValue(currseqs,str);
+    //Now relative to - strand
+    //Offsets assumed relative to matching strand
+    //Also MAF stores coordinates relative to matching strand
+    int tmpmin = min;
+    min = length(seqSet[seqidx]) - max;
+    max = length(seqSet[seqidx]) - tmpmin;
+    offsets[i].orient = '-';
+    offsets[i].offset = min;
+    offsets[i].spanlen = max-min;
+    offsets[i].seqlen = length(seqSet[seqidx]);
+  }
+
+}
+
+
+//Determines orienation for sequence i in LCB lit
+//Uses the vertexOrientMap that was previously built using relative orientation
+//of most consistent matches in the original alignment graph
+template<typename TGraph,
+	 typename TString,
+	 typename TSpec,
+	 typename TLCB,
+	 typename SeqID,
+	 typename TVertexOrientMap>
+bool getLCBProps(TGraph &g,
+		 StringSet<TString, TSpec> &seqSet,
+		 TLCB &lit,
+		 SeqID i,
+		 char & currorient, 
+		 unsigned int &min,
+		 unsigned int &max,
+		 unsigned int &alnlen,
+		 TVertexOrientMap &vertexOrientMap){
+  typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
+  typedef typename EdgeDescriptor<TGraph>::Type TEdgeDescriptor;
+  std::vector<unsigned int>::const_iterator vit;
+  std::vector<unsigned int>::const_iterator vit_end;
+  bool seqPresent=false;
+  bool resetOrientMajorityRule=false;
+  if(resetOrientMajorityRule){
+    //Resolve conflicts in orientation, use a majority rule to assign block orientation
+    //TODO, determine what best to do with misoriented, conflicting blocks
+    int plusorient=0;
+    int minusorient=0;
+    vit_end = lit->end();
+    for(vit = lit->begin();vit!=vit_end;++vit){
+      TVertexDescriptor vd1 = *vit;
+      if(idToPosition(seqSet, sequenceId(g,vd1))==i){
+	seqPresent=true;
+	if(vertexOrientMap[vd1] == '+'){
+	  plusorient++;
+	}
+	else{
+	  assert(vertexOrientMap[vd1] == '-');
+	  minusorient++;
+	}
+      }
+    }
+    if(plusorient>=minusorient){
+      currorient = '+';
+    }
+    else{
+      currorient = '-';
+    }
+  }
+  else{
+    //Orient already set in vertexOrientMap
+  }
+  
+
+  //Currently, this method will use the first encountered
+  //orientation for sequence $i in LCB $lit
+  vit_end = lit->end();
+  for(vit = lit->begin();vit!=vit_end;++vit){
+    TVertexDescriptor vd1 = *vit;
+    if(idToPosition(seqSet, sequenceId(g,vd1))==i){
+      seqPresent=true;
+      //Determine orientation for the block
+      assert(vertexOrientMap.find(vd1) != vertexOrientMap.end());
+      assert(vertexOrientMap[vd1] != '?');
+      if(currorient != '?'){
+	//All vertices in a block should have the same orientation
+	if(vertexOrientMap[vd1] != currorient){
+	  //There is a conflict
+#ifdef DEBUGGING
+	  std::cout << "Conflicting orientation on seq:" << i << " currorient:" << currorient << " V:" << vd1 << " expecting:" << vertexOrientMap[vd1] << std::endl;
+#endif
+	  if(resetOrientMajorityRule){
+	    vertexOrientMap[vd1] = currorient;
+	  }
+	  else{
+	    //if(msaOpt.refine == "colinear"){
+	    //assert(false);
+	    //}
+	  }
+	}
+      }
+      else{
+	if(resetOrientMajorityRule){
+	  assert(false);//should be using majority rule code now
+	}
+	else{
+	  currorient = vertexOrientMap[vd1];
+	}
+      }
+      assert(currorient != '?');
+#ifdef DEBUGGING
+      std::cout << "Determining orient for LCB " << " seq:" << i << " V:" << vd1 << " orient:" << currorient 
+		<< " block:" << std::endl;
+      std::cout << "Coords:" << fragmentBegin(g,vd1) << "-" << fragmentBegin(g,vd1)+fragmentLength(g,vd1) 
+		<< " min:" << min << " max:" << max << std::endl;
+#endif
+      //Min max are always on the leading strand here
+      assert((int)fragmentBegin(g,vd1)>=0);
+      alnlen = alnlen + fragmentLength(g,vd1);
+      min = (fragmentBegin(g,vd1)<min) ? fragmentBegin(g,vd1) : min;
+      max = (fragmentBegin(g,vd1)+fragmentLength(g,vd1)>max) ? fragmentBegin(g,vd1)+ fragmentLength(g,vd1) : max;
+    }
+  }
+    
+  return seqPresent;
+}
+
+
+//retrieveLCBSegments()
+//Populate TFragmentString(currmatches) for TLCB(currlcb) using TGraph(g)
+
+//TODO:retrieve LCBSegments from initial match set (TFragmentString matches)
+//rather than the alignment graph
+template<typename TGraph, 
+	 typename TString,
+	 typename TSpec,
+	 typename TString2,
+	 typename TSpec2,
+	 typename TMap,
+	 typename TVertexOrientMap, 
+	 typename TLCB, 
+	 typename TNames, 
+	 typename TSequence,
+	 typename TFragmentString, 
+	 typename TScoreValues,
+	 typename TSize,
+	 typename TOffsets,
+	 typename TCoveredSet,
+	 typename TSortedV>
+void retrieveLCBSegments(TGraph & g, 
+			 StringSet<TString, TSpec> &seqSetv,    //can include placeholder seqs, ie. no seq strings
+			 StringSet<TString2, TSpec2> &seqSetReal, //can include virtual seqs, must have strings
+			 TMap & seqIdxMap,                      //mapping between seqSetv->seqSetReal
+			 TVertexOrientMap &vertexOrientMap,
+			 TLCB &currlcb, 
+			 unsigned int lcbid,
+			 TNames & sequenceNames,
+			 StringSet<TSequence, Owner<> > & currseqs,
+			 TFragmentString &currmatches, 
+			 TScoreValues &currscores,
+			 TNames & currnameSet,
+			 std::map<TSize,TOffsets> &offsets,
+			 TCoveredSet & coveredSet,
+			 TSortedV & vseqs,
+			 unsigned int MIN_FRAGMENT_SIZE){
+  assert(MIN_FRAGMENT_SIZE>=1);
+  if(lcbid>0){};
+  //Vars
+  typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
+  typedef typename EdgeDescriptor<TGraph>::Type TEdgeDescriptor;
+  
+  //Need to trim seqSet to members and subsequences present in the current LCB
+  //Map to track old seqid to newseqid
+  std::map<TSize,TSize> seqIdMap;
+  std::map<TSize,TSize> seqLenMap;
+
+  std::set<TVertexDescriptor> currlcbset;
+
+  typename std::set<TVertexDescriptor>::iterator pos;
+
+  typedef typename Id<TGraph>::Type TId;
+
+  typename std::vector<TVertexDescriptor>::const_iterator vit;
+  typename std::vector<TVertexDescriptor>::const_iterator vit_end;
+  vit_end = currlcb->end();
+  for(vit = currlcb->begin();vit!=vit_end;vit++){
+    currlcbset.insert(*vit);
+  }
+  
+  //Determine sequences present in LCB
+  //and calculate spanning coords min,max
+  for(TSize i = 0; i<length(seqSetv); ++i) {
+    //Change to intmax
+    unsigned int min=std::numeric_limits<unsigned int>::max();
+    unsigned int max=0;
+    unsigned int alnlen=0;
+    char currorient = '?';
+    //
+    //Filter LCBs so that we only include sequences that span >
+    //MIN_FRAGMENT_SIZE
+    if(getLCBProps(g,seqSetv,currlcb,
+		   i,currorient,min,max,alnlen,
+		   vertexOrientMap)){
+      if(alnlen>=MIN_FRAGMENT_SIZE &&
+	 max-min>=MIN_FRAGMENT_SIZE){
+#ifdef DEBUGGING
+	assert(currorient!='?');
+	assert(max>0); 
+	assert((int)min>=0);
+	assert(min<max);
+	std::cout << "LCB:" << lcbid << " seq:" << i << " " 
+		  << min << "-" << max << " spanlen:" << max-min 
+		  << " alnlen:" << alnlen
+		  << " orient: " << currorient << std::endl;
+#endif
+#ifdef NDEBUG
+	;
+#else
+	unsigned int nSeq = length(currseqs);
+#endif
+	//Subsequence of lcb on seq $i 
+	TString lcbseqstr;
+	setLCBProps(i,currorient,min,max,lcbseqstr,
+		    seqSetReal,
+		    seqIdxMap,
+		    offsets);
+	//Save association between current seq $i
+	//and position in $currseqs
+	assert(length(lcbseqstr)==max-min);
+	appendValue(currseqs,lcbseqstr);
+	appendValue(currnameSet,sequenceNames[i]);
+	assert(length(currnameSet)==length(currseqs));
+	
+	seqIdMap.insert(std::make_pair(i,length(currseqs)-1));
+
+	if(seqIdxMap.find(i)!=seqIdxMap.end()){
+	  seqLenMap.insert(std::make_pair(i,length(getValueById(stringSet(g),idToPosition(seqSetReal, seqIdxMap[i])))));
+	}
+	else{
+	  seqLenMap.insert(std::make_pair(i,length(getValueById(stringSet(g),idToPosition(seqSetReal, i)))));
+	}
+
+	//Make sure that we have added one seq
+	assert(length(currseqs)==nSeq+1);
+
+	//Using sort vertices on seqs, capture any missing vertices that are spanned by the LCB
+	//TODO, use findVertex and index to avoid looking through all vertices
+	//
+	//TVertexDescriptor act_knot = findVertex(ali_g,seq_id,begin_pos);
+	if(i<vseqs.size()){
+	  for(vit = vseqs[i].begin();vit!=vseqs[i].end();++vit){
+	    //Check that vertex is not already aligned
+	    assert(sequenceId(g,*vit)==i);
+	    if(fragmentBegin(g,*vit)>=min){
+	      if(fragmentBegin(g,*vit)<max){
+		  currlcbset.insert(*vit);
+		  coveredSet.insert(*vit);
+	      }
+	      else{
+		//past max, we can stop looking
+		break;
+	      }
+	    }
+	  }
+	}
+	else{
+	  //Vseqs not populated
+	}
+      }
+      else{
+	//Seq fragment is too short to include in LCB
+      }
+    }
+    else{
+      //Seq not present in LCB
+      //This is ok
+    }
+    
+  }
+  assert(seqIdMap.size()==length(currseqs));
+#ifdef SEQAN_PROFILE2
+  std::cerr << "LCB Init of segments done: " << SEQAN_PROTIMEUPDATE(__myProfileTime) 
+	    << " seconds" << std::endl;
+#endif
+  
+  
+#ifdef SEQAN_PROFILE2
+  std::cerr << "LCB Building segments" << std::endl;
+#endif
+  
+  //All coordinates for fragments
+  //must be relative to the orientation determined previously
+  //offsets array is always relative to the matching strand
+  typedef Fragment<> TFragment;
+  
+  buildMatchesFromGraph(g,
+			seqSetv,
+			seqLenMap,
+			currlcbset,//currlcb,
+			offsets,
+			seqIdMap,
+			currmatches,
+			currscores);
+
+  //TODO double check that matches all match expected values in lcbseqstr here
+
+#ifdef SEQAN_PROFILE2
+    std::cerr << "LCB Building segments done: " << SEQAN_PROTIMEUPDATE(__myProfileTime) 
+	      << " seconds" << std::endl;
+#endif
+}
+
+//TODO retrieve LCBSegments from initial match set (TFragmentString matches)
+//Rather than the alignment graph
+template<typename TGraph, 
+	 typename TString,
+	 typename TSpec,
+	 typename TVertexOrientMap, 
+	 typename TLCB, 
+	 typename TNames, 
+	 typename TSequence,
+	 typename TFragmentString, 
+	 typename TScoreValues,
+	 typename TSize,
+	 typename TOffsets,
+	 typename TCoveredSet,
+	 typename TV>
+void retrieveLCBSegments(TGraph & g, 
+			 StringSet<TString, TSpec> &seqSet,
+			 TVertexOrientMap &vertexOrientMap,
+			 TLCB &currlcb, 
+			 unsigned int lcbid,
+			 TNames & sequenceNames,
+			 StringSet<TSequence, Owner<> > & currseqs,
+			 TFragmentString &currmatches, 
+			 TScoreValues &currscores,
+			 TNames & currnameSet,
+			 std::map<TSize,TOffsets> &offsets,
+			 TCoveredSet &coveredset,
+			 TV & vseqs,
+			 unsigned int MIN_FRAGMENT_SIZE){
+  //dummy empty map
+  std::map<TSize,TSize> seqIdxMap;
+  retrieveLCBSegments(g,
+		      seqSet,
+		      seqSet,
+		      seqIdxMap,
+		      vertexOrientMap,
+		      currlcb,lcbid,
+		      sequenceNames,
+		      currseqs,
+		      currmatches,
+		      currscores,
+		      currnameSet,
+		      offsets,
+		      coveredset,
+		      vseqs,
+		      MIN_FRAGMENT_SIZE);
+}
+
+void transformMAF(const char * maffile, 
+		  FILE * outstrm, 
+		  std::map<std::string,std::string> &currnameSet, 
+		  std::map<std::string,unsigned int> &offsets, 
+		  std::map<std::string,char> &orients, 
+		  std::map<std::string,unsigned int> & seqlens){
+  struct mafFile *mf;
+#ifdef DEBUGGING
+  std::cout << "Transforming maf file " << maffile << std::endl;
+#endif
+  mf = mafOpen(maffile, 0);
+  struct mafAli *a, *A, *last_a;
+  struct mafComp *c;
+  A = last_a = NULL;
+  while ((a = mafNext(mf)) != NULL) {
+    if ((c = a->components) == NULL)
+      assert(false);//fatal("empty maf entry");
+    if (last_a == NULL)
+      A = a;
+    else
+      last_a->next = a;
+    last_a = a;
+  }
+  if(A==NULL){
+#ifdef DEBUGGING
+    std::cout << "can't find any alignments" << std::endl;
+#endif
+  }
+  else{
+    //Do transform
+    char chrName[200], species_name[200];
+    for (a = A; a != NULL; a = a->next) {
+      int i=0;
+      for(c=a->components; c!=NULL; c=c->next) {
+	//Update coordinates
+	parseSrcName(c->src, species_name, chrName);
+	assert(currnameSet.find(std::string(chrName))!=currnameSet.end());
+	//From UCSC FAQ The start of the aligning
+	//region in the source sequence. This is a
+	//zero-based number. If the strand field is
+	//'-' then this is the start relative to the
+	//reverse-complemented source sequence.
+	
+	//TODO 
+	//Confirm reverse alignments during refine are not handled properly
+	if(c->strand == '+'){
+	  c->start = c->start+offsets[std::string(chrName)];
+	}
+	else{
+	  //Must convert relative to matching strand from original match
+	  
+	  assert(c->strand == '-');
+	  c->start = c->start+offsets[std::string(chrName)];
+	}
+	c->strand = orients[std::string(chrName)];
+	c->src = (char *)currnameSet[std::string(chrName)].c_str();
+	c->srcSize = seqlens[std::string(chrName)];
+	i++; 
+      }
+      mafWrite(outstrm, a);
+    }
+    mafFileFree(&mf);
+  }
+}
+
+template<typename TScore>
+void runIterativeMUGSY(std::string & outputdir,
+		       const std::string & fastafiles,
+		       std::string & prefix,
+		       const std::string & outprefix,
+		       MsaOptions<Dna5 , TScore> const& msaOpt){
+
+  char * mugsyinstall = std::getenv("MUGSY_INSTALL");
+  std::ostringstream refinecmd;
+  refinecmd << mugsyinstall
+	    << "/mugsy " 
+#ifdef DEBUGGING
+	    << "--debug 5 --log refine.log "
+#endif
+	    << " --distance " << msaOpt.distance
+	    << " --minlength 15"
+	    << " --nucmeropts \"-l 10 -c 15\"" //relax matchlen
+    //TODO consider removing this option during refine to allow for refinement of short blocks
+	    << " --skipunique --directory " << outputdir 
+	    << " --skiprefine --colinear "
+	    << " --prefix " << prefix
+	    << " " << fastafiles
+	    << " 1>"
+	    << outprefix << "mugsyrefine.stdout"
+	    << " 2>" << outprefix << "mugsyrefine.stderr";
+#ifdef DEBUGGING
+    std::cout << refinecmd.str() << std::endl;
+#endif
+      int ret = system(refinecmd.str().c_str());
+      if(ret!=0){
+	std::cerr << refinecmd.str() << std::endl 
+		  << "SYSTEM:" << ret << std::endl;
+      }
+      else{
+#ifdef DEBUGGING
+	;
+#else
+	std::string stdout(outprefix+"mugsyrefine.stdout");
+	std::string stderr(outprefix+"mugsyrefine.stderr");
+	std::string log(prefix+".mugsy.log");
+	unlink(stdout.c_str());
+	unlink(stderr.c_str());
+	unlink(log.c_str());
+#endif
+      }
+      assert(ret==0);
+}
+
+//Refinement using Mugsy
+//Also support for fsa,pecan,lagan aligners. They must be in your path
+//TODO, save label,dups from original MAF
+template<typename TScore>
+void refineMSA(const char * maffile,
+	       MsaOptions<Dna5 , TScore> const& msaOpt){
+  std::fstream strmmaf;
+  FILE * strmmafrefined;
+
+  struct mafFile *mf;
+  mf = mafOpen(maffile, 0);
+  struct mafAli *a, *A, *last_a;
+  struct mafComp *c;
+  A = last_a = NULL;
+  while ((a = mafNext(mf)) != NULL) {
+    if ((c = a->components) == NULL)
+      assert(false);//fatal("empty maf entry");
+    if (last_a == NULL)
+      A = a;
+    else
+      last_a->next = a;
+    last_a = a;
+  }
+  if(A==NULL){
+#ifdef DEBUGGING
+    std::cout << "can't find any alignments" << std::endl;
+#endif
+  }
+  else{
+    std::string outfile(msaOpt.outfile);
+
+    std::vector<char> writable(outfile.size() + 1);
+    std::copy(outfile.begin(), outfile.end(), writable.begin());
+    std::string outputdir(dirname(&writable[0]));
+    if(outputdir[outputdir.length()-1] != '/'){
+      outputdir = outputdir + '/';
+    }
+
+    strmmafrefined = fopen(std::string(outfile+".maf.refined").c_str(),"w");//, std::ios_base::out | std::ios_base::trunc);
+    mafWriteStart(strmmafrefined, "mugsy_refined");
+    //Do transform
+    char chrName[200], species_name[200];
+    int lcbid=0;
+    int COL_WIDTH=60;
+    for (a = A; a != NULL; a = a->next) {
+      std::map<std::string,unsigned int> curroffsets;
+      std::map<std::string,unsigned int> currspanlens;
+      std::map<std::string,unsigned int> currseqlens;
+      std::map<std::string,char> currorients;
+      std::map<std::string,std::string> currnameSetv;
+      
+      int ncol = a->textSize;
+      int i=0;
+      std::ostringstream tmpgraph;
+      tmpgraph << "MUGTMP" << getpid() << "_" << lcbid;
+
+      std::vector<std::string> fnames;
+      for(c=a->components; c!=NULL; c=c->next) {
+	std::fstream strmfsa;
+	std::string fname(tmpgraph.str());
+	fname = outputdir+fname + "_S"+boost::lexical_cast<std::string>(i) + ".fsa";
+	strmfsa.open(fname.c_str(), std::ios_base::out | std::ios_base::trunc);
+	fnames.push_back(fname);
+	parseSrcName(c->src, species_name, chrName);
+	//Write FASTA
+	if(msaOpt.refine=="fsa"){
+	  //Write XMFA style
+	  strmfsa << ">" << tmpgraph.str() << "_S" <<boost::lexical_cast<std::string>(i) 
+		  << "." << c->src << ":" << 1 << "-" << c->size << " " << c->strand << " " << c->size << std::endl;
+	}
+	else{
+	  strmfsa << ">" << c->src << std::endl ;
+	}
+	int col=0;
+	int j=0;
+	for (col = j = 0; j < ncol; ++j) {
+	  if(c->text[j]=='-'){
+
+	  }
+	  else{
+	    strmfsa << c->text[j];
+	    ++col;
+	    if (col == COL_WIDTH) {
+	      strmfsa << std::endl;
+	      col = 0;
+	    }
+	  }
+	}
+	if (col != 0){
+	  strmfsa << std::endl;
+	}
+	std::string sname(c->src);
+	curroffsets[sname] = c->start;
+	currspanlens[sname] = c->size;
+	currseqlens[sname] = c->srcSize;
+	currorients[sname] = c->strand;
+	currnameSetv[sname] = sname;
+	++i;
+	strmfsa.close();
+      }
+      
+      //Require more than one sequence
+      if(fnames.size()>1){
+	//
+	std::ostringstream fastafiles;
+	for(int k=0;k<(int)fnames.size();k++){
+	  fastafiles << fnames[k] << " " ;
+	}
+	
+	
+	//Output MAF file
+	std::string prefix("MGREF");
+	if(a->label>=0){
+	  prefix = prefix + boost::lexical_cast<std::string>(a->label);
+	}
+	std::string maffile(outputdir+"/"+prefix+".maf");
+	//Clean up old maf with same name
+	unlink(maffile.c_str());
+	
+	//Run refinement 
+	//Support for other aligners is provided for evaluation
+	if(msaOpt.refine=="pecan"){
+	  //Support for pecan aligner
+	  std::ostringstream treecmd;
+	  //treecmd << "cat " << fastafiles.str() << " | /usr/local/projects/angiuoli/developer/sangiuoli/muscle/trunk/muscle -clusteronly -in - -tree1 /tmp/pecan.tree 1> /dev/null 2> /dev/null";
+	  treecmd << "cat " << fastafiles.str() << " | muscle -clusteronly -in - -tree1 /tmp/pecan.tree 1> /dev/null 2> /dev/null";
+	  int ret = system(treecmd.str().c_str());
+	  if(ret!=0){
+	    std::cerr << treecmd.str() << std::endl 
+		      << "SYSTEM:" << ret << std::endl;
+	  }
+	  
+	  std::ostringstream refinecmd;
+	  //refinecmd << "java -cp /usr/local/projects/angiuoli/developer/sangiuoli/pecan_v0.8/pecan_v0.8.jar bp.pecan.Pecan -J /usr/local/projects/angiuoli/developer/sangiuoli/exonerate-2.2.0-x86_64/bin/exonerate -E `cat /tmp/pecan.tree | perl -ne 'chomp;print'` -F " << fastafiles.str() << " >> pecan." << getpid() << ".mfa";
+	  refinecmd << "java -cp pecan_v0.8.jar bp.pecan.Pecan -J exonerate -E `cat /tmp/pecan.tree | perl -ne 'chomp;print'` -F " << fastafiles.str() << " >> pecan." << getpid() << ".mfa";
+	  ret = system(refinecmd.str().c_str());
+	  if(ret!=0){
+	    std::cerr << refinecmd.str() << std::endl 
+		      << "SYSTEM:" << ret << std::endl;
+	  }
+	  else{
+	  }
+	}
+	else if(msaOpt.refine == "mlagan"){
+	  std::ostringstream refinecmd;
+	  //refinecmd << "/usr/local/projects/angiuoli/developer/sangiuoli/lagan20/mlagan.sh " << fastafiles.str() << " >> lagan."<< getpid() << ".mfa 2> /dev/null";
+	  refinecmd << "mlagan.sh " << fastafiles.str() << " >> lagan."<< getpid() << ".mfa 2> /dev/null";
+	  int ret = system(refinecmd.str().c_str());
+	  if(ret!=0){
+	    std::cerr << refinecmd.str() << std::endl 
+		      << "SYSTEM:" << ret << std::endl;
+	  }
+	}
+	else if(msaOpt.refine == "fsa"){
+	  ostringstream refinecmd;
+	  //refinecmd << "/usr/local/projects/angiuoli/developer/sangiuoli/fsa-1.15.3/src/main/fsa --fast --noindel2 --refinement 0 " << fastafiles.str() << " > fsa." << getpid() << ".mfa 2>> test.fsa.stderr";
+	  //refinecmd << "fsa --anchored --maxram 15000 --fast --noindel2 --refinement 0 " << fastafiles.str() << " > fsa." << getpid() << ".mfa 2>> test.fsa.stderr";
+	  refinecmd << "fsa --fast --noindel2 --refinement 0 " << fastafiles.str() << " > fsa." << getpid() << ".mfa 2>> test.fsa.stderr";
+	  int ret = system(refinecmd.str().c_str());
+	  if(ret!=0){
+	    std::cerr << refinecmd.str() << std::endl 
+		      << "SYSTEM:" << ret << std::endl;
+	  }
+	  ostringstream convertcmd;
+	  convertcmd << "echo '=' >> fsa." << getpid() << ".mfa;" 
+		     << std::getenv("MUGSY_INSTALL") << "/xmfa2maf.pl < fsa." << getpid() << ".mfa > " << maffile.c_str() << " 2>> test.maf.stderr";
+	  ret = system(convertcmd.str().c_str());
+	  if(ret!=0){
+	    std::cerr << convertcmd.str() << std::endl 
+		      << "SYSTEM:" << ret << std::endl;
+	  }
+	}
+	else{
+	  runIterativeMUGSY(outputdir,fastafiles.str(),prefix,tmpgraph.str(),msaOpt);
+	}
+	std::cerr << ".";
+	//Need to clean up here to prevent huge proliferation of files
+#ifdef DEBUGGING
+	;
+#else
+	for(int k=0;k<(int)fnames.size();k++){
+	  unlink(fnames[k].c_str());
+	}
+#endif
+	//Library call added to multiz for parsing
+	FILE* intFileDescriptor ;
+	struct stat stat_FileStatistics ;
+	
+	intFileDescriptor = fopen(maffile.c_str(), "r");
+	if(intFileDescriptor != NULL){
+	  fstat(fileno(intFileDescriptor), &stat_FileStatistics) ;
+	  unsigned long size = stat_FileStatistics.st_size ;
+	  fclose(intFileDescriptor);
+	  if(size>0){
+	    assert(currnameSetv.size()==fnames.size()); 
+	    transformMAF(maffile.c_str(),
+			 strmmafrefined,
+			 currnameSetv,
+			 curroffsets,
+			 currorients,
+			 currseqlens);
+#ifdef DEBUGGING
+	    ;
+#else
+	    unlink(maffile.c_str());
+#endif
+	  }
+	  else{
+	    //Refined MAF file has zero length
+	  }
+	}
+	++lcbid;
+      }
+      else{
+	mafWrite(strmmafrefined, a);      
+      }
+      //mafAliFree(&a); 
+    }
+  }
+}
+	
+template<typename TStringSet,
+	 typename TCargo,
+	 typename TSpec,
+	 typename TLCB,
+	 typename TStringSet1,
+	 typename TNames,
+	 typename TGenomeNames,
+	 typename TVertexOrientMap,
+	 typename TIntervals,
+	 typename TScore>
+void generateLCBs(Graph<Alignment<TStringSet, TCargo, TSpec> > &g,
+		  TLCB &LCBs,
+		  TStringSet1 &seqSet,
+		  TNames &sequenceNames,
+		  TGenomeNames &genomeNames,
+		  TVertexOrientMap &vertexOrientMap,
+		  TIntervals & aintervals,
+		  MsaOptions<Dna5 , TScore> const& msaOpt){
+  //Configurable options
+  bool useadjscores=false; //Generate and use adjacency scores  
+  bool bpanalysis=(msaOpt.segmentation=="none") ? false : true;    //Set to false to skip breakpoint analysis entirely, each CC in segment graph will be an LCB
+  //*******
+  //Retrieve initial set of alignment blocks(LCBs) from segment graph
+  //A block is a set of segments that are connected in the segment graph
+#ifdef SEQAN_PROFILE
+  std::cerr << "Converting segments to multi-genome anchors " 
+	    << length(seqSet) << " " 
+	    << length(sequenceNames) << " " 
+	    << length(genomeNames) << " " 
+	    << vertexOrientMap.size() << " " 
+	    << LCBs.size() << " " 
+	    << numVertices(g) << " " 
+	    << numEdges(g) << std::endl;
+#endif
+  typedef Dna5 TAlphabet;
+  typedef typename Value<TScore>::Type TScoreValue;
+  typedef typename Size<TStringSet>::Type TSize;
+  typedef typename Value<TStringSet1>::Type TString;
+  typedef typename Value<TNames>::Type TName;
+  //typedef Graph<Alignment<TStringSet, TSize> > TGraph;
+  //Using int to support negative edge scores
+  typedef Graph<Alignment<TStringSet, int> > TGraph;
+  typedef typename Id<TGraph>::Type TId; 
+  typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
+  typedef typename EdgeDescriptor<TGraph>::Type TEdgeDescriptor;
+  typedef typename EdgeType<TGraph>::Type TEdgeStump;
+  typedef typename Iterator<String<TEdgeStump*> const, Rooted>::Type TIterConst;
+  typedef typename Iterator<String<TEdgeStump*>, Rooted>::Type TIter;
+  
+  //
+  typedef std::map<unsigned int, unsigned int> TComponentLength;
+  
+  // Strongly Connected Components, topological sort, and length of each component
+  typedef String<unsigned int> TComponentMap;
+  typedef typename Value<TComponentMap>::Type TComponent;
+  typedef typename Position<TGraph>::Type TPos;
+  typedef SVABlock<TComponent,TSize,TVertexDescriptor,TPos> TBlock;
+  
+  TComponentMap component;
+  typedef typename Value<TComponentMap>::Type TComponent;
+  //Hold input blocks that will be used to generate LCBs
+  
+  std::map<std::pair<TComponent,TComponent>,TBlock *> componentVertexMap;
+  std::vector<std::vector<TBlock> > blocksbycomponent; 
+
+  TSize numComponents;
+
+  //Greedy algorithm for resolving conflicts in connecting segments
+  //into blocks. Conflicts arise when there are more than 2 segments
+  //connected from the same genome seperated by < msaOpt.poscombinewindow
+
+  //Considering two methods
+  //(1)Connect using best positional score first.
+  //Derive positional score from an intial clustering
+  //Break gaps that violate constraints using a mincut
+  //(2)Connect using best consistency score
+  //Start new cluster whenever a repeat/dup is to be added
+
+  std::map<TEdgeDescriptor,float> posScores;
+  if(useadjscores){
+    //Adjacency scoring is optional, off by default
+    //(1) Using adjacency and consistency score
+    std::cerr << "Not implemented" << std::endl;
+    exit(1);
+    /*
+    numComponents = convertSegments2BlocksAdjacency(g,
+						    component,
+						    componentVertexMap,
+						    blocksbycomponent,
+						    seqSet,
+						    genomeNames,
+						    posScores,
+						    msaOpt,
+						    cuts);
+    convertCC2Blocks(g,
+		     component,
+		     componentVertexMap,
+		     blocksbycomponent,
+		     aintervals,
+		     sequenceNames);
+    */
+
+  }
+  else{
+    //(2) Using consistency score
+    std::cerr << "Greedy CC on consistency score " << std::endl;
+    //numComponents = connected_components_by_genome_ranked_RECURSIVE(g, component, genomeNames, 100);
+    //std::cerr << "numc ranked recur" << numComponents << std::endl;
+    //numComponents = connected_components(g,component);
+    //std::cerr << "numc reg" << numComponents << std::endl;
+ 
+    //Convert segment graph (V=genome segments on one genome) into
+    //anchor graph (V=genome segments on multiple genomes)
+    numComponents = connected_components_by_genome_ranked(g, component, genomeNames, msaOpt.anchorwin);
+    std::cerr << "Num components:" << numComponents << std::endl;
+    blocksbycomponent.resize(numComponents);
+    //Collapse CC into blocks
+    convertCC2Blocks(g,
+		     component,
+		     componentVertexMap,
+		     blocksbycomponent,
+		     aintervals,
+		     sequenceNames);
+#ifdef DEBUGGING
+    //Ensure there are not blocks with 2 seqs from the same genome
+    typename std::vector<std::vector<TBlock> >::iterator bit2 = blocksbycomponent.begin();
+    for(; bit2!= blocksbycomponent.end();bit2++){//all cc
+      if(bit2->size()>0){
+	for(unsigned int i=0;i<bit2->size();i++){
+	  for(unsigned int j=i+1;j<bit2->size();j++){
+	    for(typename std::vector<TVertexDescriptor>::iterator vit=bit2->at(i).currV.begin();vit!=bit2->at(i).currV.end();vit++){
+	      TVertexDescriptor currV0 = *vit;
+	      //assert(degree(g,currV0)>0);
+	      for(typename std::vector<TVertexDescriptor>::iterator vit1=bit2->at(j).currV.begin();vit1!=bit2->at(j).currV.end();vit1++){
+		TVertexDescriptor currV = *vit1;
+		if(sequenceId(g,currV0)!=sequenceId(g,currV)){
+		  std::cout << "V1:" << currV0 << " " << " V2:" << currV 
+			    << " seq: " << sequenceId(g,currV0) << " " << sequenceId(g,currV) 
+			    << " genome:" << genomeNames[sequenceId(g,currV0)] << "-" << genomeNames[sequenceId(g,currV)] << std::endl;
+		  assert(genomeNames[sequenceId(g,currV0)]!=genomeNames[sequenceId(g,currV)]);
+		}
+	      }
+	    }
+	  }
+	}
+      }
+    }
+#endif
+  }
+  
+  
+  //
+  //
+  //Report some stats after building blocks
+  unsigned totalavaillen=0;
+  //unsigned totalmatchingbp=0;
+  unsigned totalseqlen=0;
+  typedef typename Iterator<TGraph, VertexIterator>::Type TVertexIterator;
+  TVertexIterator itV2(g);
+  for(;!atEnd(itV2);goNext(itV2)){
+    if(degree(g,*itV2)>0){
+      totalavaillen+=fragmentLength(g,*itV2);
+    }
+  }
+  //TVertexIterator itV(g);
+  //for(;!atEnd(itV);goNext(itV)){
+  //if(degree(g,*itV)>0){
+  //totalmatchingbp+=fragmentLength(g,*itV);
+  //}
+  //}
+
+  TSize seqSetLen = length(seqSet);
+  for(unsigned int i=0;i<seqSetLen;i++){
+   totalseqlen+=length(seqSet[i]);
+  }
+
+  //std::cerr << "Excluded unique, repeat/duplicated bp:" << totalmatchingbp-totalavaillen 
+  //<< " " 
+  //<< "=" << (float)(totalmatchingbp-totalavaillen)/totalseqlen << std::endl;
+  std::cerr << "Percentage matching bp (not including matching repeats/dups):" 
+	    << totalavaillen << "/" << totalseqlen
+	    << "=" << (float)totalavaillen/totalseqlen << std::endl;
+  //assert(totalavaillen<=totalmatchingbp);
+
+#ifdef SEQAN_PROFILE
+  std::cerr << "Anchor conversion done: " << SEQAN_PROTIMEUPDATE(__myProfileTime) << " seconds" << std::endl;
+  std::cerr << "Num anchors: " << numComponents << std::endl;
+#endif	  
+  //*********
+  //Need to rescore because edges may have been removed
+  //invalidating edge pointers
+  posScores.clear();
+  if(useadjscores){
+    //Score for positional conservation
+    std::cerr << "Rescoring for positional conservation" << std::endl;
+    std::cerr << "Not implemented" << std::endl;
+    exit(1);
+    //scorePosCons(g,
+    //	 component,
+    //	 numComponents,
+    //	 posScores,
+    //	 msaOpt.posscorewindow);
+  }
+  //*********
+  //Assign orientation to LCBs
+#ifdef SEQAN_PROFILE
+  std::cerr << "Assigning orientation to " << blocksbycomponent.size() << " anchors" << std::endl;
+#endif
+#ifdef DEBUGGING_GRAPH
+  std::fstream rawstrm;
+  rawstrm.open("refinegraphpreorient.out", std::ios_base::out | std::ios_base::trunc);
+  write(rawstrm,g,seqSet,Raw());
+  rawstrm.close();
+#endif
+
+  //Array of blocks
+  std::vector<TBlock> blocks;
+  //Assign orientation to blocks
+  //+ reversed==false
+  //- reversed==true
+  //Save the orientation using vertexOrientMap 
+  //Need a map so we can lookup orientation for each vertex in a block
+  //std::map<TVertexDescriptor,char> vertexOrientMap;
+  assert(vertexOrientMap.size()==0); //expecting empty map to start
+  assignBlockOrientation(g,
+			 blocksbycomponent,
+			 blocks,
+			 vertexOrientMap,
+			 posScores); 
+  posScores.clear();
+  blocksbycomponent.clear();
+  componentVertexMap.clear();
+  clear(component);
+  
+#ifdef SEQAN_PROFILE
+  std::cerr << "Orientation done: " << SEQAN_PROTIMEUPDATE(__myProfileTime) << " seconds" << std::endl;
+#endif
+  
+#ifdef SEQAN_PROFILE
+  std::cerr << "Building an orthology map using " << blocks.size() << " anchors" << std::endl;
+#endif
+  //Determine collinear runs, ie LCBs
+  //Save runs in the LCBs vector
+  //Each LCB is a set of TVertexDescriptors
+  //std::vector<std::vector<TVertexDescriptor> > LCBs;
+  
+  std::map<unsigned int, std::set<TVertexDescriptor> > block2fragMap;
+  typename std::vector<TBlock>::const_iterator bit = blocks.begin();
+  for(bit = blocks.begin();
+      bit!=blocks.end();
+      bit++){
+#ifdef DEBUGGING
+    typename std::vector<TVertexDescriptor>::const_iterator dvit;
+    for(dvit = bit->currV.begin();dvit!=bit->currV.end();++dvit){ 
+      assert((*dvit)!=getNil<TVertexDescriptor>());
+    }
+#endif
+    if(block2fragMap.find(bit->c) == block2fragMap.end()){
+      block2fragMap.insert(std::make_pair(bit->c,std::set<TVertexDescriptor>()));
+      block2fragMap[bit->c].insert(bit->currV.begin(),bit->currV.end());
+    }
+    else{
+      block2fragMap[bit->c].insert(bit->currV.begin(),bit->currV.end());
+    }
+  }
+
+  if(!bpanalysis){
+    //Assign each block as an LCB
+    //Useful for some simple testing 
+    typename std::map<unsigned int, std::set<TVertexDescriptor> >::iterator it;
+    for(it = block2fragMap.begin();it!=block2fragMap.end();it++){
+      std::vector<unsigned int> currlcb;
+      currlcb.insert(currlcb.end(),it->second.begin(),it->second.end());
+      LCBs.push_back(currlcb);
+    }
+  }
+  else{ 
+    //Read LCBs/blocks from input file if specified
+    if(msaOpt.blockfile.length()>0){
+      readBlockFile(msaOpt.blockfile,
+		    block2fragMap,
+		    LCBs,
+		    sequenceNames,
+		    vertexOrientMap,
+		    g,
+		    false);
+    }
+    else{
+      //Calculate LCBs using segmentation method
+      //Enredo, Mercator, or MUGSY
+      std::string diststr(msaOpt.distance);
+      std::string minlenstr(msaOpt.minlength);
+      if(msaOpt.segmentation == "enredo"){
+	do_segmentation_ENREDO(blocks,
+			       LCBs,
+			       block2fragMap,
+			       diststr,
+			       minlenstr,
+			       msaOpt,
+			       seqSet,
+			       sequenceNames,
+			       genomeNames,
+			       vertexOrientMap,
+			       g);
+      }
+      else{
+	if(msaOpt.segmentation == "mercator"){
+	  //TODO, fix this tested but now broken do_segmentation_MERCATOR()
+	  std::cerr << "Mercator segmentation not implemented" << std::endl;
+	}
+	else{
+	  if(msaOpt.refine == "colinear"){
+	    //For refinement, assume all within a single LCB
+	    typename std::map<unsigned int, std::set<TVertexDescriptor> >::iterator it;
+	    std::vector<unsigned int> currlcb;
+	    for(it = block2fragMap.begin();it!=block2fragMap.end();it++){
+	      currlcb.insert(currlcb.end(),it->second.begin(),it->second.end());
+	    }
+	    LCBs.push_back(currlcb);
+	  }
+	  else{
+	    //Default method is our own
+	    do_segmentation_MUGSY(blocks,
+				  LCBs,
+				  block2fragMap,
+				  diststr,
+				  minlenstr,
+				  msaOpt,
+				  sequenceNames,
+				  genomeNames,
+				  vertexOrientMap,
+				  g);
+	  }
+	}
+      }
+    }
+  }
+  blocks.clear();
+#ifdef DEBUGGING
+  //Print out LCBs
+  for(typename std::vector<std::vector<unsigned int> >::const_iterator lit = LCBs.begin();lit!=LCBs.end();lit++){
+    std::vector<unsigned int>::const_iterator vit;
+    for(vit = lit->begin();vit!=lit->end();vit++){
+      std::cout << *vit << ",";
+      assert(*vit!=getNil<TVertexDescriptor>());//Null vertex
+    }
+    std::cout << std::endl;
+  }	
+#endif
+
+#ifdef SEQAN_PROFILE
+  std::cerr << "LCB identification done: " << SEQAN_PROTIMEUPDATE(__myProfileTime) << " seconds" << std::endl;
+#endif
+
+}
+
+
+template<typename TSequenceSet,
+	 typename TIds,
+	 typename TTreeMap,
+	 typename TDistanceValue>
+void getGuideTree(TSequenceSet &seqSet,
+		  TIds &curridset,
+		  TTreeMap &seqguideTrees,
+		  Graph<Tree<TDistanceValue> > &currguideTree){
+  typedef String<TDistanceValue> TDistanceMatrix;
+  TDistanceMatrix distanceMatrix;
+
+  typedef Dna5 TAlphabet;
+  typedef unsigned TSize;
+  typedef String<TAlphabet> TSequence;
+  //Parse subtree
+  TDistanceMatrix currdistanceMatrix;
+  typedef typename Value<TDistanceMatrix>::Type TValue;
+  typedef typename Iterator<TDistanceMatrix>::Type TMatrixIterator;
+  
+  //This assumes the best estimate is obtained by kmers across whole genome
+  //Alternatively, build guide tree using subsequence of the LCBs for LCBs of length 
+  //greater than some cutoff to avoid needlessly building trees for extremely short LCBs
+  std::ostringstream curridsetstr;
+  for(std::set<unsigned int>::iterator it = curridset.begin();it!=curridset.end();it++){
+    curridsetstr << *it << ":";
+  }
+  std::string curridsetstring = curridsetstr.str();
+  if(seqguideTrees.find(curridsetstring) == seqguideTrees.end()){
+#ifdef DEBUGGING
+    std::cout << "Generating a new guide tree for " << curridsetstring << std::endl;
+#endif
+    //Copy genome string for sequences present in the current LCB
+    StringSet<TSequence, Owner<> > currStringSet;
+    for(TSize i = 0; i<length(seqSet); ++i) {
+      if(curridset.find(i) != curridset.end()){
+	appendValue(currStringSet,seqSet[i]);
+      }
+    }
+    
+    //if (empty(distanceMatrix)) getDistanceMatrix(currG, currdistanceMatrix, KmerDistance());
+    getKmerSimilarityMatrix(currStringSet, currdistanceMatrix, 3, TAlphabet());
+    // Similarity to distance conversion
+    TMatrixIterator matIt = begin(currdistanceMatrix);
+    TMatrixIterator endMatIt = end(currdistanceMatrix);
+    for(;matIt != endMatIt;++matIt) value(matIt) = (1 - (*matIt)) * 100;
+    upgmaTree(currdistanceMatrix, currguideTree, UpgmaMin());
+    //njTree(currdistanceMatrix, currguideTree);
+    //Save tree for the combination specified by curridset
+    seqguideTrees.insert(std::make_pair(curridsetstring,currguideTree));
+    clear(currdistanceMatrix);
+  }
+  else{
+#ifdef DEBUGGING
+      std::cout << "Using existing guide tree for " << curridsetstring << std::endl;
+#endif
+      currguideTree=seqguideTrees[curridsetstring];
+  }
+
+  //  else{
+  //use guidetree that covers all sequences
+  //assert(numVertices(seqguideTree)>0);
+  //currguideTree = seqguideTree;
+  //assert(false);
+  //}
+#ifdef SEQAN_PROFILE2
+    std::cout << "LCB Guide tree done: " << SEQAN_PROTIMEUPDATE(__myProfileTime) << " seconds" << std::endl;
+#endif
+}
+
+template<typename TGraph,
+	 typename TGraph2,
+	 typename TSize,
+	 typename TStringSet,
+	 typename TGuideTree,
+	 typename TScore>
+s_score alignSingleLCB(TGraph &currG,
+			TGraph2 &currgOut,
+			TSize lcbid,
+			TStringSet &currseqs,
+			TGuideTree &currguideTree,
+			MsaOptions<Dna5 , TScore> const& msaOpt){
+  typedef typename Value<TScore>::Type TScoreValue;
+  typedef Dna5 TAlphabet;
+  //Currently disabled
+  bool inlinerefine= (msaOpt.refine=="true") ? true : false; //Compute iterative refinement inline
+  if(lcbid>0){}
+  if(inlinerefine){}
+  TSize nSeq = length(currseqs);
+  TSize threshold = 100;
+#ifdef SEQAN_PROFILE2
+  std::cout << "LCB Performing triplet extension " << lcbid << std::endl;
+#endif
+  if (nSeq < threshold) tripletLibraryExtension(currG);
+  else tripletLibraryExtension(currG, currguideTree, threshold / 2);
+#ifdef SEQAN_PROFILE2
+  std::cout << "LCB Triplet extension done: " << SEQAN_PROTIMEUPDATE(__myProfileTime) << " seconds" << std::endl;
+#endif
+  //
+  //*******
+  //Alignment
+  //*******  
+#ifdef SEQAN_PROFILE2
+  std::cout << "LCB Performing progressive alignment" << std::endl;
+#endif	
+#ifdef DEBUGGING_GRAPH
+  std::fstream rawstrm2;
+  std::string lcbgname = "lcbgraph"+boost::lexical_cast<std::string>(lcbid)+".out";
+  rawstrm2.open(lcbgname.c_str(), std::ios_base::out | std::ios_base::trunc);
+  write(rawstrm2,currG,currseqs,Raw());
+  rawstrm2.close();
+#endif
+
+
+  //Perform the alignment
+  progressiveAlignment(currG, currguideTree, currgOut);
+
+
+#ifdef SEQAN_PROFILE2
+  std::cout << "LCB progressive alignment done: " << SEQAN_PROTIMEUPDATE(__myProfileTime) << " seconds" << std::endl;
+#endif  
+  
+  
+#ifdef DEBUGGING_GRAPH
+  std::fstream rawstrm3;
+  std::string alcbgname = "alignedlcbgraph"+boost::lexical_cast<std::string>(lcbid)+".out";
+  rawstrm3.open(alcbgname.c_str(), std::ios_base::out | std::ios_base::trunc);
+  write(rawstrm3,currgOut,currseqs,Raw());
+  rawstrm3.close();
+#endif
+
+  s_score sscore;
+#ifdef SCORING
+  // Print the scoring information
+  //TScoreValue gop = msaOpt.sc.data_gap_open;
+  //TScoreValue gex = msaOpt.sc.data_gap_extend;
+  //TSize alphSize = ValueSize<TAlphabet>::VALUE;
+  
+  // Print the alignment information
+  TSize numGapEx = 0;
+  TSize numGap = 0;
+  TSize numPairs = 0;
+  TSize numIdents = 0;
+  TSize alignLen = 0;
+  TSize totalLen = 0;
+  String<TSize> pairCount;
+  String<TSize> colCount;
+  //TScoreValue alignScore;
+  sscore = alignmentEvaluationCustom(currgOut, 
+			       msaOpt.sc, 
+			       numGapEx, 
+			       numGap, 
+			       numPairs, 
+			       numIdents,
+			       pairCount, 
+			       colCount,
+			       alignLen, 
+			       totalLen);
+  /*
+  sscore.alignScore = alignScore;
+  sscore.numGap = numGap;
+  sscore.numGapEx = numGapEx;
+  sscore.numPairs = numPairs;
+  sscore.numIdents = numIdents;
+  sscore.alignLen = alignLen;
+  sscore.totalLen = totalLen;
+  sscore.colCount = colCount;
+  sscore.seqCount = nSeq;
+  assert(length(colCount)==nSeq+1);
+  sscore.pairCount = pairCount;
+  */
+#endif
+#ifdef DEBUGGING2
+  TSize alphSize = ValueSize<TAlphabet>::VALUE;
+  TScoreValue gop = msaOpt.sc.data_gap_open;
+  TScoreValue gex = msaOpt.sc.data_gap_extend;
+  std::cout << "LCBID:" << lcbid << std::endl;
+  std::cout << "Scoring parameters:" << std::endl;
+  std::cout << "*Gap opening: " << gop << std::endl;
+  std::cout << "*Gap extension: " << gex << std::endl;
+  std::cout << "*Scoring matrix: " << std::endl;
+  std::cout << "   ";
+  for(TSize col = 0; col<alphSize; ++col) std::cout << TAlphabet(col) << ',';
+  std::cout << std::endl;
+  for(TSize row = 0; row<alphSize; ++row) {
+    for(TSize col = 0; col<alphSize; ++col) {
+      if (col == 0) std::cout << TAlphabet(row) << ": ";
+      //std::cout << score(scType, TAlphabet(row), TAlphabet(col));
+      if (col < alphSize - 1) std::cout << ',';
+    }
+    std::cout << std::endl;
+  }
+  std::cout << std::endl;
+  std::cout << "Alignment Score: " << sscore.alignScore << std::endl;
+  std::cout << "Alignment Length: " << alignLen << std::endl;
+  std::cout << "#Match-Mismatch pairs: " <<numPairs << std::endl;
+  std::cout << "Score contribution by match-mismatch pairs: " << (sscore.alignScore - (((TScoreValue) numGap * gop) + ((TScoreValue) numGapEx * gex))) << std::endl;
+  std::cout << "#Gap extensions: " << numGapEx << std::endl;
+  std::cout << "Score contribution by gap extensions: " << ((TScoreValue) numGapEx * gex) << std::endl;
+  std::cout << "#Gap openings: " << numGap << std::endl;
+  std::cout << "Score contribution by gap openings: " << ((TScoreValue) numGap * gop) << std::endl;
+  std::cout << std::endl;
+  std::cout << "#Pairs: " << std::endl;
+  std::cout << "   ";
+  for(TSize col = 0; col<alphSize; ++col) std::cout << TAlphabet(col) << ',';
+  std::cout << std::endl;
+  for(TSize row = 0; row<alphSize; ++row) {
+    for(TSize col = 0; col<alphSize; ++col) {
+      if (col == 0) std::cout << TAlphabet(row) << ": ";
+      std::cout << value(pairCount, row * alphSize + col);
+      if (col < alphSize - 1) std::cout << ',';
+    }
+    std::cout << std::endl;
+  }
+#endif
+  return sscore;
+}
+
+
+
+template<typename TName,
+	 typename TLoc,
+	 typename TNames>
+void saveInterval(std::map<TName,std::vector<TLoc> >&aintervals,
+		  TNames &currnameSet,
+		  std::vector<unsigned int> &curroffsets,
+		  std::vector<unsigned int> &currspanlens,
+		  std::vector<unsigned int> &currseqlens,
+		  std::vector<char> &currorients,
+		  int lcbid,
+		  bool dup=false){
+  for(int i=0;i<(int)length(currnameSet);i++){
+    TName n = currnameSet[i];
+#ifdef DEBUGGING
+    std::cout << "Saving aligned intervals for " << n << std::endl;
+#endif
+    typename std::map<TName,std::vector<TLoc > >::iterator ait = aintervals.find(n);
+    if(ait==aintervals.end()){      
+      aintervals.insert(std::make_pair(n,std::vector<TLoc >()));
+    }
+    ait = aintervals.find(n);
+    assert(ait!=aintervals.end());
+    int fmin,fmax;
+    //assert(curroffsets[i]>=0);
+#ifdef DEBUGGING
+    std::cout << "Interval orient " << currorients[i] << " offset" << curroffsets[i] << " spans:" << currspanlens[i] <<  " len: " << currseqlens[i] << std::endl;
+#endif
+    if(currorients[i] == '+'){
+      fmin=curroffsets[i];
+      fmax=curroffsets[i]+currspanlens[i];
+    }
+    else{
+      fmax=currseqlens[i]-curroffsets[i];
+      fmin=currseqlens[i]-(curroffsets[i]+currspanlens[i]);
+    }
+#ifdef DEBUGGING
+    std::cout << "Intervals for " << fmin << "-" << fmax <<  " " << currseqlens[i] << std::endl;
+#endif
+    assert(fmin>=0);
+    assert(fmax<=(int)currseqlens[i]);
+    //-1 - duplication end
+    //0 align end
+    //1 duplication start
+    //2 align start
+    if(dup){
+      //ait->second.push_back(make_pair(fmin,1));//align start
+      //ait->second.push_back(make_pair(fmax,-1));//align end 
+      TLoc t1,t2;
+      t1.first = fmin;
+      t1.second = 1;
+      t1.blocknum = lcbid;
+      ait->second.push_back(t1);
+      t2.first = fmax;
+      t2.second = -1;
+      t2.blocknum = lcbid;
+      ait->second.push_back(t2);
+    }
+    else{
+      //ait->second.push_back(make_pair(fmin,2));//align start
+      //ait->second.push_back(make_pair(fmax,0));//align end
+      TLoc t1,t2;
+      t1.first = fmin;
+      t1.second = 2;
+      t1.blocknum = lcbid;
+      ait->second.push_back(t1);
+      t2.first = fmax;
+      t2.second = 0;
+      t2.blocknum = lcbid;
+      ait->second.push_back(t2);
+    }
+  }
+}
+
+template<typename TGraph,
+	 typename TLCBs,
+	 typename TStringSet1,
+	 typename TStringSet2,
+	 typename TNames,
+	 typename TGenomeNames,
+	 typename TVertexOrientMap,
+	 typename TStream1,
+	 typename TName,
+	 typename TLoc,
+	 typename TScore>
+std::vector<s_score> alignLCBs(TGraph &g,
+			       TLCBs &LCBs,
+			       TStringSet1 &seqSet,
+			       TStringSet2 &genomeSeqSet,
+			       TNames &sequenceNames,
+			       TGenomeNames &genomeNames,
+			       TVertexOrientMap &vertexOrientMap,
+			       TStream1 &strmmaf,
+			       std::map<TName,std::vector<TLoc> > &aintervals,
+			       MsaOptions<Dna5 , TScore> const &msaOpt){
+
+  typedef double TDistanceValue;
+  typedef unsigned TSize;
+  typedef Dna5 TAlphabet;
+  typedef String<TDistanceValue> TDistanceMatrix;
+  typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
+  typedef typename EdgeDescriptor<TGraph>::Type TEdgeDescriptor;
+  typedef typename Value<TScore>::Type TScoreValue;
+
+  std::vector<s_score> allscores;
+  TDistanceMatrix distanceMatrix;
+
+  Graph<Tree<TDistanceValue> > seqguideTree;
+  std::map<std::string, Graph<Tree<TDistanceValue> > > seqguideTrees;
+  
+  TSize nSeq = length(seqSet);
+  
+  TSize nGenomes=0;
+  for(TSize i=0;i<length(genomeNames);i++){
+    nGenomes = (genomeNames[i] > nGenomes) ? genomeNames[i] : nGenomes;
+  }
+  nGenomes = nGenomes+1;
+
+#ifdef SEQAN_PROFILE
+    std::cerr << "Saving interval tree marking location of duplications" << std::endl;
+#endif   
+  //Save interval tree of duplications
+  //Save interval trees
+  typedef IntervalAndCargo<int,TSize> TInterval;
+  typedef Graph<Directed<void,WithoutEdgeId> > TIGraph;
+  typedef IntervalTreeNode<TInterval> TNode;
+  typedef String<TNode> TPropertyMap;
+  String<String<TInterval> > dintervals;
+  resize(dintervals,length(seqSet));
+  String<TIGraph> dgs;
+  String<TPropertyMap> dpms;
+  for(int i=0;i<(int)length(seqSet);i++){
+    std::map<int,std::pair<int,int> > tmpintervals;
+    std::map<int,std::pair<int,int> >::iterator pos;
+    bool inserted=false;
+    typename std::map<TName,std::vector<TLoc> >::iterator ait=aintervals.find(sequenceNames[i]);
+    if(ait!=aintervals.end()){
+      for(typename std::vector<TLoc>::iterator pit = ait->second.begin();pit!=ait->second.end();pit++){
+	boost::tie(pos, inserted) = tmpintervals.insert(std::make_pair(pit->blocknum,std::make_pair(0,0)));
+	if(pit->second==1){
+	  pos->second.first = pit->first;
+	}
+	else{
+	  if(pit->second==-1){
+	    pos->second.second = pit->first;
+	  }
+	}
+      }
+    }
+    for(std::map<int,std::pair<int,int> >::iterator it = tmpintervals.begin();it!=tmpintervals.end();++it){
+      //std::cout << it->second.first << " " << it->second.second << " bnum:" << it->first << std::endl;
+      appendValue(dintervals[i],IntervalAndCargo<int,unsigned int>(it->second.first,it->second.second,it->first)); 
+    }
+  }
+  
+  resize(dgs,nSeq);
+  resize(dpms,nSeq);
+  for(unsigned int i=0;i<nSeq;i++){
+    unsigned center = length(seqSet[i])/2;
+    createIntervalTree(dgs[i],dpms[i],dintervals[i],center);
+    //intervals for sequence i are not needed anymore
+    clear(dintervals[i]);
+  }
+#ifdef SEQAN_PROFILE
+    std::cerr << "Saving interval tree done: " << SEQAN_PROTIMEUPDATE(__myProfileTime) << " seconds" << std::endl;
+#endif
+#ifdef SEQAN_PROFILE
+    std::cerr << "Sorting vertices on each seq" << std::endl;
+#endif   
+  //Sort vertices on seq 
+  std::vector<std::vector<TVertexDescriptor> > vseqs;
+  vseqs.resize(nSeq);
+  typedef typename Iterator<TGraph, VertexIterator>::Type TVertexIterator;
+  TVertexIterator it(g);
+  for(;!atEnd(it);goNext(it)) {
+    TVertexDescriptor v = *it;
+    if(degree(g,v)>0){
+      assert(sequenceId(g,*it)<vseqs.size());
+      vseqs[sequenceId(g,v)].push_back(v);
+    }
+  }
+  for(unsigned int i=0;i<nSeq;i++){
+    //Sort vertices on seq 
+    sort(vseqs[i].begin(),vseqs[i].end(),vertexposcmp<TGraph>(g));
+  }
+#ifdef SEQAN_PROFILE
+    std::cerr << "Sorting vertices done: " << SEQAN_PROTIMEUPDATE(__myProfileTime) << " seconds" << std::endl;
+#endif
+  unsigned int lcbid=0;
+
+  //
+  //Loop over each LCB and align
+#ifdef SEQAN_PROFILE
+  std::cerr << "Aligning " << LCBs.size() << " LCBs" << std::endl;
+#endif
+
+  //For tracking if an anchor has been aligned in an LCB
+  std::set<TVertexDescriptor> coveredSet;
+
+  // TODO, for parallel mugsy, refactor into a sub-process and parallel loop
+  for(typename std::vector<std::vector<TVertexDescriptor> >::iterator lit = LCBs.begin();lit!=LCBs.end();lit++){
+#ifdef DEBUGGING
+    std::cout << "LCB:" << lcbid << " num_anchors:" << lit->size() << std::endl;	  
+#endif
+#ifdef SEQAN_PROFILE2
+    std::cout << "LCB of size " << lit->size() << std::endl;	  
+    std::cout << "LCB Initializing segments for LCB" << std::endl;
+#endif	
+#ifdef SEQAN_PROFILE
+    if(lcbid%1000==0){
+      std::cout << ".";
+    }
+#endif
+    
+    //Matches, scores, seqs, ids for current LCB
+    typedef String<Fragment<> > TFragmentString;
+    typedef typename Value<TScore>::Type TScoreValue;
+    typedef String<TScoreValue> TScoreValues;
+    typedef String<TAlphabet> TSequence;
+    TFragmentString currmatches;
+    TScoreValues currscores;
+    StringSet<TSequence, Owner<> > currseqs;
+    TNames currnameSet;
+    
+    //For tracking substrings 
+    //std::map<TSize,unsigned int> offsets;
+    //std::map<TSize,unsigned int> spanlens;
+    //std::map<TSize,unsigned int> seqlens;
+    //std::map<TSize,char> orients;
+    std::map<TSize,s_offset> offsets;
+    
+    //Copy links between set of vertices in LCB $lit
+    //from Graph $g and store in $currmatches,$currscores,$currseqs
+
+    std::vector<TVertexDescriptor> currlcb;
+    std::set<TVertexDescriptor> currlcbset;
+
+    //Unless allownestedlcbs, each anchor vertex can contribute to
+    //exactly one LCB; the longest LCB spanning the anchor
+    //If the vertex is reported in subsequent LCBs it will be skipped
+    if(msaOpt.allownestedlcbs == "true"){
+      //skip checks if anchor has already been aligned
+      //default for allownestedlcbs is false
+    }
+    else{
+      for(std::vector<unsigned int>::const_iterator vit = lit->begin();vit!=lit->end();++vit){
+	if(coveredSet.find(*vit)==coveredSet.end()){
+	  currlcb.push_back(*vit);
+	}
+      }
+      *lit=currlcb;
+    }
+    
+    retrieveLCBSegments(g,
+			seqSet,
+			vertexOrientMap,
+			lit,
+			lcbid,
+			sequenceNames,
+			currseqs,
+			currmatches,
+			currscores,
+			currnameSet,
+			offsets,
+			coveredSet,
+			vseqs,
+			boost::lexical_cast<int>(msaOpt.minlength));
+
+    
+    //TODO, Add matches for duplications and overlapping matches
+
+
+
+#ifdef DEBUGGING
+    std::cout << "LCB: " << lcbid 
+	      << " vertices:" << lit->size()
+	      << " seqset:" << length(seqSet)
+	      << " offsets:" << offsets.size() 
+	      << " currseqs:" << length(currseqs)
+	      << " sequenceNames:" << length(sequenceNames)
+	      << " currnameset:" << length(currnameSet)
+	      << std::endl;
+#endif
+    assert(length(sequenceNames)==length(seqSet));
+    assert(length(currnameSet)==length(currseqs));
+
+    if(length(currseqs)>1 && length(currmatches)>0){
+      assert(length(currmatches)>0);
+#ifdef SEQAN_PROFILE2
+      std::cout << "LCB Building alignment graph" << std::endl;
+#endif
+      //Build new graph from matches
+      
+      //Since LCBs contain no reversals
+      //All matches should be relative to the forward/leading strand only
+      //(non-reversed here)
+      TGraph currG(currseqs);
+      buildAlignmentGraph(currmatches, currscores, currG, FractionalScore());
+#ifdef DEBUGGING
+      std::cout << "Graph built V:"  << numVertices(currG) << " E:" << numEdges(currG) 
+		<< " number of seqs:" << length(currseqs)
+		<< " number of matches:" << length(currmatches) 
+		<< " number of scores:" << length(currscores)
+		<< std::endl;
+#endif
+
+      //Double check edge weights
+      typedef typename Iterator<TGraph, EdgeIterator>::Type TEdgeIterator;
+      TEdgeIterator itE(currG);
+      //Undo Hack that stores reverse complement matches using
+      //negative edge weights
+      for(;!atEnd(itE);goNext(itE)){
+	if(cargo(value(itE))<0){
+	  cargo(value(itE)) = cargo(value(itE))*-1;
+	}
+      }
+
+      //TESTING Test code to detect additional matches between
+      //disconnected vertices in the LCB
+      bool hashnonmatches=false;
+      if(hashnonmatches){
+	std::map<std::string,std::vector<TVertexDescriptor> > vhash;
+	typename std::map<std::string,std::vector<TVertexDescriptor> >::iterator vhashpos;
+	bool inserted;
+	//Attempt to combine non-matching vertices
+	typedef typename Iterator<TGraph, VertexIterator>::Type TVertexIterator;
+	std::cout << "Attempting to add addl edges" << std::endl;
+	int newedges=0;
+	TVertexIterator itV(currG);
+	for(;!atEnd(itV);goNext(itV)){
+	  if(degree(currG,*itV)==0){
+	    String<char> sseq = infix(currseqs[sequenceId(currG,*itV)],fragmentBegin(currG,*itV),fragmentBegin(currG,*itV)+fragmentLength(currG,*itV));
+	    char * c = toCString(sseq);
+	    std::string cstr(c);
+	    std::cout << sequenceId(currG,*itV) << " " << cstr << std::endl;
+	    boost::tie(vhashpos, inserted) = vhash.insert(std::make_pair(cstr,std::vector<TVertexDescriptor>()));
+	    vhashpos->second.push_back(*itV);
+	  }
+	}
+	for(typename std::map<std::string,std::vector<TVertexDescriptor> >::iterator hit=vhash.begin();hit!=vhash.end();++hit){
+	  for(typename std::vector<TVertexDescriptor>::iterator vit1=hit->second.begin();vit1!=hit->second.end();++vit1){
+	    for(typename std::vector<TVertexDescriptor>::iterator vit2=vit1+1;vit2!=hit->second.end();++vit2){
+	      assert(vit1!=vit2);
+	      addEdge(currG,*vit1,*vit2,1);
+	      newedges++;
+	    }
+	  }
+	}
+	std::cout << "Added " << newedges << " new edges" << std::endl;
+      }
+
+#ifdef DEBUGGING
+      TEdgeIterator itE2(currG);
+      for(;!atEnd(itE2);goNext(itE2)){
+	assert(cargo(value(itE2))>0);
+      }
+#endif
+      
+      //Map between LCB array (curr*) and segment graph array
+      std::vector<unsigned int> curroffsets;
+      std::vector<unsigned int> currspanlens;
+      std::vector<unsigned int> currseqlens;
+      std::vector<char> currorients;
+      assert(length(currseqs)==length(currnameSet));
+      currorients.resize(length(currnameSet));
+      currseqlens.resize(length(currnameSet));
+      currspanlens.resize(length(currnameSet));
+      curroffsets.resize(length(currnameSet));
+      String<int> relevant_segments;
+      std::set<unsigned int> currgenomes; //List of sequence ids
+#ifdef DEBUGGING_GRAPH
+      std::fstream strminfo;
+      std::string lcbgname = "lcbgraph"+boost::lexical_cast<std::string>(lcbid)+".info";
+      strminfo.open(lcbgname.c_str(), std::ios_base::out | std::ios_base::trunc);
+#endif
+      //TODO, refactor using a id map
+      for(TSize currrow = 0; currrow<length(currnameSet); ++currrow) {
+	for(TSize row = 0; row<length(sequenceNames); ++row) {
+	  if(currnameSet[currrow]==sequenceNames[row]){
+#ifdef DEBUGGING_GRAPH
+	    strminfo << "SeqId:" << currrow << " " << currnameSet[currrow] 
+		     << " origId:" << row 
+		     << " offset:" << offsets[row].offset 
+		     << " orient:" << offsets[row].orient << std::endl;
+#endif
+	    curroffsets[currrow] = offsets[row].offset;
+	    currspanlens[currrow] = offsets[row].spanlen;
+	    currseqlens[currrow] = offsets[row].seqlen;
+	    currorients[currrow] = offsets[row].orient;
+	    //See if we overlap a duplication
+	    findIntervals(dgs[row],dpms[row],offsets[row].offset,relevant_segments);
+	    findIntervals(dgs[row],dpms[row],offsets[row].offset+offsets[row].spanlen,relevant_segments);
+	    currgenomes.insert(genomeNames[row]);
+#ifdef DEBUGGGING
+	    std::cout << currnameSet[currrow] << " " << currorients[currrow] << std::endl;
+#endif
+	  }
+	}
+      }
+#ifdef DEBUGGING_GRAPH
+      strminfo.close();
+#endif
+      
+#ifdef SEQAN_PROFILE2
+      std::cout << "LCB Building alignment graph done: " << SEQAN_PROTIMEUPDATE(__myProfileTime) << " seconds" << std::endl;
+      std::cout << "LCB Scoring matches" << std::endl;
+#endif
+      //Use or build guide tree based on genome sequences
+      typedef double TDistanceValue;
+      Graph<Tree<TDistanceValue> > currguideTree;
+      assert(currgenomes.size()>0);
+      getGuideTree(genomeSeqSet,currgenomes,seqguideTrees,currguideTree);
+
+      TGraph currgOut(currseqs);
+      s_score sscores = alignSingleLCB(currG,
+				       currgOut,
+				       ++lcbid,
+				       currseqs,
+				       currguideTree,
+				       msaOpt);
+#ifdef SCORING
+      //Map to orig seq ids
+      //TODO, refactor using a id map
+      String<TSize> colCount;
+      resize(colCount,nSeq+1);
+      for(TSize i=0;i<nSeq;++i){
+	for(TSize j=0;j<length(currseqs);++j){
+	  if(sequenceNames[i]==currnameSet[j]){
+	    colCount[i+1] += sscores.colCount[j+1];
+	  }
+	}
+      }
+      sscores.colCount=colCount;
+      allscores.push_back(sscores);
+#endif
+
+      if(msaOpt.unique == "true"){
+	//save interval 
+	saveInterval(aintervals,
+		     currnameSet,
+		     curroffsets,
+		     currspanlens,
+		     currseqlens,
+		     currorients,
+		     lcbid);
+      }
+      if(strmmaf.is_open()){
+	//Optionally write output
+	//mafformat defined in refinement/graph_impl_align.h
+	std::ostringstream lcblabel;
+	lcblabel << " label=" << lcbid;
+
+	if(msaOpt.duplications == "true"){
+	  std::set<int> dblocks;
+	  for(unsigned int i=0;i<length(relevant_segments);i++){
+	    dblocks.insert(relevant_segments[i]);
+	  }
+	  for(std::set<int>::iterator dit=dblocks.begin();dit!=dblocks.end();++dit){
+	    //std::cout << "LCB:" <<lcbid << " overlaps duplicated block d" << *dit << std::endl;
+	    lcblabel << " dup=d" << *dit;
+	  }
+	}
+	write(strmmaf,currgOut,currnameSet,MafFormat(),curroffsets,currspanlens,currseqlens,currorients,lcblabel.str());
+	strmmaf.flush();
+      }
+    }
+  }
+#ifdef SEQAN_PROFILE
+    std::cerr << "Aligning LCBs done: " << SEQAN_PROTIMEUPDATE(__myProfileTime) << " seconds" << std::endl;
+#endif
+  return allscores;
+}
+
+template<typename TGraph, 
+	 typename TStringSet,
+	 typename TStringSet2,
+	 typename TNames, 
+	 typename TGenomeNames, 
+	 typename TScore, 
+	 typename TLCBs,
+	 typename TVMap,
+	 typename TStream,
+	 typename TIMap>
+void wholeGenomeAlignment(TGraph &g,
+			  TStringSet &seqSet,
+			  TStringSet2 &genomeSeqSet,
+			  TNames &sequenceNames,
+			  TGenomeNames &genomeNames,
+			  MsaOptions<Dna5, TScore> const& msaOpt,
+			  TLCBs &LCBs,
+			  TVMap &vertexOrientMap,
+			  TStream &strmmaf,
+			  TIMap &aintervals){
+  typedef Dna5 TAlphabet;
+  typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
+  typedef typename EdgeDescriptor<TGraph>::Type TEdgeDescriptor;
+  //Perform consistency scoring
+  //Do not add any edges to the graph
+  //Simple score existing match edges for consistency
+  
+  //extension preserves directionality of matches
+  //Edge weight > 0 same strand
+  //Edge weight < 0 opposite strand
+#ifdef SEQAN_PROFILE
+  std::cerr << "Performing consistency scoring for connected edges only" << std::endl;
+#endif
+#ifdef NDEBUG
+  ;
+#else
+  unsigned int nEdges = numEdges(g);
+#endif
+  //tripletLibraryExtensionCond(g,false);
+  //tripletLibraryExtension(g, genomeguideTree, threshold / 2);
+  graphBasedTripletLibraryExtension(g,false);
+  std::cerr << "Num edges after consistency scoring " << numEdges(g) << std::endl;
+
+  assert(nEdges==numEdges(g));
+#ifdef SEQAN_PROFILE
+  std::cerr << "Consistency scoring done: " << SEQAN_PROTIMEUPDATE(__myProfileTime) << " seconds" << std::endl;
+#endif
+
+#ifdef DEBUGGING
+  double vm, rss;
+  process_mem_usage(vm, rss);
+  cout << "VM: " << vm << "; RSS: " << rss << endl;
+#endif
+  
+  /*
+  if(msaOpt.filter){
+#ifdef SEQAN_PROFILE
+    std::cerr << "Filtering segment graph" << std::endl;
+#endif
+    filterSegmentGraph(g,seqSet,genomeNames,genomeguideTree);
+#ifdef SEQAN_PROFILE
+    std::cerr << "Filtering segment graph done: " 
+	      << SEQAN_PROTIMEUPDATE(__myProfileTime) << " seconds" << std::endl;
+#endif	
+  }
+  */
+  //*******
+  //Generate LCBs
+  //
+  //std::vector<std::vector<TVertexDescriptor> > LCBs;
+  //std::map<TVertexDescriptor,char> vertexOrientMap;
+  #ifdef TIMING 
+  time(&now);
+  std::cerr << "TIME ALIGNMENT_GRAPH:" << lasttime << " " << now << " " << now-lasttime << std::endl;
+  lasttime=now;
+  #endif 
+  generateLCBs(g,
+	       LCBs,
+	       seqSet,
+	       sequenceNames,
+	       genomeNames,
+	       vertexOrientMap,
+	       aintervals,
+	       msaOpt);
+
+
+#ifdef SEQAN_PROFILE
+  std::cerr << "Generating " << LCBs.size() << " alignments " << std::endl;
+#endif
+#ifdef DEBUGGING
+  process_mem_usage(vm, rss);
+  cout << "VM: " << vm << "; RSS: " << rss << endl;
+#endif
+  typedef typename EdgeDescriptor<TGraph>::Type TEdgeDescriptor;
+  typedef typename EdgeType<TGraph>::Type TEdgeStump;
+  typename std::vector<std::vector<unsigned int> >::const_iterator lit;
+  typedef Fragment<> TFragment;
+  typedef String<TAlphabet> TSequence;
+  //Retrieve LCBs from the complete alignment graph $g
+  std::vector<s_score> allscores = alignLCBs(g,
+					     LCBs,
+					     seqSet,
+					     genomeSeqSet,
+					     sequenceNames,
+					     genomeNames,
+					     vertexOrientMap,
+					     strmmaf,
+					     aintervals,
+					     msaOpt);
+  #ifdef TIMING 
+  time(&now);
+  std::cerr << "TIME ALIGN_LCB:" << lasttime << " " << now << " " << now-lasttime << std::endl;
+  lasttime=now;
+  #endif 
+
+#ifdef SEQAN_PROFILE
+  std::cerr << std::endl 
+	    << "Finished aligning LCBs: " 
+	    << SEQAN_PROTIMEUPDATE(__myProfileTime) << " seconds" << std::endl;
+#endif 
+  
+#ifdef SCORING
+
+#ifdef SEQAN_PROFILE
+  std::cerr << "Calculating scores "<< std::endl;
+#endif
+
+  typedef unsigned int TSize;
+  typedef typename Value<TScore>::Type TScoreValue;
+  TSize numGapEx = 0;
+  TSize numGap = 0;
+  TSize numPairs = 0;
+  TSize numIdents = 0;
+  TSize alignLen = 0;
+  TSize totalLen = 0;
+  TSize lcbCount = 0;
+  unsigned int minLen = std::numeric_limits<unsigned int>::max();
+  unsigned int maxLen = 0;
+  String<TSize> colCount;
+  String<TSize> seqCount;
+  TSize alignScore=0;
+  TSize nSeq = length(seqSet);
+  TSize nGen = length(genomeNames);
+  fill(colCount,nSeq+1,0);
+  fill(seqCount,nSeq+1,0);
+  for(TSize i=0;i<nSeq;i++){
+    assert(colCount[i]==0);
+    assert(seqCount[i]==0);
+  }
+  for(std::vector<s_score>::iterator sit=allscores.begin();sit!=allscores.end();++sit){
+    TSize nSeqn = sit->seqCount;
+    seqCount[nSeqn]++;
+    lcbCount++;
+    alignScore += sit->alignScore;
+    numGap += sit->numGap;
+    numGapEx += sit->numGapEx;
+    numPairs += sit->numPairs;
+    numIdents += sit->numIdents;
+    minLen = (sit->alignLen < minLen) ? sit->alignLen : minLen;
+    maxLen = (sit->alignLen > maxLen) ? sit->alignLen : maxLen;
+    alignLen += sit->alignLen;
+    totalLen += sit->totalLen;
+    for(TSize i=0;i<nSeqn;++i){
+      colCount[i+1] += sit->colCount[i+1];
+    }
+  }
+  
+  std::string outfile(msaOpt.outfile);
+  std::fstream strmscore;
+  strmscore.open(std::string(outfile+".scores").c_str(), std::ios_base::out | std::ios_base::trunc);
+  TScoreValue gop = msaOpt.sc.data_gap_open;
+  TScoreValue gex = msaOpt.sc.data_gap_extend;
+  strmscore << "Num LCBs: " << lcbCount << std::endl;
+  strmscore << "Avg length: " << (float)alignLen/(float)lcbCount << "bp Range:" << minLen << "-" << maxLen << "bp " << std::endl;
+  strmscore << "Total scoring summary over all LCBs" << std::endl;
+  strmscore << "SP alignment Score: " << alignScore << std::endl;
+  strmscore << "Alignment Length: " << alignLen << std::endl;
+  strmscore << "Sum of sequence length: " << totalLen << std::endl;
+  strmscore << "#Match-Mismatch pairs: " << numPairs << std::endl;
+  strmscore << "#Match pairs: " << numIdents << std::endl;
+  strmscore << "Score contribution by match-mismatch pairs: " << (alignScore - (((TScoreValue) numGap * gop) + ((TScoreValue) numGapEx * gex))) << std::endl;
+  strmscore << "#Gap extensions: " << numGapEx << std::endl;
+  strmscore << "Score contribution by gap extensions: " << ((TScoreValue) numGapEx * gex) << std::endl;
+  strmscore << "#Gap openings: " << numGap << std::endl;
+  strmscore << "Score contribution by gap openings: " << ((TScoreValue) numGap * gop) << std::endl;
+  //strmscore << "Average percent identity: " << << std::endl;
+  //strmscore << "Average percent aligned: " << << std::endl;
+  if(nSeq!=nGen){
+    strmscore << "WARNING: Some of the following stats are not calculated correctly for incomplete genomes" << std::endl;
+  }
+  strmscore << "Count of columns with identical characters " << std::endl;
+  for(TSize i=0;i<=nSeq;++i){
+    if(i!=0)
+      strmscore << " " << i << ":" << colCount[i];
+  }
+  strmscore << std::endl;
+  strmscore << "Counts of seqs per LCB " << std::endl;
+  for(TSize i=1;i<=nSeq;++i){
+    strmscore << " " << i << ":" << seqCount[i];
+  }
+  strmscore << std::endl;
+  strmscore << "Lengths per seq" << std::endl;
+  int minseq = std::numeric_limits<unsigned int>::max();
+  int maxseq = 0;
+  int totalunaligned=0;
+  for(TSize i=0;i<nSeq;++i){
+    minseq = (minseq < length(seqSet[i])) ? minseq : length(seqSet[i]);
+    maxseq = (maxseq > length(seqSet[i])) ? maxseq : length(seqSet[i]);
+    strmscore << sequenceNames[i] << " len: " << length(seqSet[i])  << " unaligned lcb,bp,%: " << " aligned lcb,bp,%: " << endl;
+    //totalunaligned+=
+  }
+  //TODO these stats only work for completed genomes
+#ifdef SCORING_NEW
+  if(length(sequenceNames)==length(genomeNames)){
+    //bionomial coffecient
+    int nfac=1;
+    int n1fac=1;
+    for(int i=1;i<=nSeq;++i){nfac *= i;}
+    for(int i=1;i<=(nSeq-2);++i){n1fac *= i;}
+    assert(n1fac>0);
+    int possiblematchpairsmin = (nfac/(2*n1fac))*minseq;
+    int possiblematchpairsaln = (nfac/(2*n1fac))*alignLen;
+    std::cout << nfac << " " << n1fac << std::endl;
+    assert(possiblematchpairsmin>0);
+    assert(possiblematchpairsaln>0);
+    strmscore << "Estimate of average %id (using min seq len) " << (float)numIdents / (float)possiblematchpairsmin << std::endl;
+    strmscore << "Estimate of average %id (using aln len) " << (float)numIdents / (float)possiblematchpairsaln << std::endl;
+    strmscore << "Estimate of average %aln (using min seq len) " << (float)numPairs / (float)possiblematchpairsmin << std::endl;
+    strmscore << "Estimate of average %aln (using aln len) " << (float)numPairs / (float)possiblematchpairsaln << std::endl;
+    
+    strmscore << "Estimate of overall %id (maxseq/alignLen+unaligned) " << (float)maxseq / (float)(alignLen+totalunaligned) << std::endl;
+    strmscore << std::endl;
+  }
+#endif
+  strmscore.close();
+#ifdef SEQAN_PROFILE
+  std::cout << std::endl 
+	    << "Finished calculating scores " 
+	    << SEQAN_PROTIMEUPDATE(__myProfileTime) << " seconds" << std::endl;
+#endif 
+  
+#endif 
+
+}  
+
+//-1 - duplication end
+//0 align end
+//1 duplication start
+//2 align start
+
+template<typename TStringSet,
+	 typename TNames,
+	 typename TName,
+	 typename TLoc,
+	 typename TStream>
+void printUniques(TStringSet &seqSet,
+		  TNames &sequenceNames,
+		  std::map<TName,std::vector<TLoc> >&aintervals,
+		  TStream &strmmaf){ 
+  int icount=0;
+  for(int i=0;i<(int)length(seqSet);i++){
+    typename std::map<TName,std::vector<TLoc> >::iterator ait=aintervals.find(sequenceNames[i]);
+    if(ait!=aintervals.end()){
+      sort(ait->second.begin(),ait->second.end(),poscmp<TLoc>());
+      int last=0;
+      int open=0;
+      int indup=0;
+      std::vector<int> currdups;
+      //pair<coord,start_end>
+      strmmaf << std::endl;
+      assert(ait->second[ait->second.size()-1].first<=(int)length(seqSet[i]));
+      for(typename std::vector<TLoc>::iterator pit = ait->second.begin();pit!=ait->second.end();pit++){
+	//Start of alignment
+	assert(pit->first>=0);
+	assert((unsigned)pit->first<=length(seqSet[i]));
+	if(pit->second>0){
+	  if(pit->second==1){//duplication start
+	    indup++;
+	     currdups.push_back(pit->blocknum);
+	  }
+	  else{
+#ifdef DEBUGGING
+	    std::cout << "OPEN:" << open << " type:" << pit->second << " seq: " << ait->first << " coord:" << pit->first << " last_close:" << last 
+		      << " spanlen: " << length(infix(seqSet[i],last,pit->first)) 
+		      << " == " <<  pit->first - last 
+		      << " indup:" << indup <<std::endl;
+#endif
+	    if(open==0){
+	      if(pit->first-last>0){
+		if(indup){
+		  //print as dup
+		  //std::cout << "DUP" << std::endl;
+		  strmmaf << "a score=0 label=u" << icount++ << " mult=1 dup=";
+		  for(std::vector<int>::iterator it =currdups.begin();it!=currdups.end();++it){
+		    strmmaf << "d" << *it;
+		    if(it+1!=currdups.end()){
+		      strmmaf << ",";
+		    }
+		  }
+		   strmmaf << std::endl;
+		}
+		else{
+		  strmmaf << "a score=0 label=u" << icount++ << " mult=1" << std::endl;
+		}
+		assert((int)length(infix(seqSet[i],last,pit->first)) == pit->first - last);
+		strmmaf << "s " << ait->first << " " << last << " " << pit->first - last << " + " 
+			<< length(seqSet[i]) << " " << infix(seqSet[i],last,pit->first)
+			<< std::endl
+			<< std::endl;
+	      }
+	    }
+	    if(pit->second==2){
+	      open++;
+	    }
+	  }
+	}
+	else{
+	  if(pit->second==-1){//duplication stop
+	    indup--;
+	    currdups.pop_back();
+	  }
+	  if(pit->second==0){//End of alignment
+	    open--;
+	  }
+	  last=pit->first;
+#ifdef DEBUGGING
+	  std:: cout << "CLOSE: " << open << " type:" << pit->second << " coord:" << last << std::endl;
+#endif
+	}
+	if(last<0){
+	  last=0;
+	}
+	assert(open>=0);
+	assert(last<=(int)length(seqSet[i]));
+      }
+      //assert(open==0);
+      if(last && length(seqSet[i])-last>0 && open==0){
+#ifdef DEBUGGING
+	std::cout << "Printing to end of sequence " << last << "-" << length(seqSet[i]) << std::endl;
+#endif
+	if(indup){
+	  //print as dup
+	  //std::cout << "DUP" << std::endl;
+	  strmmaf << "a score=0 label=u" << icount++ << " mult=1 dup=";
+	  for(std::vector<int>::iterator it =currdups.begin();it!=currdups.end();++it){
+	    strmmaf << "d" << *it;
+	    if(it+1!=currdups.end()){
+	      strmmaf << ",";
+	    }
+	  }
+	  strmmaf << std::endl;
+	}
+	else{
+	  strmmaf << "a score=0 label=u" << icount++ << " mult=1" << std::endl;
+	}
+	assert(length(infix(seqSet[i],last,length(seqSet[i]))) == length(seqSet[i]) - last);
+	strmmaf << "s " << ait->first << " " << last << " " << length(seqSet[i]) - last << " + " 
+		<< length(seqSet[i]) << " " << infix(seqSet[i],last,length(seqSet[i])) 
+		<< std::endl
+		<< std::endl;
+      }
+    }
+    else{
+#ifdef DEBUGGING
+      std::cout << "No alignment on sequence " << sequenceNames[i] << std::endl;
+      std::cout << "Printing entire sequence" << std::endl;
+#endif
+      strmmaf << "a score=0 label=0 mult=1" << std::endl;
+      strmmaf << "s " << sequenceNames[i] << " 0 " << length(seqSet[i]) << " + " 
+	      << length(seqSet[i]) << " " << seqSet[i]
+	      << std::endl
+	      << std::endl;
+    }
+  }
+}
+
+template<typename TStringSet, 
+	 typename TCargo, 
+	 typename TSpec, 
+	 typename TStringSet1, 
+	 typename TNames, 
+	 typename TGenomeNames,
+	 typename TIntervals,
+	 typename TScore>
+void
+singlepass_wholeGenomeAlignment(Graph<Alignment<TStringSet, TCargo, TSpec> >& gAlign, 
+				TStringSet1& sequenceSet,
+				TNames& sequenceNames,
+				TGenomeNames& genomeNames,
+				TIntervals &aintervals,
+				MsaOptions<Dna5 , TScore> const& msaOpt)
+{
+  typedef Dna5 TAlphabet;
+  typedef typename Value<TScore>::Type TScoreValue;
+  typedef typename Size<TStringSet>::Type TSize;
+  typedef typename Value<TStringSet1>::Type TString;
+  typedef typename Value<TNames>::Type TName;
+  //typedef Graph<Alignment<TStringSet, TSize> > TGraph;
+  //Using int to support negative edge scores
+  typedef Graph<Alignment<TStringSet, int> > TGraph;
+  typedef typename Id<TGraph>::Type TId; 
+  typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
+  typedef typename EdgeDescriptor<TGraph>::Type TEdgeDescriptor;
+  
+  typedef double TDistanceValue;
+  
+#ifdef SEQAN_PROFILE
+  std::cerr << "Mugsy WGA" << std::endl;
+  std::cerr << "Reading sequences and alignments " << std::endl;
+  std::cerr << "--distance=" << msaOpt.distance << std::endl;
+  std::cerr << "--minlength=" << msaOpt.minlength << std::endl;
+#endif
+  // Initialize alignment object
+  clear(gAlign);
+  assignStringSet(gAlign, sequenceSet);
+  // Some alignment constants
+  TStringSet& seqSet = stringSet(gAlign);
+  TSize nSeq = length(seqSet);
+  TSize nGenomes=0;
+  for(TSize i=0;i<length(genomeNames);i++){
+    nGenomes = (genomeNames[i] > nGenomes) ? genomeNames[i] : nGenomes;
+  }
+  nGenomes = nGenomes+1;
+  std::cerr << "Number of genomes:" << nGenomes << std::endl;
+  std::cerr << "Number of sequences:" << nSeq << std::endl;
+  
+  // Containers for segment matches and corresponding scores 
+  typedef String<Fragment<> > TFragmentString;
+  TFragmentString matches;
+  typedef String<TScoreValue> TScoreValues;
+  TScoreValues scores;
+  
+  // Include segment matches from subalignments
+  if (!empty(msaOpt.alnfiles)) {
+    typedef typename Iterator<String<std::string>, Standard>::Type TIter;
+    TIter begIt = begin(msaOpt.alnfiles, Standard() );
+    //TIter begItEnd = end(msaOpt.alnfiles, Standard() );
+    //Only read first alignment file
+    //for(;begIt != begItEnd; goNext(begIt)) {
+#ifdef SEQAN_PROFILE
+    std::cerr << "*Alignment file XMFA format: " << (*begIt).c_str() << std::endl;
+#endif
+    std::ifstream strm_lib;
+    strm_lib.open((*begIt).c_str(), ::std::ios_base::in | ::std::ios_base::binary);
+    read(strm_lib, matches, scores, sequenceSet, sequenceNames, MultiFastaAlign());
+    strm_lib.close();
+    //  }
+  }
+  /*
+  //TODO, read mummer for defining MUMi
+  // Include MUMmer segment matches
+  if (!empty(msaOpt.mummerfiles)){
+#ifdef SEQAN_PROFILE
+    std::cout << "Parsing MUMmer segment matches:" << std::endl;
+#endif
+    String<char> mummerFiles = value(msaOpt.mummerfiles);
+    String<char> currentMumFile;
+    for(TSize i = 0; i<=length(mummerFiles); ++i) {
+      if ((i == length(mummerFiles) || (value(mummerFiles, i) == ','))) {		
+#ifdef SEQAN_PROFILE
+	std::cout << "*MUMmer file: " << currentMumFile << std::endl;
+#endif
+	std::stringstream input;
+	input << currentMumFile;
+	std::ifstream strm_lib;
+	strm_lib.open(input.str().c_str(), std::ios_base::in | std::ios_base::binary);
+	read(strm_lib, matches, scores, seqSet, sequenceNames, MummerLib());		
+	strm_lib.close();
+	clear(currentMumFile);
+      } else {
+	if ((value(mummerFiles, i) != ' ') && (value(mummerFiles, i) != '\t')) appendValue(currentMumFile, value(mummerFiles, i));
+      }
+    }
+#ifdef SEQAN_PROFILE
+    std::cout << "Parsing done: " << SEQAN_PROTIMEUPDATE(__myProfileTime) << " seconds" << std::endl;
+#endif
+  }
+  */
+#ifdef SEQAN_PROFILE
+  std::cerr << "Reading alignments done: " << SEQAN_PROTIMEUPDATE(__myProfileTime) << " seconds" << std::endl;
+#endif
+#ifdef DEBUGGING
+  double vm, rss;
+  process_mem_usage(vm, rss);
+  cout << "VM: " << vm << "; RSS: " << rss << endl;
+#endif
+  
+  //Build StringSet for each genome
+  TStringSet1 genomeSeqSet;
+  TSize seqSetLen = length(seqSet);
+  std::vector<std::vector<TSize> > genomeMap;
+  std::map<TSize,TSize> genomeLenMap;
+  genomeMap.resize(nGenomes);
+  for(TSize i=0;i<seqSetLen;++i){
+    genomeMap[genomeNames[i]].push_back(i);
+  }
+  TSize mapLen=genomeMap.size();
+  resize(genomeSeqSet,mapLen,Exact());
+  for(TSize i=0;i<mapLen;++i){
+    std::stringstream ss;
+    //Concatenate all sequences for the genome
+    for(typename std::vector<TSize>::iterator sit = genomeMap[i].begin();sit != genomeMap[i].end();++sit){
+      append(genomeSeqSet[i],seqSet[*sit]);
+    }
+  }
+  //Save sum of genome lengths for later
+  for(unsigned int i=0;i<length(genomeSeqSet);++i){
+    genomeLenMap[i]=length(genomeSeqSet[i]);
+  }
+#ifdef SEQAN_PROFILE
+  std::cerr << "Building guide trees" << std::endl;
+#endif
+  
+  /*
+  // Set-up a distance matrix
+  typedef String<TDistanceValue> TDistanceMatrix;
+  TDistanceMatrix distanceMatrix;
+  
+  clear(distanceMatrix);
+  //Calculate initial
+  //Guide tree over all genomes
+  typedef Graph<Tree<TDistanceValue> > TGuideTree;
+  TGuideTree genomeguideTree;
+  TSize ktup=3; //3mers
+  getKmerSimilarityMatrix(genomeSeqSet, distanceMatrix, ktup, TAlphabet());
+  // Similarity to distance conversion
+  typedef typename Value<TDistanceMatrix>::Type TValue;
+  typedef typename Iterator<TDistanceMatrix, Standard>::Type TMatrixIterator;
+  TMatrixIterator matIt = begin(distanceMatrix, Standard());
+  TMatrixIterator endMatIt = end(distanceMatrix, Standard());
+  for(;matIt != endMatIt;++matIt) 
+  *matIt = SEQAN_DISTANCE_UNITY - (*matIt);
+  if (msaOpt.build == 0) njTree(distanceMatrix, genomeguideTree);
+  else if (msaOpt.build == 1) upgmaTree(distanceMatrix, genomeguideTree, UpgmaMin());
+  else if (msaOpt.build == 2) upgmaTree(distanceMatrix, genomeguideTree, UpgmaMax());
+  else if (msaOpt.build == 3) upgmaTree(distanceMatrix, genomeguideTree, UpgmaAvg());
+  else if (msaOpt.build == 4) upgmaTree(distanceMatrix, genomeguideTree, UpgmaWeightAvg());
+  clear(distanceMatrix);
+  */
+#ifdef SEQAN_PROFILE
+  std::cerr << "Building guide trees done: " << SEQAN_PROTIMEUPDATE(__myProfileTime) << " seconds" << std::endl;
+#endif
+  //*******
+  //Build alignment graph
+  //
+  // Use these segment matches for the initial alignment graph
+ #ifdef SEQAN_PROFILE
+  std::cerr << "Building alignment graph from " << length(matches) << " matches" << std::endl;
+#endif
+  TGraph g(seqSet);
+  if (!msaOpt.rescore) buildAlignmentGraph(matches, scores, g, FractionalScore() );
+  else buildAlignmentGraph(matches, scores, g, msaOpt.sc, ReScore() );
+  //clear these here to save memory
+  clear(matches);
+  clear(scores); 
+#ifdef SEQAN_PROFILE
+  std::cerr << "Building alignment graph done: " << SEQAN_PROTIMEUPDATE(__myProfileTime) << " seconds" << std::endl;
+#endif
+#ifdef DEBUGGING
+  process_mem_usage(vm, rss);
+  cout << "VM: " << vm << "; RSS: " << rss << endl;
+#endif
+  
+  std::cerr << std::endl << "Refined alignment graph built. E: " << numEdges(g) << " V:" << numVertices(g) << std::endl;
+  
+  //Stats
+  typedef typename Iterator<TGraph, VertexIterator>::Type TVertexIterator;
+  TVertexIterator itV(g);
+  unsigned totalmatchingbp=0;
+  unsigned totalseqlen=0;
+  for(;!atEnd(itV);goNext(itV)){
+    if(degree(g,*itV)>0){
+      totalmatchingbp+=fragmentLength(g,*itV);
+    }
+   }
+  for(unsigned int i=0;i<seqSetLen;i++){
+    totalseqlen+=length(seqSet[i]);
+  }
+  std::cerr << "Average fragment length: " 
+	    << (float)(totalmatchingbp/numVertices(g)) 
+	    << "bp" << std::endl;
+  std::cerr << "Percentage matching bp:" 
+	    << totalmatchingbp << "/" << totalseqlen  
+	    << "=" << (float)totalmatchingbp/totalseqlen 
+	    << std::endl;
+  //Calculate a distance measure similar to MUMi
+  //Print range
+  std::map<std::pair<int,int>,int > flengths;
+  std::map<std::pair<int,int>,int>::iterator pos;
+  typedef typename Iterator<TGraph, EdgeIterator>::Type TEdgeIterator;
+  TEdgeIterator itE(g);
+  TVertexDescriptor source,target;
+  TEdgeDescriptor ed;
+  bool inserted=false;
+  TSize sgen,tgen;
+  for(;!atEnd(itE);goNext(itE)){
+    ed = *itE;
+    source = getSource(ed);
+    target = getTarget(ed);
+    sgen = genomeNames[sequenceId(g,source)];
+    tgen = genomeNames[sequenceId(g,target)];
+    if(sgen>tgen){
+      TSize tmp=sgen;
+      sgen=tgen;
+      tgen=tmp;
+    }
+    assert(fragmentLength(g,source)==fragmentLength(g,target));
+    //TODO, fix may count same vertex more than once
+    boost::tie(pos, inserted) = flengths.insert(std::make_pair(std::make_pair(sgen,tgen),fragmentLength(g,source)));
+    if(!inserted){
+      pos->second+=fragmentLength(g,source);
+    }
+  }
+  float minnia=2;
+  float maxnia=0;
+  float minnim=2;
+  float maxnim=0;
+  for(std::map<std::pair<int,int>,int>::iterator mit = flengths.begin();mit!=flengths.end();++mit){
+    float avgsize = (genomeLenMap[mit->first.first]+genomeLenMap[mit->first.second])/2;
+    float minsize = genomeLenMap[mit->first.first] < genomeLenMap[mit->first.second] ? genomeLenMap[mit->first.first] : genomeLenMap[mit->first.second];
+    assert(avgsize>0);
+    float nia = 1 - (mit->second/avgsize);
+    float nim = 1 - (mit->second/minsize);
+    minnia = (minnia < nia) ? minnia : nia;
+    maxnia = (maxnia > nia) ? maxnia : nia;
+    minnim = (minnim < nim) ? minnim : nim;
+    maxnim = (maxnim > nim) ? maxnim : nim;
+  }
+  flengths.clear();
+  //clear(genomeSeqSet);
+  std::cerr << "D=1-Lseq/Lavg min-max: " << minnia << "-" << maxnia << std::endl;
+  std::cerr << "D=1-Lseq/Lmin min-max: " << minnim << "-" << maxnim << std::endl;
+  //
+#ifdef DEBUGGING_GRAPH
+  std::fstream rawstrm;
+  rawstrm.open("origrefinegraph.out", std::ios_base::out | std::ios_base::trunc);
+  write(rawstrm,g,sequenceSet,Raw());
+  rawstrm.close();
+#endif
+  //*******
+  //
+  //TODO 
+  //testing partitioning
+  //This partitioning is just for testing.
+  //The refined graph is already partitioned.
+  //Ideally matches could be filtered and partitioned prior to building the large alignment graph
+  //Convert graph back to matches
+  /*
+  TFragmentString filtmatches;
+  std::vector<TFragmentString> matchSets;
+  if(doPartitioning){
+    typedef typename Iterator<TGraph, EdgeIterator>::Type TEdgeIterator;
+    TEdgeIterator itE(g);
+    for(;!atEnd(itE);goNext(itE)){
+      TEdgeDescriptor ed = *itE;
+      TVertexDescriptor vd1 = getSource(ed);
+      TVertexDescriptor vd2 = getTarget(ed);
+      appendValue(filtmatches, Fragment<>(sequenceId(g,vd1), 
+					  fragmentBegin(g,vd1), 
+					  sequenceId(g,vd2),
+					  fragmentBegin(g,vd2), 
+					  fragmentLength(g,vd1), 
+					  ((int)(cargo(ed)<0)) ? true : false));
+    }
+    partitionSegments(seqSet,filtmatches,matchSets,msaOpt.partition);
+    for(typename std::vector<TFragmentString>::iterator it = matchSets.begin();
+	it!=matchSets.end();it++){
+      String<Fragment<> > matchset = *it;
+#ifdef DEBUGGING
+      std::cout << "Length of matchset " << length(matchset) << std::endl;
+#endif
+    }
+  }
+  else{
+    //Using full graph
+  }
+  */
+  std::fstream strmmaf;
+  std::string outfile(msaOpt.outfile);
+  strmmaf.open(std::string(outfile+".maf").c_str(), std::ios_base::out | std::ios_base::trunc);
+  _streamWrite(strmmaf,"##maf version=1 scoring=mugsy");
+  typedef String<unsigned int> TComponentMap;
+  typedef typename Value<TComponentMap>::Type TComponent;
+  typedef typename Position<TGraph>::Type TPos;
+  typedef SVABlock<TComponent,unsigned,TVertexDescriptor,unsigned> TBlock;
+  std::vector<std::vector<TVertexDescriptor> > lcbs;
+  std::map<TVertexDescriptor,char> vertexOrientMap;
+  
+  //LCBs are saved in lcbsp
+  //Optionally, can also store profiles and write directly to strmmaf
+  wholeGenomeAlignment(g,
+		       seqSet,
+		       genomeSeqSet,
+		       sequenceNames,
+		       genomeNames,
+		       msaOpt,
+		       lcbs,
+		       vertexOrientMap,
+		       strmmaf,
+		       aintervals);
+  //Close out MAF
+  strmmaf << std::endl;
+  
+  //loop over all profiles and print
+  
+  //Print all remaining unaligned sequences
+  //TODO broken in refactor, fix 
+  if(msaOpt.unique == "true"){
+    printUniques(seqSet,sequenceNames,aintervals,strmmaf);
+  }
+  
+  //Close output streams
+  strmmaf.close();
+
+#ifdef SEQAN_PROFILE
+  std::cerr << "Mugsy all done" << std::endl;
+#endif
+}
+
+
+//
+//Input all pairwise matches in duplicated regions
+//Build refinement graph to reduce into non-overlapping segments
+//Sort over each sequence and build runs of regions < DUP_ADJ
+//Save runs of length > DUP_CMB
+template<typename TStringSet, 
+	 typename TCargo, 
+	 typename TSpec, 
+	 typename TStringSet1, 
+	 typename TNames, 
+	 typename TGenomeNames,
+	 typename TScore,
+	 typename TIntervals>
+inline void
+findDuplications(Graph<Alignment<TStringSet, TCargo, TSpec> >& gAlign, 
+		 TStringSet1& sequenceSet,
+		 TNames& sequenceNames,
+		 TGenomeNames& genomeNames,
+		 TIntervals& dupintervals,
+		 MsaOptions<Dna5 , TScore> const& msaOpt)
+{
+  typedef Dna5 TAlphabet;
+  typedef typename Value<TScore>::Type TScoreValue;
+  typedef typename Size<TStringSet>::Type TSize;
+  typedef typename Value<TStringSet1>::Type TString;
+  typedef typename Value<TNames>::Type TName;
+  //Using int to support negative edge scores
+  typedef Graph<Alignment<TStringSet, int> > TGraph;
+  typedef typename Id<TGraph>::Type TId; 
+  typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
+  typedef typename EdgeDescriptor<TGraph>::Type TEdgeDescriptor;
+  //
+  typedef std::map<unsigned int, unsigned int> TComponentLength;
+  
+  // Strongly Connected Components, topological sort, and length of each component
+  typedef String<unsigned int> TComponentMap;
+  typedef typename Value<TComponentMap>::Type TComponent;
+  typedef typename Position<TGraph>::Type TPos;
+  typedef SVABlock<TComponent,TSize,TVertexDescriptor,TPos> TBlock;
+  
+  typedef typename Value<TComponentMap>::Type TComponent;
+  typedef std::pair<TId, TSize> TKey;
+  typedef std::map<TKey, TVertexDescriptor> TPosToVertexMap;
+  typedef FragmentInfo<TId, TSize> TFragmentInfo;
+
+  typedef double TDistanceValue;
+#ifdef SEQAN_PROFILE
+  std::cerr << "Detecting duplications " << std::endl;
+#endif
+  // Initialize alignment object
+  clear(gAlign);
+  assignStringSet(gAlign, sequenceSet);
+  // Some alignment constants
+  TStringSet& seqSet = stringSet(gAlign);
+  TSize nSeq = length(seqSet);
+  TSize nGenomes=0;
+  for(TSize i=0;i<length(genomeNames);i++){
+    nGenomes = (genomeNames[i] > nGenomes) ? genomeNames[i] : nGenomes;
+  }
+  nGenomes = nGenomes+1;
+  std::cerr << "Number of genomes:" << nGenomes << std::endl;
+  std::cerr << "Number of sequences:" << nSeq << std::endl;
+  
+  // Set-up a distance matrix
+  typedef String<TDistanceValue> TDistanceMatrix;
+  TDistanceMatrix distanceMatrix;
+  
+  // Containers for segment matches and corresponding scores 
+  typedef String<Fragment<> > TFragmentString;
+  TFragmentString matches;
+  typedef String<TScoreValue> TScoreValues;
+  TScoreValues scores;
+  	
+  // Include segment matches from subalignments
+  if (!empty(msaOpt.alnfiles)) {
+    typedef typename Iterator<String<std::string>, Standard>::Type TIter;
+    TIter begIt = begin(msaOpt.alnfiles, Standard() );
+    TIter begItEnd = end(msaOpt.alnfiles, Standard() );
+    goNext(begIt);//alignment XMFA is second alignment file passed
+    for(;begIt != begItEnd; goNext(begIt)) {
+#ifdef SEQAN_PROFILE
+      std::cerr << "*Alignment file XMFA format: " << (*begIt).c_str() << std::endl;
+#endif
+      std::ifstream strm_lib;
+      strm_lib.open((*begIt).c_str(), ::std::ios_base::in | ::std::ios_base::binary);
+      //defined in graph_align_tcoffee_io.h
+      read(strm_lib, matches, scores, sequenceSet, sequenceNames, MultiFastaAlign());
+      strm_lib.close();
+      //clear(alignmentFile);read(strm_lib, matches, scores, sequenceNames, FastaAlign());
+    }
+  }
+  else{
+    assert(false);
+  }
+#ifdef SEQAN_PROFILE
+    std::cerr << "Building alignment graph" << std::endl;
+#endif
+    TGraph g(seqSet);
+    //defined in graph_align_tcoffee_base.h
+    if (!msaOpt.rescore) buildAlignmentGraph(matches, scores, g, FractionalScore() );
+    else buildAlignmentGraph(matches, scores, g, msaOpt.sc, ReScore() );
+    //clear(matches);
+    //clear(scores); 
+#ifdef SEQAN_PROFILE
+    std::cerr << "Building alignment graph done: " << SEQAN_PROTIMEUPDATE(__myProfileTime) << " seconds" << std::endl;
+#endif
+    std::cerr << std::endl << "Refined alignment graph built. E: " << numEdges(g) << " V:" << numVertices(g) << std::endl;
+    //Stats
+    typedef typename Iterator<TGraph, VertexIterator>::Type TVertexIterator;
+    TVertexIterator itV(g);
+    unsigned totalmatchingbp=0;
+    unsigned totalseqlen=0;
+    TSize seqSetLen = length(seqSet);
+    for(;!atEnd(itV);goNext(itV)){
+      if(degree(g,*itV)>0){
+	totalmatchingbp+=fragmentLength(g,*itV);
+      }
+    }
+    for(unsigned int i=0;i<seqSetLen;i++){
+      totalseqlen+=length(seqSet[i]);
+    }
+    std::cerr << "Average fragment length: " 
+	      << (float)(totalmatchingbp/numVertices(g)) 
+	      << "bp" << std::endl;
+    std::cerr << "Percentage matching bp:" 
+	      << totalmatchingbp << "/" << totalseqlen  
+	      << "=" << (float)totalmatchingbp/totalseqlen 
+	      << std::endl;
+    
+#ifdef DEBUGGING_GRAPH
+    std::fstream rawstrm;
+    rawstrm.open("origrefinegraph.out", std::ios_base::out | std::ios_base::trunc);
+    write(rawstrm,g,sequenceSet,Raw());
+    rawstrm.close();
+#endif
+    
+#ifdef DEBUGGING
+  std::cout << "Finding connected components " << std::endl;
+#endif
+
+#ifdef SEQAN_PROFILE
+    std::cerr << "Determining duplicated regions" << std::endl;
+#endif
+  
+  //Sequence set that will capture each copy of the duplication
+  TStringSet1 runSeqSet;
+    //StringSet<TString, TSpec> runSeqSet;
+  Graph<Directed<> > runG;
+
+  // Connected Components
+  // Each CC represents an UNGAPPED set of aligned fragments/segments across sequences
+  // A CC is an ungapped block 
+  // A CC is also an LCB at this point of the algorithm but may be extended
+  TComponentMap componentall; 
+  std::map<std::pair<TComponent,TComponent>,TBlock *> componentVertexMap;
+  std::vector<std::vector<TBlock> > blocksbycomponent; 
+  
+  //TSize numComponents = connected_components(g, componentall);
+  //TSize numComponents = connected_components_by_genome_ranked(g, componentall, genomeNames,std::numeric_limits<unsigned int>::max());
+  TSize numComponents = connected_components_ranked(g, componentall);
+  std::cerr << "Determined " << numComponents << " component segments in graph of size " << numVertices(g) << std::endl;
+  assert(numComponents>0);
+  //std::cerr << "Calculating positional scores" <<std::endl;
+  //scorePosCons(g,componentall,numComponents,posScores,POS_ADJ);
+  //std::cerr << "Set positional scores" << std::endl;
+
+  //Identify runs
+  int POS_CMB = 100;
+
+  std::map<TSize,std::vector<TVertexDescriptor> > componentSeqMap; 
+  std::map<TComponent,std::vector<TVertexDescriptor> > componentMap;
+  
+  typename TPosToVertexMap::const_iterator it2 = g.data_pvMap.begin();
+  typename TPosToVertexMap::const_iterator it2End = g.data_pvMap.end();
+  for(it2 = g.data_pvMap.begin();it2!=it2End;++it2) {
+    TVertexDescriptor currV = it2->second;
+    assert(getProperty(componentall,currV)==componentall[currV]);
+
+    TSize currentSeq = sequenceId(g,currV);
+
+    typename std::map<TSize,std::vector<TVertexDescriptor> >::iterator fit = componentSeqMap.find(currentSeq); 
+    if(fit==componentSeqMap.end()){
+      componentSeqMap[currentSeq] = std::vector<TVertexDescriptor>();
+    }
+    componentSeqMap[currentSeq].push_back(currV);
+    TComponent c = getProperty(componentall, currV);
+    if(componentMap.find(c)==componentMap.end()){
+      componentMap[c] = std::vector<TVertexDescriptor>();
+    }
+    componentMap[c].push_back(currV);
+  }
+  
+
+  std::map<TSize,TSize> seqIdxMap;
+  std::map<TVertexDescriptor,TSize> runmap;
+  std::map<TSize,std::vector<TVertexDescriptor> > vrunmap;
+  int runcount=0;
+  for(typename std::map<TSize,std::vector<TVertexDescriptor> >::iterator it = componentSeqMap.begin();it!=componentSeqMap.end();++it){
+    std::set<std::pair<int,int> > runs;
+
+    TSize currentSeq = it->first;
+    //std::cout << "Examining sequence " << currentSeq
+    //      << " with num vertices:" << it->second.size() << std::endl;
+    //Sort vertices in G on sequence currentSeq
+    std::set<TSize> repeatCC;
+    std::vector<TVertexDescriptor> vlist;
+    int lastcoord=0;
+    int runstart=0;
+    int runend=0;
+
+    sort(it->second.begin(),it->second.end(),vertexposcmp<TGraph>(g));
+    for(typename std::vector<TVertexDescriptor>::iterator vit = it->second.begin();vit!=it->second.end();++vit){
+      TVertexDescriptor currV = *vit;
+      TComponent c = getProperty(componentall, *vit);
+      //Only consider segments that are part of matches
+      if(componentMap[c].size()>1){
+	if(lastcoord>0){
+	  int dist = fragmentBegin(g,*vit)-lastcoord;
+	  //it->first.first is CC label
+	  if(dist>(int)POS_CMB 
+	     || repeatCC.find(c)!=repeatCC.end()){
+	    runend = lastcoord;
+	    if(runend - runstart > POS_CMB){
+	      //std::cout << runstart << " runstart " << runstart << " runend: " << runend << " len:" << runend - runstart << std::endl;
+	      runs.insert(std::make_pair(runstart,runend));
+	      for(unsigned i=0;i<vlist.size();++i){
+		runmap[vlist[i]] = runcount;
+		assert(sequenceId(g,vlist[i])==currentSeq);
+	      }
+	      if(vrunmap.find(runcount)==vrunmap.end()){
+		vrunmap[runcount] = std::vector<TVertexDescriptor>();
+	      }
+	      vrunmap[runcount].insert(vrunmap[runcount].end(),vlist.begin(),vlist.end());
+	      addVertex(runG);
+	      seqIdxMap[runcount]=currentSeq;
+	      runcount++;
+	    }
+	    repeatCC.clear();
+	    vlist.clear();
+	    runstart = fragmentBegin(g,*vit);
+	  }
+	}
+	lastcoord = fragmentBegin(g,*vit)+fragmentLength(g,*vit);
+	//it->first.first is CC labe
+	repeatCC.insert(c); 
+	vlist.push_back(currV);
+	//std::cout << "Last coord:" << lastcoord << " component:" << c << " size:" << componentMap[c].size() << std::endl;
+      }
+    }
+    //assert(runcount+1==runs.size());
+        
+    //A run is a list of CC
+    //Create a new seqSet that contains all the runs
+    
+    //for(typename std::set<std::pair<int,int> >::iterator rit = runs.begin();rit != runs.end();++rit){
+      //std::cout << "Seq: " << currentSeq 
+      //	<< rit->first << "-"  << rit->second << " " 
+      //	<< rit->second - rit->first << std::endl;
+      //TString newseq=seqSet[i];
+      //For each run, create a new seqset
+      //appendValue(runSeqSet,seqSet[currentSeq]);
+      //addVertex(runG);
+    //}
+    //std::cout << "Current seq run count " << runcount  << " " << " . Total runs " << runs.size() << std::endl;
+
+  }
+  //
+  //Each run represents a copy of a duplicated region
+  //We will determine the copies that need to be aligned and store them in an LCB.
+  //Also stated, an LCB is a set of runs, where each run is a set of vertices in G(0).
+  //
+  //Determining the LCBs that represent duplications
+  //
+  //After the runs have been defined
+  //an LCB is simply a list of all vertices in G(0) reachable in the run; ie. all members of the CCs in that run
+  //
+  //
+  //To obtain the list of runs that comprise an LCB:
+  //Build a graph G(l) where each node is a runidx and an edge connects any two runidx that share a ccidx from G(0)
+  //To find the LCBs, simply determine the connected components in this graph G(l).  Each component defines corresponds to an LCB 
+  //and the list of vertices is obtained from G(0) using ccidx at each node
+
+  //foreach run in runs
+  //  addvertex(run,g.l)
+  //
+  //foreach v1 in cc
+  // foreach v2 in cc
+  //  if(runmap[v1]!=runmap[v2])
+  //   addedge(runv1,runv2,g.l)
+  //
+  //numcc = connected_components(rccmap,g.l)
+  //lcbs.resize(numcc)
+  //
+  //foreach runv (g.l)
+  // //save all vertices associated with
+  // lcbidx <- rccmap[runv]
+  // lcbs[lcbidx].push_back(runv)
+
+  
+  //Next, the vertices need to be updated to map to unique sequence ids for each run.
+  //foreach lcb (lcbs)
+  //  numruns <- lcbs[lcb].size()
+  //  Built new set set
+  //  Build a new graph 
+  //  foreach run (lcbs[lcb])
+  //     foreach v (vmap[run])
+  //      seqIdMap[v] = runmap[v]
+  //      lcbv.push_back(v)
+
+
+  //At this point, runSeqSet should be populated
+  //graph G(0) should be updated to reference sequence ids in runSeqSet
+  //seqIdMap should be an identity map
+
+  //retrieveLCBSegments()
+  
+  //Probably not necessary
+  //foreach match (currmatches)
+  //  fragment(match,0).seq = sequenceid[fragment(match,0).seq]
+  //  fragment(match,1).seq = sequenceid[fragment(match,1).seq]
+
+  //buildAlignmentGraph()
+
+  TComponentMap runccmap;
+
+
+  //Add edges between any 2 runs that are connected in G(0)
+
+  //Edge iterator 
+  typedef typename Iterator<TGraph, EdgeIterator>::Type TEdgeIterator;
+  TEdgeIterator itE1(g);
+  for(;!atEnd(itE1);++itE1){
+    TEdgeDescriptor ed = *itE1;
+    TVertexDescriptor source = getSource(ed);
+    TVertexDescriptor target = getTarget(ed);
+    if(runmap.find(source)!=runmap.end()
+       && runmap.find(target)!=runmap.end()){
+      if(runmap[source]!=runmap[target]){
+	addEdge(runG,runmap[source],runmap[target]);
+      }
+      else{
+	//std::cout << runmap[source] << " " << runmap[target] << " s:" << sequenceId(g,source) << " " << sequenceId(g,target) <<std::endl;
+	assert(runmap[source]==runmap[target]);
+	assert(sequenceId(g,source)==sequenceId(g,target));
+      }
+    }
+  }
+
+  int numcc = connected_components(runG,runccmap);
+  std::cerr << "Determined " << numcc << " LCBs from a graph of runs: " << numVertices(runG) << std::endl;
+
+  typedef std::vector<TVertexDescriptor> TLCB;
+  std::vector<std::vector<TVertexDescriptor> > runs;
+  std::vector<std::vector<TVertexDescriptor> > LCBs;
+  //List of runs in an LCB
+  runs.resize(numcc);
+  //List of vertices in an LCB
+  LCBs.resize(numcc);
+
+  for(unsigned i=0;i<numVertices(runG);i++){
+    int lcbidx = runccmap[i];
+    //std::cout << "LCB " << lcbidx << " contains run " << i << std::endl;
+    runs[lcbidx].push_back(i);
+  }
+  std::map<TSize,TSize> seqIdMap;
+  //std::cout << "LCBs " << runs.size() << std::endl;
+  for(unsigned j=0;j<runs.size();j++){
+    int lcbidx = j;
+    int numruns = runs[j].size();
+    //std::cout << "LCB " << j << " runs " << numruns << std::endl;
+    for(int k=0;k<numruns;k++){
+      int runidx = runs[j][k];
+      LCBs[lcbidx].insert(LCBs[lcbidx].end(),vrunmap[runidx].begin(),vrunmap[runidx].end());
+      for(unsigned i=0;i<vrunmap[runidx].size();i++){
+	TVertexDescriptor currV = vrunmap[runidx][i];
+	//std::cout << "CurrV: " << currV << " runidx: " << runidx << " " << " seq:" << sequenceId(g,currV) << std::endl;
+	g.data_fragment[currV].data_seq_id = runidx;
+	//std::cout << "CurrV: " << currV << " runidx: " << runidx << " " << " seq:" << sequenceId(g,currV) << std::endl;
+	assert(sequenceId(g,currV)==(unsigned)runidx);
+      }
+      seqIdMap[runidx] = runidx;
+    }
+  }
+
+  //Set vertex orientation
+  std::map<TVertexDescriptor,char> vertexOrientMap;
+  typedef typename Iterator<TGraph, VertexIterator>::Type TVertexIterator;
+  TVertexIterator itV2(g);
+  for(;!atEnd(itV2);goNext(itV2)){
+    vertexOrientMap[*itV2] = '+';
+  }
+
+  //Trim graph and remove vertices that are not part of runs
+  typedef typename Iterator<TGraph, VertexIterator>::Type TVertexIterator;
+  TVertexIterator itv(g);
+  std::vector<TVertexDescriptor> removeV;
+  for(;!atEnd(itv);goNext(itv)) {
+    TVertexDescriptor currV = *itv;
+    if(runmap.find(currV)==runmap.end()){
+      removeV.push_back(currV);
+    }
+  }
+  for(typename std::vector<TVertexDescriptor>::iterator vit = removeV.begin();vit!=removeV.end();++vit){
+    removeVertex(g,*vit);
+  }
+
+  //At this point, runSeqSet should be populated
+  //graph G(0) should be updated to reference sequence ids in runSeqSet
+  //seqIdMap should be an identity map
+  
+  resize(runSeqSet,numVertices(runG));
+  TNames sequenceRunsNames;
+  //resize(sequenceRunsNames,numVertices(runG));
+  for(unsigned int i=0;i<numVertices(runG);i++){
+    std::string name(toCString(sequenceNames[seqIdxMap[i]]));
+    std::string count(boost::lexical_cast<std::string>(i));
+    appendValue(sequenceRunsNames,name+"_"+count);
+  }
+  //For tracking substrings 
+  //std::map<TSize,unsigned int> offsets;
+  //std::map<TSize,unsigned int> spanlens;
+  //std::map<TSize,unsigned int> seqlens;
+  //std::map<TSize,char> orients;
+  typedef unsigned TSize2;
+  std::map<TSize2,s_offset> offsets;
+  
+  //Copy links between set of vertices in LCB $lit
+  //from Graph $g and store in $currmatches,$currscores,$currseqs
+  
+  blocksbycomponent.resize(numComponents);
+  convertCC2Blocks(g,
+		   componentall,
+		   componentVertexMap,
+		   blocksbycomponent,
+		   dupintervals,
+		   sequenceNames);
+#ifdef SEQAN_PROFILE
+    std::cerr << "Determining duplicated regions done: " << SEQAN_PROTIMEUPDATE(__myProfileTime) << " seconds" << std::endl;
+    std::cerr << "Aligning duplicated regions" << std::endl;
+#endif
+  unsigned int lcbid=0;
+  std::fstream strmmaf;
+  std::string outfile(msaOpt.outfile);
+  strmmaf.open(std::string(outfile+".dups.maf").c_str(), std::ios_base::out | std::ios_base::trunc);
+  _streamWrite(strmmaf,"##maf version=1 scoring=mugsy");
+  //std::cout << "Iterating over LCBs" << std::endl;
+  for(typename std::vector<std::vector<TVertexDescriptor> >::iterator lit = LCBs.begin();lit!=LCBs.end();lit++){
+    //Matches, scores, seqs, ids for current LCB
+    typedef String<Fragment<> > TFragmentString;
+    typedef typename Value<TScore>::Type TScoreValue;
+    typedef String<TScoreValue> TScoreValues;
+    typedef String<TAlphabet> TSequence;
+    TFragmentString currmatches;
+    TScoreValues currscores;
+    StringSet<TSequence, Owner<> > currseqs;
+    std::set<unsigned int> curridset; 
+    TNames currnameSet;
+
+    std::set<TVertexDescriptor> coveredSet;
+    std::vector<std::vector<TVertexDescriptor> > vseqs;
+    retrieveLCBSegments(g,
+			runSeqSet,
+			seqSet,
+			seqIdxMap,
+			vertexOrientMap,
+			lit,
+			++lcbid,
+			sequenceRunsNames,
+			currseqs,
+			currmatches,
+			currscores,
+			currnameSet,
+			offsets,
+			coveredSet,
+			vseqs,
+			boost::lexical_cast<unsigned int>(msaOpt.minlength));
+
+    
+    
+    //std::cout << "Retrieving LCB segments for LCB " << lcbid << std::endl; 
+    if(length(currseqs)>1 && length(currmatches)>0){
+      TGraph currG(currseqs);
+      buildAlignmentGraph(currmatches, currscores, currG, FractionalScore());
+      //Double check edge weights
+      typedef typename Iterator<TGraph, EdgeIterator>::Type TEdgeIterator;
+      TEdgeIterator itE(currG);
+      //Undo Hack that stores reverse complement matches using
+      //negative edge weights
+      for(;!atEnd(itE);goNext(itE)){
+	if(cargo(value(itE))<0){
+	  cargo(value(itE)) = cargo(value(itE))*-1;
+	}
+      }
+      
+      typedef double TDistanceValue;
+      Graph<Tree<TDistanceValue> > currguideTree;
+      Graph<Tree<TDistanceValue> > seqguideTree;
+      std::map<std::string, Graph<Tree<TDistanceValue> > > seqguideTrees;
+
+      //Build guide tree using current list of seqs
+      for(unsigned int i=0;i<length(currseqs);i++){
+	//std::cout << i << " " << length(currseqs[i])  << " " << ((curridset.find(i)!=curridset.end()) ? 1 : 0) << " " << currnameSet[i]<< std::endl;
+	curridset.insert(i);//force inclustion of this seq in building the guide tree
+      }
+      getGuideTree(currseqs,curridset,seqguideTrees,currguideTree);
+
+      typedef Fragment<> TFragment;
+      typedef String<TAlphabet> TSequence;
+      
+      std::cout << "Aligning LCB " << lcbid << " with " << length(currseqs) << std::endl;
+      assert(curridset.size()>0);
+      TGraph currgOut(currseqs);
+      s_score sscores = alignSingleLCB(currG,
+				       currgOut,
+				       lcbid,
+				       currseqs,
+				       currguideTree,
+				       msaOpt);
+      //Write MAF format
+      std::vector<unsigned int> curroffsets;
+      std::vector<unsigned int> currspanlens;
+      std::vector<unsigned int> currseqlens;
+      std::vector<char> currorients;
+      assert(length(currseqs)==length(currnameSet));
+      currorients.resize(length(currnameSet));
+      currseqlens.resize(length(currnameSet));
+      currspanlens.resize(length(currnameSet));
+      curroffsets.resize(length(currnameSet));
+      //TODO, refactor using a id map
+      for(TSize currrow = 0; currrow<length(currnameSet); ++currrow) {
+	for(TSize row = 0; row<length(sequenceRunsNames); ++row) {
+	  if(currnameSet[currrow]==sequenceRunsNames[row]){
+	    curroffsets[currrow] = offsets[row].offset;
+	    currspanlens[currrow] = offsets[row].spanlen;
+	    currseqlens[currrow] = offsets[row].seqlen;
+	    currorients[currrow] = offsets[row].orient;
+	    //reset name
+	    currnameSet[currrow] = sequenceNames[seqIdxMap[row]];
+	  }
+	}
+      } 
+      saveInterval(dupintervals,
+		   currnameSet,
+		   curroffsets,
+		   currspanlens,
+		   currseqlens,
+		   currorients,
+		   lcbid,
+		   true);
+      //mafformat defined in refinement/graph_impl_align.h
+      write(strmmaf,currgOut,currnameSet,MafFormat(),curroffsets,currspanlens,currseqlens,currorients,"label=d"+boost::lexical_cast<std::string>(lcbid));
+      
+      strmmaf.flush();
+    }
+  }
+  strmmaf.close();
+  //Alignments of many duplicated regions tend to be fragmented on first pass
+  //Consider running refinement by default
+  //refineMSA(std::string(outfile+".dups.maf").c_str(),msaOpt);
+#ifdef DEBUGGING
+  //TODO
+  //Resolve repetitive clusters here
+  //(1)break edges with weak support from adjacent matches
+  //(2)determine mincut on repeatitive clusters
+  typedef typename Iterator<TGraph, EdgeIterator>::Type TEdgeIterator;
+
+  std::fstream dotstrm;
+  dotstrm.open("refinegraphpos.dot", std::ios_base::out | std::ios_base::trunc);
+  dotstrm << "graph g{" << std::endl;
+  typedef typename Iterator<TGraph, VertexIterator>::Type TVertexIterator;
+  TVertexIterator it(g);
+  for(;!atEnd(it);goNext(it)) {
+    dotstrm << *it << " [label=\"" << *it << " S" << sequenceId(g,*it) << ","<<fragmentBegin(g,*it) << ","<<fragmentLength(g,*it) << "\"];" << std::endl;
+  }
+#endif
+#ifdef SEQAN_PROFILE
+    std::cerr << "Aligning duplicated regions done: " << SEQAN_PROTIMEUPDATE(__myProfileTime) << " seconds" << std::endl;
+#endif
+}
+
+
+//////////////////////////////////////////////////////////////////////////////////
+
+inline void
+_addVersion(CommandLineParser& parser) {
+	::std::string rev = "$Revision: 4637 $";
+	addVersionLine(parser, "Version 1.00 (10 Oct 2009) Revision: " + rev.substr(11, 4) + "");
+}
+
+//////////////////////////////////////////////////////////////////////////////////
+
+template <typename TSeqSet, typename TNameSet>
+bool _loadSequences(TSeqSet& sequences, 
+		    TNameSet& fastaIDs, 
+		    TNameSet& genomes,
+		    const char *fileName)
+{
+  assert(length(genomes)==0);
+	MultiFasta multiFasta;
+	if (!open(multiFasta.concat, fileName, OPEN_RDONLY)) return false;
+	AutoSeqFormat format;
+	guessFormat(multiFasta.concat, format);	
+	split(multiFasta, format);
+	unsigned seqCount = length(multiFasta);
+	resize(sequences, seqCount, Exact());
+	resize(fastaIDs, seqCount, Exact());
+	resize(genomes, seqCount, Exact());
+	unsigned skippedseqCount = 0;
+	for(unsigned i = 0; i < seqCount; ++i) 
+	  {
+	    char seqname[100],genomename[100];
+	    std::string idline;
+	    assignSeqId(idline, multiFasta[i], format);
+	    int matches = sscanf(idline.c_str(),"%s %s",seqname,genomename);
+	    if(matches==2){
+	      fastaIDs[i]=seqname;
+	      genomes[i]=genomename;
+	    }
+	    else{
+		assignSeqId(fastaIDs[i], multiFasta[i], format);
+		assignSeqId(genomes[i], multiFasta[i],format);
+	    }
+	    assignSeq(sequences[i], multiFasta[i], format);
+	    //SVA check for bad inputs here < kmer size
+	    if(length(sequences[i])<3){
+	      skippedseqCount++;
+	    }
+	}
+	if(skippedseqCount>0){
+	  clear(sequences);
+	  clear(fastaIDs);
+	  clear(genomes);
+	  seqCount = length(multiFasta)-skippedseqCount;
+	  //std::cerr << "Updated seqCount " << seqCount << ". Skipping" << skippedseqCount << std::endl;
+	  resize(sequences, seqCount, Exact());
+	  resize(fastaIDs, seqCount, Exact());
+	  resize(genomes, seqCount, Exact());
+	  unsigned sidx=0;
+	  String<char> testseq;
+	  unsigned oseqCount = length(multiFasta);
+	  for(unsigned i = 0; i < oseqCount; ++i) 
+	  {
+	    assignSeq(testseq, multiFasta[i], format);
+	    //SVA check for bad inputs here < kmer size
+	    if(length(testseq)>=3){
+	      char seqname[100],genomename[100];
+	      std::string idline;
+	      assignSeqId(idline, multiFasta[i], format);
+	      int matches = sscanf(idline.c_str(),"%s %s",seqname,genomename);
+	      if(matches==2){
+		fastaIDs[sidx]=seqname;
+		genomes[sidx]=genomename;
+	      }
+	      else{
+		assignSeqId(fastaIDs[sidx], multiFasta[i], format);
+		assignSeqId(genomes[sidx], multiFasta[i],format);
+	      }
+	      assignSeq(sequences[sidx], multiFasta[i], format);
+	      sidx++;
+	    }
+	    else{
+	      std::cerr << "Skipping sequence of length " << length(testseq) << std::endl;
+	    }
+	  }
+	  assert(sidx==seqCount);
+	}
+	return (seqCount > 0);
+}
+
+template<typename TAlphabet, typename TScore>
+inline void
+customizedMsaAlignment(MsaOptions<TAlphabet, TScore> const& msaOpt) {
+	typedef String<TAlphabet> TSequence;
+	StringSet<TSequence, Owner<> > sequenceSet;
+	StringSet<String<char> > sequenceNames;
+	StringSet<String<char> > genomeNames;
+	_loadSequences(sequenceSet, sequenceNames, genomeNames, msaOpt.seqfile.c_str());
+	assert(length(sequenceNames)==length(sequenceSet));
+#ifdef DEBUGGING
+	for(unsigned int j = 0; j<length(sequenceNames); ++j) {
+	  std::cout << j << " " << sequenceNames[j] << std::endl;
+	  assert(value(sequenceNames,j)==sequenceNames[j]);
+	}
+#endif
+	// Alignment of the sequences
+	Graph<Alignment<StringSet<TSequence, Dependent<> >, void, WithoutEdgeId> > gAlign;
+	typedef unsigned int TSize;
+	TSize gidx=0;
+	std::map<String<char>,TSize> genomeIdx;
+	String<TSize> genomeIndices;
+	//Convert Names to indicies
+	for(TSize i=0;i<length(genomeNames);++i){
+	  TSize cidx;
+	  if(genomeIdx.find(genomeNames[i])==genomeIdx.end()){
+	    genomeIdx[genomeNames[i]]=gidx;
+	    cidx=gidx;
+	    gidx++;
+	  }
+	  else{
+	    cidx=genomeIdx[genomeNames[i]];
+	  }
+	  appendValue(genomeIndices,cidx);
+	}
+	
+	// Calc MSA
+	 //Aligned intervals, used to determine remaining segments unaligned
+	typedef iloc TLoc;
+	std::map<String<char>,std::vector<TLoc> > aintervals;
+	if(msaOpt.duplications == "true"){
+	  findDuplications(gAlign, sequenceSet, sequenceNames, genomeIndices, aintervals, msaOpt);
+	}
+
+	//if(msaOpt.partition >= 2){
+	  //Testing new code
+	  //Prototype only
+	  //multipassprog_wholeGenomeAlignment(gAlign, sequenceSet, sequenceNames, genomeIndices, msaOpt);
+	  //std::cerr << "Bad partition parameter " << msaOpt.partition << std::endl;
+	  //exit(1);
+	//}
+	//else{
+	singlepass_wholeGenomeAlignment(gAlign, sequenceSet, sequenceNames, genomeIndices, aintervals, msaOpt);
+	//}
+		
+	// Alignment output
+	if (msaOpt.outputFormat == 0) {
+		FILE* strmWrite = fopen(msaOpt.outfile.c_str(), "w");
+		write(strmWrite, gAlign, sequenceNames, FastaFormat());
+		fclose(strmWrite);
+	} else if (msaOpt.outputFormat == 1) {
+		FILE* strmWrite = fopen(msaOpt.outfile.c_str(), "w");
+		write(strmWrite, gAlign, sequenceNames, MsfFormat());
+		fclose(strmWrite);
+	}
+
+}
+
+//////////////////////////////////////////////////////////////////////////////////
+
+template<typename TAlphabet, typename TScore, typename TSc>
+inline void
+_setMatchScore(MsaOptions<TAlphabet, TScore>&, TSc) {
+	// No operation
+}
+
+//////////////////////////////////////////////////////////////////////////////////
+
+template<typename TAlphabet, typename TScore, typename TSc>
+inline void
+_setMismatchScore(MsaOptions<TAlphabet, TScore>&, TSc) {
+	// No operation
+}
+
+//////////////////////////////////////////////////////////////////////////////////
+
+template<typename TAlphabet, typename TSc>
+inline void
+_setMatchScore(MsaOptions<TAlphabet, Score<int, Simple> >& msaOpt, TSc msc) {
+	msaOpt.sc.data_match = msc;
+}
+
+//////////////////////////////////////////////////////////////////////////////////
+
+template<typename TAlphabet, typename TSc>
+inline void
+_setMismatchScore(MsaOptions<TAlphabet, Score<int, Simple> >& msaOpt, TSc mmsc) {
+	msaOpt.sc.data_mismatch = mmsc;
+}
+
+
+//////////////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////////////
+template<typename TConfigOptions, typename TScore>
+inline void
+evaluateAlignment(TConfigOptions const& cfgOpt, TScore const& scType, Dna5) {
+  std::fstream strmmaf;
+  //FILE * strmmafrefined;
+
+  struct mafFile *mf;
+  mf = mafOpen(cfgOpt.infile.c_str(), 0);
+  struct mafAli *a, *A, *last_a;
+  struct mafComp *c;
+  A = last_a = NULL;
+  while ((a = mafNext(mf)) != NULL) {
+    if ((c = a->components) == NULL)
+      assert(false);//fatal("empty maf entry");
+    if (last_a == NULL)
+      A = a;
+    else
+      last_a->next = a;
+    last_a = a;
+  }
+  if(A==NULL){
+#ifdef DEBUGGING
+    std::cout << "can't find any alignments" << std::endl;
+#endif
+  }
+  else{
+    int lcbid=0;
+    char chrName[200], species_name[200];
+    int COL_WIDTH=60;
+    long unsigned int totalscore=0;
+    long unsigned int totallen=0;
+    for (a = A; a != NULL; a = a->next) {
+      int ncol = a->textSize;
+      std::ostringstream tmpgraph;
+      tmpgraph << "MUGTMP" << getpid() << "_" << ++lcbid;
+      std::fstream strmfsa;
+      std::string fname(tmpgraph.str());
+      fname = "/tmp/"+fname + ".eval.fsa";
+      strmfsa.open(fname.c_str(), std::ios_base::out | std::ios_base::trunc);
+      for(c=a->components; c!=NULL; c=c->next) {
+	parseSrcName(c->src, species_name, chrName);
+	//Write FASTA
+	strmfsa << ">" << c->src << std::endl ;
+	int col=0;
+	int j=0;
+	for (col = j = 0; j < ncol; ++j) {
+	  strmfsa << c->text[j];
+	  ++col;
+	  if (col == COL_WIDTH) {
+	    strmfsa << std::endl;
+	    col = 0;
+	  }
+	}
+	if (col != 0){
+	  strmfsa << std::endl;
+	}
+      }
+      strmfsa.close();
+        
+      typedef typename Value<TScore>::Type TScoreValue;
+      typedef String<Dna5> TSequence;
+      typedef typename Size<TSequence>::Type TSize;
+      typedef String<char> TName;
+      StringSet<TSequence, Owner<> > origStrSet;
+      StringSet<TName> names;
+      
+      // Read the sequences
+      std::fstream strm;
+      strm.open(fname.c_str(), std::ios_base::in | std::ios_base::binary);
+      read(strm,origStrSet,names,FastaAlign());	
+      strm.close();
+      
+      // Make a dependent StringSet
+      typedef StringSet<TSequence, Dependent<> > TDepSequenceSet;
+      TDepSequenceSet strSet(origStrSet);
+      
+      // Read the alignment
+      typedef String<Fragment<> > TFragmentString;
+      String<TScoreValue> scores;
+      TFragmentString matches;
+      std::fstream strm_lib;
+      strm_lib.open(fname.c_str(), std::ios_base::in | std::ios_base::binary);
+      read(strm_lib,matches, scores, names, FastaAlign());	
+      strm_lib.close();
+      unlink(fname.c_str());
+      // Build the alignment graph
+      typedef Graph<Alignment<TDepSequenceSet, TSize> > TGraph;
+      TGraph g(strSet);
+      buildAlignmentGraph(matches, g, FrequencyCounting() );
+      
+      // Print the scoring information
+      TScoreValue gop = scType.data_gap_open;
+      TScoreValue gex = scType.data_gap_extend;
+      std::cout << "Scoring parameters:" << std::endl;
+      std::cout << "*Gap opening: " << gop << std::endl;
+      std::cout << "*Gap extension: " << gex << std::endl;
+      std::cout << "*Scoring matrix: " << std::endl;
+      TSize alphSize = ValueSize<Dna5>::VALUE;
+      std::cout << "   ";
+      for(TSize col = 0; col<alphSize; ++col) std::cout << Dna5(col) << ',';
+      std::cout << std::endl;
+      for(TSize row = 0; row<alphSize; ++row) {
+	for(TSize col = 0; col<alphSize; ++col) {
+	  if (col == 0) std::cout << Dna5(row) << ": ";
+	  std::cout << score(scType, Dna5(row), Dna5(col));
+	  if (col < alphSize - 1) std::cout << ',';
+	}
+	std::cout << std::endl;
+      }
+      std::cout << std::endl;
+      
+      // Print the alignment information
+      TSize numGapEx = 0;
+      TSize numGap = 0;
+      TSize numPairs = 0;
+      TSize alignLen = 0;
+      String<TSize> pairCount;
+      TScoreValue alignScore = alignmentEvaluation(g, scType, numGapEx, numGap, numPairs, pairCount, alignLen);
+      totalscore+=alignScore;
+      totallen+=alignLen;
+      std::cout << "Alignment Score: " << alignScore << std::endl;
+      std::cout << "Alignment Length: " << alignLen << std::endl;
+      std::cout << "#Match-Mismatch pairs: " << numPairs << std::endl;
+      std::cout << "Score contribution by match-mismatch pairs: " << (alignScore - (((TScoreValue) numGap * gop) + ((TScoreValue) numGapEx * gex))) << std::endl;
+      std::cout << "#Gap extensions: " << numGapEx << std::endl;
+      std::cout << "Score contribution by gap extensions: " << ((TScoreValue) numGapEx * gex) << std::endl;
+      std::cout << "#Gap openings: " << numGap << std::endl;
+      std::cout << "Score contribution by gap openings: " << ((TScoreValue) numGap * gop) << std::endl;
+      std::cout << std::endl;
+      std::cout << "#Pairs: " << std::endl;
+      std::cout << "   ";
+      for(TSize col = 0; col<alphSize; ++col) std::cout << Dna5(col) << ',';
+      std::cout << std::endl;
+      for(TSize row = 0; row<alphSize; ++row) {
+	for(TSize col = 0; col<alphSize; ++col) {
+	  if (col == 0) std::cout << Dna5(row) << ": ";
+	  std::cout << value(pairCount, row * alphSize + col);
+	  if (col < alphSize - 1) std::cout << ',';
+	}
+	std::cout << std::endl;
+	
+      }
+      /*
+      struct mafAli *nexta;
+      for (a = A; a != NULL; a = nexta) {
+	nexta=a->next;
+	mafAliFree(&a);
+      }
+      */
+    }
+    mafFileFree(&mf);
+    std::cout << "Total alignment score: " << totalscore << std::endl;
+    std::cout << "Total alignment length: " << totallen << std::endl;
+  }
+}
+
+template<typename TAlphabet, typename TScore>
+inline void
+_initMsaParams(CommandLineParser& parser, TScore& scMat) {
+	
+	// Msa configuration
+	MsaOptions<TAlphabet, TScore> msaOpt;
+	
+	// Set main options
+	getOptionValueLong(parser, "seq", msaOpt.seqfile);
+	getOptionValueLong(parser, "outfile", msaOpt.outfile);
+	// MUGSY specific options
+	getOptionValueLong(parser, "distance", msaOpt.distance);
+	getOptionValueLong(parser, "minlength", msaOpt.minlength);
+	getOptionValueLong(parser, "refine", msaOpt.refine);
+	getOptionValueLong(parser, "duplications", msaOpt.duplications);
+	getOptionValueLong(parser, "unique", msaOpt.unique);
+	getOptionValueLong(parser, "allownestedlcbs", msaOpt.allownestedlcbs);
+	getOptionValueLong(parser, "anchorwin", msaOpt.anchorwin);
+	getOptionValueLong(parser, "blockfile", msaOpt.blockfile);
+	getOptionValueLong(parser, "segmentation", msaOpt.segmentation);
+	if(msaOpt.segmentation != "none" && msaOpt.segmentation != "enredo" && msaOpt.segmentation != "mercator"){
+	  msaOpt.segmentation = "mugsy";
+	}
+	String<char> optionVal;
+	getOptionValueLong(parser, "format", optionVal);
+	if (optionVal == "maf") msaOpt.outputFormat = 0;
+	else if (optionVal == "msf") msaOpt.outputFormat = 1;
+
+	unsigned int beg = 0;
+	::std::string tmpVal;
+
+	if (beg != tmpVal.length())
+		appendValue(msaOpt.libfiles, tmpVal.substr(beg, tmpVal.length() - beg));	
+	getOptionValueLong(parser, "aln", tmpVal);
+	beg = 0;
+	for(unsigned int i = 0; i<tmpVal.length(); ++i) {
+		if (tmpVal[i] == ',') {
+			appendValue(msaOpt.alnfiles, tmpVal.substr(beg, i - beg));
+			beg = i + 1;
+		}
+	}
+	if (beg != tmpVal.length())
+		appendValue(msaOpt.alnfiles, tmpVal.substr(beg, tmpVal.length() - beg));
+
+	// Set scoring options
+	msaOpt.sc = scMat;
+	getOptionValueLong(parser, "gop", msaOpt.sc.data_gap_open);
+	getOptionValueLong(parser, "gex", msaOpt.sc.data_gap_extend);
+	int msc = 0;
+	getOptionValueLong(parser, "msc", msc);
+	_setMatchScore(msaOpt, msc);
+	int mmsc = 0;
+	getOptionValueLong(parser, "mmsc", mmsc);
+	_setMismatchScore(msaOpt, mmsc);
+
+	// Set guide tree options
+
+	if (optionVal == "nj") msaOpt.build = 0;
+	else if (optionVal == "min") msaOpt.build = 1;
+	else if (optionVal == "max") msaOpt.build = 2;
+	else if (optionVal == "avg") msaOpt.build = 3;
+	else if (optionVal == "wavg") msaOpt.build = 4;
+
+	// Set alignment evaluation	options
+	getOptionValueLong(parser, "infile", msaOpt.infile);
+
+	// Check if any segment-match generation procedure is selected, otherwise set the default
+	if ((empty(msaOpt.alnfiles)) && (empty(msaOpt.method))) {
+		appendValue(msaOpt.method, 0);
+		appendValue(msaOpt.method, 1);
+	}
+
+	// Evaluation mode?
+	if (isSetLong(parser, "infile")) {
+	  if(length(msaOpt.refine) > 0 && msaOpt.refine != "colinear"){ //Refinement mode
+	    refineMSA(msaOpt.infile.c_str(),msaOpt);
+	  }
+	  else {
+	    //typedef typename Value<TScore>::Type TScoreValue;
+	    //TScore scType(boost::lexical_cast<int>(value(msaOpt, "msc")),
+	    //	  boost::lexical_cast<int>(value(msaOpt, "mmsc")),-1 * boost::lexical_cast<int>(value(msaOpt, "gex")),-1 * boost::lexical_cast<int>(value(msaOpt, "gop")));
+	    evaluateAlignment(msaOpt, msaOpt.sc, Dna5() );
+	    //evaluateAlignment(msaOpt);
+	  }
+	} else { // or alignment mode?
+	  if (!isSetLong(parser, "seq")) { 
+	    shortHelp(parser, std::cerr);	// print short help and exit
+	    exit(0);
+	  }
+	  customizedMsaAlignment(msaOpt);
+	}
+}
+
+
+
+inline void
+_initScoreMatrix(CommandLineParser& parser, Dna5 const) {
+	String<char> matrix;
+	getOptionValueLong(parser, "matrix", matrix);
+	if (isSetLong(parser, "matrix")) {
+		Score<int, ScoreMatrix<> > sc;
+		loadScoreMatrix(sc, matrix);
+		_initMsaParams<Dna5>(parser, sc);
+	} else {
+		Score<int> sc;
+		_initMsaParams<Dna5>(parser, sc);
+	}
+}
+
+
+
+int main(int argc, const char *argv[]){
+#ifdef TIMING 
+  time(&now);
+  lasttime=now;
+#endif
+  //////////////////////////////////////////////////////////////////////////////
+  // Command line parsing
+  //////////////////////////////////////////////////////////////////////////////
+  std::string versionstring = std::string("1.3");  
+  // Set the keys
+  CommandLineParser parser;
+  _addVersion(parser);
+  
+  addTitleLine(parser, "*************************************************");
+  addTitleLine(parser, "* mugsyWGA                                      *");
+  addTitleLine(parser, "* v"+versionstring+"                                          *");
+  addTitleLine(parser, "* Multiple whole genome aligner                 *");
+  addTitleLine(parser, "* using graph based LCB identification          *");
+  addTitleLine(parser, "* and Seqan::TCoffee                            *");
+  addTitleLine(parser, "*************************************************");
+
+  addUsageLine(parser, "-seq <multi-FASTA sequence file> -aln <Aligned pairwise FASTA library> [-distance <LCB chaining distance>] [-minlength <LCB minimum length>] [Other options]");
+
+  //Many config options lifted from seqan::tcoffee
+  addSection(parser, "Main Options:");
+  addOption(parser, addArgumentText(CommandLineOption("s", "seq", "multi-FASTA file with all input sequences. For draft genomes, FASTA headers should be in the form >seqname genomename.", OptionType::String), "<FASTA Sequence File>"));
+  addOption(parser, addArgumentText(CommandLineOption("al", "aln", "Library of pairwise alignments. Aligned multi-FASTA format (XMFA)", OptionType::String), "<File1>,<File2>,..."));
+  addOption(parser, addArgumentText(CommandLineOption("o", "outfile", "output filename prefix", (int)OptionType::String, "outfile"), "<Filename>"));
+  addOption(parser, addArgumentText(CommandLineOption("distance", "distance", "LCB chaining distance", (int)OptionType::String,"1000"), "<String>"));
+  addOption(parser, addArgumentText(CommandLineOption("minlength", "minlength", "Minimum LCB segment span", (int)OptionType::String,"100"), "<Int>"));
+  addOption(parser, addArgumentText(CommandLineOption("unique", "unique", "Report unique regions", OptionType::String,"true"), "[true|false]"));
+  addOption(parser, addArgumentText(CommandLineOption("duplications", "duplications", "Report duplications. Requires a second alignment file of pairwise duplications is passed to --aln. ", OptionType::String,"false"), "[true|false]"));
+  
+  addSection(parser, "Other Options:");
+
+  addOption(parser, addArgumentText(CommandLineOption("f", "format", "output format", (int)OptionType::String, "maf"), "[maf | msf]"));
+  addOption(parser, addArgumentText(CommandLineOption("anchorwin", "anchorwin", "bp window to consider for collapsing anchors", (int)OptionType::Int,0), "<Int>"));
+
+  //synchain-mugsy can return overlapping and nested synteny blocks with the extent determined by --distance
+  //allownestedlcbs=false ensures each multi-genome anchor contributes to exactly one LCB; the longest LCB spanning the anchor
+  //The LCBs are sorted by length in descending order. Each anchor is
+  //removed from the anchor graph as soon as it is aligned in an LCB.
+  addOption(parser, addArgumentText(CommandLineOption("allownestedlcbs", "allownestedlcbs", "allow anchors to contribute to multiple LCBs. Default=false", OptionType::String,"false"), "[true|false]"));
+
+  addOption(parser, addArgumentText(CommandLineOption("refine", "refine", "refinement method: mugsy,fsa,pecan,mlagan", OptionType::String), "<String>"));
+  //addOption(parser, addArgumentText(CommandLineOption("poscorewindow", "psw", "posscorewindow", (int)OptionType::Int,1000), "<Int>"));
+  //addOption(parser, addArgumentText(CommandLineOption("possharedcutoff", "pscut", "possharedcutoff", (int)OptionType::Double,(double)0.1), "<Int>"));
+
+  addOption(parser, addArgumentText(CommandLineOption("segmentation", "segmentation", "Segmentation method. mugsy,enredo,mercator", OptionType::String), "<String>"));
+  addOption(parser, addArgumentText(CommandLineOption("blockfile", "blockfile", "Bypass segmentation and use this output file from synchain-mugsy", OptionType::String), "<String>"));
+
+  addSection(parser, "Scoring Options:");
+  addOption(parser, addArgumentText(CommandLineOption("g", "gop", "gap open penalty", (int)OptionType::Int, -13), "<Int>"));
+  addOption(parser, addArgumentText(CommandLineOption("e", "gex", "gap extension penalty", (int)OptionType::Int, -1), "<Int>"));
+  addOption(parser, addArgumentText(CommandLineOption("ma", "matrix", "score matrix", (int)OptionType::String, "Blosum62"), "<Matrix file>. Ignored."));
+  addOption(parser, addArgumentText(CommandLineOption("ms", "msc", "match score", (int)OptionType::Int, 5), "<Int>"));
+  addOption(parser, addArgumentText(CommandLineOption("mm", "mmsc", "mismatch penalty", (int)OptionType::Int, -4), "<Int>"));
+
+  addSection(parser, "Guide Tree Options:");
+  //addOption(parser, addArgumentText(CommandLineOption("u", "usetree", "tree filename", OptionType::String), "<Newick guide tree>"));
+  addOption(parser, addArgumentText(CommandLineOption("b", "build", "tree building method for progressive aln", (int)OptionType::String, "nj"), "[nj, min, max, avg, wavg]"));
+  addHelpLine(parser, "nj = Neighbor-joining");
+  addHelpLine(parser, "min = UPGMA single linkage");
+  addHelpLine(parser, "max = UPGMA complete linkage");
+  addHelpLine(parser, "avg = UPGMA average linkage");
+  addHelpLine(parser, "wavg = UPGMA weighted average linkage");
+  addHelpLine(parser, "Neighbor-joining creates an");
+  addHelpLine(parser, "  unrooted tree. We root that tree");
+  addHelpLine(parser, "  at the last joined pair.");
+  // Alignment evaluation	
+  addSection(parser, "Alignment Evaluation Options:");
+  addOption(parser, addArgumentText(CommandLineOption("i", "infile", "alignment file", OptionType::String), "<FASTA alignment file>"));
+  
+  if (argc == 1)
+    {
+      shortHelp(parser, std::cerr);	// print short help and exit
+      return 0;
+    }
+
+  bool exitrun=false;
+  if (!parse(parser, argc, argv, ::std::cerr)) exitrun=true;
+  if (isSetLong(parser, "help") || isSetLong(parser, "version")) exitrun=false;	// print help or version and exit
+    
+
+  char * mugsyinstallstr = std::getenv("MUGSY_INSTALL");
+  if(mugsyinstallstr==NULL || strlen(mugsyinstallstr)==0){
+    std::cerr << "ERROR: Environment variable MUGSY_INSTALL must be set to the installation directory for mugsy" << std::endl;
+    exit(1);
+  }
+  assert(mugsyinstallstr != NULL);
+  std::string mugsyinstall = std::string(mugsyinstallstr);
+  assert(mugsyinstall.length()>0);
+#ifdef DEBUGGING
+  std::cerr << "Using MUGSY_INSTALL=" << mugsyinstall << std::endl;
+#endif
+  //Check for chaining executable
+  struct stat st;
+  if(stat(std::string(mugsyinstall+"/synchain-mugsy").c_str(),&st) == 0){
+    //present
+  }
+  else{
+    std::cerr << "ERROR: MUGSY_INSTALL/synchain-mugsy not found. check installation at MUGSY_INSTALL=" << mugsyinstall << std::endl;
+    exitrun=true;
+  }
+  
+  if(exitrun){
+    return 1;
+  }
+  // Basic command line options
+  String<char> alphabet = "dna";
+  // Initialize scoring matrices
+  _initScoreMatrix(parser, Dna5());
+  return 0;
+}
+
diff --git a/mugsy-seqan/projects/library/apps/mugsy/rna_alphabet.h b/mugsy-seqan/projects/library/apps/mugsy/rna_alphabet.h
new file mode 100644
index 0000000..0f42883
--- /dev/null
+++ b/mugsy-seqan/projects/library/apps/mugsy/rna_alphabet.h
@@ -0,0 +1,305 @@
+/*==========================================================================
+               SeqAn - The Library for Sequence Analysis
+                         http://www.seqan.de 
+============================================================================
+Copyright (C) 2007
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 3 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+==========================================================================*/
+
+#ifndef SEQAN_HEADER_RNA_ALPHABET_H
+#define SEQAN_HEADER_RNA_ALPHABET_H
+
+namespace SEQAN_NAMESPACE_MAIN
+{
+
+//////////////////////////////////////////////////////////////////////////////
+// RNA5 Alphabet
+//////////////////////////////////////////////////////////////////////////////
+
+template <typename T = void>
+struct _Translate_Table_Rna5_2_Ascii
+{
+	static char const VALUE[5];
+};
+template <typename T>
+char const _Translate_Table_Rna5_2_Ascii<T>::VALUE[5] = {'A', 'C', 'G', 'U', 'N'}; 
+
+//////////////////////////////////////////////////////////////////////////////
+
+template <typename T = void>
+struct _Translate_Table_Rna_2_Ascii
+{
+	static char const VALUE[4];
+};
+template <typename T>
+char const _Translate_Table_Rna_2_Ascii<T>::VALUE[4] = {'A', 'C', 'G', 'U'}; 
+
+
+//////////////////////////////////////////////////////////////////////////////
+
+template <typename T = void>
+struct _Translate_Table_Byte_2_Rna5
+{
+	static char const VALUE[256];
+};
+template <typename T>
+char const _Translate_Table_Byte_2_Rna5<T>::VALUE[256] = 
+{
+	0,   1,   2,   3,   4,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, //0
+	0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, //1
+	0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, //2
+	0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, //3
+	0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, //4
+	0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, //5
+	0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, //6
+	0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, //7
+	0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, //8
+	0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, //9
+	0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, //10
+	0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, //11
+	0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, //12
+	0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, //13
+	0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, //14
+	0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0  //15
+};
+
+//////////////////////////////////////////////////////////////////////////////
+
+template <typename T = void>
+struct _Translate_Table_Byte_2_Rna
+{
+	static char const VALUE[256];
+};
+template <typename T>
+char const _Translate_Table_Byte_2_Rna<T>::VALUE[256] = 
+{
+	0,   1,   2,   3,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, //0
+	0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, //1
+	0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, //2
+	0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, //3
+	0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, //4
+	0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, //5
+	0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, //6
+	0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, //7
+	0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, //8
+	0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, //9
+	0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, //10
+	0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, //11
+	0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, //12
+	0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, //13
+	0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, //14
+	0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0  //15
+};
+
+
+//////////////////////////////////////////////////////////////////////////////
+
+
+template <typename T = void>
+struct _Translate_Table_Ascii_2_Rna5
+{
+	static char const VALUE[256];
+};
+template <typename T>
+char const _Translate_Table_Ascii_2_Rna5<T>::VALUE[256] = 
+{
+	4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4, //0
+	4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4, //1
+	4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4, //2
+	4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4, //3
+
+	4,   0,   4,   1,   4,   4,   4,   2,   4,   4,   4,   4,   4,   4,   4,   4, //4
+//	 ,   A,   B,   C,   D,   E,   D,   G,   H,   I,   J,   K,   L,   M,   N,   O,
+
+	4,   4,   4,   4,   4,   3,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4, //5
+//	P,   Q,   R,   S,   T,   U,   V,   W,   X,   Y,   Z,    ,    ,    ,    ,    
+
+	4,   0,   4,   1,   4,   4,   4,   2,   4,   4,   4,   4,   4,   4,   4,   4, //6
+//   ,   a,   b,   c,   d,   e,   f,   g,   h,   i,   j,   k,   l,   m,   n,   o,
+
+	4,   4,   4,   4,   4,   3,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4, //7
+//  p,   q,   r,   s,   t,   u,   v,   w,   x,   y,   z,    ,    ,    ,    ,   
+
+	4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4, //8
+	4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4, //9
+	4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4, //10
+	4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4, //11
+	4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4, //12
+	4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4, //13
+	4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4, //14
+	4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4  //15
+};
+
+//////////////////////////////////////////////////////////////////////////////
+
+
+template <typename T = void>
+struct _Translate_Table_Ascii_2_Rna
+{
+	static char const VALUE[256];
+};
+template <typename T>
+char const _Translate_Table_Ascii_2_Rna<T>::VALUE[256] = 
+{
+	0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, //0
+	0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, //1
+	0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, //2
+	0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, //3
+
+	0,   0,   0,   1,   0,   0,   0,   2,   0,   0,   0,   0,   0,   0,   0,   0, //4
+//	 ,   A,   B,   C,   D,   E,   D,   G,   H,   I,   J,   K,   L,   M,   N,   O,
+
+	0,   0,   0,   0,   0,   3,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, //5
+//	P,   Q,   R,   S,   T,   U,   V,   W,   X,   Y,   Z,    ,    ,    ,    ,    
+
+	0,   0,   0,   1,   0,   0,   0,   2,   0,   0,   0,   0,   0,   0,   0,   0, //6
+//   ,   a,   b,   c,   d,   e,   f,   g,   h,   i,   j,   k,   l,   m,   n,   o,
+
+	0,   0,   0,   0,   0,   3,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, //7
+//  p,   q,   r,   s,   t,   u,   v,   w,   x,   y,   z,    ,    ,    ,    ,   
+
+	0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, //8
+	0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, //9
+	0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, //10
+	0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, //11
+	0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, //12
+	0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, //13
+	0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, //14
+	0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0  //15
+};
+
+//////////////////////////////////////////////////////////////////////////////
+
+struct _Rna {};
+typedef SimpleType<unsigned char,_Rna> Rna;
+
+template <> struct ValueSize< Rna > { enum { VALUE = 4 }; };
+template <> struct BitsPerValue< Rna > { enum { VALUE = 2 }; };
+
+//////////////////////////////////////////////////////////////////////////////
+
+struct _Rna5 {};
+typedef SimpleType<unsigned char, _Rna5> Rna5;
+
+template <> struct ValueSize< Rna5 > { enum { VALUE = 5 }; };
+template <> struct BitsPerValue< Rna5 > { enum { VALUE = 3 }; };
+
+//////////////////////////////////////////////////////////////////////////////
+//Rna assignment
+//////////////////////////////////////////////////////////////////////////////
+
+inline void 
+assign(Ascii& target,
+	   Rna const & source)
+{
+	SEQAN_CHECKPOINT
+	target = _Translate_Table_Rna_2_Ascii<>::VALUE[source.value];
+}
+
+//////////////////////////////////////////////////////////////////////////////
+
+template <>
+struct CompareType<Rna, Byte> { typedef Rna Type; };
+inline void assign(Rna & target, Byte c_source)
+{
+	SEQAN_CHECKPOINT
+	target.value = _Translate_Table_Byte_2_Rna<>::VALUE[c_source];
+}
+
+//////////////////////////////////////////////////////////////////////////////
+
+template <>
+struct CompareType<Rna, Ascii> { typedef Rna Type; };
+inline void assign(Rna & target, Ascii c_source)
+{
+	SEQAN_CHECKPOINT
+	target.value = _Translate_Table_Ascii_2_Rna<>::VALUE[(unsigned char)c_source];
+}
+
+//////////////////////////////////////////////////////////////////////////////
+
+
+template <>
+struct CompareType<Rna, Unicode> { typedef Rna Type; };
+inline void assign(Rna & target, Unicode c_source)
+{
+	SEQAN_CHECKPOINT
+	target.value = _Translate_Table_Ascii_2_Rna<>::VALUE[(unsigned char) c_source];
+}
+
+//____________________________________________________________________________
+
+template <>
+struct CompareType<Rna, Rna5> { typedef Rna Type; };
+inline void assign(Rna & target, Rna5 const & c_source)
+{
+SEQAN_CHECKPOINT
+	target.value = c_source.value & 0x03;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//Rna5 assignment
+//////////////////////////////////////////////////////////////////////////////
+
+inline void 
+assign(Ascii& target,
+	   Rna5 const & source)
+{
+	SEQAN_CHECKPOINT
+	target = _Translate_Table_Rna5_2_Ascii<>::VALUE[source.value];
+}
+
+//////////////////////////////////////////////////////////////////////////////
+
+template <>
+struct CompareType<Rna5, Byte> { typedef Rna5 Type; };
+inline void assign(Rna5 & target, Byte c_source)
+{
+	SEQAN_CHECKPOINT
+	target.value = _Translate_Table_Byte_2_Rna5<>::VALUE[c_source];
+}
+
+//////////////////////////////////////////////////////////////////////////////
+
+template <>
+struct CompareType<Rna5, Ascii> { typedef Rna5 Type; };
+inline void assign(Rna5 & target, Ascii c_source)
+{
+	SEQAN_CHECKPOINT
+	target.value = _Translate_Table_Ascii_2_Rna5<>::VALUE[(unsigned char)c_source];
+}
+
+//////////////////////////////////////////////////////////////////////////////
+
+
+template <>
+struct CompareType<Rna5, Unicode> { typedef Rna5 Type; };
+inline void assign(Rna5 & target, Unicode c_source)
+{
+	SEQAN_CHECKPOINT
+	target.value = _Translate_Table_Ascii_2_Rna5<>::VALUE[(unsigned char) c_source];
+}
+
+//////////////////////////////////////////////////////////////////////////////
+
+template <>
+struct CompareType<Rna5, Rna> { typedef Dna Type; };
+inline void assign(Rna5 & target, Rna const & c_source)
+{
+SEQAN_CHECKPOINT
+	target.value = c_source.value;
+}
+
+
+}// namespace SEQAN_NAMESPACE_MAIN
+
+#endif //#ifndef SEQAN_HEADER_...
diff --git a/mugsy-seqan/projects/library/apps/mugsy/transformcoords.h b/mugsy-seqan/projects/library/apps/mugsy/transformcoords.h
new file mode 100644
index 0000000..fc42939
--- /dev/null
+++ b/mugsy-seqan/projects/library/apps/mugsy/transformcoords.h
@@ -0,0 +1,36 @@
+struct mafAli
+/* A multiple alignment. */
+{
+  struct mafAli *next;
+  double score;
+  struct mafComp *components;	/* List of components of alignment */
+  int textSize;	 /* Size of text in each component. */
+  int chain_len;
+  int label;
+  char orient; /* Relative orientation of the reference */
+  
+};
+struct mafComp
+/* A component of a multiple alignment. */
+    {
+      struct mafComp *next;
+      char *name;        /* comman name of sequence source. */
+      char *src;	 /* Name of sequence source.  */
+      char *text;        /* The sequence including dashes. */
+      char* contig;
+      int* mafPosMap;
+      int srcSize;       /* Size of sequence source.  */
+      int start;	 /* Start within sequence. Zero based. If strand is - is relative to src end. */
+      int size;	         /* Size in sequence (does not include dashes).  */
+      short nameID;
+      char strand;       /* Strand of sequence.  Either + or -*/
+      char paralog;
+};
+
+extern "C" void parseSrcName(char* srcName, char* name, char* src);
+extern "C" struct mafFile *mafOpen(const char *fileName, int verbose);
+extern "C" struct mafAli *mafNext(struct mafFile *mafFile);
+extern "C" void mafWrite(FILE *f, struct mafAli *maf);
+extern "C" void mafWriteStart(FILE *f, char *scoring);
+extern "C" void mafFileFree(struct mafFile **pObj);
+extern "C" void mafAliFree(struct mafAli **pObj);
diff --git a/mugsyWGA b/mugsyWGA
new file mode 120000
index 0000000..6b72076
--- /dev/null
+++ b/mugsyWGA
@@ -0,0 +1 @@
+mugsy-seqan/projects/library/apps/mugsy/gcc/mugsy
\ No newline at end of file
diff --git a/mugsyenv.sh b/mugsyenv.sh
new file mode 100644
index 0000000..4a4dcb4
--- /dev/null
+++ b/mugsyenv.sh
@@ -0,0 +1,8 @@
+#!/bin/sh
+
+export MUGSY_INSTALL=/usr/local/projects/angiuoli/mugsy_trunk
+export PATH=$MUGSY_INSTALL:$MUGSY_INSTALL/mapping:$PATH
+export PERL5LIB=$MUGSY_INSTALL/perllibs
+#For testing TBA
+#export PATH=$PATH:$MUGSY_INSTALL/../../multiz-tba/trunk/
+
diff --git a/mumi.sh b/mumi.sh
new file mode 100755
index 0000000..0a43ee5
--- /dev/null
+++ b/mumi.sh
@@ -0,0 +1,140 @@
+#!/bin/bash 
+#
+# Compute the MUMi similarity value between two given complete genome sequences. If a genome sequence is
+# contained within a directory, all chromosomes of the genome sequences are merged before the genomes are
+# compared with each other.
+#
+# INPUT
+#   seq1  GenBank file or directory containing GenBank files for the same genome
+#   seq2  GenBank file or directory containing GenBank files for the same genome
+#   -p    optional prefix used for creation of temporary file names (default: "MUMI")
+#   -t    optional directory for storage of temporary files (default: "/tmp")
+#
+# syntax: mumi [-p prefix] [-t tmp_dir] seq1 seq2
+
+# process command line options
+prefix="MUMi"
+tmp_dir="/tmp"
+while getopts 'p:t:' option
+do
+  case ${option} in
+    p) prefix=${OPTARG};;
+    t) tmp_dir=`echo "${OPTARG}" | sed -e 's/\/*$//'`;;
+    ?) echo "Usage: mumi [-p prefix] [-t tmp_dir] seq1 seq2" >&2
+       exit 1;;
+  esac
+done
+let "numoptions = ${OPTIND}-1"
+shift ${numoptions}
+
+# process command line arguments
+if [ $# -ne 2 ]
+then
+  echo "$0: invalid number of arguments" >&2
+  echo "Usage: mumi [-p prefix] [-t tmp_dir] seq1 seq2" >&2
+  exit 1
+fi
+if [ ! -f $1 -a ! -d $1 ]
+then
+  echo "$0: illigal argument" >&2
+  echo "Usage: mumi [-p prefix] [-t tmp_dir] seq1 seq2" >&2
+  exit 1
+fi
+if [ ! -f $2 -a ! -d $2 ]
+then
+  echo "$0: illigal argument" >&2
+  echo "Usage: mumi [-p prefix] [-t tmp_dir] seq1 seq2" >&2
+  exit 1
+fi
+seq1_file=`echo "$1" | sed -e 's/\/*$//'`                         # remove final slash from directory name
+seq2_file=`echo "$2" | sed -e 's/\/*$//'`
+seq1_name=`echo "${seq1_file}" | awk -F'/' '{print $NF}'`         # extract name as last part of file or directory name
+seq2_name=`echo "${seq2_file}" | awk -F'/' '{print $NF}'`
+seq1_fasta="${tmp_dir}/${prefix}_${seq1_name}.fasta"              # construct temporary FASTA file names
+seq2_fasta="${tmp_dir}/${prefix}_${seq2_name}.fasta"
+mumfile="${tmp_dir}/${prefix}_${seq1_name}_${seq2_name}.mummer"   # construct temporary file name to output results
+
+# convert GenBank files to (concatenated) files in FASTA format
+# echo "converting GenBank files to (concatenated) FASTA files ..."
+echo ">${seq1_name}" > ${seq1_fasta}
+if [ -d ${seq1_file} ]
+then
+  for seqfile in `grep -H '^DEFINITION' ${seq1_file}/*.gbk | grep -v 'plasmid' | sort | cut -d':' -f1`
+  do
+    seqret -sequence ${seqfile} -sformat gb -osf fasta -stdout -auto 2> /dev/null | tail -n +2 >> ${seq1_fasta}
+    while [ $? -ne 0 ]
+    do
+      seqret -sequence ${seqfile} -sformat gb -osf fasta -stdout -auto 2> /dev/null | tail -n +2 >> ${seq1_fasta}
+    done
+  done
+else
+  seqret -sequence ${seq1_file} -sformat gb -osf fasta -stdout -auto 2> /dev/null | tail -n +2 >> ${seq1_fasta}
+  while [ $? -ne 0 ]
+  do
+    seqret -sequence ${seq1_file} -sformat gb -osf fasta -stdout -auto 2> /dev/null | tail -n +2 >> ${seq1_fasta}
+  done
+fi
+
+echo ">${seq2_name}" > ${seq2_fasta}
+if [ -d ${seq2_file} ]
+then
+  for seqfile in `grep -H '^DEFINITION' ${seq2_file}/*.gbk | grep -v 'plasmid' | sort | cut -d':' -f1`
+  do
+    seqret -sequence ${seqfile} -sformat gb -osf fasta -stdout -auto 2> /dev/null | tail -n +2 >> ${seq2_fasta}
+    while [ $? -ne 0 ]
+    do
+      seqret -sequence ${seqfile} -sformat gb -osf fasta -stdout -auto 2> /dev/null | tail -n +2 >> ${seq2_fasta}
+    done
+  done
+else
+  seqret -sequence ${seq2_file} -sformat gb -osf fasta -stdout -auto 2> /dev/null | tail -n +2 >> ${seq2_fasta}
+  while [ $? -ne 0 ]
+  do
+    seqret -sequence ${seq2_file} -sformat gb -osf fasta -stdout -auto 2> /dev/null | tail -n +2 >> ${seq2_fasta}
+  done
+fi
+
+# process sequences by mummer
+mummer -mum -b -c -l 19 ${seq1_fasta} ${seq2_fasta} > ${mumfile} 2> /dev/null
+
+# get sequence length
+seq1_len=`tail -n +2 ${seq1_fasta} | tr -d '\n\r ' | wc -c`
+seq2_len=`tail -n +2 ${seq2_fasta} | tr -d '\n\r ' | wc -c`
+
+# process mummer output
+# echo "processing mummer output ..."
+awk -v seq1_len=${seq1_len} -v seq2_len=${seq2_len} -v seq1_name=${seq1_name} -v seq2_name=${seq2_name} '
+# forward or reverse hit for second sequence
+/^>/ { if ($0 ~ /Reverse/) reverse=1; next }
+
+# mark positions covered by MUMs
+{
+  len+=$3
+  for(i=$1;i<$1+$3;++i) seq1[i-1]=1
+  if (reverse==1)
+    for(i=$2-$3+1;i<=$2;++i) seq2[i-1]=1
+  else
+    for(i=$2;i<$2+$3;++i) seq2[i-1]=1
+}
+
+# determine MUM-index
+END {
+  # compute MUM-coverages of both genomes
+  for(i=0;i<seq1_len;++i) seq1_cov+=seq1[i]
+  for(i=0;i<seq2_len;++i) seq2_cov+=seq2[i]
+
+  # compute different versions of MUMi similarity value
+  sim1=seq1_cov/seq1_len
+  sim2=seq2_cov/seq2_len
+  sim3=(seq1_cov + seq2_cov)/(seq1_len + seq2_len)
+  sim4=0.5*(sim1 + sim2)
+
+  # output results
+  printf("%s\t%s\t%d\t%d\t%d\t%d\t%10.8f\t%10.8f\t%10.8f\t%10.8f\n",seq1_name,seq2_name,seq1_len,seq2_len,seq1_cov,seq2_cov,sim1,sim2,sim3,sim4)
+}
+' ${mumfile}
+
+# remove temporary files
+rm -f ${seq1_fasta}
+rm -f ${seq2_fasta}
+rm -f ${mumfile}
diff --git a/mumi_fasta.sh b/mumi_fasta.sh
new file mode 100755
index 0000000..f99036b
--- /dev/null
+++ b/mumi_fasta.sh
@@ -0,0 +1,95 @@
+#!/bin/bash
+#
+# Compute the MUMi similarity value between two given complete genome sequences. 
+#
+# INPUT
+#   seq1  FASTA file for seq1. 
+#   seq2  FASTA file for seq2. 
+#   -p    optional prefix used for creation of temporary file names (default: "MUMI")
+#   -t    optional directory for storage of temporary files (default: "/tmp")
+#
+# syntax: mumi [-p prefix] [-t tmp_dir] seq1 seq2
+
+# process command line options
+prefix="MUMi"
+tmp_dir="/tmp"
+while getopts 'p:t:' option
+do
+  case ${option} in
+    p) prefix=${OPTARG};;
+    t) tmp_dir=`echo "${OPTARG}" | sed -e 's/\/*$//'`;;
+    ?) echo "Usage: mumi [-p prefix] [-t tmp_dir] seq1 seq2" >&2
+       exit 1;;
+  esac
+done
+let "numoptions = ${OPTIND}-1"
+shift ${numoptions}
+
+# process command line arguments
+if [ $# -ne 2 ]
+then
+  echo "$0: invalid number of arguments" >&2
+  echo "Usage: mumi [-p prefix] [-t tmp_dir] seq1 seq2" >&2
+  exit 1
+fi
+if [ ! -f $1 -a ! -d $1 ]
+then
+  echo "$0: illigal argument" >&2
+  echo "Usage: mumi [-p prefix] [-t tmp_dir] seq1 seq2" >&2
+  exit 1
+fi
+if [ ! -f $2 -a ! -d $2 ]
+then
+  echo "$0: illigal argument" >&2
+  echo "Usage: mumi [-p prefix] [-t tmp_dir] seq1 seq2" >&2
+  exit 1
+fi
+
+seq1_fasta=`echo "$1" | sed -e 's/\/*$//'`                         # remove final slash from directory name
+seq2_fasta=`echo "$2" | sed -e 's/\/*$//'`
+seq1_name=`echo "${seq1_fasta}" | awk -F'/' '{print $NF}'`         # extract name as last part of file or directory name
+seq2_name=`echo "${seq2_fasta}" | awk -F'/' '{print $NF}'`
+mumfile="${tmp_dir}/${prefix}_${seq1_name}_${seq2_name}.mummer"   # construct temporary file name to output results
+
+# process sequences by mummer
+mummer -mum -b -c -l 19 ${seq1_fasta} ${seq2_fasta} > ${mumfile} 2> /dev/null
+
+# get sequence length
+seq1_len=`tail -n +2 ${seq1_fasta} | tr -d '\n\r ' | wc -c`
+seq2_len=`tail -n +2 ${seq2_fasta} | tr -d '\n\r ' | wc -c`
+
+# process mummer output
+# echo "processing mummer output ..."
+awk -v seq1_len=${seq1_len} -v seq2_len=${seq2_len} -v seq1_name=${seq1_name} -v seq2_name=${seq2_name} '
+# forward or reverse hit for second sequence
+/^>/ { if ($0 ~ /Reverse/) reverse=1; next }
+
+# mark positions covered by MUMs
+{
+  len+=$3
+  for(i=$1;i<$1+$3;++i) seq1[i-1]=1
+  if (reverse==1)
+    for(i=$2-$3+1;i<=$2;++i) seq2[i-1]=1
+  else
+    for(i=$2;i<$2+$3;++i) seq2[i-1]=1
+}
+
+# determine MUM-index
+END {
+  # compute MUM-coverages of both genomes
+  for(i=0;i<seq1_len;++i) seq1_cov+=seq1[i]
+  for(i=0;i<seq2_len;++i) seq2_cov+=seq2[i]
+
+  # compute different versions of MUMi similarity value
+  sim1=seq1_cov/seq1_len
+  sim2=seq2_cov/seq2_len
+  sim3=(seq1_cov + seq2_cov)/(seq1_len + seq2_len)
+  sim4=0.5*(sim1 + sim2)
+
+  # output results
+  printf("%s\t%s\t%d\t%d\t%d\t%d\t%10.8f\t%10.8f\t%10.8f\t%10.8f\n",seq1_name,seq2_name,seq1_len,seq2_len,seq1_cov,seq2_cov,sim1,sim2,sim3,sim4)
+}
+' ${mumfile}
+
+# remove temporary files
+rm -f ${mumfile}
diff --git a/plot.pl b/plot.pl
new file mode 100755
index 0000000..bd5d5d0
--- /dev/null
+++ b/plot.pl
@@ -0,0 +1,403 @@
+#!/usr/bin/perl
+
+#./plot.pl outputprefix reforganismname ps,xll
+#Eg. cat /tmp/testfilter.maf | ./plot.pl /tmp/testfilter genome2 mugsy.out > out.gp
+#cat /tmp/plasmidfilter.maf | ./plot.pl /tmp/plasmidfilter AF401292 mugsy.out > out.gp
+#gnuplot out.gp
+#
+#Requires delta files output by mugsy in outputprefix
+#
+
+use strict;
+
+my $terminal = ($ARGV[4] =~ /ps/) ? 'postscript' : 'X11';
+my($refname) = ($ARGV[1] =~ /^([^:.]+)/);
+my $delta = "$ARGV[0].$refname.filt.delta";
+print STDERR "Parsing $delta\n";
+
+die "Can't find delta file" if(!-e $delta);
+
+#
+#Need to add -R -Q support for specifying the order of draft sequences
+
+my $mummerplotcmd = "/usr/local/projects/angiuoli/developer/sangiuoli/mummer/trunk/MUMmer3.20/mummerplot -p $ARGV[0].$refname \"$delta\"";
+`$mummerplotcmd 1> /dev/null 2> /dev/null`;
+
+my $idlenlookup={};
+
+open FILE, "$ARGV[0].$refname.gp" or die "Can't open file $ARGV[0].$refname.gp";
+
+my $savelen;
+my @xseqs;
+my @yseqs;
+
+while (my $line=<FILE>){
+    if($line =~ /^set ytics/){
+	$savelen=2;
+    }
+    if($line =~ /^set xtics/){
+	$savelen=1;
+    }
+    if($line =~ /^set [xy]label "([^\"]+)"/){
+	if($1 eq "QRY" || $1 eq "REF"){
+
+	}
+	else{
+	    my $id = $1;
+	    $id =~ /([^\:\.]+)[\:\.]([^\:]+)/;
+	    if($1 eq $2){
+		$id = $1;
+	    }
+	    else{
+		$id = "$1.$2";
+	    }
+
+	    if($line =~ /xlabel/){
+		push @xseqs,[$id,0];
+	    }
+	    elsif($line =~ /ylabel/){
+		push @yseqs,[$id,0];
+	    }
+	}
+    }
+    if($savelen){
+	my($id,$len) = ($line =~ /\"\*?([^\"]+)\"\s+(\d+)\,/);
+	$id =~ /([^\:\.]+)[\:\.]([^\:]+)/;
+	if($1 eq $2){
+	    $id = $1;
+	}
+	else{
+	    $id = "$1.$2";
+	}
+	if(defined $len && $id ne ""){
+	    if($savelen==1){
+		push @xseqs,[$id,$len];
+	    }
+	    elsif($savelen==2){
+		push @yseqs,[$id,$len];
+	    }
+	}
+    }
+}
+
+my @seqs = (@xseqs, at yseqs);
+
+for(my $i=0;$i<@seqs;$i++){
+    my($id,$len) = ($seqs[$i]->[0],$seqs[$i]->[1]);
+    $idlenlookup->{$id} = $len;
+}
+
+close FILE;
+
+open FILE1,"+>$ARGV[0].$refname.maf.fplot" or die "Can't open plot $ARGV[0].$refname.maf.fplot";
+open FILE2,"+>$ARGV[0].$refname.maf.rplot" or die "Can't open plot $ARGV[0].$refname.maf.rplot";
+
+print FILE1 "0 0 0\n";
+print FILE1 "0 0 0\n";
+print FILE1 "\n\n";
+
+print FILE2 "0 0 0\n";
+print FILE2 "0 0 0\n";
+print FILE2 "\n\n";
+
+my @accs = `grep ">" $delta`;
+&maf2gp(\*FILE1,\*FILE2,$ARGV[1]);
+
+my $synfile = "$ARGV[0].$refname.syn.plot";
+my $reportgraphs = {};
+my @graphs;
+my $varreportgraphs = {};
+my @vargraphs;
+#Synteny blocks
+if($ARGV[2]){
+    open FILE,$ARGV[2] or die "Can't open output file $ARGV[2]";
+    my $currgraph;
+    my $currchain;
+    my $name;
+    while(my $line=<FILE>){
+	chomp $line;
+	if($line !~ /^[\s\#]/){
+	    my @elts = split(/\s+/,$line);
+	    if($name ne $elts[0]){
+		$name = "$elts[0]";
+	    }
+	    my $seq = $elts[1];
+	    my $start = $elts[3];
+	    my $end = $elts[4];
+	    $reportgraphs->{$name}->{$name}->{'seqs'}->{$seq}->{'start'} = $start;
+	    $reportgraphs->{$name}->{$name}->{'seqs'}->{$seq}->{'end'} = $end;
+	}
+    }
+
+    close FILE;
+    @graphs = keys %$reportgraphs;
+}
+
+#Variants
+if(defined $ARGV[3] && -e $ARGV[3]){
+    open FILE,$ARGV[3] or die "Can't open output file $ARGV[3]";
+    my $currgraph;
+    my $currchain;
+    my $name;
+    while(my $line=<FILE>){
+	chomp $line;
+	if($line !~ /^[\s\#]/){
+	    my @elts = split(/\s+/,$line);
+	    if($name ne $elts[0]){
+		$name = "$elts[0]";
+	    }
+	    my $seq = $elts[1];
+	    my $start = $elts[3];
+	    my $end = $elts[4];
+	    $varreportgraphs->{$name}->{$name}->{'seqs'}->{$seq}->{'start'} = $start;
+	    $varreportgraphs->{$name}->{$name}->{'seqs'}->{$seq}->{'end'} = $end;
+	}
+    }
+
+    close FILE;
+    @vargraphs = keys %$varreportgraphs;
+}
+
+my $first=1;
+my @outlabels;
+open FILE, "+>$ARGV[0].$refname.syn.plot";
+foreach my $graphfile (@graphs){
+    chomp $graphfile;
+    foreach my $chainname (keys %{$reportgraphs->{$graphfile}}){
+	my @labels = keys %{$reportgraphs->{$graphfile}->{'seqs'}};
+	foreach my $x (@xseqs){
+	    my $xacc = $x->[0];
+	    $xacc =~ s/[\.|]/_/g;
+	    if(exists $reportgraphs->{$graphfile}->{$chainname}->{'seqs'}->{$xacc}){
+		foreach my $y (@yseqs){
+		    my $yacc = $y->[0];
+		    $yacc =~ s/[\.|]/_/g;
+		    if(exists $reportgraphs->{$graphfile}->{$chainname}->{'seqs'}->{$yacc}){
+			die "Can't find length for $x->[0]" if(! exists $idlenlookup->{$x->[0]});
+			die "Can't find length for $y->[0]" if(! exists $idlenlookup->{$y->[0]});
+			my $min0 = $reportgraphs->{$graphfile}->{$chainname}->{'seqs'}->{$xacc}->{'start'} += $idlenlookup->{$x->[0]};
+			my $min1 = $reportgraphs->{$graphfile}->{$chainname}->{'seqs'}->{$yacc}->{'start'} += $idlenlookup->{$y->[0]};
+			my $max0 = $reportgraphs->{$graphfile}->{$chainname}->{'seqs'}->{$xacc}->{'end'} += $idlenlookup->{$x->[0]};
+			my $max1 = $reportgraphs->{$graphfile}->{$chainname}->{'seqs'}->{$yacc}->{'end'} += $idlenlookup->{$y->[0]};
+			printf FILE ("%d %d %d #$graphfile\n",$min0,$min1,100);
+			printf FILE ("%d %d %d\n",$min0,$max1,100);
+			printf FILE ("\n");
+			printf FILE ("%d %d %d\n",$min0,$max1,100);
+			printf FILE ("%d %d %d\n",$max0,$max1,100);
+			printf FILE ("\n");
+			printf FILE ("%d %d %d\n",$max0,$max1,100);
+			printf FILE ("%d %d %d\n",$max0,$min1,100);
+			printf FILE ("\n");
+			printf FILE ("%d %d %d\n",$max0,$min1,100);
+			printf FILE ("%d %d %d\n",$min0,$min1,100);
+			printf FILE ("\n\n");
+#			push @outlabels,"set label \"$chainname\" at $min0,",$min1+(($max1-$min1)/2),"\n";
+#			push @outlabels,"set label \"$chainname\" at ",$min0+(($max0-$min0)/2),",",$min1+(($max1-$min1)/2),"\n";
+			push @outlabels,"set label \"$chainname\" at ",$min0+(($max0-$min0)/2),",",$min1,"\n";
+#			push @outlabels,"set label \"$chainname\" at $min0,",$max1,"\n";
+		    }
+		}
+	    }
+	}
+    }
+}
+close FILE;
+
+open FILE, "+>$ARGV[0].$refname.var.plot";
+foreach my $vargraphfile (@vargraphs){
+    chomp $vargraphfile;
+    foreach my $chainname (keys %{$varreportgraphs->{$vargraphfile}}){
+	my @labels = keys %{$varreportgraphs->{$vargraphfile}->{'seqs'}};
+	foreach my $x (@xseqs){
+	    my $xacc = $x->[0];
+	    $xacc =~ s/[\.|]/_/g;
+	    if(exists $varreportgraphs->{$vargraphfile}->{$chainname}->{'seqs'}->{$xacc}){
+		foreach my $y (@yseqs){
+		    my $yacc = $y->[0];
+		    $yacc =~ s/[\.|]/_/g;
+		    if(exists $varreportgraphs->{$vargraphfile}->{$chainname}->{'seqs'}->{$yacc}){
+			die "Can't find length for $x->[0]" if(! exists $idlenlookup->{$x->[0]});
+			die "Can't find length for $y->[0]" if(! exists $idlenlookup->{$y->[0]});
+			my $min0 = $varreportgraphs->{$vargraphfile}->{$chainname}->{'seqs'}->{$xacc}->{'start'} += $idlenlookup->{$x->[0]};
+			my $min1 = $varreportgraphs->{$vargraphfile}->{$chainname}->{'seqs'}->{$yacc}->{'start'} += $idlenlookup->{$y->[0]};
+			my $max0 = $varreportgraphs->{$vargraphfile}->{$chainname}->{'seqs'}->{$xacc}->{'end'} += $idlenlookup->{$x->[0]};
+			my $max1 = $varreportgraphs->{$vargraphfile}->{$chainname}->{'seqs'}->{$yacc}->{'end'} += $idlenlookup->{$y->[0]};
+			printf FILE ("%d %d %d #$vargraphfile\n",$min0,$min1,100);
+			printf FILE ("%d %d %d\n",$min0,$max1,100);
+			printf FILE ("\n");
+			printf FILE ("%d %d %d\n",$min0,$max1,100);
+			printf FILE ("%d %d %d\n",$max0,$max1,100);
+			printf FILE ("\n");
+			printf FILE ("%d %d %d\n",$max0,$max1,100);
+			printf FILE ("%d %d %d\n",$max0,$min1,100);
+			printf FILE ("\n");
+			printf FILE ("%d %d %d\n",$max0,$min1,100);
+			printf FILE ("%d %d %d\n",$min0,$min1,100);
+			printf FILE ("\n\n");
+#			push @outlabels,"set label \"$chainname\" at $min0,",$min1+(($max1-$min1)/2),"\n";
+#			push @outlabels,"set label \"$chainname\" at ",$min0+(($max0-$min0)/2),",",$min1+(($max1-$min1)/2),"\n";
+			push @outlabels,"set label \"$chainname\" at ",$min0+(($max0-$min0)/2),",",$min1,"\n";
+#			push @outlabels,"set label \"$chainname\" at $min0,",$max1,"\n";
+		    }
+		}
+	    }
+	}
+    }
+}
+close FILE;
+
+open FILE, "$ARGV[0].$refname.gp" or die "Can't open file $ARGV[0].$refname.gp";
+
+my $inplot=0;
+while (my $line=<FILE>){
+    if($line =~ /^plot/){
+	print join('', at outlabels);
+
+	$inplot++;
+    }
+    elsif($inplot>0){
+	if($line =~ /ls\s+2\s+$/){
+	    chomp $line;
+	    $line .= ", \\\n";
+	}
+	$inplot++;
+    }
+    print $line;
+    if($inplot==3){
+	print " \"$ARGV[0].$refname.maf.fplot\" title \"MAFFWD\" w lp ls 3, \\\n";
+	print " \"$ARGV[0].$refname.maf.rplot\" title \"MAFREV\" w lp ls 4, \\\n";
+	print " \"$ARGV[0].$refname.syn.plot\" title \"SYNBLOCKS\" w lp ls 5";	
+	if(defined $ARGV[3]){
+	    print ", \\\n";
+	    print " \"$ARGV[0].$refname.var.plot\" title \"VARBLOCKS\" w lp ls 6 \n";
+	}
+	else{
+	    print "\n";
+	}
+
+	$inplot=0;
+    }
+
+
+}
+
+
+sub maf2gp{
+    my($fh1,$fh2,$refacc)=@_;
+    my $refline;
+    my $x = [];	
+    my $lcbnum=0;
+    $refacc =~ /([^\:\.]+)[\:\.]([^\:]+)/;
+    if($1 && $2 && $1 ne $2){
+	$refacc = "$1.$2";
+    }
+    else{
+	$refacc = $1;
+    }
+    print STDERR "Using accession: $refacc\n";
+    while(my $line=<STDIN>){
+	if($line =~ /^a\s+/){
+	    if(scalar(@$x)>0){
+		&printblock($fh1,$fh2,$x,$refacc,$lcbnum);
+		$lcbnum++;
+		$x = [];
+	    }
+	}
+	else{
+	    if($line =~ /^(s.+)\s+\S+/){
+		push @$x,$1;
+	    }
+	}
+    }
+    &printblock($fh1,$fh2,$x,$refacc,$lcbnum);
+}
+
+sub printblock{
+    my($fh1, $fh2, $scores,$ref,$lcbnum) = @_;
+    my($refa,$refb,$refe,$refo,$reflen);
+    my $hasref=0;
+    my $refacc;
+    foreach my $line (@$scores){
+	my($qry) = ($line =~ /s\s+(\S+)/);
+	$qry =~ /([^\:\.]+)[\:\.]([^\:]+)/;
+	if($1 && $2 && $1 ne $2){
+	    $qry = "$1.$2";
+	}
+	else{
+	    $qry = $1;
+	}
+	if($qry =~ /^$ref/){
+	    $refacc = $qry;
+	    my $refoffset = $idlenlookup->{$refacc};
+	    ($refa,$refb,$refe,$refo,$reflen) = ($line =~ /s\s+(\S+)\s+(\d+)\s+(\d+)\s+([\+\-])\s+(\d+)/);
+	    $refe = $refb + $refe;
+	    $refe += $refoffset;
+	    $refb += $refoffset;
+	    $hasref=1;
+	}
+    }
+    if($hasref==1){
+	foreach my $line (@$scores){
+	    my($qry) = ($line =~ /s\s+(\S+)/);
+	    $qry =~ /([^\:\.]+)[\:\.]([^\:]+)/;
+	    if($1 && $2 && $1 ne $2){
+		$qry = "$1.$2";
+	    }
+	    else{
+		$qry = $1;
+	    }
+	    if($qry ne $refacc){
+		my $qryoffset = $idlenlookup->{$qry};
+		#print STDERR "$qry $qryoffset\n";
+		if(defined $qryoffset){
+		    my($qrya,$qryb,$qrye,$qryo,$qrylen) = ($line =~ /s\s+(\S+)\s+(\d+)\s+(\d+)\s+([\+\-])\s+(\d+)/);
+		    $qrye = $qryb + $qrye;
+		    $qryb = $qryb;
+		    if($refo eq '+' && $qryo eq '+'){
+			#print STDERR "$refa\t$refb\t$refe\t$refo\t$qrya\t$qryb\t$qrye\t$qryo\n";
+			$qrye += $qryoffset;
+			$qryb += $qryoffset;
+			print $fh1 "$refb $qryb 100\n";
+			print $fh1 "$refe $qrye 100\n\n\n";
+			push @outlabels,"set label \"$lcbnum\" at ",$refe+100,",",$qrye,"\n";
+		    }
+		    elsif($refo eq '+' && $qryo eq '-'){
+			$qrye = ($qrylen - $qrye);
+			$qryb = ($qrylen - $qryb);
+			#print STDERR "$refa\t$refb\t$refe\t$refo\t$qry\t$qryb\t$qrye\t$qryo\n";
+			$qrye += $qryoffset;
+			$qryb += $qryoffset;
+			print $fh2 "$refe $qrye 100\n";
+			print $fh2 "$refb $qryb 100\n\n\n";
+			push @outlabels,"set label \"$lcbnum\" at ",$refb+100,",",$qryb,"\n";
+			
+		    }
+		    elsif($refo eq '-' && $qryo eq '+'){
+			my $refec = $reflen - $refe;
+			my $refbc = $reflen - $refb;
+			#print STDERR "$refa\t$refbc\t$refec\t$refo\t$qry\t$qryb\t$qrye\t$qryo\n";
+			$qrye += $qryoffset;
+			$qryb += $qryoffset;
+			print $fh2 "$refec $qrye 100\n";
+			print $fh2 "$refbc $qryb 100\n\n\n";
+			push @outlabels,"set label \"$lcbnum\" at ",$refbc+100,",",$qryb,"\n";
+		    }
+		    elsif($refo eq '-' && $qryo eq '-'){
+			my $refec = $reflen - $refe;
+			my $refbc = $reflen - $refb;
+			#print STDERR "$refa\t$refbc\t$refec\t$refo\t$qry\t$qryb\t$qrye\t$qryo\n";
+			$qrye = $qryoffset + ($qrylen - $qrye);
+			$qryb = $qryoffset + ($qrylen - $qryb);
+			print $fh1 "$refec $qrye 100\n";
+			print $fh1 "$refbc $qryb 100\n\n\n";
+			push @outlabels,"set label \"$lcbnum\" at ",$refbc+100,",",$qryb,"\n";
+		    }
+		    else{
+			die;
+		    }
+		    #print STDERR "\n";
+		}
+	    }
+	}
+    }
+}
diff --git a/splitmaf.pl b/splitmaf.pl
new file mode 100755
index 0000000..25a8524
--- /dev/null
+++ b/splitmaf.pl
@@ -0,0 +1,48 @@
+#!/usr/bin/perl
+
+#Accepts pairwise maf only
+#./splitmaf.pl outputprefix < input.maf
+
+
+
+my $qfiles = {};
+
+my @seqs;
+my @buffer;
+my $currscoreline;
+
+my $header = "##maf version=1 scoring=maf_project_simple\n";
+
+while(my $line=<STDIN>){
+    if($line =~ /^a/){
+	die "Only pairwise seqs accepted" if(scalar(@seqs)>2);
+	if(scalar(@seqs)>0){
+	    &writemaf(\@seqs,\@buffer);
+	}
+	$currscoreline=$line;
+	@seqs = ();
+	@buffer = ();
+    }
+    elsif($line =~ /^s\s+([^.\s]+)/){
+	push @seqs,$1;
+    }
+    push @buffer,$line;
+}
+if(scalar(@seqs)>0){
+    &writemaf(\@seqs,\@buffer);
+}
+
+
+sub writemaf{
+    my($seqs,$buffer) = @_;
+    die "Invalid seqs ids $seqs->[0] $seqs->[1]" if(!defined $seqs->[0] || !defined $seqs->[1]);
+    my $fh;
+    if(! exists $qfiles->{$seqs->[0]}->{$seqs->[1]}){
+	open $fh, "+>$ARGV[0]$seqs->[0].$seqs->[1].maf" or die "Can't open file $ARGV[0]$seqs->[0].$seqs->[1].maf: $!";
+	print $fh $header;
+	$qfiles->{$seqs->[0]}->{$seqs->[1]} = $fh;
+	print "$ARGV[0]$seqs->[0].$seqs->[1].maf\n";
+    }
+    $fh = $qfiles->{$seqs->[0]}->{$seqs->[1]};
+    print $fh @$buffer;
+}
diff --git a/synchain-mugsy b/synchain-mugsy
new file mode 120000
index 0000000..8a89de6
--- /dev/null
+++ b/synchain-mugsy
@@ -0,0 +1 @@
+chaining/synchain-mugsy
\ No newline at end of file
diff --git a/util/mafgrep.pl b/util/mafgrep.pl
new file mode 100755
index 0000000..398274b
--- /dev/null
+++ b/util/mafgrep.pl
@@ -0,0 +1,55 @@
+#!/usr/bin/perl
+#Returns list of blocks that contain all sequences in the set seqid1...seqidn
+#./mafgrep.pl seqid1 seqid2 ... seqidn < out.maf
+
+use strict;
+
+my $format='maf';#or tab
+
+my %grepids = map { $_, 1 } @ARGV;
+print STDERR "Looking for ",scalar(keys %grepids),"\n";
+my $currscore;
+my $currorient;
+my $blockorient;
+my @allblocks;
+my $block = [];
+while(my $line=<STDIN>){
+    if($line =~ /^a\s+score=(\S+)/){
+	$currscore=$1;
+	push @allblocks,$block;
+	$block=[];
+    }
+    elsif($line =~ /^s/){
+	my @elts = split(/\s+/,$line);
+	#0-score,1-blockorient,2-accession,3-start,4-end
+	push @$block,[$currscore,$currorient,$elts[1],$elts[2],$elts[2]+$elts[3],$elts[3],$elts[4],$line];
+    }
+}
+print STDERR "Parsed ",scalar(@allblocks)," blocks\n";
+print "##maf version=12\n";
+push @allblocks,$block;
+foreach my $blocks (@allblocks){
+    #Lookup of all seqs in the block
+    my %seqs = map {$_->[2], 1} @$blocks;
+    #
+    my %results = map { $_, $grepids{$_} } grep { not exists $seqs{$_} } keys %grepids;
+    #print STDERR "Seqs ",join(',',sort keys %seqs)," ",scalar(@$block),"\n";
+    #print STDERR "Results ",join(',',sort keys %results),"\n";
+    #print STDERR "Grep ",join(',',sort keys %grepids),"\n";	#join(' ',keys %seqs)," | ",join(' ',keys %grepids),"\n";
+    if(scalar(keys %results)==0){
+	if($format eq 'maf'){
+	    print "a score=$blocks->[0]->[0]\n";
+	}
+	foreach my $bl (@$blocks){
+	    if($format eq 'maf'){
+		if(exists $grepids{$bl->[2]}){
+		    print "$bl->[7]";
+		}
+	    }
+	    else{
+		print "$bl->[2]\t$bl->[3]\t$bl->[4]\t$bl->[5]\t$bl->[6]\n";
+	    }
+	}
+	print "\n";
+    }
+}
diff --git a/util/mafstats.pl b/util/mafstats.pl
new file mode 100755
index 0000000..89cd308
--- /dev/null
+++ b/util/mafstats.pl
@@ -0,0 +1,600 @@
+#!/usr/bin/perl
+
+#Reports coverage
+
+#Unique DNA should be sum of blocks blocks with one seq and runs aligned to all gaps
+
+use strict;
+
+
+
+my $found=0;
+my $currscore;
+my $currorient;
+my $blockorient;
+my @allblocks;
+my $block = [];
+my $isdup=0;
+my $multiplealnblkcount=0;
+while(my $line=<STDIN>){
+    if($line =~ /^a/){
+	($currscore) =~ ($line =~ /score=(\S+)/);
+	my($label) = ($line =~ /label=(\S+)/);
+	my($isdup) = ($line =~ /dup=/) ? 1 : 0;
+	push @allblocks,$block if(scalar(@$block)>0);
+	$multiplealnblkcount++ if(scalar(@$block)>1);
+	$block=[];
+    }
+    elsif($line =~ /^s/){
+	#my @elts = split(/\s+/,$line);
+	#0-score,1-blockorient,2-accession,3-start,4-end
+	chomp $line;
+	push @$block,[$currscore,$line,$isdup];
+    }
+}
+push @allblocks,$block if(scalar(@$block)>0);
+
+#Number of lcbs with N genomes
+my $lcbseqcount = [];
+#Frequency of alignment columns with N identical rows
+my $numIdentCols = [];
+#Freq of columns with no gaps
+my $numUngappedCols = [];
+#Freq columns with one seq and all gaps
+my $numGappedCols = [];
+
+#Number of bps in blocks containing N genomes
+my $lcbbpcount = [];
+my $lcbbpdistro = [];
+my $gapdistro = [];
+
+
+my $alnbpseqs = {};
+my $lcbseqs = {};
+
+
+
+my $totalscore=0;
+my $numgaps=0;
+my $numblocks=scalar(@allblocks);
+my $totallen=0;
+my $totalseqlen=0;
+my $smallestblks=0;
+my $smallerblks=0;
+my $smallerlen=0;
+my $smallestlen=0;
+my $nummaf=0;
+
+my $uniqcount=0;
+my $dupcount=0;
+
+my %minseq;
+my %maxseq;
+my %allseqs;
+
+print "Num_blocks:$numblocks\n";
+print "Num_multi_blocks:$multiplealnblkcount\n";
+my $lcbid=0;
+
+open AFILE,"+>aln.$ARGV[0].dat";
+foreach my $block (@allblocks){
+    my $issmaller=0;
+    my $issmallest=0;
+    #Min and max len of seqs in the LCB
+    my $minlen=-1;
+    my $maxlen=0;
+    die if(scalar(@$block) ==0);
+    my $nseq = scalar(@$block);
+    die if($nseq <= 0);
+    $lcbseqcount->[$nseq]++;
+    my @alntext;
+    my $isdup=0;
+    if($nseq>1){
+	foreach my $maf (@$block){
+	    if($maf->[2]){
+		$isdup=1;
+	    }
+	    my($seq,$beg,$len,$orient,$seqlen,$text) = ($maf->[1] =~ /s\s+(\S+)\s+(\d+)\s+(\d+)\s+([\+\-])\s+(\d+)\s+(\S+)/);
+	    die if($len<0);
+	    $text =~ s/\s+//g;
+	    if($text =~ /[^-]/){
+		push @alntext,$text;
+		if(exists $allseqs{$seq}){
+		    die if($seqlen != $allseqs{$seq});
+		}
+		else{
+		    $allseqs{$seq} = $seqlen;
+		}
+		if($minlen==-1){
+		    $minlen=$len;
+		}
+		else{
+		    $minlen = ($len<$minlen) ? $len:$minlen;
+		    die if($minlen<0);
+		}
+		$maxlen = ($len>$maxlen) ? $len:$maxlen;
+		my $cgaps = ($text =~ tr/\-/-/);
+		die if($cgaps<0);
+		$numgaps += $cgaps;
+		my($fmin,$fmax);
+		
+		if($orient eq '-'){
+		    $fmin = $seqlen-$beg-$len;
+		    $fmax = $seqlen-$beg;
+		}
+		else{
+		    $fmin = $beg;
+		    $fmax = $beg+$len;
+		}
+		die "$maf->[1]" if($fmin < 0 || $fmin > $seqlen);
+		die "$maf->[1]" if($fmax < 0 || $fmax > $seqlen);
+		$minseq{$seq} = ($minseq{$seq} < $fmin) ? $fmin : $minseq{$seq};
+		$maxseq{$seq} = ($maxseq{$seq} > $fmax) ? $fmax : $maxseq{$seq};
+
+		$lcbseqs->{$seq} = [] if(!ref $lcbseqs->{$seq});
+		push @{$lcbseqs->{$seq}},[$fmin,$fmax,$orient];
+
+		die "$maf->[1]" if($len<=0);
+		$totallen += $len;
+		$nummaf++;
+		if($len < 100){
+		    $issmallest=1;
+		    $smallestlen+=$len;
+		}
+		if($len < 1000){
+		    $issmaller=1;
+		    $smallerlen+=$len;
+		}
+	    }
+	    else{
+		print STDERR "All gap encountered but length $len > 0 $text\n" if($len != 0);
+		$nseq--;
+	    }
+	}
+    }
+    else{
+	my($seq,$beg,$len,$orient,$seqlen,$text) = ($block->[0]->[1] =~ /s\s+(\S+)\s+(\d+)\s+(\d+)\s+([\+\-])\s+(\d+)\s+(\S+)/);
+	$minlen=$len;
+    }
+    die if($minlen<0);
+    $smallerblks++ if($issmaller);
+    $smallestblks++ if($issmallest);
+
+    $lcbbpcount->[$nseq]+=$minlen;
+
+    if($nseq ==1){
+	if($isdup){
+	    $uniqcount +=$minlen;
+	}
+	else{
+	    $dupcount +=$minlen;
+	}
+    }
+	    
+    $lcbbpdistro->[$nseq] = [] if(!ref $lcbbpdistro->[$nseq]);
+    push @{$lcbbpdistro->[$nseq]},$minlen;
+
+    print STDERR "LCB: $lcbid maxlen:$maxlen\n";
+    $lcbid++;
+
+    if($nseq>1){
+	my $alnmatrix = &maf2matrix(\@alntext);
+	my($lcbtotalscore,$blklen) = &scorealn($alnmatrix,
+					       $numIdentCols,
+					       $numUngappedCols,
+					       $numGappedCols,
+					       $gapdistro,
+					       $alnbpseqs);
+	die "$lcbtotalscore,$blklen" if($blklen ==0);
+	$totalscore+=$lcbtotalscore;
+	my $alnlen = scalar(@{$alnmatrix->[0]});
+	my $nseq = scalar(@$alnmatrix);
+	for(my $k=0;$k<$alnlen;$k++){
+	    for(my $i=0;$i<$nseq;$i++){
+		print AFILE $alnmatrix->[$i]->[$k];
+	    }
+	    print AFILE "\n";
+	}
+    }
+}
+
+close AFILE;
+my $seqmatrix = &getCovered($lcbseqs);
+
+my $uniqbptotal=0;
+my $alignedlentotal=0;
+my $doublecovtotal=0;
+open MFILE,"+>bps.$ARGV[0].dat";
+foreach my $seq (sort {$a cmp $b} keys %allseqs){
+    $totalseqlen+=$allseqs{$seq};
+    my $alignedlen=0;
+    my $doublecov=0;
+    my $uniqbp=0;
+    for(my $i=0;$i<$allseqs{$seq};$i++){    
+	if($seqmatrix->{$seq}->[$i]){
+	    $alignedlen++;
+	    $alignedlentotal++;
+	    if($seqmatrix->{$seq}->[$i]>1){
+		$doublecovtotal+=$seqmatrix->{$seq}->[$i]-1;
+		$doublecov++;
+	    }
+	}
+	else{
+	    die if($seqmatrix->{$seq}->[$i]>0);
+	    $uniqbptotal++;
+	    $uniqbp++;
+	}
+	print MFILE "$seq $i $seqmatrix->{$seq}->[$i]\n";
+    }
+    print "$seq len:$allseqs{$seq} aln_cov:$alignedlen aln_cov_pct:",$alignedlen/$allseqs{$seq}," uniq:$uniqbp doublecov:$doublecov \n";
+}
+close MFILE;
+
+#Count of bases that are aligned to only gaps
+my $uniqaln=0;
+for(my $i=0;$i<scalar(@$numGappedCols);$i++){
+    $uniqaln+=$numGappedCols->[$i];
+}
+
+print "\n";
+#Summary #genomes,total len, avg block size
+print "max_genomes_aln:",scalar(@$numIdentCols)-1,"\n";
+print STDERR "Num ident cols size=",scalar(@$numIdentCols),"!= Numbpdistro=",scalar(@$lcbbpdistro),"\n" if(scalar(@$numIdentCols)!=scalar(@$lcbbpdistro));
+print "total_seq_len:",$totalseqlen,"\n";
+print "avg_block_len:",$totallen/$nummaf,"\n";
+print "num_lcbs:",$nummaf,"\n";
+print "double_covered:",$doublecovtotal,"\n";
+#Avg/total coverage, #bps aligned
+print "aln_cov:",$alignedlentotal," ",$totallen-$doublecovtotal,"\n";
+print "aln_cov_pct:",$alignedlentotal/$totalseqlen,"\n";
+print "not_cov:",$uniqbptotal,"\n";
+print "not_cov_pct:",$uniqbptotal/$totalseqlen,"\n";
+
+#Composition
+print "aln_bps:",($totalseqlen-$uniqbptotal-$uniqaln),"\n";
+print "aln_pct:",($totalseqlen-$uniqbptotal-$uniqaln)/$totalseqlen,"\n";
+print "core_bps:",$numUngappedCols->[scalar(@$numUngappedCols)-1],"\n";
+print "core_pct:",$numUngappedCols->[scalar(@$numUngappedCols)-1]/$totalseqlen,"\n";
+print "uniq_bps:",$uniqbptotal+$uniqaln,"\n";
+print "uniq_pct:",($uniqbptotal+$uniqaln)/$totalseqlen,"\n";
+
+print "\n";
+print "MISMATCH between uniqLCB len and calculated len\n" if( $lcbbpcount->[1] != $uniqbptotal);
+print "uniq_LCBlen:",$lcbbpcount->[1],"\n";
+print "uniq_cov:",$uniqbptotal,"\n";
+print "uniq_aln:",$uniqaln,"\n";
+print "uniq_dup:",$uniqcount,"\n";
+print "dup_bps:",$dupcount,"\n";
+
+print "blklt100bp:",$smallestblks,"\n";
+print "blklen:",$smallestlen,"\n";
+print "blklt1000bp:",$smallerblks,"\n";
+print "blklen:",$smallerlen,"\n";
+#Scoring
+print "num_gaps:",$numgaps,"\n";
+print "score:",$totalscore,"\n";
+
+
+print "LCB seq count\n";
+for(my $i=0;$i<scalar(@$lcbseqcount);$i++){
+    print "$i\t";
+}
+print "\n";
+for(my $i=0;$i<scalar(@$lcbseqcount);$i++){
+    print $lcbseqcount->[$i],"\t";
+}
+print "\n";
+print "LCB coverage bp count\n";
+for(my $i=0;$i<scalar(@$lcbbpcount);$i++){
+    print "$i\t";
+}
+print "\n";
+for(my $i=0;$i<scalar(@$lcbbpcount);$i++){
+    print $lcbbpcount->[$i],"\t";
+}
+print "\n";
+print "Ident.Freq of identical alignment columns\n";
+for(my $i=0;$i<scalar(@$numIdentCols);$i++){
+    print "$i\t";
+}
+print "\n";
+for(my $i=0;$i<scalar(@$numIdentCols);$i++){
+    print "$numIdentCols->[$i]\t";
+}
+print "\n";
+print "NoGaps.Freq of alignment columns with no gaps\n";
+for(my $i=0;$i<scalar(@$numUngappedCols);$i++){
+    print "$i\t";
+}
+print "\n";
+for(my $i=0;$i<scalar(@$numUngappedCols);$i++){
+    print "$numUngappedCols->[$i]\t";
+}
+print "\n";
+print "AllGaps.Freq of alignment cols with one seq and all gaps\n";
+for(my $i=0;$i<scalar(@$numGappedCols);$i++){
+    print "$i\t";
+}
+print "\n";
+for(my $i=0;$i<scalar(@$numGappedCols);$i++){
+    print "$numGappedCols->[$i]\t";
+}
+print "\n";
+
+print "LCBs:";
+my @lcblens;
+for(my $i=2;$i<@$lcbbpdistro;++$i){
+    push @lcblens,@{$lcbbpdistro->[$i]} if(ref $lcbbpdistro->[$i]);
+}
+print join(',',sort {$a <=> $b} @lcblens);
+print "\n";
+print "LCBs core:";
+print join(',',sort {$a <=> $b} @{$lcbbpdistro->[scalar(@$lcbbpdistro)-1]});
+print "\n";
+print "Gaps:";
+my @gaplens;
+foreach my $seq (@$gapdistro){
+    push @gaplens,@$seq if(ref $seq);
+}
+print "\n";
+print join(',',sort {$a <=> $b} @gaplens);
+print "\n";
+
+print STDERR "Writing .dat files for R\n";
+
+#Data for R
+open LFILE,"+>lcbs.$ARGV[0].dat";
+print LFILE join("\n",sort {$a <=> $b} @lcblens);
+close LFILE;
+
+open CFILE,"+>corelcbs.$ARGV[0].dat";
+print CFILE join("\n",sort {$a <=> $b} @{$lcbbpdistro->[scalar(@$lcbbpdistro)-1]});
+close CFILE;
+
+open GFILE,"+>gaps.$ARGV[0].dat";
+print GFILE join("\n",sort {$a <=> $b} @gaplens);
+close GFILE;
+
+open RFILE,"+>mafstats.$ARGV[0].r";
+print RFILE "lcbs <- read.csv(file=\"lcbs.$ARGV[0].dat\");\n";
+print RFILE "corelcbs <- read.csv(file=\"corelcbs.$ARGV[0].dat\");\n";
+print RFILE "gaps <- read.csv(file=\"gaps.$ARGV[0].dat\");\n";
+print RFILE "hist(lcbs\$X1, col=\"green\", main=\"LCBs\", xlab=\"LCB length (bp)\");\n";
+print RFILE "dev.print(device=postscript, \"lcbs.$ARGV[0].eps\", onefile=FALSE, horizontal=FALSE);\n";
+print RFILE "hist(corelcbs\$X1, col=\"blue\", main=\"Core LCBs\", xlab=\"LCB length (bp)\");\n";
+print RFILE "dev.print(device=postscript, \"corelcbs.$ARGV[0].eps\", onefile=FALSE, horizontal=FALSE);\n";
+print RFILE "hist(gaps\$X1, col=\"red\", main=\"Gaps\", xlab=\"Gap length (bp)\");\n";
+print RFILE "dev.print(device=postscript, \"gaps.$ARGV[0].eps\", onefile=FALSE, horizontal=FALSE);\n";
+close RFILE;
+
+sub scorealn{
+    my($matrix,$numIdentCols,$numUngappedCols,$numGappedCols,$gapaln,$alnbpseqs) = @_;
+    my $gapext = -1;
+    my $gapopen = -2;
+    my $gapopeni=0;
+    my $gapopenj=0;
+    my $gapexcount=0;
+    my $gapcount=0;
+    my $totalscore=0;
+    my $alnlen = 0;
+    my $nseq = scalar(@$matrix);
+    #print "Scoring $nseq\n";
+
+    #Loop over each sequence/row
+    for(my $i=0;$i<$nseq;$i++){
+	if($alnlen!=0){
+	    die if($alnlen != scalar(@{$matrix->[$i]}));
+	}
+	else{
+	    $alnlen = scalar(@{$matrix->[$i]});
+	}
+	for(my $j=$i+1;$j<$nseq;$j++){
+	    
+	    #print "$i $alnlen\n";
+	    #Loop over each column
+	    for(my $k=0;$k<$alnlen;$k++){
+		if($matrix->[$i]->[$k] ne '-'){
+		    if($matrix->[$j]->[$k] ne '-'){
+			$gapopeni=0;
+			$gapopenj=0;
+			$totalscore+=1;#$scorematrix[$matrix[$i][$k]][$matrix[$j][$k]];
+		    }
+		    else{
+			if($gapopenj){
+			    $gapexcount++;
+			    $totalscore+=$gapext;
+			}
+			else{
+			    $gapopenj=1;
+			    $gapcount++;
+			    $totalscore+=$gapopen;
+			}
+		    }
+		}
+		else{
+		    if($matrix->[$j]->[$k] ne '-'){
+			if($gapopeni){
+			    $gapexcount++;
+			    $totalscore+=$gapext;
+			}
+			else{
+			    $gapopeni=1;
+			    $gapcount++;
+			    $totalscore+=$gapopen;
+			}
+		    }
+		}
+	    }
+	}
+    }
+    #Get number of identical columns, allowing for gaps but not mismatches
+    # S1 TTTTTTAAATTT
+    # S2 TT---TAAAA-A
+    # S3 TTTTTT--ATTT
+    #    332223223020
+    # $numIdentCols[0]=2 //at least one mismatch
+    # $numIdentCols[2]=6
+    # $numIdentCols[3]=3
+    my $c; #bp
+    my @uniqruns;
+    my $uniqrow;
+    my $runopen;
+    my $startrun=-1;
+    my $runpos=-1;
+    for(my $k=0;$k<$alnlen;$k++){
+	my $numIdents=0;
+	my $mismatch;
+       	for(my $j=0;$j<$nseq;$j++){
+	    if($matrix->[$j]->[$k] ne '-'){
+		if($numIdents==0){
+		    $c = lc($matrix->[$j]->[$k]);
+		    $numIdents++;
+		    $uniqrow=$j;
+		}
+		else{
+		    if(lc($matrix->[$j]->[$k]) eq $c){
+			$numIdents++;
+		    }
+		    else{
+			$numIdents=0;
+			last;
+		    }
+		}
+	    }
+	    else{
+		$mismatch=1;
+	    }
+	}
+	if($numIdents==1){
+	    if($runopen eq $uniqrow){
+		$runpos=$k;
+	    }
+	    else{
+		push @uniqruns,[$runopen,$startrun,$runpos] if($runopen ne "");
+		$runopen=$uniqrow;
+		$startrun=$k;
+		$runpos=$k;
+	    }
+	}
+	else{
+	    push @uniqruns,[$runopen,$startrun,$runpos] if($runopen ne "");
+	    $runopen = "";
+	    $startrun=-1;
+	    $runpos=-1;
+	}
+	$numIdentCols->[$numIdents]++;
+	$mismatch =1 if($numIdents<$nseq);
+	#push @$mismatches,$k if($mismatch);
+    }
+    #Get number of ungapped columns
+    # S1 TTTTTTAAATTT
+    # S2 TT---TAAAA-A
+    # S3 TTTTTT--ATTT
+    #    330003003303
+    # $numUngapped[0]=6
+    # $numUngapped[3]=6
+    my $c; #bp
+    for(my $k=0;$k<$alnlen;$k++){
+	my $numUngaps=0;
+	for(my $j=0;$j<$nseq;$j++){
+	    if($matrix->[$j]->[$k] ne '-'){
+		$numUngaps++;
+	    }
+	    else{
+		$numUngaps=0;
+		last;
+	    }
+	}
+	$numUngappedCols->[$numUngaps]++;
+    }
+    #Get number columns with one sequence and all gaps
+    for(my $k=0;$k<$alnlen;$k++){
+	my $numGaps=0;
+	for(my $j=0;$j<$nseq;$j++){
+	    if($matrix->[$j]->[$k] eq '-'){
+		$numGaps++;
+	    }
+	}
+	if($numGaps==$nseq){
+	    for(my $j=0;$j<$nseq;$j++){
+		for(my $k=0;$k<$alnlen;$k++){
+		    print STDERR "$matrix->[$j]->[$k]";
+		}
+		print STDERR "\n";
+	      }  
+	    print STDERR "Column $k has all gaps\n";
+	}
+	if($numGaps>0 && $numGaps==$nseq-1){
+	    $numGappedCols->[$numGaps]++;
+	}
+    }
+
+    #
+    # Save lengths of all runs of gaps
+    # Eg.
+    # S1 TTTTTTAAATTT
+    # S2 TT---TAAAT-T
+    # S3 TTTTTT--ATTT
+    # 
+    #Results
+    #$gapaln[1]=[3,1]
+    #$gapaln[2]=[2]
+    $gapopen=0;    
+    for(my $j=0;$j<$nseq;$j++){
+	$gapopen=0;
+	for(my $k=0;$k<$alnlen;$k++){
+	    if($matrix->[$j]->[$k] eq '-'){
+		$gapopen++;
+	    }
+	    else{
+		if($gapopen){ #end of a run of gaps
+		    $gapaln->[$j] = [] if(!ref $gapaln->[$j]);
+		    push @{$gapaln->[$j]},$gapopen;
+		    #if($gapopen>1000){
+		    #print STDERR "Long gap $gapopen in seq $j\n";
+		    #}
+		}
+		#start of a run of gaps
+		$gapopen=0;
+	    }
+	}
+    }
+    if($gapopen){
+	$gapaln->[$nseq-1] = [] if(!ref $gapaln->[$nseq-1]);
+	push @{$gapaln->[$nseq-1]},$gapopen;
+    }
+    return ($totalscore,$alnlen);
+}
+
+sub maf2matrix{
+    my($mafs) = @_;
+    my $matrix = [];
+    my $i=0;
+    print STDERR " with ",scalar(@$mafs)," seqs\n";
+    foreach my $m (@$mafs){
+	my @row = split(//,$m);
+	$matrix->[$i++] = \@row;
+    }
+    return $matrix;
+}
+
+
+
+sub getCovered{
+    my($blocksbyseq) = @_;
+    my $seqmatrix = {};
+    
+    foreach my $seq (sort {$a cmp $b} keys %$blocksbyseq){
+	foreach my $b (@{$blocksbyseq->{$seq}}){
+	    for(my $j=$b->[0];$j<$b->[1];$j++){
+		if($seqmatrix->{$seq}->[$j]>0){
+		    print STDERR " $seq $j doublecov $seqmatrix->{$seq}->[$j] $b->[0] $b->[1]\n";
+		}
+		$seqmatrix->{$seq}->[$j]++;
+	    }
+	}
+    }
+    return $seqmatrix;
+}    
+	    
+
diff --git a/util/reportvariants.pl b/util/reportvariants.pl
new file mode 100755
index 0000000..4d662b9
--- /dev/null
+++ b/util/reportvariants.pl
@@ -0,0 +1,118 @@
+#!/usr/bin/perl
+#./reportvariants.pl index fasta
+
+use strict;
+use Bio::Perl;
+use Bio::DB::Fasta;
+use Bio::Seq;
+use lib '/usr/local/projects/angiuoli/developer/sangiuoli/mugsy/trunk/mapping/';
+use Getopt::Long qw(:config no_ignore_case no_auto_abbrev);
+use AlignmentTree;
+
+my %options;
+my $results = GetOptions (\%options, 
+			  'gap_window|g=s',
+			  'display_window|d=s',
+			  'gaps_allowed|a=s') || pod2usage(-verbose => 1);
+
+pod2usage(-verbose=>1) if($options{'help'});
+
+my $atree = AlignmentTree::deserialize($ARGV[0]);
+
+my $db = Bio::DB::Fasta->new($ARGV[1],'-reindex'=>1); 
+
+my $gapthreshold=0;
+if(exists $options{'gaps_allowed'}){
+    $gapthreshold = $options{'gaps_allowed'};
+}
+my $gap_window=5;
+if(exists $options{'gap_window'}){
+    $gap_window = $options{'gap_window'};
+}
+my $display_window=5;
+if(exists $options{'display_window'}){
+    $display_window = $options{'display_window'};
+}
+
+
+shift @ARGV;
+shift @ARGV;
+
+my $pwseqs = {};
+my $refname = shift @ARGV;
+foreach my $seq (@ARGV){
+    $pwseqs->{$seq}++;
+}
+
+open VFILE,"+>$$.pwvariants.out" or die "Can't open file pwvariants.out";
+open SFILE,"+>$$.snpvariants.out" or die "Can't open file snpvariants.out";
+foreach my $alnname (sort {$a cmp $b} keys %{$atree->{_alignments}}){
+    my($alnobj,$aln_bv,$align_width) = @{$atree->{_alignments}->{$alnname}};
+    my ($mmatrix,$seqmatrix,$names) = $atree->getAlignmentMatrix($alnname,1,$align_width,$db);
+    if(@$seqmatrix > 1){
+	#print STDERR "Checking alignment $alnname $align_width ",scalar(@$seqmatrix),"\n";
+	
+	my $ngaps;
+	my $nmismatches;
+	my $variants = {};
+	my $seqvariants = {};
+	my $refidx;
+	for(my $i=0;$i<@$seqmatrix;$i++){
+	    if($names->[$i] eq $refname){	
+		$refidx=$i;
+	    }
+	}
+#Matrix cols start at 0
+	for(my $j=0;$j<$align_width;$j++){
+	    my $b;
+	    my $refbp = lc(substr($seqmatrix->[$refidx],$j,1));
+	    for(my $i=0;$i<@$seqmatrix;$i++){
+		if($i ne $refidx){
+		    my $currbp = lc(substr($seqmatrix->[$i],$j,1));
+		    if($currbp ne $refbp && $currbp !~ /[yskrmwnw]/){
+			$variants->{$j}++;
+			$seqvariants->{$i}->{$j}++;
+		    }
+		}
+		#print "$b=$currbp " if($b ne '-' && $currbp ne '-');
+	    }
+	}
+	#print STDERR "variants ",scalar(keys %$variants),"\n";
+	foreach my $col (sort {$a <=> $b} keys %$variants){
+	    my $gaps=0;
+	    for(my $i=0;$i<@$seqmatrix;$i++){
+		my $start = $col - $gap_window;
+		$start = 0 if($start < 0);
+		my $end = $col + $gap_window;
+		$end = $align_width if($end > $align_width);
+		$gaps+= (substr($seqmatrix->[$i],$start,$end-$start+1) =~ tr/\-/\-/);
+	    }
+	    if($gaps<=$gapthreshold){
+		my $refc;
+		for(my $i=0;$i<@$seqmatrix;$i++){
+		    my $start = $col - $display_window;
+		    $start = 0 if($start < 0);
+		    my $end = $col + $display_window;
+		    $end = $align_width if($end > $align_width);
+		    my($alni) = $atree->getAlignedInterval($alnname,$names->[$i]);
+		    my $colstart = 1+$start;
+		    my $colend = $colstart;
+		    my($startc,$endc) = AlignmentTree::columntocoords($alni,$col+1,$col+1);
+		    $refc = $startc if($names->[$i] eq "$refname");
+		    #AlignmentTree::printAlignmentDebug($alnobj);
+		    printf("%10s %s\tcoords:%d-%d\n",$names->[$i],lc(substr($seqmatrix->[$i],$start,$end-$start+1)),$startc,$endc);
+#, substr($seqmatrix->[$i],$start,$end-$start),"\n";
+		    
+		    if($names->[0] eq "$refname" && exists $pwseqs->{$names->[$i]} && $seqvariants->{$i}->{$col}){
+			print SFILE "$names->[$i]\t$refname\t$refc\t",$refc+1,"\t",uc(substr($seqmatrix->[0],$col,1)),"\n";
+			print VFILE "$names->[$i]\t$refc\t",$refc+1,"\t",substr($seqmatrix->[0],$col,1),"/",substr($seqmatrix->[$i],$col,1),"\t$names->[$i]\t$startc-$endc\n";
+		    }
+		}
+		printf("%10s      ^     \n");
+		print "\n";
+	    }
+	}
+    }
+}
+close VFILE;
+close SFILE;
diff --git a/xmfa2maf.pl b/xmfa2maf.pl
new file mode 100755
index 0000000..b20389e
--- /dev/null
+++ b/xmfa2maf.pl
@@ -0,0 +1,116 @@
+#!/usr/bin/perl
+#Utility for converting output of Mauve XMFA to MAF format
+
+#USAGE: ./xmfa2maf seqs.len < aln.xmfa > aln.maf
+
+use strict;
+
+my $seqname;
+my $start;
+my $end;
+my $orient;
+my $seqinfo = [];
+my %lens;
+my $blocks = [];
+my $usenum = $ARGV[1];
+my $idx=0;
+
+if($ARGV[0]){
+    open(FILE,$ARGV[0]) or die "Can't open file $ARGV[0] needed for sequence lengths";
+    while(my $line=<FILE>){
+	chomp $line;
+	my($name,$len,$newname) = split(/\s+/,$line);
+	if($usenum){
+	    $lens{++$idx}->{'len'} = $len;
+	    if(length($newname)>0){
+		$lens{$idx}->{'name'} = $newname;
+	    }
+	}
+	elsif($name){
+	    $lens{$name}->{'len'} = $len;
+	    if(length($newname)>0){
+		$lens{$name}->{'name'} = $newname;
+	    }
+	}
+    }
+    close FILE;
+}
+
+print "##maf version=1 scoring=mauve\n";
+while(my $line=<STDIN>){
+    if($line =~ /^\s*=/){
+	if(defined $seqname && $start>0){
+	    push @$blocks,[$seqname,$start-1,$end,$orient,$seqinfo];
+	}
+	if(scalar(@$blocks)>0){
+	    #Convert alignment to zero start, interbase coordinates
+	    print "a score=1\n";
+	    foreach my $l (@$blocks){
+		&printMAF(@$l);
+	    }
+	    print "\n";
+	}
+	$seqname=undef;
+	$start=0;
+	$seqinfo=[];
+	$blocks = [];
+    }
+    #Format >id1:start-end orient id2
+    elsif(($line =~ /^>\s+\S+\:/ && 
+	   $line =~ /^>\s*(\S+)\:(\d+)-(\d+)\s+([\+\-])\s+(\S+)/)
+	  || 
+	  $line =~ /^>(\S+)\s+(\d+)\s+(\d+)\s+([\+\-])\s+(\S+)/){
+	chomp $line;
+	if(defined $seqname && $start>0){
+	    push @$blocks,[$seqname,$start-1,$end,$orient,$seqinfo];
+	}
+	my $seqid;
+	if(exists $lens{$1}){
+	    $seqid = $1;
+	}else{
+	    if(exists $lens{$5}){
+		$seqid = $5;
+	    }
+	    else{
+		$seqid = $1;
+		$lens{$1}->{'len'} = $5;
+	    }
+	}
+	$start = $2;
+	if($start>0){
+	    $end = $3;
+	    #XMFA format start always < end 
+	    die "Invalid coordinates $start-$end" if($start>$end);
+	    #Relative orientation of the alignment
+	    $orient = $4;
+	    my $file = $5;
+	    $seqname = $seqid;
+	    $seqname =~ s/^\/.*\/(\S+)/$1/;
+	}
+	$seqinfo=[];
+    }
+    else{
+	if($line !~ /\#/){
+	    if(defined $seqname){
+		chomp $line;
+		push @$seqinfo,$line if($line =~ /\S+/);
+	    }
+	}
+    }
+}
+
+sub printMAF{
+    my($id,$s,$e,$o,$str) = @_;
+    die "No length specified for seq $id in $ARGV[0]" if(!exists $lens{$id});
+    die "$e<$s" if($e<=$s);
+    die if($o ne '+' && $o ne '-');
+    my $len = $e-$s;
+    $s = ($o eq '-') ? ($lens{$id}->{'len'}-$e) : $s;
+    die "Bad coords $s $e $lens{$id}->{'len'}" if($s<0);
+    
+    my $seqlen = $lens{$id}->{'len'};
+    if(exists $lens{$id}->{'name'}){
+	$id = $lens{$id}->{'name'};
+    }
+    print "s $id $s ",$len," $o $seqlen ",join('',@$str),"\n";
+}

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/mugsy.git



More information about the debian-med-commit mailing list