[med-svn] [libmems] 01/02: Imported Upstream version 1.6.0+4725

Fri Apr 17 20:36:46 UTC 2015

This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository libmems.

commit d6a03948c5ba705cb1acc8c5fad540b5d87f78e5
Author: Andreas Tille <tille at debian.org>
Date:   Fri Apr 17 22:35:37 2015 +0200

    Imported Upstream version 1.6.0+4725
---
 AUTHORS                                 |    1 +
 COPYING                                 |  340 +++
 ChangeLog                               |    0
 INSTALL                                 |  167 ++
 Makefile.am                             |   14 +
 NEWS                                    |    0
 README                                  |    1 +
 TODO                                    |    1 +
 autogen.sh                              |    5 +
 configure.ac                            |  137 ++
 doxygen.am                              |  157 ++
 libMems-1.6.pc.in                       |   12 +
 libMems/AbstractGappedAlignment.h       |  109 +
 libMems/AbstractMatch.h                 |  392 +++
 libMems/Aligner.cpp                     | 2289 ++++++++++++++++++
 libMems/Aligner.h                       |  307 +++
 libMems/Backbone.cpp                    | 1203 ++++++++++
 libMems/Backbone.h                      |  240 ++
 libMems/ClustalInterface.cpp            |  576 +++++
 libMems/ClustalInterface.h              |  101 +
 libMems/CompactGappedAlignment.h        |  819 +++++++
 libMems/DNAFileSML.cpp                  |   68 +
 libMems/DNAFileSML.h                    |   66 +
 libMems/DNAMemorySML.cpp                |   48 +
 libMems/DNAMemorySML.h                  |   55 +
 libMems/DenseAbstractMatch.h            |  169 ++
 libMems/DistanceMatrix.h                |  327 +++
 libMems/FileSML.cpp                     |  679 ++++++
 libMems/FileSML.h                       |  135 ++
 libMems/Files.h                         |  213 ++
 libMems/GappedAligner.h                 |   73 +
 libMems/GappedAlignment.cpp             |   77 +
 libMems/GappedAlignment.h               |  283 +++
 libMems/GreedyBreakpointElimination.cpp |  994 ++++++++
 libMems/GreedyBreakpointElimination.h   |  873 +++++++
 libMems/HomologyHMM/algebras.cc         |   52 +
 libMems/HomologyHMM/algebras.h          |  558 +++++
 libMems/HomologyHMM/dptables.h          |  387 +++
 libMems/HomologyHMM/homology.cc         |  786 ++++++
 libMems/HomologyHMM/homology.h          |  188 ++
 libMems/HomologyHMM/homology.xml        |  217 ++
 libMems/HomologyHMM/homologymain.cc     |   65 +
 libMems/HomologyHMM/parameters.h        |  162 ++
 libMems/HybridAbstractMatch.h           |  315 +++
 libMems/Interval.cpp                    |   25 +
 libMems/Interval.h                      |  958 ++++++++
 libMems/IntervalList.cpp                |   25 +
 libMems/IntervalList.h                  |  842 +++++++
 libMems/Islands.cpp                     |  320 +++
 libMems/Islands.h                       |  417 ++++
 libMems/LCB.h                           |   70 +
 libMems/Makefile.am                     |   85 +
 libMems/MaskedMemHash.cpp               |   65 +
 libMems/MaskedMemHash.h                 |   44 +
 libMems/Match.h                         |   33 +
 libMems/MatchFinder.cpp                 |  444 ++++
 libMems/MatchFinder.h                   |  380 +++
 libMems/MatchHashEntry.cpp              |  203 ++
 libMems/MatchHashEntry.h                |  147 ++
 libMems/MatchList.cpp                   |   26 +
 libMems/MatchList.h                     |  668 ++++++
 libMems/MatchProjectionAdapter.h        |  142 ++
 libMems/Matrix.h                        |  174 ++
 libMems/MemHash.cpp                     |  330 +++
 libMems/MemHash.h                       |  208 ++
 libMems/Memory.h                        |   60 +
 libMems/MemorySML.cpp                   |   96 +
 libMems/MemorySML.h                     |   58 +
 libMems/MuscleInterface.cpp             | 1192 ++++++++++
 libMems/MuscleInterface.h               |  148 ++
 libMems/NumericMatrix.h                 |  164 ++
 libMems/PairwiseMatchAdapter.h          |  117 +
 libMems/PairwiseMatchFinder.cpp         |   73 +
 libMems/PairwiseMatchFinder.h           |   38 +
 libMems/ParallelMemHash.cpp             |  133 ++
 libMems/ParallelMemHash.h               |   75 +
 libMems/PhyloTree.cpp                   |    9 +
 libMems/PhyloTree.h                     |  378 +++
 libMems/ProgressiveAligner.cpp          | 3945 +++++++++++++++++++++++++++++++
 libMems/ProgressiveAligner.h            |  637 +++++
 libMems/RepeatHash.cpp                  |   64 +
 libMems/RepeatHash.h                    |   54 +
 libMems/RepeatMatch.cpp                 |   51 +
 libMems/RepeatMatch.h                   |   51 +
 libMems/RepeatMatchList.cpp             |  300 +++
 libMems/RepeatMatchList.h               |   66 +
 libMems/Scoring.h                       |  335 +++
 libMems/SeedMasks.h                     |  404 ++++
 libMems/SeedOccurrenceList.h            |  100 +
 libMems/SlotAllocator.cpp               |    5 +
 libMems/SlotAllocator.h                 |  170 ++
 libMems/SortedMerList.cpp               |  826 +++++++
 libMems/SortedMerList.h                 |  323 +++
 libMems/SparseAbstractMatch.h           |  250 ++
 libMems/SubstitutionMatrix.h            |  111 +
 libMems/SuperInterval.cpp               |  124 +
 libMems/SuperInterval.h                 |   81 +
 libMems/TreeUtilities.h                 |  138 ++
 libMems/UngappedLocalAlignment.h        |  227 ++
 libMems/configuration.h                 |   37 +
 libMems/dmSML/Makefile.am               |   22 +
 libMems/dmSML/aPOSIXaio.c               |  124 +
 libMems/dmSML/aPOSIXaio.h               |   18 +
 libMems/dmSML/alibc.c                   |   47 +
 libMems/dmSML/alibc.h                   |   15 +
 libMems/dmSML/alinuxaio.c               |  283 +++
 libMems/dmSML/alinuxaio.h               |   19 +
 libMems/dmSML/asyncio.c                 |  358 +++
 libMems/dmSML/asyncio.h                 |  166 ++
 libMems/dmSML/awin32aio.c               |  160 ++
 libMems/dmSML/awin32aio.h               |   18 +
 libMems/dmSML/buffer.c                  |  407 ++++
 libMems/dmSML/buffer.h                  |  203 ++
 libMems/dmSML/dmsort.c                  | 1942 +++++++++++++++
 libMems/dmSML/dmsort.h                  |  197 ++
 libMems/dmSML/sml.c                     |   55 +
 libMems/dmSML/sml.h                     |   79 +
 libMems/dmSML/sorting.c                 |  323 +++
 libMems/dmSML/sorting.h                 |   81 +
 libMems/dmSML/timing.c                  |  164 ++
 libMems/dmSML/timing.h                  |   24 +
 libMems/dmSML/util.c                    |  132 ++
 libMems/dmSML/util.h                    |   28 +
 libMems/gnAlignedSequences.cpp          | 1570 ++++++++++++
 libMems/gnAlignedSequences.h            |  401 ++++
 libMems/gnRAWSequence.h                 |  202 ++
 libMems/twister.c                       |  224 ++
 libMems/twister.h                       |   18 +
 m4/ac_cxx_namespaces.m4                 |   25 +
 m4/ax_openmp.m4                         |  104 +
 m4/ax_prog_doxygen.m4                   |  535 +++++
 m4/boost.m4                             | 1343 +++++++++++
 m4/pkg.m4                               |  156 ++
 projects/libMems.doxygen                |  212 ++
 projects/libMems.kdevprj                |  281 +++
 projects/libMems.sln                    |   20 +
 projects/libMems.vcproj                 | 1033 ++++++++
 137 files changed, 41096 insertions(+)

diff --git a/AUTHORS b/AUTHORS
new file mode 100644
index 0000000..7eb5af2
--- /dev/null
+++ b/AUTHORS
@@ -0,0 +1 @@
+Aaron Darling <darling(at)cs.wisc.edu>
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..d60c31a
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,340 @@
+		    GNU GENERAL PUBLIC LICENSE
+		       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+     59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+			    Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+

+		    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+

+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+

+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+

+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+			    NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+		     END OF TERMS AND CONDITIONS
+

+	    How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year  name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/ChangeLog b/ChangeLog
new file mode 100644
index 0000000..e69de29
diff --git a/INSTALL b/INSTALL
new file mode 100644
index 0000000..02a4a07
--- /dev/null
+++ b/INSTALL
@@ -0,0 +1,167 @@
+Basic Installation
+==================
+
+   These are generic installation instructions.
+
+   The `configure' shell script attempts to guess correct values for
+various system-dependent variables used during compilation.  It uses
+those values to create a `Makefile' in each directory of the package.
+It may also create one or more `.h' files containing system-dependent
+definitions.  Finally, it creates a shell script `config.status' that
+you can run in the future to recreate the current configuration, a file
+`config.cache' that saves the results of its tests to speed up
+reconfiguring, and a file `config.log' containing compiler output
+(useful mainly for debugging `configure').
+
+   If you need to do unusual things to compile the package, please try
+to figure out how `configure' could check whether to do them, and mail
+diffs or instructions to the address given in the `README' so they can
+be considered for the next release.  If at some point `config.cache'
+contains results you don't want to keep, you may remove or edit it.
+
+   The file `configure.in' is used to create `configure' by a program
+called `autoconf'.  You only need `configure.in' if you want to change
+it or regenerate `configure' using a newer version of `autoconf'.
+
+The simplest way to compile this package is:
+
+  1. `cd' to the directory containing the package's source code and type
+     `./configure' to configure the package for your system.  If you're
+     using `csh' on an old version of System V, you might need to type
+     `sh ./configure' instead to prevent `csh' from trying to execute
+     `configure' itself.
+
+     Running `configure' takes a while.  While running, it prints some
+     messages telling which features it is checking for.
+
+  2. Type `make' to compile the package.
+
+  3. Type `make install' to install the programs and any data files and
+     documentation.
+
+  4. You can remove the program binaries and object files from the
+     source code directory by typing `make clean'.  
+
+Compilers and Options
+=====================
+
+   Some systems require unusual options for compilation or linking that
+the `configure' script does not know about.  You can give `configure'
+initial values for variables by setting them in the environment.  Using
+a Bourne-compatible shell, you can do that on the command line like
+this:
+     CC=c89 CFLAGS=-O2 LIBS=-lposix ./configure
+
+Or on systems that have the `env' program, you can do it like this:
+     env CPPFLAGS=-I/usr/local/include LDFLAGS=-s ./configure
+
+Compiling For Multiple Architectures
+====================================
+
+   You can compile the package for more than one kind of computer at the
+same time, by placing the object files for each architecture in their
+own directory.  To do this, you must use a version of `make' that
+supports the `VPATH' variable, such as GNU `make'.  `cd' to the
+directory where you want the object files and executables to go and run
+the `configure' script.  `configure' automatically checks for the
+source code in the directory that `configure' is in and in `..'.
+
+   If you have to use a `make' that does not supports the `VPATH'
+variable, you have to compile the package for one architecture at a time
+in the source code directory.  After you have installed the package for
+one architecture, use `make distclean' before reconfiguring for another
+architecture.
+
+Installation Names
+==================
+
+   By default, `make install' will install the package's files in
+`/usr/local/bin', `/usr/local/man', etc.  You can specify an
+installation prefix other than `/usr/local' by giving `configure' the
+option `--prefix=PATH'.
+
+   You can specify separate installation prefixes for
+architecture-specific files and architecture-independent files.  If you
+give `configure' the option `--exec-prefix=PATH', the package will use
+PATH as the prefix for installing programs and libraries.
+Documentation and other data files will still use the regular prefix.
+
+   If the package supports it, you can cause programs to be installed
+with an extra prefix or suffix on their names by giving `configure' the
+option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'.
+
+Optional Features
+=================
+
+   Some packages pay attention to `--enable-FEATURE' options to
+`configure', where FEATURE indicates an optional part of the package.
+They may also pay attention to `--with-PACKAGE' options, where PACKAGE
+is something like `gnu-as' or `x' (for the X Window System).  The
+`README' should mention any `--enable-' and `--with-' options that the
+package recognizes.
+
+   For packages that use the X Window System, `configure' can usually
+find the X include and library files automatically, but if it doesn't,
+you can use the `configure' options `--x-includes=DIR' and
+`--x-libraries=DIR' to specify their locations.
+
+Specifying the System Type
+==========================
+
+   There may be some features `configure' can not figure out
+automatically, but needs to determine by the type of host the package
+will run on.  Usually `configure' can figure that out, but if it prints
+a message saying it can not guess the host type, give it the
+`--host=TYPE' option.  TYPE can either be a short name for the system
+type, such as `sun4', or a canonical name with three fields:
+     CPU-COMPANY-SYSTEM
+
+See the file `config.sub' for the possible values of each field.  If
+`config.sub' isn't included in this package, then this package doesn't
+need to know the host type.
+
+   If you are building compiler tools for cross-compiling, you can also
+use the `--target=TYPE' option to select the type of system they will
+produce code for and the `--build=TYPE' option to select the type of
+system on which you are compiling the package.
+
+Sharing Defaults
+================
+
+   If you want to set default values for `configure' scripts to share,
+you can create a site shell script called `config.site' that gives
+default values for variables like `CC', `cache_file', and `prefix'.
+`configure' looks for `PREFIX/share/config.site' if it exists, then
+`PREFIX/etc/config.site' if it exists.  Or, you can set the
+`CONFIG_SITE' environment variable to the location of the site script.
+A warning: not all `configure' scripts look for a site script.
+
+Operation Controls
+==================
+
+   `configure' recognizes the following options to control how it
+operates.
+
+`--cache-file=FILE'
+     Use and save the results of the tests in FILE instead of
+     `./config.cache'.  Set FILE to `/dev/null' to disable caching, for
+     debugging `configure'.
+
+`--help'
+     Print a summary of the options to `configure', and exit.
+
+`--quiet'
+`--silent'
+`-q'
+     Do not print messages saying which checks are being made.
+
+`--srcdir=DIR'
+     Look for the package's source code in directory DIR.  Usually
+     `configure' can determine that directory automatically.
+
+`--version'
+     Print the version of Autoconf used to generate the `configure'
+     script, and exit.
+
+`configure' also accepts some other, not widely useful, options.
+
diff --git a/Makefile.am b/Makefile.am
new file mode 100644
index 0000000..19b47bc
--- /dev/null
+++ b/Makefile.am
@@ -0,0 +1,14 @@
+ACLOCAL_AMFLAGS = -I m4
+include doxygen.am
+MOSTLYCLEANFILES = $(DX_CLEANFILES)
+
+EXTRA_DIST = \
+projects/libMems.doxygen \
+projects/libMems.sln \
+projects/libMems.vcproj
+
+pkgconfigdir = $(libdir)/pkgconfig
+pkgconfig_DATA = libMems- at GENERIC_API_VERSION@.pc
+
+SUBDIRS = libMems 
+
diff --git a/NEWS b/NEWS
new file mode 100644
index 0000000..e69de29
diff --git a/README b/README
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/README
@@ -0,0 +1 @@
+
diff --git a/TODO b/TODO
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/TODO
@@ -0,0 +1 @@
+
diff --git a/autogen.sh b/autogen.sh
new file mode 100755
index 0000000..fc76ff6
--- /dev/null
+++ b/autogen.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+mkdir -p config
+autoreconf --force --install -I config  -I m4
+echo "Now run ./configure --with-boost=</path/to/boost> --prefix=$HOME && make install"
+echo "Add --disable-shared to the configure line if building on Mac OS X"
diff --git a/configure.ac b/configure.ac
new file mode 100644
index 0000000..39159dc
--- /dev/null
+++ b/configure.ac
@@ -0,0 +1,137 @@
+dnl Process this file with autoconf to produce a configure script.
+AC_PREREQ([2.59])
+AC_INIT(libMems/Match.h)
+AC_CONFIG_AUX_DIR(config)
+AC_CONFIG_MACRO_DIR([m4])
+
+dnl -----------------------------------------------
+dnl Package name and version number (user defined)
+dnl -----------------------------------------------
+
+GENERIC_LIBRARY_NAME=libMems
+
+#release versioning
+GENERIC_MAJOR_VERSION=1
+GENERIC_MINOR_VERSION=6
+GENERIC_MICRO_VERSION=0
+
+#API version (often = GENERIC_MAJOR_VERSION.GENERIC_MINOR_VERSION)
+GENERIC_API_VERSION=1.6
+AC_SUBST(GENERIC_API_VERSION)
+
+#shared library versioning
+GENERIC_LIBRARY_VERSION=1:0:0
+#                       | | |
+#                +------+ | +---+
+#                |        |     |
+#             current:revision:age
+#                |        |     |
+#                |        |     +- increment if interfaces have been added
+#                |        |        set to zero if interfaces have been removed
+#                                  or changed
+#                |        +- increment if source code has changed
+#                |           set to zero if current is incremented
+#                +- increment if interfaces have been added, removed or changed
+
+
+
+dnl --------------------------------
+dnl Package name and version number
+dnl --------------------------------
+
+AC_SUBST(GENERIC_LIBRARY_VERSION)
+
+PACKAGE=$GENERIC_LIBRARY_NAME
+AC_SUBST(GENERIC_LIBRARY_NAME)
+
+GENERIC_VERSION=$GENERIC_MAJOR_VERSION.$GENERIC_MINOR_VERSION.$GENERIC_MICRO_VERSION
+GENERIC_RELEASE=$GENERIC_MAJOR_VERSION.$GENERIC_MINOR_VERSION
+AC_SUBST(GENERIC_RELEASE)
+AC_SUBST(GENERIC_VERSION)
+
+VERSION=$GENERIC_VERSION
+
+AM_INIT_AUTOMAKE($PACKAGE, $VERSION, no-define)
+
+dnl Override default O2
+CFLAGS=${CFLAGS-""}
+CXXFLAGS=${CXXFLAGS-""}
+
+AC_PREFIX_DEFAULT(/usr/local)
+
+dnl Checks for programs.
+AC_PROG_CXX
+AC_PROG_INSTALL
+AC_PROG_LN_S 
+AM_PROG_LIBTOOL
+AC_SYS_LARGEFILE
+
+dnl Checks for header files.
+AC_HEADER_STDC
+
+dnl Check what compiler we're using
+AM_CONDITIONAL(ICC, test x$CXX = xicc )
+EXTRA_CXX_FLAGS=""
+if( test x$CC == "xgcc" ) then
+	EXTRA_CXX_FLAGS="-Wno-deprecated"
+fi
+AC_SUBST(EXTRA_CXX_FLAGS)
+
+dnl Allow debugging compilation
+AC_ARG_ENABLE(debug,
+[  --enable-debug    Turn on debugging],
+[case "${enableval}" in
+  yes) debug=true ;;
+  no)  debug=false ;;
+  *) AC_MSG_ERROR(bad value ${enableval} for --enable-debug) ;;
+esac],[debug=false])
+AM_CONDITIONAL(DEBUG, test x$debug = xtrue)
+
+dnl Get location of Boost
+BOOST_REQUIRE(1.34.0)
+AC_CXX_NAMESPACES
+BOOST_FILESYSTEM
+BOOST_PROGRAM_OPTIONS
+BOOST_IOSTREAMS
+
+dnl Get location of libGenome Headers
+PKG_CHECK_MODULES(DEPS, libGenome-1.3 >= 1.3.1  libMUSCLE-3.7 >= 1.0.0)
+AC_SUBST(DEPS_CFLAGS)
+
+dnl Check for OpenMP
+#AX_OPENMP()
+AC_SUBST(OPENMP_CFLAGS)
+dnl OpenMP checker only defines for C when compiling both C and C++
+OPENMP_CXXFLAGS=$OPENMP_CFLAGS
+AC_SUBST(OPENMP_CXXFLAGS)
+
+dnl ensure portability for OS X with these checks
+AC_CHECK_HEADERS(sys/types.h)
+AC_CHECK_HEADERS(sys/aio.h)
+AC_CHECK_HEADERS(aio.h)
+AC_CHECK_HEADERS(features.h)
+
+dnl certain parts of the library need async io and threads
+AC_CHECK_LIB(pthread, pthread_mutex_unlock)
+AC_CHECK_LIB(rt, aio_write)
+AC_SUBST(DEPS_LIBS)
+
+dnl Make doxygen docs
+DX_INIT_DOXYGEN( "libMems", "projects/libMems.doxygen", "doc" )
+
+AM_CONFIG_HEADER(config.h)
+
+dnl Checks for typedefs, structures, and compiler characteristics.
+AC_C_CONST
+AC_C_INLINE
+dnl AC_C_BIGENDIAN
+AC_HEADER_TIME
+
+dnl Checks for library functions.
+AC_PROG_GCC_TRADITIONAL
+
+dnl SAVE_LIBRARY_VERSION
+AC_SUBST(LIBTOOL_VERSION_INFO)
+
+AC_OUTPUT(Makefile libMems/Makefile libMems-1.6.pc )
+#doc/html/Makefile  doc/man/Makefile  doc/man/man3/Makefile)
diff --git a/doxygen.am b/doxygen.am
new file mode 100644
index 0000000..5046add
--- /dev/null
+++ b/doxygen.am
@@ -0,0 +1,157 @@
+## --------------------------------- ##
+## Format-independent Doxygen rules. ##
+## --------------------------------- ##
+
+if DX_COND_doc
+
+## ------------------------------- ##
+## Rules specific for HTML output. ##
+## ------------------------------- ##
+
+if DX_COND_html
+
+DX_CLEAN_HTML = @DX_DOCDIR@/html
+
+endif DX_COND_html
+
+## ------------------------------ ##
+## Rules specific for CHM output. ##
+## ------------------------------ ##
+
+if DX_COND_chm
+
+DX_CLEAN_CHM = @DX_DOCDIR@/chm
+
+if DX_COND_chi
+
+DX_CLEAN_CHI = @DX_DOCDIR@/@PACKAGE at .chi
+
+endif DX_COND_chi
+
+endif DX_COND_chm
+
+## ------------------------------ ##
+## Rules specific for MAN output. ##
+## ------------------------------ ##
+
+if DX_COND_man
+
+DX_CLEAN_MAN = @DX_DOCDIR@/man
+
+endif DX_COND_man
+
+## ------------------------------ ##
+## Rules specific for RTF output. ##
+## ------------------------------ ##
+
+if DX_COND_rtf
+
+DX_CLEAN_RTF = @DX_DOCDIR@/rtf
+
+endif DX_COND_rtf
+
+## ------------------------------ ##
+## Rules specific for XML output. ##
+## ------------------------------ ##
+
+if DX_COND_xml
+
+DX_CLEAN_XML = @DX_DOCDIR@/xml
+
+endif DX_COND_xml
+
+## ----------------------------- ##
+## Rules specific for PS output. ##
+## ----------------------------- ##
+
+if DX_COND_ps
+
+DX_CLEAN_PS = @DX_DOCDIR@/@PACKAGE at .ps
+
+DX_PS_GOAL = doxygen-ps
+
+doxygen-ps: @DX_DOCDIR@/@PACKAGE at .ps
+
+ at DX_DOCDIR@/@PACKAGE at .ps: @DX_DOCDIR@/@PACKAGE at .tag
+	cd @DX_DOCDIR@/latex; \
+	rm -f *.aux *.toc *.idx *.ind *.ilg *.log *.out; \
+	$(DX_LATEX) refman.tex; \
+	$(MAKEINDEX_PATH) refman.idx; \
+	$(DX_LATEX) refman.tex; \
+	countdown=5; \
+	while $(DX_EGREP) 'Rerun (LaTeX|to get cross-references right)' \
+	                  refman.log > /dev/null 2>&1 \
+	   && test $$countdown -gt 0; do \
+	    $(DX_LATEX) refman.tex; \
+	    countdown=`expr $$countdown - 1`; \
+	done; \
+	$(DX_DVIPS) -o ../@PACKAGE at .ps refman.dvi
+
+endif DX_COND_ps
+
+## ------------------------------ ##
+## Rules specific for PDF output. ##
+## ------------------------------ ##
+
+if DX_COND_pdf
+
+DX_CLEAN_PDF = @DX_DOCDIR@/@PACKAGE at .pdf
+
+DX_PDF_GOAL = doxygen-pdf
+
+doxygen-pdf: @DX_DOCDIR@/@PACKAGE at .pdf
+
+ at DX_DOCDIR@/@PACKAGE at .pdf: @DX_DOCDIR@/@PACKAGE at .tag
+	cd @DX_DOCDIR@/latex; \
+	rm -f *.aux *.toc *.idx *.ind *.ilg *.log *.out; \
+	$(DX_PDFLATEX) refman.tex; \
+	$(DX_MAKEINDEX) refman.idx; \
+	$(DX_PDFLATEX) refman.tex; \
+	countdown=5; \
+	while $(DX_EGREP) 'Rerun (LaTeX|to get cross-references right)' \
+	                  refman.log > /dev/null 2>&1 \
+	   && test $$countdown -gt 0; do \
+	    $(DX_PDFLATEX) refman.tex; \
+	    countdown=`expr $$countdown - 1`; \
+	done; \
+	mv refman.pdf ../@PACKAGE at .pdf
+
+endif DX_COND_pdf
+
+## ------------------------------------------------- ##
+## Rules specific for LaTeX (shared for PS and PDF). ##
+## ------------------------------------------------- ##
+
+if DX_COND_latex
+
+DX_CLEAN_LATEX = @DX_DOCDIR@/latex
+
+endif DX_COND_latex
+
+.PHONY: doxygen-run doxygen-doc $(DX_PS_GOAL) $(DX_PDF_GOAL)
+
+.INTERMEDIATE: doxygen-run $(DX_PS_GOAL) $(DX_PDF_GOAL)
+
+doxygen-run: @DX_DOCDIR@/@PACKAGE at .tag
+
+doxygen-doc: doxygen-run $(DX_PS_GOAL) $(DX_PDF_GOAL)
+
+ at DX_DOCDIR@/@PACKAGE at .tag: $(DX_CONFIG) $(pkginclude_HEADERS)
+	rm -rf @DX_DOCDIR@
+	$(DX_ENV) $(DX_DOXYGEN) $(srcdir)/$(DX_CONFIG)
+
+DX_CLEANFILES = \
+    @DX_DOCDIR@/@PACKAGE at .tag \
+    -r \
+    $(DX_CLEAN_HTML) \
+    $(DX_CLEAN_CHM) \
+    $(DX_CLEAN_CHI) \
+    $(DX_CLEAN_MAN) \
+    $(DX_CLEAN_RTF) \
+    $(DX_CLEAN_XML) \
+    $(DX_CLEAN_PS) \
+    $(DX_CLEAN_PDF) \
+    $(DX_CLEAN_LATEX)
+
+endif DX_COND_doc
+
diff --git a/libMems-1.6.pc.in b/libMems-1.6.pc.in
new file mode 100644
index 0000000..b7186e0
--- /dev/null
+++ b/libMems-1.6.pc.in
@@ -0,0 +1,12 @@
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+Name: libMems 
+Description: c++ library supporting DNA sequence and genome alignment 
+Version: @VERSION@
+Requires: libGenome-1.3 libMUSCLE-3.7
+Libs: -L${libdir} @OPENMP_CXXFLAGS@ -lMems- at GENERIC_API_VERSION@ @BOOST_SYSTEM_LDFLAGS@ @BOOST_SYSTEM_LIBS@ @BOOST_FILESYSTEM_LIBS@ @BOOST_PROGRAM_OPTIONS_LIBS@ @BOOST_IOSTREAMS_LIBS@ @LIBS@ 
+Cflags: -I${includedir}/@GENERIC_LIBRARY_NAME at -@GENERIC_API_VERSION@ @BOOST_CPPFLAGS@ @OPENMP_CXXFLAGS@ @EXTRA_CXX_FLAGS@ 
+
diff --git a/libMems/AbstractGappedAlignment.h b/libMems/AbstractGappedAlignment.h
new file mode 100644
index 0000000..91b42bf
--- /dev/null
+++ b/libMems/AbstractGappedAlignment.h
@@ -0,0 +1,109 @@
+/*******************************************************************************
+ * $Id: AbstractGappedAlignment.h,v 1.12 2004/04/19 23:10:50 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * This file is licensed under the GPL.
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifndef __AbstractGappedAlignment_h__
+#define __AbstractGappedAlignment_h__
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/AbstractMatch.h"
+#include "libGenome/gnFilter.h"
+
+namespace mems {
+
+template<class AbstractMatchImpl>
+class AbstractGappedAlignment : public AbstractMatchImpl
+{
+public:
+	AbstractGappedAlignment();
+	AbstractGappedAlignment( uint seq_count, gnSeqI align_length );
+	
+	/**
+	 * Sets the alignment 
+	 * @param seq_align	should be in row/column format, e.g. one string per sequence (row)
+	 */
+	virtual void SetAlignment( const std::vector< std::string >& seq_align ) = 0;
+
+	// Inherited methods from AbstractMatch:
+	gnSeqI Length( uint seqI = UINT_MAX ) const; 
+	virtual void SetLength( gnSeqI len, uint seqI ) { length[ seqI ] = len; }
+
+	gnSeqI AlignmentLength() const {return align_length;}
+	void SetAlignmentLength(gnSeqI len){ align_length = len; }
+
+protected:
+	// for use by derived classes in order to swap contents
+	void swap( AbstractGappedAlignment* other );	
+private:
+	std::vector< gnSeqI > length;
+	gnSeqI align_length;
+};
+
+
+template<class AbstractMatchImpl>
+AbstractGappedAlignment<AbstractMatchImpl>::AbstractGappedAlignment() : AbstractMatchImpl()
+{
+	align_length = 0;
+}
+
+template<class AbstractMatchImpl>
+AbstractGappedAlignment<AbstractMatchImpl>::AbstractGappedAlignment( uint seq_count, gnSeqI align_length ) : AbstractMatchImpl( seq_count )
+{
+	length = std::vector< gnSeqI >( seq_count, 0 );
+	this->align_length = align_length;
+}
+
+template<class AbstractMatchImpl>
+void AbstractGappedAlignment<AbstractMatchImpl>::swap( AbstractGappedAlignment* other )
+{
+	std::swap( length, other->length );
+	std::swap( align_length, other->align_length );
+	AbstractMatchImpl::swap( other );
+}
+
+template<class AbstractMatchImpl>
+gnSeqI AbstractGappedAlignment<AbstractMatchImpl>::Length( uint seqI ) const 
+{
+	if( seqI == UINT_MAX )
+		return align_length;
+	return length[ seqI ]; 
+}
+
+//template<class AbstractGappedAlignmentImpl>
+void GetAlignment( const AbstractMatch& ga, const std::vector< genome::gnSequence* >& seq_table, std::vector<std::string>& alignment );
+
+//template<class AbstractGappedAlignmentImpl>
+inline
+void GetAlignment( const AbstractMatch& ga, const std::vector< genome::gnSequence* >& seq_table, std::vector<std::string>& alignment )
+{
+	std::vector< bitset_t > aln_mat;
+	ga.GetAlignment(aln_mat);
+	alignment = std::vector<std::string>( aln_mat.size() );
+	const genome::gnFilter* comp_filter = genome::gnFilter::DNAComplementFilter();
+	for( std::size_t seqI = 0; seqI < alignment.size(); seqI++ )
+	{
+		alignment[seqI] = std::string( aln_mat[0].size(), '-' );
+		if( ga.LeftEnd(seqI) == NO_MATCH )
+			continue;
+		std::string cur_seq;
+		seq_table[seqI]->ToString( cur_seq, ga.Length(seqI), ga.LeftEnd(seqI) );
+		if( ga.Orientation(seqI) == AbstractMatch::reverse )
+			comp_filter->ReverseFilter(cur_seq);
+		std::size_t cI = 0; 
+		for( std::size_t gI = 0; gI < alignment[seqI].size(); gI++ )
+			if( aln_mat[seqI][gI] )
+				alignment[seqI][gI] = cur_seq[cI++];
+	}
+}
+
+}
+
+#endif // __AbstractGappedAlignment_h__
+
diff --git a/libMems/AbstractMatch.h b/libMems/AbstractMatch.h
new file mode 100644
index 0000000..79d94d4
--- /dev/null
+++ b/libMems/AbstractMatch.h
@@ -0,0 +1,392 @@
+/*******************************************************************************
+ * $Id: AbstractMatch.h,v 1.8 2004/02/27 23:08:55 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * This file is licensed under the GPL.
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifndef __AbstractMatch_h__
+#define __AbstractMatch_h__
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libGenome/gnClone.h"
+#include <vector>
+#include <algorithm>
+#include <boost/type_traits/remove_pointer.hpp>
+#include <boost/type_traits/add_pointer.hpp>
+#include <boost/dynamic_bitset.hpp>
+#include <libMems/SlotAllocator.h>
+#include <libMems/configuration.h>
+
+namespace mems {
+
+static const gnSeqI NO_MATCH = 0;
+
+
+#ifdef WIN32 
+/** define this to force all matches to use boost allocators instead of new/delete */
+//#define _USE_BOOST_MATCH_ALLOCATOR
+//typedef boost::dynamic_bitset<unsigned, boost::pool_allocator<unsigned> > bitset_t;
+
+// slot allocator turns out to have the fastest new/free implementation for single object allocations
+#define _USE_SLOT_ALLOCATOR
+#else
+#define _USE_SLOT_ALLOCATOR
+#endif
+typedef boost::dynamic_bitset<> bitset_t;
+
+#ifdef _USE_SLOT_ALLOCATOR
+#include "libMems/SlotAllocator.h"
+#elif defined(_USE_BOOST_MATCH_ALLOCATOR)
+#include <boost/pool/pool_alloc.hpp>
+#endif
+
+template< typename T >
+T* m_allocateAndCopy( const T& t )
+{
+#ifdef _USE_SLOT_ALLOCATOR
+	SlotAllocator<T>& sat = SlotAllocator<T>::GetSlotAllocator();
+	T* newt = sat.Allocate();
+	newt = new(newt) T(t);	// construct a new T at the address given by newt
+//	*newt = t;
+	return newt;
+#elif defined(_USE_BOOST_MATCH_ALLOCATOR)
+	boost::fast_pool_allocator< T > fpa;
+	T* newt = boost::fast_pool_allocator< T >::allocate();
+	fpa.construct(newt, t);
+	return newt;
+#else
+	return new T(t);
+#endif
+}
+
+template< typename T >
+void m_free( T* t )
+{
+#ifdef _USE_SLOT_ALLOCATOR
+	SlotAllocator<T>& sat = SlotAllocator<T>::GetSlotAllocator();
+	sat.Free(t);
+#elif defined(_USE_BOOST_MATCH_ALLOCATOR)
+	boost::fast_pool_allocator< T > fpa;
+	fpa.destroy(t);
+	boost::fast_pool_allocator< T >::deallocate(t);
+#else
+	delete t;
+#endif
+}
+
+/**
+ * AbstractMatch is a pure virtual base class that defines an interface for 
+ * both gapped and ungapped alignments among several sequences or several regions
+ * of the same sequence 
+ */
+class AbstractMatch : public genome::gnClone {
+public:
+	
+	enum orientation {
+		forward,	/**< the alignment is on the forward strand */
+		reverse,	/**< alignment on the reverse strand */
+		undefined	/**< there is no alignment on either strand */
+	};
+
+	/** creates a copy of this using a boost::pool::fast_pool_allocator */
+	virtual AbstractMatch* Copy() const = 0;
+
+	/** frees storage used by this object in a boost::pool::fast_pool_allocator */
+	virtual void Free() = 0;
+	
+	/** Returns the length of this match */
+	virtual gnSeqI Length( uint seqI ) const = 0;
+
+	/** Sets the length of this match to @param len */
+	virtual void SetLength( gnSeqI len, uint seqI ) = 0;
+
+	/** Deprecated:  use LeftEnd() and Orientation() instead.
+	 * Returns the start coordinate of this match in sequence @param startI */
+	virtual int64 Start(uint startI) const = 0;
+
+	/** Deprecated: use SetLeftEnd() and SetOrientation instead
+	 * Sets the start in sequence @param seqI of this match to @param start */
+	virtual void SetStart(uint seqI, int64 start) = 0;
+
+	/** Deprecated: use LeftEnd() instead
+	 * Returns the start coordinate of this match in sequence @param seqI */
+	int64 operator[](uint seqI) const{return Start(seqI);}	// this is a synonym for Start()
+
+	/** Deprecated: use RightEnd() instead
+	 * Returns the last coordinate of this match in sequence @param seqI */
+	virtual int64 End(uint seqI) const;
+
+	/** Returns the left end coordinate of this match at the seqI'th matching position/sequence */
+	virtual gnSeqI LeftEnd(uint seqI) const = 0;
+
+	/** Returns the right-end coordinate of this match at the seqI'th matching position/sequence 
+	    (equal to LeftEnd(seqI) + Length(seqI) - 1) */
+	virtual gnSeqI RightEnd(uint seqI) const{ return LeftEnd(seqI) + Length( seqI ) - 1; };
+
+	/** Returns the orientation of this match at the startI'th matching position/sequence, 
+	 *  either AbstractMatch::forward or AbstractMatch::reverse 
+	 */
+	virtual orientation Orientation(uint seqI) const = 0;
+
+	/** sets the left end coordinate of this match in the seqI'th matching position/sequence */
+	virtual void SetLeftEnd(uint seqI, gnSeqI start) = 0;
+
+	/** sets the relative orientation of this match in the seqI'th matching position/sequence */
+	virtual void SetOrientation(uint seqI, orientation o) = 0;
+
+	/** Shift the left-end coordinates in forward oriented positions by a given amount */
+	virtual void MoveStart(int64 move_amount) = 0;
+	/** Shift the left-end coordinates  in reverse oriented positions by a given amount */
+	virtual void MoveEnd(int64 move_amount) = 0;
+
+	/** Returns the multiplicity of the match.  e.g. the number of sequences this match occurs in */
+	virtual uint Multiplicity() const = 0;
+
+	/** Returns the number of sequences in the alignment which contains this match */
+	virtual uint SeqCount() const = 0;
+
+	/** Returns the index of the first sequence this match occurs in */
+	virtual uint FirstStart() const = 0;
+	
+	/** Returns the total length of this alignment in columns */
+	virtual gnSeqI AlignmentLength() const = 0;
+
+	/** Inverts the coordinates of this match */
+	virtual void Invert() = 0;
+	
+	//warning:  none of the following do bounds checking.
+	/** 
+	 * Deprecated:  Use CropLeft and CropRight instead
+	 * Removes the first <code>crop_amount</code> base pairs from the beginning of the match.
+	 */
+	virtual void CropStart(gnSeqI crop_amount) = 0;
+	/** 
+	 * Deprecated:  Use CropLeft and CropRight instead
+	 * Removes the last <code>crop_amount</code> base pairs from the end of the match.
+	 */
+	virtual void CropEnd(gnSeqI crop_amount) = 0;
+
+	/**
+	 * Crop this match from the left
+	 * Removes the first <code>crop_amount</code> positions from the left side of the match.
+	 */
+	virtual void CropLeft(gnSeqI crop_amount, uint seqI) = 0;
+	/**
+	 * Crop this match from the right
+	 * Removes the last <code>crop_amount</code> positions from the right side of the match.
+	 */
+	virtual void CropRight(gnSeqI crop_amount, uint seqI) = 0;
+	
+//	virtual AbstractMatch* Split( gnSeqI before_column ) = 0;
+
+	/**
+	 * Gets a copy of the alignment as an array of dynamic_bitsets
+	 */
+	virtual void GetAlignment( std::vector< bitset_t >& align_matrix ) const = 0;
+
+	/** Given an alignment column index, this function returns the corresponding sequence coordinates
+	 *  and whether each sequence is aligned in that column 
+	 *  If a given sequence is not represented in the requested column, the position returned 
+	 *  in pos should be that of the first nucleotide to the left of the requested column.  If no
+	 *  nucleotides exist to the left of the requested column, then a NO_MATCH is returned in pos
+	 *  for that sequence.
+	 */
+	virtual void GetColumn( gnSeqI col, std::vector<gnSeqI>& pos, std::vector<bool>& column ) const = 0;
+
+//	gnSeqI SeqPosToColumn( uint seq, int64 pos) const = 0;
+	/** returns true if the given row,column of the alignment has a gap character */
+	virtual bool IsGap( uint seq, gnSeqI col ) const = 0;
+	/** Returns the id of the i-th defined sequence in this match */ 
+	virtual uint UsedSeq( uint seqI ) const = 0;
+};
+
+inline
+int64 AbstractMatch::End(uint endI) const
+{
+	if( Start(endI) > 0 )
+		return Start(endI) + Length(endI) - 1;
+	return Start(endI);
+}
+
+
+template< typename MatchType >
+class AbstractMatchStartComparator {
+public:
+	AbstractMatchStartComparator( unsigned seq = 0 ){
+		m_seq = seq;
+	}
+	AbstractMatchStartComparator( const AbstractMatchStartComparator& msc ){
+		m_seq = msc.m_seq;
+	}
+	AbstractMatchStartComparator<MatchType>& operator=( const AbstractMatchStartComparator<MatchType>& msc )
+	{
+		m_seq = msc.m_seq;
+	}
+	// TODO??  make this do a wraparound comparison if all is equal?
+	boolean operator()(const MatchType& a, const MatchType& b) const{
+		int start_diff = std::max( a.FirstStart(), m_seq ) - std::max( a.FirstStart(), m_seq );
+		if(start_diff == 0){
+			uint m_count = a.SeqCount();
+			m_count = m_count <= b.SeqCount() ? m_count : b.SeqCount();
+			for(uint seqI = m_seq; seqI < m_count; seqI++){
+				gnSeqI a_start = a.Orientation(seqI) == AbstractMatch::forward ? a.LeftEnd( seqI ) : a.RightEnd( seqI );
+				gnSeqI b_start = b.Orientation(seqI) == AbstractMatch::forward ? b.LeftEnd( seqI ) : b.RightEnd( seqI );
+				int64 diff = a_start - b_start;
+				if(a_start == NO_MATCH || b_start == NO_MATCH)
+					continue;
+				else if(a_start == b_start)
+					continue;
+				else
+					return a_start < b_start;
+			}
+		}
+		return start_diff < 0;
+	}
+private:
+	unsigned m_seq;
+};
+
+template< typename MatchType >
+class AbstractMatchSingleStartComparator {
+public:
+	AbstractMatchSingleStartComparator( unsigned seq = 0 ){
+		m_seq = seq;
+	}
+	AbstractMatchSingleStartComparator( const AbstractMatchSingleStartComparator& msc ){
+		m_seq = msc.m_seq;
+	}
+	AbstractMatchSingleStartComparator<MatchType>& operator=( const AbstractMatchSingleStartComparator<MatchType>& msc )
+	{
+		m_seq = msc.m_seq;
+	}
+	/**
+	 * Compare on only one sequence.  Undefined matches are less than defined matches
+	 */
+	boolean operator()(const MatchType& a, const MatchType& b) const{
+		int64 a_start = a.LeftEnd( m_seq ), b_start = b.LeftEnd( m_seq );
+		if( a_start == NO_MATCH || b_start == NO_MATCH ){
+			if( b_start != NO_MATCH )
+				return true;
+			return false;
+		}
+
+		return a_start < b_start;
+	}
+private:
+	unsigned m_seq;
+};
+
+
+
+template< typename MatchType >
+class MatchStartComparator {
+public:
+	MatchStartComparator( unsigned seq = 0 ){
+		m_seq = seq;
+	}
+	MatchStartComparator( const MatchStartComparator& msc ){
+		m_seq = msc.m_seq;
+	}
+	MatchStartComparator<MatchType>& operator=( const MatchStartComparator<MatchType>& msc )
+	{
+		m_seq = msc.m_seq;
+	}
+	// TODO??  make this do a wraparound comparison if all is equal?
+	boolean operator()(const MatchType* a, const MatchType* b) const{
+		int start_diff = std::max( a->FirstStart(), m_seq ) - std::max( a->FirstStart(), m_seq );
+		if(start_diff == 0){
+			uint m_count = a->SeqCount();
+			m_count = m_count <= b->SeqCount() ? m_count : b->SeqCount();
+			for(uint seqI = m_seq; seqI < m_count; seqI++){
+				gnSeqI a_start = a->Orientation(seqI) == AbstractMatch::forward ? a->LeftEnd( seqI ) : a->RightEnd( seqI );
+				gnSeqI b_start = b->Orientation(seqI) == AbstractMatch::forward ? b->LeftEnd( seqI ) : b->RightEnd( seqI );
+				int64 diff = a_start - b_start;
+				if(a_start == NO_MATCH || b_start == NO_MATCH)
+					continue;
+				else if(a_start == b_start)
+					continue;
+				else
+					return a_start < b_start;
+			}
+		}
+		return start_diff < 0;
+	}
+private:
+	unsigned m_seq;
+};
+
+template< typename MatchType >
+class SingleStartComparator {
+public:
+	SingleStartComparator( unsigned seq = 0 ){
+		m_seq = seq;
+	}
+	SingleStartComparator( const SingleStartComparator& msc ){
+		m_seq = msc.m_seq;
+	}
+	SingleStartComparator<MatchType>& operator=( const SingleStartComparator<MatchType>& msc )
+	{
+		m_seq = msc.m_seq;
+	}
+	/**
+	 * Compare on only one sequence.  Undefined matches are less than defined matches
+	 */
+	boolean operator()(const MatchType* a, const MatchType* b) const{
+		int64 a_start = a->LeftEnd( m_seq ), b_start = b->LeftEnd( m_seq );
+		if( a_start == NO_MATCH || b_start == NO_MATCH ){
+			if( b_start != NO_MATCH )
+				return true;
+			return false;
+		}
+
+		return a_start < b_start;
+	}
+private:
+	unsigned m_seq;
+};
+
+
+template< typename MatchType >
+class SSC {
+public:
+	SSC( unsigned seq = 0 ){
+		m_seq = seq;
+	}
+	SSC( const SSC<MatchType>& msc ){
+		m_seq = msc.m_seq;
+	}
+	SSC<MatchType>& operator=( const SSC<MatchType>& msc )
+	{
+		m_seq = msc.m_seq;
+	}
+	boolean operator()( const typename boost::add_pointer<MatchType>::type& a, 
+		const typename boost::add_pointer<MatchType>::type& b) const
+	{
+		return operator()(*a,*b);
+	}
+	/**
+	 * Compare on only one sequence.  Undefined matches are less than defined matches
+	 */
+	boolean operator()(const typename boost::remove_pointer<MatchType>::type& a, 
+		const typename boost::remove_pointer<MatchType>::type& b) const{
+		int64 a_start = a.LeftEnd( m_seq ), b_start = b.LeftEnd( m_seq );
+		if( a_start == NO_MATCH || b_start == NO_MATCH ){
+			if( b_start != NO_MATCH )
+				return true;
+			return false;
+		}
+
+		return a_start < b_start;
+	}
+private:
+	unsigned m_seq;
+};
+
+}
+
+#endif // __AbstractMatch_h__
diff --git a/libMems/Aligner.cpp b/libMems/Aligner.cpp
new file mode 100644
index 0000000..2fa6ee2
--- /dev/null
+++ b/libMems/Aligner.cpp
@@ -0,0 +1,2289 @@
+/*******************************************************************************
+ * $Id: Aligner.cpp,v 1.47 2004/04/19 23:10:30 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * Please see the file called COPYING for licensing, copying, and modification
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#include "libMems/Aligner.h"
+#include "libMems/Islands.h"
+#include "libMems/DNAFileSML.h"
+#include "libMems/MuscleInterface.h"	// it's the default gapped aligner
+#include "libGenome/gnRAWSource.h"
+#include "libMems/DistanceMatrix.h"
+#include "libMems/Files.h"
+
+#include <map>
+#include <fstream>	// for debugging
+#include <sstream>
+#include <stack>
+#include <algorithm>
+#include <limits>
+
+using namespace std;
+using namespace genome;
+namespace mems {
+
+
+boolean validateLCB( MatchList& lcb );
+void validateRangeIntersections( vector< MatchList >& lcb_list  );
+bool debug_shite = false;
+
+/**
+ * Test code to ensure that an individual LCB is truly collinear
+ */
+boolean validateLCB( MatchList& lcb ){
+	vector< Match* >::iterator lcb_iter = lcb.begin();
+	if( lcb.size() == 0 )
+		return true;
+	uint seq_count = (*lcb_iter)->SeqCount();
+	uint seqI = 0;
+	boolean complain = false;
+	for(; seqI < seq_count; seqI++ ){
+		lcb_iter = lcb.begin();
+		int64 prev_coord = 0;
+		for(; lcb_iter != lcb.end(); lcb_iter++ ){
+			if( (*lcb_iter)->Start( seqI ) == NO_MATCH )
+				continue;
+			else if( prev_coord != 0 && (*lcb_iter)->Start( seqI ) < prev_coord ){
+				complain = true;
+			}
+			prev_coord = (*lcb_iter)->Start( seqI );
+		}
+	}
+	return !complain;
+}
+
+/**
+ * Delete overlapping regions in favor of the larger match.
+ * This code isn't perfect, it can delete too many base pairs in some cases
+ */
+void EliminateOverlaps( MatchList& ml ){
+	if( ml.size() < 2 )
+		return;
+	vector< Match* > result_matches;
+	uint seq_count = ml[0]->SeqCount();
+	for( uint seqI = 0; seqI < seq_count; seqI++ ){
+		SingleStartComparator<AbstractMatch> msc( seqI );
+		sort( ml.begin(), ml.end(), msc );
+		int64 matchI = 0;
+		int64 nextI = 0;
+		int64 deleted_count = 0;
+		vector< Match* > new_matches;
+
+		// scan forward to first defined match
+		for(; matchI != ml.size(); matchI++ )
+			if( ml[ matchI ]->Start( seqI ) != NO_MATCH )
+				break;
+
+		for(; matchI < ml.size(); matchI++ ){
+			if( ml[ matchI ] == NULL )
+				continue;
+			
+			for( nextI = matchI + 1; nextI < ml.size(); nextI++ ){
+				if( ml[ nextI ] == NULL )
+					continue;
+
+				boolean deleted_matchI = false;
+				// check for overlaps
+				int64 startI = ml[ matchI ]->Start( seqI );
+				int64 lenI = ml[ matchI ]->Length();
+				int64 startJ = ml[ nextI ]->Start( seqI );
+//				int64 diff =  absolut( startJ ) - absolut( startI ) - lenI;
+				int64 diff =  absolut( startJ ) - absolut( startI ) - lenI;
+
+				if( diff < 0 ){
+					diff = -diff;
+					Match* new_match;
+					// delete bases from the smaller match
+//					if( ml[ nextI ]->Length() * ml[ nextI ]->Multiplicity() >= 
+//						lenI * ml[ matchI ]->Multiplicity() ){
+					if( ( ml[ nextI ]->Multiplicity() > ml[ matchI ]->Multiplicity() ) ||
+ 						( ml[ nextI ]->Multiplicity() == ml[ matchI ]->Multiplicity() && ml[ nextI ]->Length() > ml[ matchI ]->Length() ) ){
+						// mem_iter is smaller
+						new_match = ml[matchI]->Copy();
+						// erase base pairs from new_match
+						if( diff >= lenI ){
+//							cerr << "Deleting " << **mem_iter << " at the hands of\n" << **next_iter << endl;
+							ml[ matchI ]->Free();
+							ml[ matchI ] = NULL;
+							matchI--;
+							deleted_matchI = true;
+							deleted_count++;
+						}else{
+							if( startI > 0 ){
+								ml[ matchI ]->CropEnd( diff );
+								new_match->CropStart( new_match->Length() - diff );
+							}else{
+								ml[ matchI ]->CropStart( diff );
+								new_match->CropEnd( new_match->Length() - diff );
+							}
+						}
+					}else{
+						// match_iter is smaller
+						new_match = ml[nextI]->Copy();
+						// erase base pairs from new_match
+						if( diff >= ml[ nextI ]->Length() ){
+//							cerr << "Deleting " << **next_iter << " at the hands of\n" << **mem_iter << endl;
+							ml[ nextI ]->Free();
+							ml[ nextI ] = NULL;
+							deleted_count++;
+						}else{
+							if( startJ > 0 ){
+								ml[ nextI ]->CropStart( diff );
+								new_match->CropEnd( new_match->Length() - diff );
+							}else{
+								ml[ nextI ]->CropEnd( diff );
+								new_match->CropStart( new_match->Length() - diff );
+							}
+						}
+
+					}
+					new_match->SetStart( seqI, 0 );
+					if( new_match->Multiplicity() > 1 && new_match->Length() > 0 )
+						new_matches.push_back( new_match );
+					else
+					{
+						new_match->Free();
+						new_match = NULL;
+					}
+					if( deleted_matchI )
+						break;
+				}else
+					break;	// there are no more overlaps
+			}
+//			if( nextI > 1 )
+//				cerr << "There were " << nextI << " overlaps\n";
+//			if( nextI > config_value_2 )
+//				__asm(nop);
+		}
+
+		if( deleted_count > 0 ){
+			result_matches.reserve( ml.size() - deleted_count );
+			for( int64 copyI = 0; copyI < ml.size(); copyI++ ){
+				if( ml[ copyI ] != NULL )
+					result_matches.push_back( ml[ copyI ] );
+			}
+			ml.clear();
+			ml.insert( ml.end(), result_matches.begin(), result_matches.end() );
+		}
+		ml.insert( ml.end(), new_matches.begin(), new_matches.end() );
+		result_matches.clear();
+		new_matches.clear();
+	}
+		
+}
+
+
+const gnSeqI default_min_r_gap_size = 200;
+Aligner::Aligner( uint seq_count ) :
+debug(false),
+seq_count(seq_count),
+min_recursive_gap_length(default_min_r_gap_size),
+collinear_genomes(false),
+gal(&(MuscleInterface::getMuscleInterface())),
+permutation_weight(-1),
+cur_min_coverage(-1),
+max_extension_iters(4)
+{}
+
+Aligner::Aligner( const Aligner& al ) :
+//gap_mh( al.gap_mh ),
+nway_mh( al.nway_mh ),
+seq_count( al.seq_count ),
+debug( al.debug),
+LCB_minimum_density( al.LCB_minimum_density),
+LCB_minimum_range( al.LCB_minimum_range ),
+cur_min_coverage( al.cur_min_coverage),
+min_recursive_gap_length( al.min_recursive_gap_length ),
+collinear_genomes( al.collinear_genomes ),
+gal( al.gal ),
+permutation_weight( al.permutation_weight ),
+permutation_filename( al.permutation_filename ),
+max_extension_iters( al.max_extension_iters )
+{}
+
+Aligner& Aligner::operator=( const Aligner& al )
+{
+	gap_mh = al.gap_mh;
+	nway_mh = al.nway_mh;
+	seq_count = al.seq_count;
+	debug = al.debug;
+	
+	LCB_minimum_density = al.LCB_minimum_density;
+	LCB_minimum_range = al.LCB_minimum_range;
+	
+	cur_min_coverage = al.cur_min_coverage;
+	min_recursive_gap_length = al.min_recursive_gap_length;
+	collinear_genomes = al.collinear_genomes;
+
+	gal = al.gal;
+
+	permutation_weight = al.permutation_weight;
+	permutation_filename = al.permutation_filename;
+
+	max_extension_iters = al.max_extension_iters;
+
+	return *this;
+}
+
+void Aligner::SetMinRecursionGapLength( gnSeqI min_r_gap ) {
+	min_recursive_gap_length = min_r_gap;
+}
+
+void Aligner::SetGappedAligner( GappedAligner& gal ){
+	this->gal = &(gal);
+}
+
+void Aligner::SetMaxGappedAlignmentLength( gnSeqI len ){
+	gal->SetMaxAlignmentLength( len );
+}
+
+
+/* returns true if all labels between start_label and end_label are contained in the no_match_labels set */
+void scanLabels( set< uint >& no_match_labels, uint& start_label, boolean forward ){
+	uint labelI;
+	// scan no_match_labels for consecutive labels starting at start_label until one is missing
+	if( forward ){
+		for( labelI = start_label + 1; ; labelI++){
+			set< uint >::iterator  label_iter = no_match_labels.find( labelI );
+			if( label_iter == no_match_labels.end() ){
+				start_label = labelI - 1;
+				break;
+			}
+		}
+	}else{
+		for( labelI = start_label; labelI > 0; labelI--){
+			set< uint >::iterator  label_iter = no_match_labels.find( labelI - 1 );
+			if( label_iter == no_match_labels.end() ){
+				start_label = labelI;
+				break;
+			}
+		}
+	}
+}
+
+boolean checkCollinearity( Match* m1, Match* m2 ){
+	for( uint seqI = 0; seqI < m1->SeqCount(); seqI++ ){
+		if( m1->Start( seqI ) == NO_MATCH ||
+			m2->Start( seqI ) == NO_MATCH )
+			continue;
+		if((( m1->Start( seqI ) > 0 &&
+			m2->Start( seqI ) > 0 ) ||
+			(m1->Start( seqI ) < 0 &&
+			m2->Start( seqI ) < 0 )) &&
+			m1->Start( seqI ) <= m2->Start( seqI ) )
+			continue;
+		return false;
+	}
+	return true;
+}
+
+void scanFit( list< LabeledMem >& pair_list, list< LabeledMem >::iterator& list_iter, Match* new_match, uint sort_seq ){
+
+	list< LabeledMem >::iterator cur_iter = list_iter;
+	list< LabeledMem >::iterator last_iter = list_iter;
+//	int64 initial_start = absolut( list_iter->mem->Start( sort_seq ) );
+	int64 initial_start = absolut( list_iter->mem->Start( sort_seq ) );
+
+	uint match_count = 0;
+	for(; last_iter != pair_list.end(); ++last_iter ){
+		if( last_iter->mem->Start( sort_seq ) == NO_MATCH ){
+			++match_count;
+			continue;
+		}
+//		if( absolut( last_iter->mem->Start( sort_seq ) ) < initial_start ||
+//			absolut( last_iter->mem->Start( sort_seq ) ) > new_match->Start( sort_seq ) )
+		if( absolut( last_iter->mem->Start( sort_seq ) ) < initial_start ||
+			absolut( last_iter->mem->Start( sort_seq ) ) > new_match->Start( sort_seq ) )
+			break;
+		++match_count;
+	}
+	vector< vector< int > > score_vector;
+	score_vector.reserve( new_match->SeqCount() - sort_seq - 1 );
+	for( uint seqI = sort_seq + 1; seqI < new_match->SeqCount(); ++seqI ){
+		vector< int > sv;
+		score_vector.push_back( sv );
+		score_vector[ score_vector.size() - 1 ].reserve( match_count );
+	}
+	uint matchI = 0;
+	for(; cur_iter != last_iter; ++cur_iter ){
+		
+		for( uint seqI = sort_seq + 1; seqI < new_match->SeqCount(); ++seqI ){
+			int64 p_start = cur_iter->mem->Start( seqI );
+			int64 m_start = new_match->Start( seqI );
+			p_start = p_start < 0 ? -p_start : p_start;
+			m_start = m_start < 0 ? -m_start : m_start;
+			if( m_start == NO_MATCH ){
+				score_vector[ seqI - sort_seq - 1 ].push_back( 0 );
+			}else if( p_start == NO_MATCH ){
+				score_vector[ seqI - sort_seq - 1 ].push_back( 0 );
+			}else if( p_start < m_start ){
+				score_vector[ seqI - sort_seq - 1 ].push_back( 1 );
+			}else
+				score_vector[ seqI - sort_seq - 1 ].push_back( -1 );
+		}
+	}
+	vector< int > scores;
+	scores.reserve( match_count );
+	for( matchI = match_count; matchI > 0; matchI-- )
+		scores.push_back( 0 );
+	for( uint seqI = 0; seqI < new_match->SeqCount() - sort_seq - 1; ++seqI ){
+		boolean redefined = false;
+		for( matchI = match_count; matchI > 0; matchI-- ){
+			if( !redefined ){
+				if( score_vector[ seqI ][ matchI - 1 ] >= 0 ){
+					if( score_vector[ seqI ][ matchI - 1 ] == 1 )
+						redefined = true;
+					++scores[ matchI - 1 ];
+				}
+			}else{
+				if( score_vector[ seqI ][ matchI - 1 ] == -1 )
+					redefined = false;
+			}
+		}
+	}
+	// find the first highest scoring match
+	cur_iter = list_iter;
+	int max_score = 0;
+	for( matchI = 0; matchI < match_count; ++matchI ){
+		if( scores[ matchI ] > max_score ){
+			max_score = scores[ matchI ];
+			list_iter = cur_iter;
+		}
+		++cur_iter;
+	}
+}
+
+/**
+ * Aaron's subset LCB algorithm.  
+ */
+void AaronsLCB( MatchList& mlist, set<uint>& breakpoints ){
+	breakpoints.clear(); // make sure this is empty
+	if( mlist.size() == 0 )
+		return;
+	// can only look for breakpoints if there is more than one match!!
+	if( mlist.size() == 1 ){
+		breakpoints.insert( 0 );
+		return;
+	}
+	uint seq_count = mlist[0]->SeqCount();
+
+	SingleStartComparator<AbstractMatch> msc( 0 );
+	sort( mlist.begin(), mlist.end(), msc );
+	vector<Match*>::iterator mem_iter = mlist.begin();
+	list<LabeledMem> pair_list;
+	
+	map<uint, Match*> debug_label_map;
+	boolean debugging = false;
+	
+	
+	list< PlacementMatch > placement_list;
+	
+	for(; mem_iter != mlist.end(); ++mem_iter ){
+		if( (*mem_iter)->Start( 0 ) != NO_MATCH ){		
+			// add this one to the list.
+			LabeledMem lm;
+			lm.mem = *mem_iter;
+			lm.label = 0;
+			pair_list.push_back( lm );
+		}else{
+			PlacementMatch pm;
+			pm.mem = *mem_iter;
+			pm.iter = pair_list.end();
+			placement_list.push_back( pm );
+		}
+	}
+	LabeledMemComparator lmc( 0 );
+	pair_list.sort( lmc );
+	list< LabeledMem >::iterator pair_iter = pair_list.begin();
+	for(; pair_iter != pair_list.end(); ++pair_iter ){
+		PlacementMatch pm;
+		pm.mem = pair_iter->mem;
+		pm.iter = pair_iter;
+		placement_list.push_back( pm );
+	}
+	
+	// place all the subset matches from each sequence in the correct place in pair_list.
+	for( uint seqI = 1; seqI < seq_count; ++seqI ){
+		PlacementMatchComparator pmc( seqI );
+		placement_list.sort( pmc );
+		list< PlacementMatch >::iterator placement_prev;
+		list< PlacementMatch >::iterator placement_iter = placement_list.begin();
+		if( placement_iter->iter == pair_list.end() &&
+			placement_iter->mem->Start( seqI ) != NO_MATCH ){
+			LabeledMem lm;
+			lm.mem = placement_iter->mem;
+			lm.label = 0;
+			pair_list.insert( pair_list.begin(), lm );
+			placement_iter->iter = pair_list.begin();
+		}
+
+		for( ++placement_iter; placement_iter != placement_list.end(); ++placement_iter ){
+			placement_prev = placement_iter;
+			placement_prev--;
+			
+			if( placement_iter->iter != pair_list.end() )
+				continue;
+			
+			if( placement_iter->mem->Start( seqI ) == NO_MATCH )
+				continue;
+			
+			list< LabeledMem >::iterator insert_iter = placement_prev->iter;
+			if( insert_iter == pair_list.end() || placement_prev->mem->Start( seqI ) == NO_MATCH )
+				insert_iter = pair_list.begin();
+			else{
+				if( insert_iter->mem->Start( seqI ) < 0 ){
+					// invert if necessary and insert before
+					if( placement_iter->mem->Start( seqI ) > 0 )
+						placement_iter->mem->Invert();
+					if( !checkCollinearity( placement_iter->mem, insert_iter->mem ) ){
+						placement_iter->mem->Invert();
+						scanFit( pair_list, insert_iter, placement_iter->mem, seqI );
+						++insert_iter;
+					}
+				}else{
+					// insert in the earliest place this match fits with surrounding matches
+					scanFit( pair_list, insert_iter, placement_iter->mem, seqI );
+					++insert_iter;
+				}
+			}
+			
+			LabeledMem lm;
+			lm.mem = placement_iter->mem;
+			lm.label = 0;
+			pair_list.insert( insert_iter, lm );
+			placement_iter->iter = insert_iter;
+			placement_iter->iter--;
+		}
+	}
+	boolean debug_labels = false;
+	ofstream debug_label_file;
+	if( debug_labels )
+		debug_label_file.open( "label_debug.txt" );
+	// number the LabeledMems in the pair_list
+	uint cur_label = 0;
+	mlist.clear();
+	vector< LabeledMem > pair_vec;
+	pair_vec.reserve( pair_list.size() );
+	mlist.reserve( pair_list.size() );
+	for( pair_iter = pair_list.begin(); pair_iter != pair_list.end(); ++pair_iter ){
+		pair_iter->label = cur_label++;
+		mlist.push_back( pair_iter->mem );
+		pair_vec.push_back( *pair_iter );
+		if( debug_labels ){
+			debug_label_map.insert( map<uint, Match*>::value_type( pair_iter->label, pair_iter->mem ) );
+			debug_label_file << pair_iter->label << '\t' << (*pair_iter->mem) << endl;
+		}
+	}
+	if( debug_labels )
+		debug_label_file.close();
+	
+	breakpoints.clear();
+	pair_list.clear();
+	vector< LabeledMem >::iterator pair_vec_iter;
+	for( uint seqI = 1; seqI < seq_count; seqI++ ){
+		// sort the list on the current genome
+		LabeledMemComparator lmc( seqI );
+		sort( pair_vec.begin(), pair_vec.end(), lmc );
+		set< uint > no_match_labels;
+
+		// debugging code
+/*		stringstream debug_filename;
+		debug_filename << "label_sort_" << seqI << ".txt";
+		ofstream debug_file( debug_filename.str().c_str() );
+		for( uint pairI = 0; pairI < pair_vec.size(); pairI++ ){
+			debug_file << pair_vec[ pairI ].label << *pair_vec[ pairI ].mem << endl;
+		}
+		debug_file.close();
+*/		// end debugging code
+		
+		pair_vec_iter = pair_vec.begin();
+		uint block_start = pair_vec_iter->label;
+		uint break_label = 0;
+		for( ++pair_vec_iter; pair_vec_iter != pair_vec.end(); ++pair_vec_iter ){
+			vector<LabeledMem>::iterator pair_prev = pair_vec_iter;
+			pair_prev--;
+			break_label = 0;
+			uint scan_label = 0;
+			if( pair_prev->mem->Start( seqI ) == NO_MATCH ){
+				no_match_labels.insert( set< uint >::value_type( pair_prev->label ) );
+				// get the correct block start
+				if( pair_vec_iter->mem->Start( seqI ) < 0 ){
+					block_start = pair_vec_iter->label;
+					scanLabels( no_match_labels, block_start, true );
+				}else if( pair_vec_iter->mem->Start( seqI ) > 0 ){
+					block_start = pair_vec_iter->label;
+					scanLabels( no_match_labels, block_start, false );
+				}
+				
+				continue;
+			}
+
+			if( pair_prev->mem->Start( seqI ) < 0 ){
+				// this block would break at its start
+				break_label = block_start;
+			}else{
+				// this block would break at its end
+				break_label = pair_prev->label;
+				scanLabels( no_match_labels, break_label, true );
+			}
+			if( pair_vec_iter->mem->Start( seqI ) < 0 ){
+				// scan forward to the beginning of new block
+				scan_label = pair_vec_iter->label;
+				scanLabels( no_match_labels, scan_label, true );
+			}else{
+				// scan back to the beginning of new block
+				scan_label = pair_vec_iter->label;
+				scanLabels( no_match_labels, scan_label, false );
+			}
+
+			if( pair_vec_iter->mem->Start( seqI ) < 0 &&
+				pair_prev->mem->Start( seqI ) < 0 ){
+				if( scan_label + 1 == pair_prev->label )
+					continue;
+				if( debugging ){
+					map< uint, Match* >::const_iterator debug_iter = debug_label_map.find( pair_vec_iter->label );
+					while( debug_iter->first <= pair_prev->label ){
+						cout << debug_iter->first << '\t' << *(debug_iter->second) << endl;
+						++debug_iter;
+					}
+				}
+			}else
+			if( pair_vec_iter->mem->Start( seqI ) > 0 &&
+				pair_prev->mem->Start( seqI ) > 0 ){
+				
+				if( scan_label - 1 == pair_prev->label )
+					continue;
+				if( debugging ){
+					map< uint, Match* >::const_iterator debug_iter = debug_label_map.find( pair_prev->label );
+					while( debug_iter->first <= pair_vec_iter->label ){
+						cout << debug_iter->first << '\t' << *(debug_iter->second) << endl;
+						++debug_iter;
+					}
+				}
+			}
+			// check if the missing matches are in the set of non-matches
+
+			// since it didn't meet any of the above
+			// criteria it's a breakpoint.  insert the label of the end of the current block
+			// note that if it's a reverse complement block, the end label is really the start label
+			breakpoints.insert( break_label );
+			block_start = scan_label;
+		}
+
+		// insert the correct block ending
+		if( pair_vec_iter != pair_vec.begin() ){
+			pair_vec_iter--;
+			
+			if( pair_vec_iter->mem->Start( seqI ) < 0 ){
+				break_label = block_start;
+			}else{
+				break_label = pair_vec_iter->label;
+				scanLabels( no_match_labels, break_label, true );
+			}
+			breakpoints.insert( break_label );
+		}
+	}
+}
+
+/** Set output parameters for permutation matrices */
+void Aligner::SetPermutationOutput( std::string& permutation_filename, int64 permutation_weight )
+{
+	this->permutation_filename = permutation_filename;
+	this->permutation_weight = permutation_weight;
+}
+
+
+void GetLCBCoverage( MatchList& lcb, uint64& coverage ){
+	vector< Match* >::iterator match_iter = lcb.begin();
+	coverage = 0;
+	bool debug = true;
+	for( ; match_iter != lcb.end(); ++match_iter ){
+		coverage += (*match_iter)->Length() * (*match_iter)->Multiplicity();
+
+		// if we have sequence information then
+		// subtract the coverage for any position that contains an N
+		if( lcb.seq_table.size() > 0 )
+		{
+			for( uint seqI = 0; seqI < (*match_iter)->SeqCount(); ++seqI )
+			{
+				gnSeqI lend = absolut((*match_iter)->Start(seqI));
+				gnSeqI length = (*match_iter)->Length();
+				if( lend == 0 )
+					continue;
+				string match_seq = lcb.seq_table[seqI]->ToString(length, lend);
+				for( size_t s = 0; s < match_seq.size(); ++s )
+					if( match_seq[s] == 'n' || match_seq[s] == 'N' )
+						if( (*match_iter)->Start(seqI) > 0 )
+							coverage--;
+			}
+		}
+	}
+}
+
+
+void computeLCBAdjacencies_v2( vector<MatchList>& lcb_list, vector< int64 >& weights, vector< LCB >& adjacencies ){
+	IntervalList iv_list;
+	for( uint lcbI = 0; lcbI < lcb_list.size(); ++lcbI ){
+		vector<AbstractMatch*> asdf;
+		asdf.push_back( lcb_list[ lcbI ].front() );
+		if( lcb_list[lcbI].size() > 1 )
+			asdf.push_back( lcb_list[ lcbI ].back() );
+		Interval iv( asdf.begin(), asdf.end() );
+		iv_list.push_back( iv );
+	}
+	computeLCBAdjacencies_v2( iv_list, weights, adjacencies );
+}
+
+const uint NO_ADJACENCY = (std::numeric_limits<uint>::max)();
+
+/**
+ *  Redesign to be more intuitive.  left_adjacency is always left, regardless of LCB orientation
+ */
+void computeLCBAdjacencies_v2( IntervalList& iv_list, vector< int64 >& weights, vector< LCB >& adjacencies ){
+	adjacencies.clear(); // start with no LCB adjacencies
+	if( iv_list.size() == 0 )
+		return;	// there aren't any LCBs so there aren't any adjacencies!
+
+	uint seq_count = iv_list[0].SeqCount();
+	uint seqI;
+	uint lcbI;
+	adjacencies.resize(iv_list.size());
+	for( lcbI = 0; lcbI < iv_list.size(); ++lcbI ){
+		LCB& lcb = adjacencies[lcbI];
+		lcb.left_end.resize(seq_count);
+		lcb.right_end.resize(seq_count);
+		lcb.left_adjacency.resize(seq_count);
+		lcb.right_adjacency.resize(seq_count);
+		for( seqI = 0; seqI < seq_count; seqI++ ){
+			// support "ragged edges" on the ends of LCBs
+			int64 leftI = iv_list[lcbI].LeftEnd(seqI);
+			int64 rightI = NO_MATCH;
+			if( leftI != NO_MATCH )
+			{
+				leftI = iv_list[lcbI].Orientation(seqI) == AbstractMatch::forward ? leftI : -leftI;
+				rightI = iv_list[lcbI].RightEnd(seqI)+1;
+				rightI = iv_list[lcbI].Orientation(seqI) == AbstractMatch::forward ? rightI : -rightI;
+			}
+
+			lcb.left_end[seqI] = leftI;
+			lcb.right_end[seqI] = rightI;
+			lcb.left_adjacency[seqI] = NO_ADJACENCY;
+			lcb.right_adjacency[seqI] = NO_ADJACENCY;
+		}
+		lcb.lcb_id = lcbI;
+		lcb.weight = weights[ lcbI ];
+		lcb.to_be_deleted = false;
+	}
+
+	for( seqI = 0; seqI < seq_count; seqI++ ){
+		LCBLeftComparator llc( seqI );
+		sort( adjacencies.begin(), adjacencies.end(), llc );
+		for( lcbI = 1; lcbI + 1 < iv_list.size(); lcbI++ ){
+			adjacencies[ lcbI ].left_adjacency[ seqI ] = adjacencies[ lcbI - 1 ].lcb_id;
+			adjacencies[ lcbI ].right_adjacency[ seqI ] = adjacencies[ lcbI + 1 ].lcb_id;
+		}
+		if( lcbI == iv_list.size() )
+			lcbI--;	// need to decrement when there is only a single LCB
+
+		// set first and last lcb adjacencies to -1
+		adjacencies[ 0 ].left_adjacency[ seqI ] = NO_ADJACENCY;
+		adjacencies[ lcbI ].right_adjacency[ seqI ] = NO_ADJACENCY;
+		if( lcbI > 0 ){
+			adjacencies[ 0 ].right_adjacency[ seqI ] = adjacencies[ 1 ].lcb_id;
+			adjacencies[ lcbI ].left_adjacency[ seqI ] = adjacencies[ lcbI - 1 ].lcb_id;
+		}
+	}
+	LCBIDComparator lic;
+	sort( adjacencies.begin(), adjacencies.end(), lic );
+	
+}
+
+
+void scanLeft( int& left_recurseI, vector< LCB >& adjacencies, int min_weight, int seqI ){
+	while( left_recurseI != -1 && adjacencies[ left_recurseI ].weight < min_weight )
+		left_recurseI = adjacencies[ left_recurseI ].left_adjacency[ seqI ];
+}
+void scanRight( int& right_recurseI, vector< LCB >& adjacencies, int min_weight, int seqI ){
+	while( right_recurseI != -1 && adjacencies[ right_recurseI ].weight < min_weight )
+		right_recurseI = adjacencies[ right_recurseI ].right_adjacency[ seqI ];
+}
+
+
+
+/** iv_regions -- lists of intervening regions between LCBs in each sequence
+  * start positions organized as iv_regions[ seqI ][ lcbI * 2 ]
+  * end positions organized as iv_regions[ seqI ][ lcbI * 2 + 1 ] 
+ */
+void CreateGapSearchList( vector< LCB >& adjacencies, const vector< gnSequence* >& seq_table, vector< vector< int64 > >& iv_regions, boolean entire_genome ) 
+{
+	iv_regions.clear();
+	if( adjacencies.size() == 0 )
+		return;		// there aren't any intervening LCB regions!
+	if( adjacencies.size() == 1 && !entire_genome )
+		return; 	// there aren't any interveniing LCB regions in the local area
+	boolean debug_lcb_extension = false;	/**< enables debugging output */
+	const uint seq_count = seq_table.size();
+
+	uint seqI = 0;
+	int lcbI = 0;
+	iv_regions = vector< vector< int64 > >( seq_count );
+
+	// extract a gnSequence containing only the intervening regions
+	for( seqI = 0; seqI < seq_count; seqI++ ){
+
+		// find the first LCB in this sequence
+		for( lcbI = 0; lcbI < adjacencies.size(); lcbI++ ){
+			if( adjacencies[ lcbI ].left_adjacency[ seqI ] == -1 )
+				break;
+		}
+		// start concatenating the intervening regions
+		// scan right
+		int right_recurseI = lcbI;
+		lcbI = -1;
+		if( !entire_genome && right_recurseI != -1 ){
+			lcbI = right_recurseI;
+			right_recurseI = adjacencies[ lcbI ].right_adjacency[ seqI ];
+		}
+		gnSeqI seq_len = 0;
+		while( (lcbI != -1 || right_recurseI != -1 ) && right_recurseI < (int)adjacencies.size() ){
+			int64 l_end = lcbI == -1 ? 1 : adjacencies[ lcbI ].right_end[ seqI ];
+			int64 r_end = right_recurseI == -1 ? seq_table[ seqI ]->length() : adjacencies[ right_recurseI ].left_end[ seqI ];
+
+			// break out if outside the last LCB and not searching the entire genome
+			if( !entire_genome && right_recurseI == -1 )
+				break;
+
+			l_end = absolut( l_end );
+			r_end = absolut( r_end );
+			
+			if( l_end > r_end && !( r_end + 1 == l_end && right_recurseI == -1 ) ){
+				std::cerr << "Overlapping LCBs.  lcbI " << lcbI << " right_recurseI " << right_recurseI << endl;
+				std::cerr << "lend: " << l_end << " rend: " << r_end << endl;
+				l_end = r_end;
+				
+			}
+			
+			lcbI = right_recurseI;
+			if( right_recurseI != -1 )
+				right_recurseI = adjacencies[ right_recurseI ].right_adjacency[ seqI ];
+			if( r_end + 1 == l_end && right_recurseI == -1 )
+				continue;	// we're at the right end and there's nothing to add
+			seq_len += r_end - l_end;
+			iv_regions[ seqI ].push_back( l_end );
+			iv_regions[ seqI ].push_back( r_end );
+		}
+		if( debug_lcb_extension )
+			std::cerr << "seqI " << seqI << " seq_len: " << seq_len << endl;
+	}
+
+}
+
+void SearchLCBGaps( MatchList& new_matches, const std::vector< std::vector< int64 > >& iv_regions, MaskedMemHash& nway_mh ) {
+	if( iv_regions.size() == 0 )
+		return;		// there aren't any intervening LCB regions!
+	size_t sI = 0;
+	for( ; sI < iv_regions.size(); sI++ )
+		if( iv_regions[sI].size() > 0 )
+			break;
+	if( sI == iv_regions.size() )
+		return;		// there aren't any intervening LCB regions!
+
+	boolean debug_lcb_extension = false;	/**< enables debugging output */
+
+	const uint seq_count = new_matches.seq_table.size();
+	uint seqI = 0;
+	int lcbI = 0;
+	MatchList gap_list;
+	gap_list.seq_table = vector< gnSequence* >( seq_count );	/**< intervening regions of sequences */
+	gap_list.sml_table = vector< SortedMerList* >( seq_count );
+
+	// extract a gnSequence containing only the intervening regions
+	for( seqI = 0; seqI < seq_count; seqI++ ){
+		gap_list.seq_table[ seqI ] = new gnSequence();
+		gap_list.sml_table[ seqI ] = new DNAMemorySML();
+		gnSeqI seq_len = 0;
+		for( size_t ivI = 0; ivI < iv_regions[seqI].size(); ivI += 2 )
+		{
+			int64 l_end = iv_regions[seqI][ivI];
+			int64 r_end = iv_regions[seqI][ivI+1];
+			try{
+			if( debug_lcb_extension )
+				cerr << "Adding " << seqI << "\t" << l_end << "\t" << r_end << "\t(" << r_end - l_end << " bp)" << endl;
+			gap_list.seq_table[ seqI ]->append( new_matches.seq_table[ seqI ]->ToString(r_end - l_end, l_end ) );
+//			gap_list.seq_table[ seqI ]->append( new_matches.seq_table[ seqI ]->subseq( l_end, r_end - l_end ) );
+			}catch(...){
+				cout << "";
+			}
+			seq_len += r_end - l_end;
+		}
+		if( debug_lcb_extension )
+			cerr << "seqI " << seqI << " seq_len: " << seq_len << endl;
+	}
+	//
+	// search for MUMs in the intervening sequence regions
+	//
+
+	// calculate potential mer sizes for searches
+	gnSeqI total_iv_length = 0;
+	for( seqI = 0; seqI < seq_count; seqI++ ){
+		total_iv_length += gap_list.seq_table[ seqI ]->length();
+/*		cerr << "seqI: " << seqI << " length: " << gap_list.seq_table[ seqI ]->length();
+		cerr << "\n";
+*/
+	}
+	total_iv_length /= seq_count;
+
+	uint search_mer_size = getDefaultSeedWeight( total_iv_length );
+	if( search_mer_size < MIN_DNA_SEED_WEIGHT )
+		return;		// The seed size is too small to be significant
+	uint64 default_seed = getSeed( search_mer_size );
+	
+	//	Create sorted mer lists for the intervening gap region
+	vector< boost::filesystem::path > delete_files;
+	boolean create_succeeded = true;
+	for( seqI = 0; seqI < seq_count; seqI++ ){
+		gap_list.sml_table[ seqI ]->Clear();
+		try{
+			if( debug_lcb_extension )
+				cerr << "Creating memory SML for seqI " << seqI << endl;
+			gap_list.sml_table[ seqI ]->Create( *(gap_list.seq_table[ seqI ]), default_seed );
+		}catch(...){
+			create_succeeded = false;
+			break;
+		}
+	}
+	if( !create_succeeded ){	
+		// free memory consumed by any SMLs	
+		for( seqI = 0; seqI < seq_count; seqI++ ){
+			gap_list.sml_table[ seqI ]->Clear();
+			delete gap_list.sml_table[ seqI ];
+		}
+
+		for( seqI = 0; seqI < seq_count; seqI++ ){
+			cerr << "Creating dmSML for seqI " << seqI << endl;
+			// presumably we ran out of memory and couldn't use a MemorySML.	
+			// try using a FileSML with external sort
+			string concat_file = CreateTempFileName("seqconcat");
+
+			concat_file += ".raw";	// need .raw extension to tell stupid libGenome it's a raw file
+			gnRAWSource::Write( *(gap_list.seq_table[ seqI ]), concat_file.c_str() );
+			delete_files.push_back( concat_file );
+			delete gap_list.seq_table[ seqI ];	// make sure memory gets freed!
+			cerr << "Wrote raw sequence for seqI " << seqI << endl;
+			gap_list.seq_table[ seqI ] = new gnSequence();
+			gap_list.seq_table[ seqI ]->LoadSource( concat_file.c_str() );
+			cerr << "Loaded sequence " << seqI << gap_list.seq_table[ seqI ]->length() << "b.p.\n";
+			string sml_file = CreateTempFileName("dmsml");	
+			DNAFileSML* sml = new DNAFileSML( sml_file.c_str() );	
+			gap_list.sml_table[ seqI ] = sml;	
+			sml->dmCreate( *(gap_list.seq_table[ seqI ]), default_seed );	
+			delete_files.push_back( sml_file );
+			delete_files.push_back( sml_file + ".coords" );
+		}
+	}
+
+	//	Find all exact matches in the gap region
+	nway_mh.Clear();
+	nway_mh.FindMatches( gap_list );
+	gap_list.MultiplicityFilter( seq_count );
+//	nway_mh.GetMatchList( gap_list );
+
+	// free memory used by SMLs!
+	for( seqI = 0; seqI < seq_count; seqI++ ){
+		gap_list.sml_table[ seqI ]->Clear();
+		delete gap_list.sml_table[ seqI ];
+	}
+	
+	if( debug_lcb_extension ){
+		ofstream debug_extension_out( "new_extension_matches.txt" );
+		WriteList( gap_list, debug_extension_out );
+		debug_extension_out.close();
+	}
+
+	//	
+	// If an N mask was used, transpose MUMs back into the previous	
+	// sequence coordinates	
+	//	
+	if( !create_succeeded ){
+		for( seqI = 0; seqI < seq_count; seqI++ )	
+			transposeMatches( gap_list, seqI, ((FileSML*)gap_list.sml_table[ seqI ])->getUsedCoordinates() );
+	}
+	//
+	// Transpose MUMs back into their original sequence coordinates
+	//
+	for( seqI = 0; seqI < seq_count; seqI++ )
+		transposeMatches( gap_list, seqI, iv_regions[ seqI ] );
+
+	EliminateOverlaps( gap_list );
+	gap_list.MultiplicityFilter( seq_count );
+	// filter out matches that are too short
+	gap_list.LengthFilter( MIN_ANCHOR_LENGTH );
+
+	// free memory used by sequences!
+	for( seqI = 0; seqI < seq_count; seqI++ )
+		delete gap_list.seq_table[ seqI ];
+
+	for( int delI = 0; delI < delete_files.size(); delI++ )	
+		boost::filesystem::remove( delete_files[delI] );
+
+	new_matches.insert( new_matches.end(), gap_list.begin(), gap_list.end() );
+}
+
+
+
+class MatchLeftEndComparator {
+public:
+	MatchLeftEndComparator( unsigned seq = 0 ){
+		m_seq = seq;
+	}
+	MatchLeftEndComparator( MatchLeftEndComparator& msc ){
+		m_seq = msc.m_seq;
+	}
+	// TODO??  make this do a wraparound comparison if all is equal?
+	boolean operator()(const AbstractMatch* a, const AbstractMatch* b) const{
+		int32 start_diff = max( a->FirstStart(), m_seq ) - max( b->FirstStart(), m_seq );
+		if(start_diff == 0){
+			uint32 m_count = a->SeqCount();
+			m_count = m_count <= b->SeqCount() ? m_count : b->SeqCount();
+			for(uint32 seqI = m_seq; seqI < m_count; seqI++){
+				int64 a_start = absolut( a->Start( seqI ) ), b_start = absolut( b->Start( seqI ) );
+				int64 diff = a_start - b_start;
+				if(a_start == (int64)NO_MATCH || b_start == (int64)NO_MATCH)
+					continue;
+				else if(diff == 0)
+					continue;
+				else
+					return diff < 0;
+			}
+		}
+		return start_diff < 0;
+	}
+private:
+	unsigned m_seq;
+};
+
+/**
+ * Transposes the coordinates of matches in mlist to correspond to the original
+ * set of source sequence regions described by seq_regions, splitting matches if
+ * necessary.
+ */
+void transposeMatches( MatchList& mlist, uint seqI, const vector< int64 >& seq_regions ){
+	if( seq_regions.size() < 2 )	
+		return; // no work to be done here...
+
+	uint matchI = 0;
+	MatchLeftEndComparator msc( seqI );
+	sort( mlist.begin(), mlist.end(), msc );
+	uint regionI = 0;
+	gnSeqI region_sum = seq_regions[ 1 ] - seq_regions[ 0 ];
+	gnSeqI region_start_sum = 0;
+	MatchList new_matches;
+
+	for( ; matchI < mlist.size(); matchI++ ){
+		// find the translated start coordinate for this match
+		int64 trans_start = mlist[ matchI ]->Start( seqI );
+		int64 iv_orig_start = trans_start;
+		if( trans_start == 0 )
+			continue;
+		while( region_sum < absolut( trans_start ) && regionI + 2 < seq_regions.size() ){
+			regionI += 2;
+			region_start_sum = region_sum;
+			region_sum += seq_regions[ regionI + 1 ] - seq_regions[ regionI ];
+		}
+
+		if( trans_start < 0 )
+			trans_start = -seq_regions[ regionI ] - ( -trans_start - region_start_sum ) + 1;
+		else if( trans_start > 0 )
+			trans_start = seq_regions[ regionI ] + ( trans_start - region_start_sum ) - 1;
+
+		int64 trans_end = mlist[ matchI ]->Start( seqI );
+		trans_end += trans_end > 0 ? mlist[ matchI ]->Length() - 1: -(int64)(mlist[ matchI ]->Length()) + 1;
+		
+		mlist[ matchI ]->SetStart( seqI, trans_start );
+		
+		// this bad boy may need to be split
+		gnSeqI end_region_sum = region_sum;
+		gnSeqI end_prev_sum = region_start_sum;
+		uint end_regionI = regionI;
+		Match* cur_match = mlist[ matchI ];
+		while( end_region_sum < absolut( trans_end ) && end_regionI + 2 < seq_regions.size() ){
+			end_regionI += 2;
+
+			Match* left_match = new Match( *cur_match );
+			// clip off the part going to the other match
+			if( left_match->Start( seqI ) < 0 ){
+				cur_match->CropStart( absolut( iv_orig_start ) + left_match->Length() - end_region_sum - 1);
+				left_match->CropEnd( cur_match->Length() );
+			}else{
+				cur_match->CropEnd( absolut( iv_orig_start ) + left_match->Length() - end_region_sum - 1);
+				left_match->CropStart( cur_match->Length() );
+			}
+
+			iv_orig_start += iv_orig_start > 0 ? cur_match->Length(): -(int64)cur_match->Length();
+
+			if( trans_start < 0 )
+				trans_start = -seq_regions[ end_regionI ] - ( -iv_orig_start - end_region_sum ) + 1;
+			else if( trans_start > 0 )
+				trans_start = seq_regions[ end_regionI ] + ( iv_orig_start - end_region_sum ) - 1;
+			
+			left_match->SetStart( seqI, trans_start );
+
+			cur_match = left_match;
+			new_matches.push_back( left_match );
+
+			end_prev_sum = end_region_sum;
+			end_region_sum += seq_regions[ end_regionI + 1 ] - seq_regions[ end_regionI ];
+
+		}
+//		if( end_region_sum == absolut( trans_end ) )
+//			cerr << "Beware of a possible bug in transposeMatches()\n";
+	}
+	
+	// voila... coordinates are translated
+	mlist.insert( mlist.end(), new_matches.begin(), new_matches.end() );
+}
+
+void ComputeLCBs( MatchList& meml, set<uint>& breakpoints, vector<MatchList>& lcb_list, vector<int64>& weights ){
+
+	// there must be at least one end of a block defined
+	if( breakpoints.size() < 1 )
+		return;
+		
+	lcb_list.clear();
+	weights.clear();
+	
+	// organize the LCBs into different MatchList instances
+
+	set<uint>::iterator break_iter = breakpoints.begin();
+	uint prev_break = 0;	// prev_break is the first match in the current block
+	MatchList lcb = meml;
+	for( ; break_iter != breakpoints.end(); break_iter++ ){
+		lcb.clear();
+		lcb.insert( lcb.begin(), meml.begin() + prev_break, meml.begin() + *break_iter + 1 );
+		prev_break = *break_iter + 1;
+		
+		// code to filter LCBs based on their coverage
+		uint64 coverage;
+		GetLCBCoverage( lcb, coverage );
+		weights.push_back( coverage );
+
+		// add the new MatchList to the set if it made the cut
+		lcb_list.push_back( lcb );
+	}
+}
+
+void Aligner::Recursion( MatchList& r_list, Match* r_begin, Match* r_end, boolean nway_only ){
+	try{
+	gnSeqI gap_size = 0;
+	uint seqI = 0;
+//	gnSeqI min_gap_size = 0;
+	boolean create_ok = true;
+	// create gnSequences for each intervening region
+	// create a MatchList for the intervening region
+	MatchList gap_list;
+	
+	gap_list.seq_table.reserve( seq_count );
+	gap_list.sml_table.reserve( seq_count );
+	vector< int64 > starts;
+	uint below_cutoff_count = 0;
+// 
+//	Get the sequence in the intervening gaps between these two matches
+//
+	for( seqI = 0; seqI < seq_count; seqI++ ){
+		int64 gap_end = 0;
+		int64 gap_start = 0;
+		getInterveningCoordinates( r_list.seq_table, r_begin, r_end, seqI, gap_start, gap_end );
+		if( (r_end && r_end->Start( seqI ) == NO_MATCH) ||
+			(r_begin && r_begin->Start( seqI ) == NO_MATCH )){
+			below_cutoff_count++;
+			cerr << "It's screwed up\n";
+			gap_list.seq_table.push_back( new gnSequence() );
+			gap_list.sml_table.push_back( new DNAMemorySML() );
+			continue;
+		}
+		if( gap_end < 0 && gap_start > 0 ){
+			create_ok = false;
+			cerr << "It's screwed up 2\n";
+			break; // bail out on directional inconsistency
+		}else if( gap_end < 0 && gap_start > 0 ){
+			cerr << "It's screwed up 3\n";
+			create_ok = false;
+			break;	// bail out on directional inconsistency
+		}
+		int64 diff = gap_end - gap_start;
+		diff = 0 < diff ? diff : 0;
+		gap_size = diff < gap_size ? gap_size : diff;
+
+		if( gap_start == 0 )
+			cerr << "scheiss\n";
+
+		if( debug )
+			cout << r_list.seq_table[ seqI ]->length() << endl;
+
+		if( diff < min_recursive_gap_length )
+			below_cutoff_count++;
+		starts.push_back( gap_start );
+		gnSequence* new_seq = new gnSequence( r_list.seq_table[ seqI ]->subseq( gap_start, diff ) );
+		gap_list.seq_table.push_back( new_seq );
+		gap_list.sml_table.push_back( new DNAMemorySML() );
+	}
+	
+	// only perform recursive anchoring if the gapped regions are long enough
+	// otherwise just let ClustalW do the work
+	if( below_cutoff_count + 1 < seq_count ){
+		if( nway_only )
+			nway_mh.Clear();
+		else
+			gap_mh.get().Clear();
+
+		multimap< uint, uint > mer_sizes;
+		// calculate potential mer sizes for searches
+		for( seqI = 0; seqI < seq_count; seqI++ ){
+			uint search_mer_size = getDefaultSeedWeight( gap_list.seq_table[ seqI ]->length() );
+			mer_sizes.insert( multimap< uint, uint >::value_type( search_mer_size, seqI ) );
+		}
+		multimap< uint, uint >::iterator mer_iter = mer_sizes.end();
+		mer_iter--;
+		vector< uint > search_seqs;
+		while( mer_iter != mer_sizes.end() ){
+			uint prev_mer = mer_iter->first;
+			uint new_seqs = 0;
+			while( true ){
+				if( mer_iter->first < MIN_DNA_SEED_WEIGHT )
+					break;
+				if( mer_iter->first == prev_mer || search_seqs.size() < 2 ){
+					search_seqs.push_back( mer_iter->second );
+					new_seqs++;
+					if( mer_iter == mer_sizes.begin() ){
+						mer_iter = mer_sizes.end();	// signify that the scan is complete
+						break;
+					}
+					prev_mer = mer_iter->first;
+					mer_iter--;
+				}else
+					break;
+			}
+
+			if( search_seqs.size() < 2 )
+				break;
+			// look for MUMs
+			
+			//
+			//	Create sorted mer lists for the intervening gap region
+			//
+
+			uint64 default_seed = getSeed( prev_mer );
+			if( prev_mer < MIN_DNA_SEED_WEIGHT )
+				break;
+			for( uint seqI = 0; seqI < gap_list.seq_table.size(); seqI++ ){
+				gap_list.sml_table[ seqI ]->Clear();
+				gap_list.sml_table[ seqI ]->Create( *(gap_list.seq_table[ seqI ]), default_seed );
+			}
+			//
+			//	Find all exact matches in the gap region
+			//
+			MatchList cur_mems = gap_list;
+			cur_mems.clear();
+			if( nway_only ){
+				// no sense in searching for matches in subsets!!
+				if( search_seqs.size() < seq_count )
+					continue;
+				nway_mh.ClearSequences();
+				nway_mh.FindMatches( cur_mems );
+			}else{
+				gap_mh.get().ClearSequences();
+				gap_mh.get().FindMatches( cur_mems );
+			}
+			for( size_t mI = 0; mI < cur_mems.size(); ++mI )
+				cur_mems[mI]->Free();
+			cur_mems.clear();
+		}
+		if( nway_only )
+			nway_mh.GetMatchList( gap_list );
+		else
+			gap_mh.get().GetMatchList( gap_list );
+		
+
+		// delete overlaps/inclusions		
+		EliminateOverlaps( gap_list );
+		// mult. filter after EliminateOverlaps because e.o. may generate some subset matches
+		if( nway_only )
+			gap_list.MultiplicityFilter( seq_count );
+		
+		// for anchor accuracy, throw out any anchors that are shorter than the minimum
+		// anchor length after EliminateOverlaps()
+		gap_list.LengthFilter( MIN_ANCHOR_LENGTH );
+
+	//	if( min_gap_size < search_mer_size )
+	//		create_ok = false;
+		if( gap_list.size() > 0 && create_ok ){
+
+	/*		if( debug ){
+				cout << "Starting mem: " << *r_begin << endl;
+				cout << "Next mem: " << *r_end << endl;
+				list<Match*>::iterator gappy_iter = gap_list.begin();
+				while( gappy_iter != gap_list.end() ){
+					cout << **gappy_iter;
+					cout << endl;
+					gappy_iter++;
+				}
+			}
+	*/
+
+			// move all the matches that were found
+			vector< Match* >::iterator mum_iter = gap_list.begin();
+			for( ; mum_iter != gap_list.end(); ){
+				boolean add_ok = true;
+				for( uint seqI = 0; seqI < (*mum_iter)->SeqCount(); ++seqI ){
+					int64 gap_start;
+					if( (*mum_iter)->Start( seqI ) == NO_MATCH )
+						continue;
+					else if( (*mum_iter)->Start( seqI ) < 0 ){
+						gap_start = r_begin != NULL ? -r_begin->End( seqI ) : 0;
+						if( gap_start > 0 )
+	//						gap_start = -r_end->Start( seqI ) + r_end->Length() - 1;
+							gap_start = r_end != NULL ? r_end->Start( seqI ) - r_end->Length() + 1 : 0;
+						else if( r_begin )
+							add_ok = false;
+						(*mum_iter)->SetStart( seqI, (*mum_iter)->Start( seqI ) + gap_start );
+					}else{
+						// insert them all before mem_iter
+						gap_start = r_begin != NULL ? r_begin->End( seqI ) : 0;
+						if( gap_start < 0 ){
+							gap_start = r_end != NULL ? r_end->Start( seqI ) - r_end->Length() + 1 : 0;
+							add_ok = false;
+						}
+						(*mum_iter)->SetStart( seqI, (*mum_iter)->Start( seqI ) + gap_start );
+					}
+				}
+				if( add_ok )
+					r_list.push_back( *mum_iter );
+				else{
+					(*mum_iter)->Free();
+					(*mum_iter) = NULL;
+				}
+				++mum_iter;
+			}
+	//		for( ; mum_iter != gap_list.end(); )
+	//			match_allocator.Free( *mum_iter );
+		}
+	}
+	// delete sequences and smls
+	for( uint seqI = 0; seqI < gap_list.seq_table.size(); ++seqI )
+		delete gap_list.seq_table[ seqI ];
+	for( uint seqI = 0; seqI < gap_list.sml_table.size(); ++seqI )
+		delete gap_list.sml_table[ seqI ];
+		
+	gap_list.seq_table.clear();
+	gap_list.sml_table.clear();
+	
+	}catch( gnException& gne ){
+		cerr << gne << endl;
+	}catch( exception& e ){
+		cerr << e.what() << endl;
+	}catch(...){
+		cerr << "When I say 'ohhh' you say 'shit'!\n";
+	}
+}
+
+// compute the gapped alignments between anchors in an LCB
+void AlignLCBInParallel( bool collinear_genomes, mems::GappedAligner* gal, MatchList& mlist, Interval& iv, AlnProgressTracker& apt )
+{
+	// check whether this function can do anything useful...
+	if( !collinear_genomes && mlist.size() < 2 ){
+		iv.SetMatches( mlist );
+		return;
+	}
+	size_t galI = 0;
+	vector<GappedAlignment*> gapped_alns(mlist.size()+1, NULL);
+	vector<int> success(gapped_alns.size(), 0);
+	gnSeqI progress_base = apt.cur_leftend;
+//#pragma omp parallel for
+	for( int mI = 0; mI < mlist.size()-1; mI++ )
+	{
+		// align the region between mI and mI+1
+		GappedAlignment ga(mlist.seq_table.size(),0);
+		gapped_alns[mI] = ga.Copy();
+
+		bool align_success = gal->Align( *(gapped_alns[mI]), mlist[mI], mlist[mI+1], mlist.seq_table );
+		if(align_success)
+			success[mI] = 1;
+		if(mI % 50 == 0 && mI > 0)
+		{
+			// update and print progress
+			int done = 0;
+			for( int i = 0; i < gapped_alns.size(); i++ )
+				if(gapped_alns[i] != NULL)
+					done++;
+//#pragma omp critical
+{
+			double cur_progress = ((double)(progress_base+done) / (double)apt.total_len)*100.0;
+			printProgress((uint)apt.prev_progress, (uint)cur_progress, cout);
+			apt.prev_progress = cur_progress;
+}
+		}
+	}
+	apt.cur_leftend += mlist.size()-1;
+
+	// merge the alignments and anchors back together
+	vector<AbstractMatch*> merged(mlist.size()*2 + 1);
+	size_t mlistI = 0;
+	size_t gappedI = 0;
+	bool turn = true;
+	size_t mJ = 0;
+
+	// check if genomes are collinear and get the start and end alignments if necessary
+	if(collinear_genomes)
+	{
+		GappedAlignment ga_tmp(mlist.seq_table.size(),0);
+		GappedAlignment* ga = ga_tmp.Copy();
+		bool align_success = gal->Align( *ga, NULL, mlist[0], mlist.seq_table );
+		if(align_success)
+			merged[mJ++] = ga;
+		gapped_alns[mlist.size()] = ga_tmp.Copy();
+		align_success = gal->Align( *(gapped_alns[mlist.size()]), mlist.back(), NULL, mlist.seq_table );
+		if(align_success)
+			success[mlist.size()] = 1;
+	}
+	for( ; mJ < merged.size() && mlistI < mlist.size();  )
+	{
+		if(turn)
+			merged[mJ++] = mlist[mlistI++];
+		else if(success[gappedI])
+			merged[mJ++] = gapped_alns[gappedI++];
+		else
+			gappedI++;
+		turn = !turn;
+	}
+	// add the last alignment
+	if( success[mlist.size()]==1 )
+		merged[mJ++] = gapped_alns.back();
+	merged.resize(mJ);
+
+	iv.SetMatches(merged);
+}
+
+// compute the gapped alignments between anchors in an LCB
+void Aligner::AlignLCB( MatchList& mlist, Interval& iv ){
+	// check whether this function can do anything useful...
+	if( !collinear_genomes && mlist.size() < 2 ){
+		iv.SetMatches( mlist );
+		return;
+	}
+
+	vector< AbstractMatch* > iv_matches;
+	boolean debug_recurse = false;
+	int64 config_value = 138500;
+	int print_interval = 50;
+	try{
+	list< Match* > match_list;
+	match_list.insert( match_list.end(), mlist.begin(), mlist.end() );
+	mlist.clear();
+	MatchList r_list = mlist;
+
+	list< Match* >::iterator recurse_iter = match_list.begin();
+	list< Match* >::iterator recurse_prev = match_list.begin();
+	// scan ahead to the first n-way matches
+	while( recurse_prev != match_list.end() && (*recurse_prev)->Multiplicity() != seq_count )
+		++recurse_prev;
+
+	recurse_iter = recurse_prev;
+	if( !collinear_genomes ){
+		if( recurse_iter != match_list.end() )
+			++recurse_iter;
+		while( recurse_iter != match_list.end() && (*recurse_iter)->Multiplicity() != seq_count )
+			++recurse_iter;
+	}else
+		cout << "Assuming collinear genomes...\n";
+	
+	uint memI = 0;
+	uint matchI = 0;
+	while( true ){
+		if( memI >= print_interval && memI % print_interval == 0 || debug)
+			cout << "Number: " << memI << " match " << **recurse_prev << endl;
+		++memI;
+		if( debug_recurse ){
+			cout << "Recursing on " << endl;
+			if( recurse_prev != match_list.end() )
+				cout << **recurse_prev << " and " << endl;
+			if( recurse_iter != match_list.end() )
+				cout << **recurse_iter << endl;
+		}
+		
+		if( recurse_prev != match_list.end() && (*recurse_prev)->Start( 0 ) == config_value )
+			cout << "";
+		
+		// recurse on a pair of matches! 
+		// this function should locate all matches between the two iterators
+		// and add them to r_list		
+		r_list.clear();
+		GappedAlignment* cr = NULL;
+		boolean align_success = false;
+		
+		Match* r_lend = NULL;
+		Match* r_rend = NULL;
+		if( recurse_iter != recurse_prev )
+			r_lend = *recurse_prev;
+		if( recurse_iter != match_list.end() )
+			r_rend = *recurse_iter;
+
+		// attempt a clustalW alignment
+		cr = new GappedAlignment();
+		align_success = gal->Align( *cr, r_lend, r_rend, r_list.seq_table );
+
+		// add the gapped alignment to the Interval
+		if( r_lend != NULL )
+			iv_matches.push_back( r_lend );
+		if( align_success )
+			iv_matches.push_back( cr );
+
+		// scan ahead to the next pair of n-way matches
+		recurse_prev = recurse_iter;
+		if( recurse_iter != match_list.end() )
+			++recurse_iter;
+		while( recurse_iter != match_list.end() && (*recurse_iter)->Multiplicity() != seq_count )
+			++recurse_iter;
+
+		if( ( recurse_iter == match_list.end() && !collinear_genomes ) ||
+				( recurse_prev == match_list.end() && collinear_genomes ) )
+				break;
+	}
+	// get the last little bit at the end of the LCB.
+	list< Match* >::iterator iter = recurse_prev;
+	for( ; iter != recurse_iter; ++iter )
+		iv_matches.push_back(*iter);
+
+	mlist.insert( mlist.end(), match_list.begin(), match_list.end() );
+	iv.SetMatches(iv_matches); 
+
+	}catch( gnException& gne ){
+		cerr << gne << endl;
+	}catch(exception& e){
+		cerr << e.what();
+	}catch(...){
+		cerr << "matrix exception?\n";
+	}
+}
+
+// just search each intervening region once for matches, no gapped alignment...
+void Aligner::SearchWithinLCB( MatchList& mlist, std::vector< search_cache_t >& new_cache, bool leftmost, bool rightmost){
+	// check whether this function can do anything useful...
+	if( !(leftmost || rightmost) && mlist.size() < 2 )
+		return;
+
+	boolean debug_recurse = false;
+	int64 config_value = 138500;
+	int print_interval = 50;
+
+	try{
+	list< Match* > match_list;
+	match_list.insert( match_list.end(), mlist.begin(), mlist.end() );
+	mlist.clear();
+	MatchList r_list = mlist;
+
+	list< Match* >::iterator recurse_iter = match_list.begin();
+	list< Match* >::iterator recurse_prev = match_list.begin();
+	if( !leftmost && recurse_iter != match_list.end() )
+		++recurse_iter;
+	
+	uint memI = 0;
+	uint matchI = 0;
+	while( recurse_prev != match_list.end() ){
+		if( memI >= print_interval && memI % print_interval == 0 || debug)
+			cout << "Number: " << memI << " match " << **recurse_prev << endl;
+		++memI;
+		if( debug_recurse ){
+			cout << "Recursing on " << endl;
+			if( recurse_prev != match_list.end() )
+				cout << **recurse_prev << " and " << endl;
+			if( recurse_iter != match_list.end() )
+				cout << **recurse_iter << endl;
+		}
+		
+		
+		// recurse on a pair of matches! 
+		// this function should locate all matches between the two iterators
+		// and add them to r_list		
+		r_list.clear();
+		Match* r_left = NULL;
+		Match* r_right = NULL;
+		if( recurse_iter == match_list.begin() && leftmost ){
+			r_left = NULL;
+			r_right = *recurse_iter;
+		}else if( recurse_iter == match_list.end() && rightmost ){
+			r_left = *recurse_prev;
+			r_right = NULL;
+		}else{
+			r_left = *recurse_prev;
+			r_right = *recurse_iter;
+		}
+		// check the cache to see whether this search has already been done!
+
+		search_cache_t cacheval = make_pair( r_left, r_right );
+		if( cacheval.first != NULL )
+			cacheval.first = cacheval.first->Copy();
+		if( cacheval.second != NULL )
+			cacheval.second = cacheval.second->Copy();
+		std::vector< search_cache_t >::iterator cache_entry = std::upper_bound( search_cache.begin(), search_cache.end(), cacheval, cache_comparator );
+		if( cache_entry == search_cache.end() || 
+			(cache_comparator( cacheval, *cache_entry ) || cache_comparator( *cache_entry, cacheval )) )
+		{
+			// search this region
+			Recursion( r_list, r_left, r_right, true );
+		}
+		new_cache.push_back( cacheval );
+
+		if( debug_recurse ){
+			vector< Match* >::iterator r_iter = r_list.begin();
+			cout << "Found matches " << endl;
+			for(; r_iter != r_list.end(); ++r_iter )
+				cout << **r_iter << endl;
+		}
+
+		// insert any n-way matches into the match list
+		for( matchI = 0; matchI < r_list.size(); ++matchI ){
+			if( r_list[ matchI ]->Multiplicity() == seq_count ){
+				match_list.insert( recurse_iter, r_list[ matchI ] );
+			}else
+			{
+				r_list[matchI]->Free();
+				r_list[matchI] = NULL;
+			}
+		}
+
+		// move ahead to the next pair of n-way matches
+		recurse_prev = recurse_iter;
+		if( recurse_iter != match_list.end() )
+			++recurse_iter;
+		
+		// break early if we aren't assuming genome collinearity
+		if( !rightmost && recurse_iter == match_list.end() )
+			break;
+			
+	}
+
+	mlist.insert( mlist.begin(), match_list.begin(), match_list.end() );
+
+	}catch( gnException& gne ){
+		cerr << gne << endl;
+	}catch(exception& e){
+		cerr << e.what();
+	}catch(...){
+		cerr << "matrix exception?\n";
+	}
+
+	// Multiplicity Filter...
+	mlist.MultiplicityFilter( seq_count );
+	EliminateOverlaps( mlist );
+	// E.O. can create some matches of lower multiplicity
+	mlist.MultiplicityFilter( seq_count );
+}
+
+void Aligner::consistencyCheck( uint lcb_count, vector< LCB >& adjacencies, vector< MatchList >& lcb_list, vector< int64 >& weights ){
+	vector< LCB > tmp_adj = adjacencies;
+	vector< MatchList > tmp_lcbs = lcb_list;
+	vector< int64 > tmp_weights = weights;
+	filterMatches( tmp_adj, tmp_lcbs, tmp_weights );
+	MatchList emmlist;
+	for( uint lcbI = 0; lcbI < tmp_lcbs.size(); lcbI++ )
+		emmlist.insert( emmlist.end(), tmp_lcbs[ lcbI ].begin(), tmp_lcbs[ lcbI ].end() );
+	set< uint > breakpoints;
+	AaronsLCB( emmlist, breakpoints );
+	
+	// do the correct number of LCBs exist?
+	if( lcb_count != tmp_lcbs.size() ){
+		cerr << "lcb_count: " << lcb_count << "\ttmp_lcbs.size(): " << tmp_lcbs.size() << endl;
+	}
+	if( lcb_count != breakpoints.size() ){
+		cerr << "lcb_count: " << lcb_count << "\tbreakpoints.size(): " << breakpoints.size() << endl;
+	}
+	if( tmp_lcbs.size() != breakpoints.size() ){
+		cerr << "tmp_lcbs.size(): " << tmp_lcbs.size() << "\tbreakpoints.size(): " << breakpoints.size() << endl;
+	}
+}
+
+
+/**
+ * Version 2 of this algorithm:
+ * each time two LCBs coalesce, repeatedly search their intervening region until
+ * either a single LCB exists or all LCBs meet the current minimum_weight.
+ * @returns		The weight of the minimum weight LCB that remains
+ */
+int64 greedyBreakpointElimination( gnSeqI minimum_weight, vector< LCB >& adjacencies, vector< int64 >& weights, ostream* status_out ){
+	// repeatedly remove the low weight LCBs until the minimum weight criteria is satisfied
+	uint lcbI = 0;
+	vector< uint > low_weight;
+	bool have_weight = false;
+	gnSeqI min_weight = 0;
+	gnSeqI prev_min_weight = 0;
+	uint min_lcb = 0;
+	uint lcb_count = adjacencies.size();
+	boolean debug_bp_elimination = false;
+	uint current_lcbI = 0;	/**< tracks how many of the LCBs are above the min weight */
+
+	if( adjacencies.size() == 0 )
+		return 0;	// nothing can be done
+	uint seq_count = adjacencies[0].left_end.size();
+	
+	while( min_weight < minimum_weight ){
+		if( lcb_count == 1 )
+			break;	// if only a single LCB remains, don't remove it
+
+		while(true){
+			have_weight = false;
+			min_weight = 0;
+			current_lcbI = 0;	// always scan the entire set
+
+			// start with current_lcbI since everything up to it has already been scanned
+			for( lcbI = current_lcbI; lcbI < weights.size(); lcbI++ ){
+				if( adjacencies[ lcbI ].lcb_id != lcbI ){
+					// this lcb has been removed or merged with another lcb
+					continue;
+				}
+				if( weights[ lcbI ] < min_weight || !have_weight ){
+					min_weight = weights[ lcbI ];
+					min_lcb = lcbI;
+					have_weight = true;
+					if( min_weight == prev_min_weight && current_lcbI > 0 )
+						break;	// we've already found a minimum
+								// weight LCB, stop here to save some searching
+				}
+			}
+			lcbI = min_lcb;
+			have_weight = false;
+			// if the min weight changed then scan the entire set from the beginning
+			if( prev_min_weight != min_weight ){
+				if( status_out != NULL )
+					*status_out << "There are " << lcb_count << " LCBs with minimum weight " << min_weight << endl;
+
+				current_lcbI = 0;
+				prev_min_weight = min_weight;
+				continue;
+			}
+
+			// save time by skipping LCBs that have already been scanned
+			current_lcbI = min_lcb;
+			break;
+		}
+		
+//		consistencyCheck( lcb_count, adjacencies, lcb_list, weights );
+		if( min_weight >= minimum_weight )
+			break;
+
+		// actually remove the LCBs now
+		// (only remove a single LCB for now -- it's easier to calculate adjacencies)
+
+		// remove this LCB
+		adjacencies[ lcbI ].lcb_id = -2;
+		
+		// update adjacencies
+		uint seqI;
+		uint left_adj;
+		uint right_adj;
+		for( seqI = 0; seqI < seq_count; seqI++ ){
+			left_adj = adjacencies[ lcbI ].left_adjacency[ seqI ];
+			right_adj = adjacencies[ lcbI ].right_adjacency[ seqI ];
+			if( debug_bp_elimination ){
+				if( left_adj == -2 || right_adj == -2 ){
+					cerr << "improper linking\n";
+				}
+				// for debugging, check for consistency:
+				if( left_adj != -1 && adjacencies[ left_adj ].right_adjacency[ seqI ] != lcbI )
+					cerr << "Mutiny on the bounty!\n";
+				// for debugging, check for consistency
+				if( right_adj == adjacencies.size() )
+					cerr << "Horrible Error -399a\n";
+				if( right_adj != -1 && adjacencies[ right_adj ].left_adjacency[ seqI ] != lcbI )
+					cerr << "Mutiny on the bounty!\n";
+			}
+			if( left_adj != -1 )
+				adjacencies[ left_adj ].right_adjacency[ seqI ] = right_adj;
+			if( right_adj != -1 && right_adj != adjacencies.size() )
+				adjacencies[ right_adj ].left_adjacency[ seqI ] = left_adj;
+			
+		}
+		// just deleted an lcb, drop the lcb count
+		lcb_count--;
+
+		// check for collapse
+		for( seqI = 0; seqI < seq_count; seqI++ ){
+			left_adj = adjacencies[ lcbI ].left_adjacency[ seqI ];
+			right_adj = adjacencies[ lcbI ].right_adjacency[ seqI ];
+			if( left_adj == -1 || right_adj == -1 )
+				continue;	// can't collapse with a non-existant LCB!
+
+			if( debug_bp_elimination ){
+				if( right_adj == adjacencies.size() )
+					cerr << "Horrible Error -399a\n";
+				// check whether this LCB has already been merged
+				if( left_adj != adjacencies[ left_adj ].lcb_id ||
+					right_adj != adjacencies[ right_adj ].lcb_id ){
+					// because adjacency pointers are always updated to point to the 
+					// representative entry of an LCB, the lcb_id and the array index
+					// should always be identical
+					cerr << "improper linking\n";
+					continue;
+				}
+				if( left_adj == -2 || right_adj == -2 ){
+					cerr << "improper linking\n";
+				}
+			}
+
+			// check whether the two LCBs are adjacent in each sequence
+			boolean orientation = adjacencies[ left_adj ].left_end[ seqI ] > 0 ? true : false;
+			uint seqJ;
+			for( seqJ = 0; seqJ < seq_count; seqJ++ ){
+				boolean j_orientation = adjacencies[ left_adj ].left_end[ seqJ ] > 0;
+				if( j_orientation == orientation &&
+					adjacencies[ left_adj ].right_adjacency[ seqJ ] != right_adj )
+					break;
+				if( j_orientation != orientation &&
+					adjacencies[ left_adj ].left_adjacency[ seqJ ] != right_adj )
+					break;
+				// check that they are both in the same orientation
+				if( adjacencies[ right_adj ].left_end[ seqJ ] > 0 != j_orientation )
+					break;
+			}
+
+			if( seqJ != seq_count )
+				continue;
+			
+
+			// these two can be collapsed
+			// do it.  do it now.
+			adjacencies[ right_adj ].lcb_id = left_adj;
+			if( adjacencies[ right_adj ].lcb_id == -1 ||
+				adjacencies[ right_adj ].lcb_id == -2 )
+				cerr << "Trouble in the eleventh circle\n";
+			weights[ left_adj ] += weights[ right_adj ];
+			// unlink right_adj from the adjacency list and
+			// update left and right ends of left_adj
+			for( seqJ = 0; seqJ < seq_count; seqJ++ ){
+				boolean j_orientation = adjacencies[ left_adj ].left_end[ seqJ ] > 0;
+				uint rr_adj = adjacencies[ right_adj ].right_adjacency[ seqJ ];
+				uint rl_adj = adjacencies[ right_adj ].left_adjacency[ seqJ ];
+				if( j_orientation == orientation ){
+					adjacencies[ left_adj ].right_end[ seqJ ] = adjacencies[ right_adj ].right_end[ seqJ ];
+					adjacencies[ left_adj ].right_adjacency[ seqJ ] = rr_adj;
+					if( rr_adj == adjacencies.size() )
+						cerr << "Horrible Error -399a\n";
+					if( rr_adj != -1 )
+						adjacencies[ rr_adj ].left_adjacency[ seqJ ] = left_adj;
+				}else{
+					adjacencies[ left_adj ].left_end[ seqJ ] = adjacencies[ right_adj ].left_end[ seqJ ];
+					adjacencies[ left_adj ].left_adjacency[ seqJ ] = rl_adj;
+					if( rl_adj == adjacencies.size() )
+						cerr << "Horrible Error -399a\n";
+					if( rl_adj != -1 )
+						adjacencies[ rl_adj ].right_adjacency[ seqJ ] = left_adj;
+				}
+				// update lcbI's adjacency links to point nowhere
+				if( adjacencies[ lcbI ].left_adjacency[ seqJ ] == right_adj )
+					adjacencies[ lcbI ].left_adjacency[ seqJ ] = left_adj;
+				if( adjacencies[ lcbI ].right_adjacency[ seqJ ] == right_adj )
+					adjacencies[ lcbI ].right_adjacency[ seqJ ] = left_adj;
+
+
+			}
+			// just collapsed an lcb, decrement
+			lcb_count--;
+		}
+	}
+	return min_weight;
+}
+
+class LCBLeftEndComp
+{
+public:
+	LCBLeftEndComp() : ssc(0) {};
+	bool operator()( const MatchList& a, const MatchList& b )
+	{
+		return ssc(a.front(), b.front());
+	}
+protected:
+	SingleStartComparator<AbstractMatch> ssc;
+};
+
+/**
+ * Takes a set of filtered LCB adjacencies and an unfiltered set of matches as input
+ * returns a filtered set of matches that reflects the LCBs found
+ */
+void filterMatches( vector< LCB >& adjacencies, vector< MatchList >& lcb_list, vector< int64 >& weights ){
+	if( lcb_list.size() < 1 )
+		return;
+	MatchList lcb_tmp = lcb_list[ 0 ];
+	lcb_tmp.clear();
+	vector< MatchList > filtered_lcbs = vector< MatchList >( lcb_list.size(), lcb_tmp );
+	uint lcbI;
+	for( lcbI = 0; lcbI < adjacencies.size(); lcbI++ ){
+		if( adjacencies[ lcbI ].lcb_id == lcbI ){
+			filtered_lcbs[ lcbI ].insert( filtered_lcbs[ lcbI ].end(), lcb_list[ lcbI ].begin(), lcb_list[ lcbI ].end() );
+			continue;
+		}
+		if( adjacencies[ lcbI ].lcb_id == -1 ){
+			cerr << "weird";
+			continue; 	// this one was removed
+		}
+		if( adjacencies[ lcbI ].lcb_id == -2 )
+			continue; 	// this one was removed
+
+		// this one points elsewhere
+		// search and update the union/find structure for the target
+		stack< uint > visited_lcbs;
+		visited_lcbs.push( lcbI );
+		uint cur_lcb = adjacencies[ lcbI ].lcb_id;
+		while( adjacencies[ cur_lcb ].lcb_id != cur_lcb ){
+			visited_lcbs.push( cur_lcb );
+			cur_lcb = adjacencies[ cur_lcb ].lcb_id;
+			if( cur_lcb == -1 || cur_lcb == -2 ){
+//				cerr << "improper hoodidge\n";
+				break;	// this one points to an LCB that got deleted
+			}
+		}
+		while( visited_lcbs.size() > 0 ){
+			adjacencies[ visited_lcbs.top() ].lcb_id = cur_lcb;
+			visited_lcbs.pop();
+		}
+		// add this LCB's matches to the target LCB.
+		if( cur_lcb != -1 && cur_lcb != -2 )
+			filtered_lcbs[ cur_lcb ].insert( filtered_lcbs[ cur_lcb ].end(), lcb_list[ lcbI ].begin(), lcb_list[ lcbI ].end() );
+	}
+
+
+	lcb_list.clear();
+	vector< int64 > new_weights;
+	for( lcbI = 0; lcbI < filtered_lcbs.size(); lcbI++ ){
+		if( filtered_lcbs[ lcbI ].size() > 0 ){
+			lcb_list.push_back( filtered_lcbs[ lcbI ] );
+			uint64 wt = 0;
+			GetLCBCoverage( filtered_lcbs[lcbI], wt );
+			new_weights.push_back( wt );
+//			if( new_weights[ new_weights.size() - 1 ] != weights[ lcbI ] ){
+//				cerr << "Error: Have you lost weight Susan? difference: " << new_weights[ new_weights.size() - 1 ] - weights[ lcbI ] << "\n";
+//			}
+		}
+	}
+
+	// sort the matches inside consolidated LCBs
+	MatchStartComparator<AbstractMatch> msc( 0 );
+	for( lcbI = 0; lcbI < lcb_list.size(); lcbI++ ){
+		sort( lcb_list[ lcbI ].begin(), lcb_list[ lcbI ].end(), msc );
+	}
+
+	// sort the LCBs themselves
+	LCBLeftEndComp llec;
+	std::sort( lcb_list.begin(), lcb_list.end(), llec );
+
+	// calculate the LCB adjacencies
+	weights = new_weights;
+	computeLCBAdjacencies_v2( lcb_list, weights, adjacencies );
+
+}
+
+void Aligner::WritePermutation( vector< LCB >& adjacencies, std::string out_filename )
+{
+	ofstream permutation_out( out_filename.c_str() );
+	if( !permutation_out.is_open() )
+	{
+		cerr << "Error opening " << out_filename << endl;
+		return;
+	}
+	for( int seqI = 0; seqI < seq_count; seqI++ )
+	{
+		// find the left-most LCB in this genome
+		int left_lcb = 0;
+		for( ; left_lcb < adjacencies.size(); left_lcb++ )
+		{
+			uint left_adj = adjacencies[left_lcb].left_adjacency[seqI];
+			if( left_adj == -1 )
+				break;
+		}
+		// write out lcb id's in order
+		for( uint lcbI = left_lcb; lcbI < adjacencies.size(); )
+		{
+			if( lcbI != left_lcb )
+				permutation_out << '\t';
+			if( adjacencies[lcbI].left_end[seqI] < 0 )
+				permutation_out << "-";
+			permutation_out << adjacencies[lcbI].lcb_id;
+			lcbI = adjacencies[lcbI].right_adjacency[seqI];
+		}
+		permutation_out << endl;
+	}
+}
+
+void WritePermutationCoordinates( IntervalList& perm_iv_list, std::string out_filename )
+{
+	ofstream perm_out( out_filename.c_str() );
+	if( !perm_out.is_open() )
+	{
+		cerr << "Error opening \"" << out_filename << "\"\n";
+		return;
+	}
+	perm_out << "#";
+	for( size_t seqI = 0; seqI < perm_iv_list.seq_table.size(); ++seqI )
+	{
+		if( seqI > 0 )
+			perm_out << '\t';
+		perm_out << "seq" << seqI << "_leftend\tseq" << seqI << "_rightend";
+	}
+	perm_out << endl;
+	for( size_t ivI = 0; ivI < perm_iv_list.size(); ++ivI )
+	{
+		for( size_t seqI = 0; seqI < perm_iv_list.seq_table.size(); ++seqI )
+		{
+			if( seqI > 0 )
+				perm_out << '\t';
+			if( perm_iv_list[ivI].Orientation(seqI) == AbstractMatch::reverse )
+				perm_out << '-';
+			perm_out << perm_iv_list[ivI].LeftEnd(seqI) << '\t';
+			if( perm_iv_list[ivI].Orientation(seqI) == AbstractMatch::reverse )
+				perm_out << '-';
+			perm_out << perm_iv_list[ivI].RightEnd(seqI);
+		}
+		perm_out << endl;
+	}
+}
+
+void Aligner::RecursiveAnchorSearch( MatchList& mlist, gnSeqI minimum_weight, vector< MatchList >& LCB_list, boolean entire_genome, ostream* status_out ){
+
+//
+// Step 4) Identify regions of collinearity (LCBs) among the remaining n-way multi-MUMs
+//
+	uint lcbI;
+	set<uint> breakpoints;
+	vector< int64 > weights;
+	vector< LCB > adjacencies;
+	MatchList new_matches;
+	new_matches.seq_table = mlist.seq_table;
+	new_matches.seq_filename = mlist.seq_filename;
+
+	if( mlist.size() == 0 )
+		return;
+
+	AaronsLCB( mlist, breakpoints );
+	if( status_out )
+		*status_out << "The " << mlist.size() << " matches constitute " << breakpoints.size() << " breakpoints\n";
+	// organize the LCBs into different MatchList instances (inside of LCB_list)
+	ComputeLCBs( mlist, breakpoints, LCB_list, weights );
+	uint weightI;
+	for( weightI = 0; weightI < weights.size(); weightI++ )
+		if( weights[weightI] < cur_min_coverage || cur_min_coverage == -1 )
+			cur_min_coverage = weights[weightI];
+
+	computeLCBAdjacencies_v2( LCB_list, weights, adjacencies );
+
+	int cur_extension_round = 0;
+	int64 total_weight = 0;
+	int64 prev_total_weight = 0;
+	weightI = 0;
+	vector< vector< int64 > > prev_iv_regions;
+	do {
+
+//		for( ; weightI < weights.size(); weightI++ )
+//			total_weight += weights[ weightI ];
+
+		int64 extension_weight = total_weight;
+		int64 prev_extension_weight = total_weight;
+
+		// only search outside existing LCBs on the whole-genome scale to save time
+		if( entire_genome && extend_lcbs && total_weight != 0 &&
+			cur_extension_round < this->max_extension_iters )
+		{
+			cur_extension_round++;
+			if( status_out )
+				*status_out << "Performing LCB extension\n";
+			vector< vector< int64 > > cur_iv_regions;
+			CreateGapSearchList( adjacencies, new_matches.seq_table, cur_iv_regions, entire_genome );
+			// only do the search if there's something new to search
+			if( prev_iv_regions != cur_iv_regions )
+			{
+				int local_round = 0;
+				do {
+					local_round++;
+					// search the gaps between the LCBs to extend the ends of LCBs
+					new_matches.clear();
+					vector< vector< int64 > > new_iv_regions;
+					CreateGapSearchList( adjacencies, new_matches.seq_table, new_iv_regions, entire_genome );
+					SearchLCBGaps( new_matches, new_iv_regions, nway_mh );
+					mlist.insert( mlist.end(), new_matches.begin(), new_matches.end() );
+					
+					AaronsLCB( mlist, breakpoints );
+					ComputeLCBs( mlist, breakpoints, LCB_list, weights );
+					cur_min_coverage = *(std::min_element(weights.begin(), weights.end()));
+					computeLCBAdjacencies_v2( LCB_list, weights, adjacencies );
+
+					// calculate the new total LCB weight
+					prev_extension_weight = extension_weight;
+					extension_weight = 0;
+					for( weightI = 0; weightI < weights.size(); weightI++ )
+						extension_weight += weights[ weightI ];
+					if( status_out )
+						*status_out << "Previous weight: " << prev_extension_weight << " new weight: " << extension_weight << endl;
+					if( prev_extension_weight > extension_weight ){
+						cerr << "Error! Previous weight: " << prev_extension_weight << " new weight: " << extension_weight << endl;
+					}
+				}while( extension_weight > prev_extension_weight && local_round < this->max_extension_iters);
+			}
+			swap( prev_iv_regions, cur_iv_regions );
+		}
+		
+		// now search within LCBs
+		if( currently_recursing && total_weight != 0 ){
+			vector< search_cache_t > new_cache;
+			for( lcbI = 0; lcbI < LCB_list.size(); lcbI++ ){
+//				if( status_out )
+//					*status_out << "Searching in LCB: " << lcbI << endl;
+				int prev_size = LCB_list[ lcbI ].size();
+				bool leftmost = true;
+				for( int i = 0; leftmost && i < adjacencies[lcbI].left_adjacency.size(); i++ )
+					if(adjacencies[lcbI].left_adjacency[i] != NO_ADJACENCY)
+						leftmost = false;
+				bool rightmost = true;
+				for( int i = 0; rightmost && i < adjacencies[lcbI].right_adjacency.size(); i++ )
+					if(adjacencies[lcbI].right_adjacency[i] != NO_ADJACENCY)
+						rightmost = false;
+				SearchWithinLCB( LCB_list[ lcbI ], new_cache, leftmost, rightmost );
+//				if( status_out )
+//					*status_out << "Gained " << LCB_list[ lcbI ].size() - prev_size << " matches\n";
+
+			}
+
+			// delete the previous search cache
+			swap( search_cache, new_cache );
+			for( size_t mI = 0; mI < new_cache.size(); mI++ )
+			{
+				if( new_cache[mI].first != NULL )
+					new_cache[mI].first->Free();
+				if( new_cache[mI].second != NULL )
+					new_cache[mI].second->Free();
+			}
+			new_cache.clear();
+			std::sort( search_cache.begin(), search_cache.end(), cache_comparator );
+		}
+		
+		mlist.clear();
+		for( lcbI = 0; lcbI < LCB_list.size(); lcbI++ ){
+			mlist.insert( mlist.end(), LCB_list[ lcbI ].begin(), LCB_list[ lcbI ].end() );
+		}
+
+		if( currently_recursing && total_weight != 0 ){
+			// remove low weight LCBs, while searching coalesced regions
+			AaronsLCB( mlist, breakpoints );
+			ComputeLCBs( mlist, breakpoints, LCB_list, weights );
+			computeLCBAdjacencies_v2( LCB_list, weights, adjacencies );
+			cur_min_coverage = *(std::min_element(weights.begin(), weights.end()));
+		}
+
+		
+		// write  alist for debugging
+//		ofstream debug_match_list( "debug_match_list.txt" );
+//		mlist.WriteList( debug_match_list );
+//		debug_match_list.close();
+
+//
+// Step 6) Use greedy breakpoint elimination to remove low-weight LCBs
+//
+		int64 cur_perm_weight = permutation_weight != -1 ? permutation_weight : minimum_weight;
+		do{
+			vector<double> m_weights(weights.size());
+			for( size_t wI = 0; wI < weights.size(); wI++ )
+				m_weights[wI] = (double)weights[wI];
+			SimpleBreakpointScorer sbs(adjacencies, cur_perm_weight, this->collinear_genomes);
+			if( status_out )
+				(*status_out) << "Performing greedy breakpoint elimination (this may take some time)\n";
+
+			greedyBreakpointElimination_v4(adjacencies, m_weights, sbs, NULL, false);
+//			cur_min_coverage = greedyBreakpointElimination( cur_perm_weight, adjacencies, weights, status_out );
+//			MatchList deleted_matches;
+			filterMatches( adjacencies, LCB_list, weights );
+			cur_min_coverage = *(std::min_element(weights.begin(), weights.end()));
+			
+			mlist.clear();
+			for( lcbI = 0; lcbI < LCB_list.size(); lcbI++ ){
+				mlist.insert( mlist.end(), LCB_list[ lcbI ].begin(), LCB_list[ lcbI ].end() );
+			}
+			if( status_out )
+				*status_out << "Greedy breakpoint elimination leaves " << mlist.size() << " matches constituting " << LCB_list.size() << " LCBs covering at least " << cur_min_coverage << "b.p.\n";
+			
+			if( permutation_weight != -1 ){
+				// construct a filename
+				stringstream cur_perm_filename;
+				cur_perm_filename << permutation_filename << "." << cur_perm_weight / seq_count;
+				// output the permutation
+				WritePermutation( adjacencies, cur_perm_filename.str() );
+
+				// also write out condensed interval data for the permutation
+				cur_perm_filename << ".lcbs";
+				IntervalList perm_iv_list;
+				perm_iv_list.seq_filename = mlist.seq_filename;
+				perm_iv_list.seq_table = mlist.seq_table;
+				for( int permI = 0; permI < LCB_list.size(); permI++ ){
+					vector< AbstractMatch* > perm_vector;
+					perm_vector.push_back( LCB_list[permI].front() );
+					if( LCB_list[permI].size() > 1 )
+						perm_vector.push_back( LCB_list[permI].back() );
+					Interval perm_iv(perm_vector.begin(), perm_vector.end());
+					perm_iv_list.push_back(perm_iv);
+				}
+				WritePermutationCoordinates( perm_iv_list, cur_perm_filename.str() );
+
+				// get the current min weight
+				vector< int64 >::iterator min_w = std::min_element( weights.begin(), weights.end() );
+				// increment the current weight
+				cur_perm_weight = *min_w + seq_count;
+			}
+		}while( cur_perm_weight < minimum_weight );
+		// only enable recursive anchor search once we achieve
+		// the desired weight threshold once -- for speed's sake
+		if( recursive && entire_genome ){
+			currently_recursing = true;
+		}
+
+		// calculate the new total LCB weight
+		prev_total_weight = total_weight;
+		total_weight = 0;
+		for( weightI = 0; weightI < weights.size(); weightI++ )
+			total_weight += weights[ weightI ];
+		if( status_out )
+			*status_out << "Previous weight: " << prev_total_weight << " new weight: " << total_weight << endl;
+	// the weight can shrink--this isn't an error condition
+//		if( prev_total_weight > total_weight ){
+//			cerr << "Error! Previous weight: " << prev_total_weight << " new weight: " << total_weight << endl;
+			// write out the lcb lists
+//		}
+
+//
+// Step 7) Repeat 4, 5 and 6 until the total weight stabilizes
+//
+	}while( total_weight != prev_total_weight );
+
+	// delete the search cache
+	for( size_t mI = 0; mI < search_cache.size(); mI++ )
+	{
+		if( search_cache[mI].first != NULL )
+			search_cache[mI].first->Free();
+		if( search_cache[mI].second != NULL )
+			search_cache[mI].second->Free();
+	}
+}
+
+/**
+ * Note: this algorithm differs from the one reported in the Mauve paper
+ *       The modifications should make the Mauve method more sensitive
+ * Given an initial set of multi-MUMs, the alignment is an x step process:
+ * 1) Eliminate overlaps among the multi-MUMs
+ * 2) Compute a phylogenetic guide tree using the multi-MUMs
+ * 3) Remove subset multi-MUMs
+ * 4) Identify regions of collinearity (LCBs) among the remaining n-way multi-MUMs
+ * 5) Perform recursive anchor search within and outside LCBs
+ *    5a) search outside until weight stabilizes
+ *    5b) search within LCBs
+ * 6) Use greedy breakpoint elimination to remove low-weight LCBs
+ *    6a) whenever two LCBs coalesce, search the intervening region for multi-MUMs
+ * 7) Repeat 4, 5 and 6 until the total weight stabilizes
+ * 8) Perform gapped alignment on each LCB
+ * When limited area DP and POA are integrated, step 8 will become step 5c
+ * 
+ */
+
+void Aligner::align( MatchList& mlist, IntervalList& interval_list, double LCB_minimum_density, double LCB_minimum_range, boolean recursive, boolean extend_lcbs, boolean gapped_alignment, string tree_filename ){
+	seq_count = mlist.seq_table.size();
+	this->LCB_minimum_density = LCB_minimum_density;
+	this->LCB_minimum_range = LCB_minimum_range;
+	this->recursive = recursive;
+	this->currently_recursing = false;
+	this->extend_lcbs = extend_lcbs;
+	this->gapped_alignment = gapped_alignment;
+
+	// use LCB_minimum_range == -1 to indicate that all genomes are 
+	// expected to be collinear
+	this->collinear_genomes = LCB_minimum_range == -1;
+	if( collinear_genomes )
+		cout << "\nAssuming collinear genomes...\n";
+
+	// set the nway_mh mask
+	uint64 nway_mask = 1;
+	nway_mask <<= seq_count;
+	nway_mask--;
+	nway_mh.SetMask( nway_mask );
+		
+	cout << "Starting with " << mlist.size() << " MUMs\n";
+	
+//
+// Step 1) Eliminate overlaps among the multi-MUMs
+//	
+	// Remove linked inclusions
+	EliminateOverlaps( mlist );
+	cout << "Eliminating overlaps yields " << mlist.size() << " MUMs\n";
+
+//
+// Step 2) Compute a phylogenetic guide tree using the multi-MUMs
+//
+
+	bool guide_tree_loaded = false;
+	MuscleInterface& mi = MuscleInterface::getMuscleInterface();	
+
+	if( !guide_tree_loaded && (recursive || tree_filename != "") ){
+		// Make a phylogenetic tree for ClustalW
+		interval_list.seq_table = mlist.seq_table;
+		interval_list.seq_filename = mlist.seq_filename;
+		// use the identity matrix method and convert to a distance matrix
+		NumericMatrix< double > distance;
+		DistanceMatrix( mlist, distance );
+		if( tree_filename == "" )
+			tree_filename = CreateTempFileName("guide_tree");
+		mi.CreateTree( distance, tree_filename );
+	}
+
+//
+// Step 3) Remove subset multi-MUMs
+//
+	// Multiplicity Filter...
+	mlist.MultiplicityFilter( seq_count );
+	cout << "Multiplicity filter gives " << mlist.size() << " MUMs\n";
+
+	if( mlist.size() == 0 )
+		return;
+	
+//
+// Steps 4 through 7 are contained in RecursiveAnchorSearch
+//
+	vector< MatchList > LCB_list;
+	RecursiveAnchorSearch( mlist, (gnSeqI)LCB_minimum_range, LCB_list, true, &cout );
+
+
+//
+// Step 8) Perform gapped alignment on each LCB using the anchors
+//
+	if( gapped_alignment && recursive )
+		cout << "\nMaking final gapped alignment...\n";
+	interval_list.clear();
+	AlnProgressTracker apt;
+	apt.cur_leftend = 0;
+	apt.prev_progress = 0;
+	apt.total_len = 0;
+	for( uint lcbI = 0; lcbI < LCB_list.size(); lcbI++ )
+		apt.total_len += LCB_list[lcbI].size()-1;
+	for( uint lcbI = 0; lcbI < LCB_list.size(); lcbI++ ){
+		Interval new_iv;
+		interval_list.push_back( new_iv );
+		Interval& iv = interval_list.back();
+		if( !gapped_alignment || !recursive ){
+			iv.SetMatches( LCB_list[lcbI] );
+		}else{
+//			AlignLCB( LCB_list[ lcbI ], iv );
+			AlignLCBInParallel( collinear_genomes || (LCB_list.size()==1), gal, LCB_list[ lcbI ], iv, apt );
+		}
+	}
+	
+	// finally add any unaligned regions to the interval list	
+	if( gapped_alignment )
+		addUnalignedIntervals( interval_list );
+}
+
+}	// namespace mems
+
diff --git a/libMems/Aligner.h b/libMems/Aligner.h
new file mode 100644
index 0000000..a4e2377
--- /dev/null
+++ b/libMems/Aligner.h
@@ -0,0 +1,307 @@
+/*******************************************************************************
+ * $Id: Aligner.h,v 1.23 2004/04/19 23:10:13 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * This file is licensed under the GPL.
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifndef _Aligner_h_
+#define _Aligner_h_
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libGenome/gnSequence.h"
+#include "libMems/DNAMemorySML.h"
+#include "libMems/GappedAligner.h"
+#include "libMems/MatchList.h"
+#include "libMems/Interval.h"
+#include "libMems/IntervalList.h"
+#include "libMems/MemHash.h"
+#include "libMems/MaskedMemHash.h"
+#include <map>
+#include "libMems/NumericMatrix.h"
+#include "libMems/GreedyBreakpointElimination.h"
+#include <list>
+#include "libMems/LCB.h"
+#include "libMUSCLE/threadstorage.h"
+
+namespace mems {
+
+/**
+ * A mem labeled with a number.
+ * Used by LCB construction algorithm 
+ */
+class LabeledMem{
+public:
+	Match* mem;
+	uint label;
+};
+
+/**
+ * Compares Matches labeled with a number.
+ * Used by LCB construction algorithm 
+ */
+class LabeledMemComparator {
+public:
+	LabeledMemComparator( uint seq ){
+		m_seq = seq;
+	}
+	LabeledMemComparator( LabeledMemComparator& lmc ){
+		m_seq = lmc.m_seq;
+	}
+	boolean operator()(const LabeledMem& a, const LabeledMem& b) const{
+		
+		int64 a_start = a.mem->Start( m_seq ), b_start = b.mem->Start( m_seq );
+		if( a_start == NO_MATCH || b_start == NO_MATCH ){
+			if( b_start != NO_MATCH )
+				return true;
+			return false;
+		}
+		if(a_start < 0)
+			a_start = -a_start;
+//			a_start = -a_start + a.mem->Length();
+		if(b_start < 0)
+			b_start = -b_start;
+//			b_start = -b_start + b.mem->Length();
+		int64 diff = a_start - b_start;
+		return diff < 0;
+	}
+protected:
+	uint m_seq;
+private:
+	LabeledMemComparator();
+};
+
+/**
+ * A match with an associated list iterator.
+ * Used by LCB construction algorithm 
+ */
+class PlacementMatch{
+public:
+	Match* mem;
+	std::list< LabeledMem >::iterator iter;
+};
+
+/**
+ * Compares Matches.
+ * Used by LCB construction algorithm 
+ */
+class PlacementMatchComparator {
+public:
+	PlacementMatchComparator( uint seq ){
+		m_seq = seq;
+	}
+	PlacementMatchComparator( PlacementMatchComparator& lmc ){
+		m_seq = lmc.m_seq;
+	}
+	boolean operator()(const PlacementMatch& a, const PlacementMatch& b) const{
+		
+		int64 a_start = a.mem->Start( m_seq ), b_start = b.mem->Start( m_seq );
+		if( a_start == NO_MATCH || b_start == NO_MATCH ){
+			if( b_start != NO_MATCH )
+				return true;
+			return false;
+		}
+		if(a_start < 0)
+			a_start = -a_start;
+//			a_start = -a_start + a.mem->Length();
+		if(b_start < 0)
+			b_start = -b_start;
+//			b_start = -b_start + b.mem->Length();
+
+		int64 diff = a_start - b_start;
+		return diff < 0;
+	}
+protected:
+	uint m_seq;
+private:
+	PlacementMatchComparator();
+};
+
+
+/** a cache type to remember which intervals have already been searched */
+typedef std::pair< mems::Match*, mems::Match* > search_cache_t;
+
+
+/**
+ * Used to find locally colinear blocks (LCBs) and do recursive
+ * alignments on the blocks
+ * To create an alignment one need only use the align method.
+ * LCB lists are typically stored using the IntervalList class.  They can be
+ * read and written in interval format using that class.  For input and output
+ * of gapped alignments in other formats, see the gnAlignedSequences class.
+ * Other methods in this class are available for experimentation.
+ */
+class Aligner {
+public:
+	/** 
+	 * Constructs an aligner for the specified number of sequences.
+	 * @param seq_count 	The number of sequences that will be aligned with this Aligner
+	 */
+	Aligner( uint seq_count );
+	Aligner( const Aligner& al );
+	Aligner& operator=( const Aligner& al );
+
+	/**
+	 * Performs an alignment.  Takes a MatchList as input and outputs a list of LCBs as an IntervalList.
+	 * Several of the options can be used to filter out unlikely LCBs.  If the recursive option is
+	 * specified, the regions between matches in each LCB are searched for further homology and a full
+	 * gapped alignment is produced.
+	 * @param mlist					The MatchList to use as input for the alignment process
+	 * @param interval_list			The IntervalList that is created by the alignment process
+	 * @param LCB_minimum_density	The minimum density that an LCB may have to be considered a valid block
+	 *   						 	This should be a number between 0 and 1.
+	 * @param LCB_minimum_range		A misnomer: really it's the minimum number of matching base pairs an LCB 
+	 *								must contain to be considered an LCB. Coverage is defined as 
+	 *								(length of match) * (# of matching sequences)
+	 * @param recursive 			Option for performing a recursive alignment.  If this is set to
+	 * 								true, all regions which have gaps will be searched for exact matches.
+	 * @param extend_lcbs		If true, attempt to extend the boundaries of LCBs by searching for 
+	 *                          additional matches between LCBs
+	 * @param tree_filename		The name of the output file to write the phylogenetic guide tree into.  If
+	 *                          an empty string is specified then a temporary file is created.  
+	 * @throws AlignerError 	may be thrown if an error occurs
+	 */
+	void align( MatchList& mlist, IntervalList& interval_list, double LCB_minimum_density, double LCB_minimum_range, boolean recursive, boolean extend_lcbs, boolean gapped_alignment, std::string tree_filename = "" );
+	
+	void Recursion( MatchList& r_list, Match* r_begin, Match* r_end, boolean nway_only = false );
+	void GetBestLCB( MatchList& r_list, MatchList& best_lcb );
+	void DoSomethingCool( MatchList& mlist, Interval& iv );
+	
+	/**
+	 * Set the minimum size of intervening region between two anchor matches that will
+	 * be considered for recursive anchor determination.  When the gaps between two anchors
+	 * are less than this cutoff value the region is handed off to the dynamic programming aligner
+	 * e.g. ClustalW 
+	 */
+	void SetMinRecursionGapLength( gnSeqI min_r_gap );
+
+	void SetMaxExtensionIterations( uint ext_iters ){ this->max_extension_iters = ext_iters; }
+
+	void SearchWithinLCB( MatchList& mlist, std::vector< search_cache_t >& new_cache, bool leftmost = false, bool rightmost = false );
+	void RecursiveAnchorSearch( MatchList& mlist, gnSeqI minimum_weight, std::vector< MatchList >& LCB_list, boolean entire_genome, std::ostream* status_out = NULL );
+
+	void AlignLCB( MatchList& mlist, Interval& iv );
+	void SetGappedAligner( GappedAligner& gal );
+	/** forwards the request to whatever gapped aligner is being used */
+	void SetMaxGappedAlignmentLength( gnSeqI len );
+
+	/** Set output parameters for permutation matrices */
+	void SetPermutationOutput( std::string& permutation_filename, int64 permutation_weight );
+	void WritePermutation( std::vector< LCB >& adjacencies, std::string out_filename );
+
+	void SetRecursive( bool value ){ this->recursive = value; }
+protected:
+	TLS<MemHash> gap_mh;			/**< Used during recursive alignment */
+	MaskedMemHash nway_mh;	/**< Used during recursive alignment to find nway matches only */
+	uint32 seq_count;		/**< The number of sequences this aligner is working with */
+	boolean debug;			/**< Flag for debugging output */
+	
+	double LCB_minimum_density;
+	double LCB_minimum_range;
+
+	uint max_extension_iters;	/**< maximum number of attempts at LCB extension */
+	
+	int64 cur_min_coverage;	/**< Tracks the minimum weight of the least weight LCB */
+	
+	gnSeqI min_recursive_gap_length;	/**< Minimum size of gap regions that will be recursed on */
+
+	void consistencyCheck( uint lcb_count, std::vector< LCB >& adjacencies, std::vector< MatchList >& lcb_list, std::vector< int64 >& weights );
+	
+	boolean recursive;		/**< Set to true if a recursive anchor search/gapped alignment should be performed */
+	boolean extend_lcbs;	/**< Set to true if LCB extension should be attempted */
+	boolean gapped_alignment;	/**< Set to true to complete a gapped alignment */
+	boolean currently_recursing;	/**< True when the recursive search has begun */
+	boolean collinear_genomes;	/**< Set to true if all genomes are assumed to be collinear */
+	
+	GappedAligner* gal;
+
+	std::string permutation_filename;
+	int64 permutation_weight;
+
+	std::vector< search_cache_t > search_cache; /**< a list of recursive searches that have already been done */
+};
+
+/**
+ * Thrown if some error occurs during alignment
+ */
+CREATE_EXCEPTION( AlignerError );
+
+void transposeMatches( MatchList& mlist, uint seqI, const std::vector< int64 >& seq_regions );
+
+/**
+ * Deletes overlapping regions in a set of matches.  Always removes matching base pairs from the
+ * match covering fewer bases.  Coverage is defined as (length of match) * (# of matching sequences)
+ */
+void EliminateOverlaps( MatchList& ml );
+
+/**
+ * Function to determine the breakpoints in a set of matches. 
+ * Sorts the matches in mlist and returns the indices of breakpoints.
+ * This function attempts (sometimes unsuccessfully) to determine subset LCBs.  If a set of
+ * matches containing subset LCBs has been passed to it, the resulting breakpoint set may
+ * be incorrect.  You have been warned.
+ * @param mlist 		A list of matches to search for LCBs.
+ * @param breakpoints 	The indices of matches in the sorted match list that are at LCB boundaries
+ */
+void AaronsLCB( MatchList& mlist, std::set<uint>& breakpoints );
+
+
+void ComputeLCBs( MatchList& meml, std::set<uint>& breakpoints, std::vector<MatchList>& lcb_list, std::vector<int64>& weights );
+void computeLCBAdjacencies_v2( std::vector<MatchList>& lcb_list, std::vector< int64 >& weights, std::vector< LCB >& adjacencies );
+void computeLCBAdjacencies_v2( IntervalList& iv_list, std::vector< int64 >& weights, std::vector< LCB >& adjacencies );
+void scanLeft( int& left_recurseI, std::vector< LCB >& adjacencies, int min_weight, int seqI );
+void scanRight( int& right_recurseI, std::vector< LCB >& adjacencies, int min_weight, int seqI );
+void GetLCBCoverage( MatchList& lcb, uint64& coverage );
+
+int64 greedyBreakpointElimination( gnSeqI minimum_weight, std::vector< LCB >& adjacencies, std::vector< int64 >& weights, std::ostream* status_out = NULL );
+void filterMatches( std::vector< LCB >& adjacencies, std::vector< MatchList >& lcb_list, std::vector< int64 >& weights );
+
+void CreateGapSearchList( std::vector< LCB >& adjacencies, const std::vector< genome::gnSequence* >& seq_table, std::vector< std::vector< int64 > >& iv_regions, boolean entire_genome );
+void SearchLCBGaps( MatchList& new_matches, const std::vector< std::vector< int64 > >& iv_regions, MaskedMemHash& nway_mh );
+
+static const uint MIN_ANCHOR_LENGTH = 9;
+
+
+/** used for search cache lookups */
+class SearchCacheComparator
+{
+public:
+	SearchCacheComparator() : msc(0){};
+	bool operator()( const search_cache_t& a, const search_cache_t& b ) const
+	{
+		bool lt = true;
+		if( a.first == NULL )
+		{
+			if( b.first == NULL )
+				lt = false;
+		}else if( b.first == NULL )
+		{
+			lt = false;
+		}else if( !msc( a.first, b.first ) )
+			lt = false;
+		else if( a.second == NULL )
+		{
+			if( b.second == NULL )
+				lt = false;
+		}else if( b.second == NULL )
+		{
+			lt = false;
+		}else if( !msc( a.second, b.second ) )
+			lt = false;
+
+		return lt;		
+	}
+protected:
+	mems::MatchStartComparator<mems::Match> msc;
+};
+
+static SearchCacheComparator cache_comparator;
+
+
+}
+
+#endif // _Aligner_h_
diff --git a/libMems/Backbone.cpp b/libMems/Backbone.cpp
new file mode 100644
index 0000000..afa0ed8
--- /dev/null
+++ b/libMems/Backbone.cpp
@@ -0,0 +1,1203 @@
+/*******************************************************************************
+ * $Id: Backbone.cpp,v 1.12 2004/04/19 23:11:19 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * Please see the file called COPYING for licensing, copying, and modification
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/ProgressiveAligner.h"
+#include "libMems/Backbone.h"
+#include "libMems/Islands.h"
+#include "libMems/CompactGappedAlignment.h"
+
+#include <boost/graph/graph_traits.hpp>
+#include <boost/graph/adjacency_list.hpp>
+#include <boost/graph/topological_sort.hpp>
+#include <boost/graph/johnson_all_pairs_shortest.hpp>
+#include <boost/graph/depth_first_search.hpp>
+#include <boost/graph/undirected_dfs.hpp>
+
+using namespace std;
+using namespace genome;
+namespace mems {
+
+
+template< typename MatchVector >
+void getBpList( MatchVector& mvect, uint seq, vector< gnSeqI >& bp_list )
+{
+	bp_list.clear();
+	for( size_t ivI = 0; ivI < mvect.size(); ivI++ )
+	{
+		if( mvect[ivI]->LeftEnd(seq) == NO_MATCH )
+			continue;
+		bp_list.push_back( mvect[ivI]->LeftEnd(seq) );
+		bp_list.push_back( mvect[ivI]->RightEnd(seq)+1 );
+	}
+	std::sort( bp_list.begin(), bp_list.end() );
+}
+
+template< typename MatchVector >
+void createMap( const MatchVector& mv_from, const MatchVector& mv_to, vector< size_t >& map )
+{
+	typedef typename MatchVector::value_type MatchPtr;
+	vector< pair< MatchPtr, size_t > > m1(mv_from.size());
+	vector< pair< MatchPtr, size_t > > m2(mv_to.size());
+	for( size_t i = 0; i < mv_from.size(); ++i )
+		m1[i] = make_pair( mv_from[i], i );
+	for( size_t i = 0; i < mv_to.size(); ++i )
+		m2[i] = make_pair( mv_to[i], i );
+	std::sort( m1.begin(), m1.end() );
+	std::sort( m2.begin(), m2.end() );
+	map.resize( m1.size() );
+	for( size_t i = 0; i < m1.size(); ++i )
+		map[m1[i].second] = m2[i].second;
+}
+
+typedef pair< size_t, Interval* > iv_tracker_t;
+class IvTrackerComp
+{
+public:
+	IvTrackerComp( uint seq ) : ssc( seq ) {}
+	bool operator()( const iv_tracker_t& a, const iv_tracker_t& b )
+	{
+		return ssc(a.second, b.second);
+	}
+private:
+	SingleStartComparator<Interval> ssc;
+};
+
+const int LEFT_NEIGHBOR = -1;
+const int RIGHT_NEIGHBOR = 1;
+typedef vector< size_t > neighbor_t;
+
+neighbor_t& getNeighbor( pair< neighbor_t, neighbor_t >& entry, int direction )
+{
+	if( direction == RIGHT_NEIGHBOR )
+		return entry.first;
+	else
+		return entry.second;
+}
+
+
+void collapseCollinear( IntervalList& iv_list )
+{
+	if( iv_list.size() == 0 )
+		return;	// nothing to see here, move along...
+	const size_t seq_count = iv_list.seq_table.size();
+	std::vector< Interval* > iv_ptrs(iv_list.size());
+	size_t lilI = 0;
+	for( size_t i = 0; i < iv_list.size(); ++i )
+	{
+		// ignore unaligned regions
+		if( iv_list[i].Multiplicity() < 2 )
+			continue;
+		iv_ptrs[lilI++] = &iv_list[i];
+	}
+	iv_ptrs.resize(lilI);
+	const size_t NEIGHBOR_UNKNOWN = (std::numeric_limits<size_t>::max)();
+	neighbor_t lefties_tmp( seq_count, NEIGHBOR_UNKNOWN );
+	pair< neighbor_t, neighbor_t > neighbor_pair( lefties_tmp, lefties_tmp );
+	vector< pair< neighbor_t, neighbor_t > > neighbor_list( iv_ptrs.size(), neighbor_pair );
+	vector< iv_tracker_t > iv_tracker( iv_ptrs.size() );
+	for( size_t i = 0; i < iv_ptrs.size(); ++i )
+	{
+		iv_tracker[i] = make_pair( i, iv_ptrs[i] );
+	}
+	for( size_t seqI = 0; seqI < seq_count; ++seqI )
+	{
+		IvTrackerComp ivc( seqI );
+		sort( iv_tracker.begin(), iv_tracker.end(), ivc );
+		size_t prev_i = NEIGHBOR_UNKNOWN;
+		size_t cur_i = NEIGHBOR_UNKNOWN;
+		for( size_t i = 0; i < iv_tracker.size(); ++i )
+		{
+			if( iv_tracker[i].second->LeftEnd(seqI) == NO_MATCH )
+				continue;
+			if( cur_i != NEIGHBOR_UNKNOWN )
+			{
+				neighbor_list[cur_i].first[seqI] = prev_i;
+				neighbor_list[cur_i].second[seqI] = iv_tracker[i].first;
+			}
+			prev_i = cur_i;
+			cur_i = iv_tracker[i].first;
+		}
+		// get the last one
+		if( cur_i != NEIGHBOR_UNKNOWN )
+		{
+			neighbor_list[cur_i].first[seqI] = prev_i;
+			neighbor_list[cur_i].second[seqI] = NEIGHBOR_UNKNOWN;
+		}
+	}
+
+	// now look for neighbor pair entries which can be merged
+	for( int d = -1; d < 2; d+= 2 )	// iterate over both directions
+	{
+		size_t unknown_count = 0;
+		for( size_t nI = 0; nI < neighbor_list.size(); ++nI )
+		{
+			size_t nayb = NEIGHBOR_UNKNOWN;
+			size_t seqI = 0;
+			bool parity = false;
+			size_t ct = 0;
+			for( ; seqI < seq_count; ++seqI )
+			{
+				if( iv_ptrs[nI]->Orientation(seqI) == AbstractMatch::undefined )
+					continue;
+				int orient = iv_ptrs[nI]->Orientation(seqI) == AbstractMatch::forward ? 1 : -1;
+
+				if( nayb == NEIGHBOR_UNKNOWN )
+				{
+					nayb = getNeighbor( neighbor_list[nI], d * orient * -1 )[seqI];
+					if( nayb != NEIGHBOR_UNKNOWN )
+						parity = iv_ptrs[nI]->Orientation(seqI) == iv_ptrs[nayb]->Orientation(seqI);
+				}
+				else if( nayb != getNeighbor( neighbor_list[nI], d * orient * -1 )[seqI] )
+					break;
+				else if( parity != (iv_ptrs[nI]->Orientation(seqI) == iv_ptrs[nayb]->Orientation(seqI)) )
+					break;
+				if( nayb != NEIGHBOR_UNKNOWN )
+					ct++;
+			}
+			if( seqI < seq_count || ct < iv_ptrs[nI]->Multiplicity() )
+				continue;	// not collinear
+			if( nayb == NEIGHBOR_UNKNOWN )
+				continue;
+
+			// merge nI and nayb
+			uint fs = iv_ptrs[nI]->FirstStart();
+			gnSeqI nI_lend_fs = iv_ptrs[nI]->LeftEnd(fs);
+			gnSeqI nayb_lend_fs = iv_ptrs[nayb]->LeftEnd(fs);
+			AbstractMatch::orientation o = iv_ptrs[nI]->Orientation(fs);
+			vector< AbstractMatch* > nI_matches;
+			iv_ptrs[nI]->StealMatches( nI_matches );
+			vector< AbstractMatch* > nayb_matches;
+			iv_ptrs[nayb]->StealMatches( nayb_matches );
+			if( !parity )
+			{
+				std::reverse( nI_matches.begin(), nI_matches.end() );
+				for( size_t i = 0; i < nI_matches.size(); ++i )
+					nI_matches[i]->Invert();
+				o = o == AbstractMatch::forward ? AbstractMatch::reverse : AbstractMatch::forward;
+			}
+			if( (o == AbstractMatch::forward && nI_lend_fs > nayb_lend_fs) ||
+				(o == AbstractMatch::reverse && nI_lend_fs < nayb_lend_fs))
+				nayb_matches.insert( nayb_matches.end(), nI_matches.begin(), nI_matches.end() );
+			else
+				nayb_matches.insert( nayb_matches.begin(), nI_matches.begin(), nI_matches.end() );
+
+			iv_ptrs[nayb]->SetMatches( nayb_matches );
+
+			// update all pointers to point to nayb
+			seqI = 0;
+			for( ; seqI < seq_count; ++seqI )
+			{
+				if( getNeighbor( neighbor_list[nI], -1 )[seqI] == NEIGHBOR_UNKNOWN &&
+					getNeighbor( neighbor_list[nI], 1 )[seqI] == NEIGHBOR_UNKNOWN )
+					continue;
+				int orient = iv_ptrs[nayb]->Orientation(seqI) == AbstractMatch::forward ? 1 : -1;
+				size_t other_nayb = getNeighbor( neighbor_list[nI], d * orient * (parity ? 1 : -1) )[seqI];
+				if( other_nayb != NEIGHBOR_UNKNOWN )
+				{
+					if( getNeighbor( neighbor_list[other_nayb], 1 )[seqI] == nI )
+						getNeighbor( neighbor_list[other_nayb], 1 )[seqI] = nayb;
+					else if( getNeighbor( neighbor_list[other_nayb], -1 )[seqI] == nI )
+						getNeighbor( neighbor_list[other_nayb], -1 )[seqI] = nayb;
+					else
+					{
+						cerr << "serious programmer error\n";
+						genome::breakHere();
+					}
+				}
+				if( getNeighbor( neighbor_list[nayb], 1 )[seqI] == nI )
+					getNeighbor( neighbor_list[nayb], 1 )[seqI] = other_nayb;
+				else if( getNeighbor( neighbor_list[nayb], -1 )[seqI] == nI )
+					getNeighbor( neighbor_list[nayb], -1 )[seqI] = other_nayb;
+				else
+				{
+					cerr << "inexcusable programmer error\n";
+					genome::breakHere();
+				}
+				neighbor_list[nI].first[seqI] = NEIGHBOR_UNKNOWN;
+				neighbor_list[nI].second[seqI] = NEIGHBOR_UNKNOWN;
+			}
+		}
+	}
+
+	IntervalList new_list;
+	new_list.seq_filename = iv_list.seq_filename;
+	new_list.seq_table = iv_list.seq_table;
+	new_list.resize( iv_ptrs.size() );
+	size_t newI = 0;
+	for( size_t ivI = 0; ivI < iv_ptrs.size(); ++ivI )
+	{
+		vector< AbstractMatch* > matches;
+		iv_ptrs[ivI]->StealMatches( matches );
+		if( matches.size() > 0 )
+			new_list[newI++].SetMatches( matches );
+	}
+	new_list.resize(newI);
+	swap( iv_list, new_list );
+	addUnalignedRegions(iv_list);
+}
+
+
+void checkForAllGapColumns( IntervalList& iv_list )
+{
+	// debug: sanity check whether there are all gap columns
+	for( size_t ivI = 0; ivI < iv_list.size(); ivI++ )
+	{
+		vector< string > aln;
+		mems::GetAlignment( iv_list[ivI], iv_list.seq_table, aln );
+		for( size_t colI = 0; colI < aln[0].size(); ++colI )
+		{
+			size_t rowI = 0;
+			for( ; rowI < aln.size(); ++rowI )
+				if( aln[rowI][colI] != '-' )
+					break;
+			if( rowI == aln.size() )
+			{
+				cerr << "ERROR!  IV " << ivI << " COLUMN " << colI << " IS ALL GAPS!\n";
+			}
+		}
+	}
+}
+
+
+
+void translateToPairwiseGenomeHSS( const hss_array_t& hss_array, pairwise_genome_hss_t& hss_cols )
+{
+	uint seq_count = hss_array.shape()[0];
+	uint iv_count = hss_array.shape()[2];
+	hss_cols.resize( boost::extents[seq_count][seq_count][iv_count] );
+
+	// make pairwise projections of intervals and find LCBs...
+	for( size_t seqI = 0; seqI < seq_count; ++seqI )
+	{
+		for( size_t seqJ = seqI+1; seqJ < seq_count; ++seqJ )
+		{
+			for( size_t ivI = 0; ivI < iv_count; ++ivI )
+			{
+				const hss_list_t& cur_list = hss_array[seqI][seqJ][ivI];
+				hss_cols[seqI][seqJ][ivI].resize( cur_list.size() );
+				for( size_t hssI = 0; hssI < cur_list.size(); hssI++ )
+				{
+					hss_cols[seqI][seqJ][ivI][hssI].first = cur_list[hssI].left_col;
+					hss_cols[seqI][seqJ][ivI][hssI].second = cur_list[hssI].right_col;
+				}
+			}
+		}
+	}
+}
+
+
+double computeGC( std::vector< gnSequence* >& seq_table )
+{
+	const uint8* tab = SortedMerList::BasicDNATable();
+	size_t counts[4];
+	for( int i = 0; i < 4; i++ )
+		counts[i] = 0;
+	for( size_t seqI = 0; seqI < seq_table.size(); seqI++ )
+	{
+		std::string seq;
+		seq_table[seqI]->ToString( seq );
+		for( size_t cI = 0; cI < seq.size(); cI++  )
+			counts[ tab[ seq[cI] ] ]++;
+	}
+	return double(counts[1]+counts[2]) / double(counts[1]+counts[2] + counts[0]+counts[3]);
+}
+
+
+void makeAllPairwiseGenomeHSS( IntervalList& iv_list, vector< CompactGappedAlignment<>* >& iv_ptrs, vector< CompactGappedAlignment<>* >& iv_orig_ptrs, pairwise_genome_hss_t& hss_cols, const HssDetector* detector )
+{
+	uint seq_count = iv_list.seq_table.size();
+	// make pairwise projections of intervals and find LCBs...
+	for( size_t seqI = 0; seqI < seq_count; ++seqI )
+	{
+		for( size_t seqJ = seqI+1; seqJ < seq_count; ++seqJ )
+		{
+			vector< uint > projection;
+			projection.push_back( seqI );
+			projection.push_back( seqJ );
+			vector< vector< MatchProjectionAdapter* > > LCB_list;
+			vector< LCB > projected_adjs;
+			projectIntervalList( iv_list, projection, LCB_list, projected_adjs );
+			// make intervals
+			IntervalList pair_ivs;
+			pair_ivs.seq_table.push_back( iv_list.seq_table[seqI] );
+			pair_ivs.seq_table.push_back( iv_list.seq_table[seqJ] );
+			pair_ivs.resize( LCB_list.size() );
+			for( size_t lcbI = 0; lcbI < LCB_list.size(); ++lcbI )
+				pair_ivs[lcbI].SetMatches( LCB_list[lcbI] );
+			LCB_list.clear();
+
+			vector< CompactGappedAlignment<>* > pair_cgas( pair_ivs.size() );
+			for( size_t lcbI = 0; lcbI < pair_ivs.size(); ++lcbI )
+			{
+				CompactGappedAlignment<> tmp_cga;
+				pair_cgas[lcbI] = tmp_cga.Copy();
+				new (pair_cgas[lcbI])CompactGappedAlignment<>( pair_ivs[lcbI] );
+			}
+
+			// break up these alignments on contig and chromosome boundaries
+			for(int ssI=0; ssI<2; ssI++){
+				vector<gnSeqI> contig_bounds;
+				for( size_t cI=0; cI < pair_ivs.seq_table[ssI]->contigListSize(); cI++ ){
+					contig_bounds.push_back(pair_ivs.seq_table[ssI]->contigLength(cI));
+					if( cI > 0 )
+						contig_bounds[cI] += contig_bounds[cI-1];
+				}
+				GenericMatchSeqManipulator< CompactGappedAlignment<> > gmsm(ssI);
+				applyBreakpoints(contig_bounds, pair_cgas, gmsm);
+			}
+
+			vector< CompactGappedAlignment<>* > hss_list;
+			// now find islands
+			hss_array_t hss_array;
+			(*detector)( pair_cgas, pair_ivs.seq_table, hss_array );
+			HssArrayToCga(pair_cgas, pair_ivs.seq_table, hss_array, hss_list);
+
+			for( size_t cgaI = 0; cgaI < pair_cgas.size(); ++cgaI )
+				pair_cgas[cgaI]->Free();
+			pair_cgas.clear();
+
+			// now split up on iv boundaries
+			vector< gnSeqI > bp_list;
+			getBpList( iv_ptrs, seqI, bp_list );
+			GenericMatchSeqManipulator< CompactGappedAlignment<> > gmsm(0);
+			SingleStartComparator< CompactGappedAlignment<> > ssc(0);
+			std::sort(hss_list.begin(), hss_list.end(), ssc );
+			applyBreakpoints( bp_list, hss_list, gmsm );
+			// and again on seqJ
+			getBpList( iv_ptrs, seqJ, bp_list );
+			GenericMatchSeqManipulator< CompactGappedAlignment<> > gmsm1(1);
+			SingleStartComparator< CompactGappedAlignment<> > ssc1(1);
+			std::sort(hss_list.begin(), hss_list.end(), ssc1 );
+			applyBreakpoints( bp_list, hss_list, gmsm1 );
+
+			// now transform into interval-specific columns
+			std::sort(hss_list.begin(), hss_list.end(), ssc );
+
+			SingleStartComparator< CompactGappedAlignment<> > ivcomp(seqI);
+			std::sort( iv_ptrs.begin(), iv_ptrs.end(), ivcomp );
+			vector< size_t > iv_map;
+			createMap( iv_ptrs, iv_orig_ptrs, iv_map );
+			size_t ivI = 0;
+			while( ivI < iv_ptrs.size() && iv_ptrs[ivI]->LeftEnd(0) == NO_MATCH )
+				++ivI;
+			for( size_t hssI = 0; hssI < hss_list.size(); ++hssI )
+			{
+				if( hss_list[hssI]->LeftEnd(0) == NO_MATCH || hss_list[hssI]->Length(0) == 0 )
+					continue;
+				if( ivI == iv_ptrs.size() )
+				{
+					cerr << "huh?\n";
+					cerr << hss_list[hssI]->LeftEnd(0) << endl;
+					cerr << hss_list[hssI]->RightEnd(0) << endl;
+					cerr << iv_ptrs.back()->LeftEnd(seqI) << endl;
+					cerr << iv_ptrs.back()->RightEnd(seqI) << endl;
+				}
+				while( ivI < iv_ptrs.size() && 
+					(iv_ptrs[ivI]->LeftEnd(seqI) == NO_MATCH ||
+					hss_list[hssI]->LeftEnd(0) > iv_ptrs[ivI]->RightEnd(seqI) ) )
+					++ivI;
+				if( ivI == iv_ptrs.size() )
+				{
+					cerr << "hssI fit!!\n";
+					genome::breakHere();
+				}
+				// check for containment in seqJ
+				if( iv_ptrs[ivI]->LeftEnd(seqJ) == NO_MATCH ||
+					iv_ptrs[ivI]->RightEnd(seqJ) < hss_list[hssI]->LeftEnd(1) ||
+					hss_list[hssI]->RightEnd(1) < iv_ptrs[ivI]->LeftEnd(seqJ) )
+					continue;	// this hss falls to an invalid range in seqJ
+
+				if( hss_list[hssI]->RightEnd(0) < iv_ptrs[ivI]->LeftEnd(seqI) )
+				{
+					cerr << "huh 2?\n";
+					cerr << hss_list[hssI]->LeftEnd(0) << endl;
+					cerr << hss_list[hssI]->RightEnd(0) << endl;
+					cerr << iv_ptrs[ivI]->LeftEnd(seqI) << endl;
+					cerr << iv_ptrs[ivI]->RightEnd(seqI) << endl;
+					hssI++;
+					continue;
+				}
+
+				vector< pair< size_t, size_t > >& cur_hss_cols = hss_cols[seqI][seqJ][iv_map[ivI]];
+
+				gnSeqI left_col = iv_ptrs[ivI]->SeqPosToColumn( seqI, hss_list[hssI]->LeftEnd(0) );
+				gnSeqI right_col = iv_ptrs[ivI]->SeqPosToColumn( seqI, hss_list[hssI]->RightEnd(0) );
+				if(left_col > right_col && iv_ptrs[ivI]->Orientation(seqI) == AbstractMatch::reverse )
+				{
+					swap(left_col, right_col);	// must have been a revcomp seq
+				}
+				else if(left_col > right_col)
+				{
+					cerr << "bad cols\n";
+					cerr << hss_list[hssI]->LeftEnd(0) << endl;
+					cerr << hss_list[hssI]->RightEnd(0) << endl;
+					cerr << iv_ptrs[ivI]->LeftEnd(seqI) << endl;
+					cerr << iv_ptrs[ivI]->RightEnd(seqI) << endl;
+					genome::breakHere();
+				}
+
+				if( left_col > 2000000000 || right_col > 2000000000 )
+				{
+					cerr << "huh 2?\n";
+					cerr << hss_list[hssI]->LeftEnd(0) << endl;
+					cerr << hss_list[hssI]->RightEnd(0) << endl;
+					cerr << iv_ptrs[ivI]->LeftEnd(seqI) << endl;
+					cerr << iv_ptrs[ivI]->RightEnd(seqI) << endl;
+					genome::breakHere();
+				}
+				cur_hss_cols.push_back( make_pair( left_col, right_col ) );
+			}
+			for( size_t hssI = 0; hssI < hss_list.size(); ++hssI )
+				hss_list[hssI]->Free();
+		}
+	}
+}
+
+void mergePairwiseHomologyPredictions( 	vector< CompactGappedAlignment<>* >& iv_orig_ptrs, pairwise_genome_hss_t& hss_cols, vector< vector< ULA* > >& ula_list )
+{
+	uint seq_count = hss_cols.shape()[0];
+	uint iv_count = hss_cols.shape()[2];
+	//
+	// FINALLY!  ready to merge.  how to do it?
+	// make an empty list of UngappedLocalAlignments
+	// start with the first seq and create a ULA for every col
+	// range.  Then continue to the second seq, and when
+	// a col range overlaps a pre-existing ULA, create a new ULA
+	// for the intersected region and a smaller ULA for the non-intersected region
+	ula_list.resize( iv_count );
+	for( size_t ivI = 0; ivI < iv_count; ++ivI )
+	{
+		vector< ULA* >& iv_ulas = ula_list[ivI];
+		for( size_t seqI = 0; seqI < seq_count; ++seqI )
+		{
+			for( size_t seqJ = seqI+1; seqJ < seq_count; ++seqJ )
+			{
+				vector< pair< size_t, size_t > >& cur_hss_cols = hss_cols[seqI][seqJ][ivI];
+				vector< ULA* > cur_ulas( cur_hss_cols.size() );
+				ULA tmp_ula(seq_count);
+				for( size_t hssI = 0; hssI < cur_hss_cols.size(); ++hssI )
+				{
+					cur_ulas[hssI] = tmp_ula.Copy();
+					cur_ulas[hssI]->SetStart(seqI, cur_hss_cols[hssI].first+1);
+					cur_ulas[hssI]->SetStart(seqJ, cur_hss_cols[hssI].first+1);
+					cur_ulas[hssI]->SetLength( cur_hss_cols[hssI].second - cur_hss_cols[hssI].first + 1 );
+				}
+
+				vector< gnSeqI > iv_bp_list;
+				vector< gnSeqI > cur_bp_list;
+				SingleStartComparator<ULA> ulacompI(seqI);
+				std::sort( iv_ulas.begin(), iv_ulas.end(), ulacompI );
+				std::sort( cur_ulas.begin(), cur_ulas.end(), ulacompI );
+				getBpList( iv_ulas, seqI, iv_bp_list );
+				getBpList( cur_ulas, seqI, cur_bp_list );
+				GenericMatchSeqManipulator< ULA > gmsm(seqI);
+				applyBreakpoints( iv_bp_list, cur_ulas, gmsm );
+				applyBreakpoints( cur_bp_list, iv_ulas, gmsm );
+
+				SingleStartComparator<ULA> ulacompJ(seqJ);
+				std::sort( iv_ulas.begin(), iv_ulas.end(), ulacompJ );
+				std::sort( cur_ulas.begin(), cur_ulas.end(), ulacompJ );
+				getBpList( iv_ulas, seqJ, iv_bp_list );
+				getBpList( cur_ulas, seqJ, cur_bp_list );
+				GenericMatchSeqManipulator< ULA > gmsmJ(seqJ);
+				applyBreakpoints( iv_bp_list, cur_ulas, gmsmJ );
+				applyBreakpoints( cur_bp_list, iv_ulas, gmsmJ );
+
+				// do seqI a second time to propagate any breakpoints introduced by seqJ
+				std::sort( iv_ulas.begin(), iv_ulas.end(), ulacompI );
+				std::sort( cur_ulas.begin(), cur_ulas.end(), ulacompI );
+				getBpList( iv_ulas, seqI, iv_bp_list );
+				getBpList( cur_ulas, seqI, cur_bp_list );
+				applyBreakpoints( iv_bp_list, cur_ulas, gmsm );
+				applyBreakpoints( cur_bp_list, iv_ulas, gmsm );
+
+				std::sort( iv_ulas.begin(), iv_ulas.end(), ulacompI );
+				std::sort( cur_ulas.begin(), cur_ulas.end(), ulacompI );
+				// now that cur_ulas and iv_ulas are all broken up according to each other's boundaries
+				// we can simply scan along and add
+				size_t iv_ulas_size = iv_ulas.size();
+				size_t ivuI = 0;
+				size_t curuI = 0;
+				vector< ULA* > added_to( cur_ulas.size(), NULL );	// this tracks which of iv_ulas a cur_ula was added to
+				vector< ULA* > to_delete;
+				while( ivuI < iv_ulas_size && curuI < cur_ulas.size() )
+				{
+					if( iv_ulas[ivuI]->LeftEnd(seqI) == cur_ulas[curuI]->LeftEnd(seqI) )
+					{
+						if( added_to[curuI] == iv_ulas[ivuI] )
+						{
+							// do nothing
+						}else if( added_to[curuI] == NULL )
+						{
+							iv_ulas[ivuI]->SetLeftEnd(seqJ, cur_ulas[curuI]->LeftEnd(seqJ));
+							added_to[curuI] = iv_ulas[ivuI];
+						}else{
+							ULA* merge = added_to[curuI];
+							for( size_t seqK = 0; seqK < seq_count; ++seqK )
+							{
+								if( merge->Start(seqK) == NO_MATCH )
+									continue;
+								iv_ulas[ivuI]->SetStart( seqK, merge->Start(seqK) );
+							}
+							to_delete.push_back( merge );
+						}
+						ivuI++;
+					}else if( iv_ulas[ivuI]->LeftEnd(seqI) < cur_ulas[curuI]->LeftEnd(seqI) )
+					{
+						ivuI++;
+					}else
+						curuI++;
+				}
+
+				// delete to_delete...
+				std::sort( to_delete.begin(), to_delete.end() );
+				vector< ULA* >::iterator last = std::unique( to_delete.begin(), to_delete.end() );
+				to_delete.erase( last, to_delete.end() );
+				vector< ULA* > new_iv_ulas( iv_ulas.size() - to_delete.size() );
+				std::sort( iv_ulas.begin(), iv_ulas.end() );
+				std::set_difference( iv_ulas.begin(), iv_ulas.end(), to_delete.begin(), to_delete.end(), new_iv_ulas.begin() );
+				swap( iv_ulas, new_iv_ulas );
+				for( size_t delI = 0; delI < to_delete.size(); ++delI )
+					to_delete[delI]->Free();
+
+				vector< ULA* > orig_ula_order = cur_ulas;
+				// now do something similar for seqJ
+				std::sort( iv_ulas.begin(), iv_ulas.end(), ulacompJ );
+				std::sort( cur_ulas.begin(), cur_ulas.end(), ulacompJ );
+
+				vector< size_t > added_map;
+				createMap( cur_ulas, orig_ula_order, added_map );
+
+				ivuI = 0;
+				curuI = 0;
+				to_delete.clear();
+				while( ivuI < iv_ulas_size && curuI < cur_ulas.size() )
+				{
+					if( iv_ulas[ivuI]->LeftEnd(seqJ) == cur_ulas[curuI]->LeftEnd(seqJ) )
+					{
+						if( added_to[added_map[curuI]] == iv_ulas[ivuI] )
+						{
+							// do nothing
+						}else if( added_to[added_map[curuI]] == NULL )
+						{
+							iv_ulas[ivuI]->SetLeftEnd(seqI, cur_ulas[curuI]->LeftEnd(seqI));
+							added_to[added_map[curuI]] = iv_ulas[ivuI];
+						}else{
+							ULA* merge = added_to[added_map[curuI]];
+							for( size_t seqK = 0; seqK < seq_count; ++seqK )
+							{
+								if( merge->Start(seqK) == NO_MATCH )
+									continue;
+								iv_ulas[ivuI]->SetStart( seqK, merge->Start(seqK) );
+							}
+							to_delete.push_back( merge );
+						}
+						ivuI++;
+					}else if( iv_ulas[ivuI]->LeftEnd(seqJ) < cur_ulas[curuI]->LeftEnd(seqJ) )
+					{
+						ivuI++;
+					}else
+					{
+						curuI++;
+					}
+				}
+
+				// anything with a null added_to entry needs to be added to iv_ulas
+				// everything else needs to get freed
+				std::sort( cur_ulas.begin(), cur_ulas.end(), ulacompI );
+				for( curuI = 0; curuI < cur_ulas.size(); ++curuI )
+				{
+					if( added_to[curuI] == NULL )
+						iv_ulas.push_back( cur_ulas[curuI] );
+					else
+						cur_ulas[curuI]->Free();
+				}
+				// delete to_delete...
+				std::sort( to_delete.begin(), to_delete.end() );
+				last = std::unique( to_delete.begin(), to_delete.end() );
+				to_delete.erase( last, to_delete.end() );
+				new_iv_ulas = vector< ULA* >( iv_ulas.size() - to_delete.size() );
+				std::sort( iv_ulas.begin(), iv_ulas.end() );
+				std::set_difference( iv_ulas.begin(), iv_ulas.end(), to_delete.begin(), to_delete.end(), new_iv_ulas.begin() );
+				swap( iv_ulas, new_iv_ulas );
+				for( size_t delI = 0; delI < to_delete.size(); ++delI )
+					to_delete[delI]->Free();
+			}
+		}
+	}
+
+	// Eliminate segments that have no representation in a genome
+	for( size_t ivI = 0; ivI < ula_list.size(); ++ivI )
+	{
+		for( size_t mI = 0; mI < ula_list[ivI].size(); ++mI )
+		{
+			size_t seqI = ula_list[ivI][mI]->FirstStart();
+			std::vector<gnSeqI> l_pos;
+			std::vector<bool> l_column;
+			std::vector<gnSeqI> r_pos;
+			std::vector<bool> r_column;
+			gnSeqI left_col = ula_list[ivI][mI]->LeftEnd(seqI)-1;
+			gnSeqI right_col = ula_list[ivI][mI]->RightEnd(seqI)-1;
+			iv_orig_ptrs[ivI]->GetColumn(left_col, l_pos, l_column);
+			iv_orig_ptrs[ivI]->GetColumn(right_col, r_pos, r_column);
+			for( ; seqI < ula_list[ivI][mI]->SeqCount(); ++seqI )
+			{
+				if( ula_list[ivI][mI]->LeftEnd(seqI) == NO_MATCH )
+					continue;
+				if( l_pos[seqI] == r_pos[seqI] && !l_column[seqI] && !r_column[seqI] )
+					ula_list[ivI][mI]->SetStart(seqI, NO_MATCH);	// no match in this col
+			}
+			if( ula_list[ivI][mI]->Multiplicity() < 2 )
+			{
+				ula_list[ivI][mI]->Free();
+				ula_list[ivI][mI] = NULL;
+			}
+		}
+		// clean out any NULL ptrs
+		std::vector< ULA* >::iterator last = std::remove( ula_list[ivI].begin(), ula_list[ivI].end(), (ULA*)NULL );
+		ula_list[ivI].erase( last, ula_list[ivI].end() );
+	}
+}
+
+void unalignIslands( IntervalList& iv_list, vector< CompactGappedAlignment<>* >& iv_orig_ptrs, vector< vector< ULA* > >& ula_list )
+{
+	uint seq_count = iv_list.seq_table.size();
+	// unalign regions in the iv list that aren't contained in backbone
+	for( size_t ivI = 0; ivI < ula_list.size(); ++ivI )
+	{
+		vector< AbstractMatch* > new_matches(ula_list[ivI].size());
+		for( size_t mI = 0; mI < ula_list[ivI].size(); ++mI )
+		{
+			size_t seqI = ula_list[ivI][mI]->FirstStart();
+			gnSeqI left_col = ula_list[ivI][mI]->LeftEnd(seqI)-1;
+			CompactGappedAlignment<> tmp_cga;
+			CompactGappedAlignment<>* new_cga = tmp_cga.Copy();
+			iv_orig_ptrs[ivI]->copyRange(*new_cga, left_col, ula_list[ivI][mI]->Length(seqI));
+			for( seqI = 0; seqI < ula_list[ivI][mI]->SeqCount(); ++seqI )
+			{
+				if( ula_list[ivI][mI]->LeftEnd(seqI) == NO_MATCH )
+					new_cga->SetLeftEnd(seqI, NO_MATCH);
+			}
+			new_cga->CondenseGapColumns();
+			new_matches[mI] = new_cga;
+		}
+		if( new_matches.size() > 0 )
+		{
+
+			vector< vector< AbstractMatch* > > disjoint_subsets;
+			{
+				// split into multiple intervals if some sequences are completely unaligned
+				// use a union-find structure to quickly figure out how many subgroups there are
+				vector< uint > seq_map( seq_count );
+				for( size_t sI = 0; sI < seq_map.size(); ++sI )
+					seq_map[sI] = sI;
+				for( size_t mI = 0; mI < new_matches.size(); ++mI )
+				{
+					uint sI = new_matches[mI]->FirstStart();
+					uint map_to = seq_map[sI];
+					while( map_to != seq_map[map_to] )
+						map_to = seq_map[map_to];
+					seq_map[sI] = map_to;
+					for( ++sI; sI < seq_count; ++sI )
+					{
+						if( new_matches[mI]->LeftEnd(sI) == NO_MATCH )
+							continue;
+						uint map_from = seq_map[sI];
+						while( map_from != seq_map[map_from] )
+							map_from = seq_map[map_from];
+						seq_map[map_from] = map_to;
+					}
+				}
+				vector< vector< AbstractMatch* > > mapped_lists( seq_count );
+				for( size_t mI = 0; mI < new_matches.size(); ++mI )
+				{
+					uint sI = new_matches[mI]->FirstStart();
+					uint map_to = seq_map[sI];
+					while( map_to != seq_map[map_to] )
+						map_to = seq_map[map_to];
+					mapped_lists[map_to].push_back( new_matches[mI] );
+				}
+				for( uint sI = 0; sI < seq_count; ++sI )
+				{
+					if( mapped_lists[sI].size() > 0 )
+						disjoint_subsets.push_back( mapped_lists[sI] );
+				}
+			}
+
+			for( size_t dI = 0; dI < disjoint_subsets.size(); ++dI )
+			{
+				vector< AbstractMatch* >& cur_d_matches = disjoint_subsets[dI];
+				vector< AbstractMatch* > orig_order = cur_d_matches;
+				// need to sort these.  use boost's topological sort.
+				vector< size_t > id_map;
+				typedef boost::adjacency_list< boost::vecS, boost::vecS, boost::directedS, boost::property<boost::vertex_color_t, boost::default_color_type> > Graph;
+				typedef boost::graph_traits<Graph>::vertex_descriptor Vertex;
+				typedef std::pair< int, int > Pair;
+				vector< Pair > edges;
+				for( size_t seqI = 0; seqI < seq_count; ++seqI )
+				{
+					SingleStartComparator<AbstractMatch> ssc(seqI);
+					std::sort( cur_d_matches.begin(), cur_d_matches.end(), ssc );
+					createMap( cur_d_matches, orig_order, id_map );
+					int prev = -1;
+					int first = -1;
+					bool reverse = false;
+					for( int mI = 0; mI < cur_d_matches.size(); ++mI )
+					{
+						if( cur_d_matches[mI]->LeftEnd(seqI) == NO_MATCH )
+							continue;
+						if( prev != -1 )
+						{
+							Pair edge( id_map[prev], id_map[mI] );
+							if( reverse )
+								swap( edge.first, edge.second );
+							edges.push_back(edge);
+						}else
+						{
+							reverse = cur_d_matches[mI]->Start(seqI) < 0;
+							first = mI;
+						}
+						prev = mI;
+					}
+					if( prev != -1 && !reverse )
+						edges.push_back( Pair( id_map[prev], cur_d_matches.size() ) );
+					else if( prev != -1 && reverse )
+						edges.push_back( Pair( id_map[first], cur_d_matches.size() ) );
+				}
+				std::sort( edges.begin(), edges.end() );
+				vector< Pair >::iterator ee_iter = std::unique( edges.begin(), edges.end() );
+				edges.erase( ee_iter, edges.end() );
+				Pair* edge_array = new Pair[edges.size()];
+				for( size_t eI = 0; eI < edges.size(); ++eI )
+					edge_array[eI] = edges[eI];
+				typedef boost::graph_traits<Graph>::vertices_size_type v_size_t;
+				Graph G(edge_array, edge_array + edges.size(), v_size_t(edges.size()));
+				typedef std::vector< Vertex > container;
+				container c;
+				topological_sort(G, std::back_inserter(c));
+				cur_d_matches.clear();
+				for ( container::reverse_iterator ii=c.rbegin(); ii!=c.rend(); ++ii)
+				{
+					if( *ii < orig_order.size() )
+						cur_d_matches.push_back( orig_order[ *ii ] );
+				}
+				if( dI == 0 )
+					iv_list[ivI].SetMatches(cur_d_matches);
+				else
+				{
+					Interval new_iv( cur_d_matches.begin(), cur_d_matches.end() );
+					iv_list.push_back(new_iv);
+				}
+				delete[] edge_array;
+			}
+		}
+		else
+		{
+			iv_orig_ptrs[ivI]->Free();
+			iv_orig_ptrs[ivI] = NULL;
+		}
+	}
+
+
+	// update iv_list to match the filtered iv_orig_ptrs
+	size_t givI = 0;
+	for( size_t iI = 0; iI < iv_orig_ptrs.size(); ++iI )
+	{
+		if( iv_orig_ptrs[iI] != NULL )
+		{
+			swap( iv_list[givI], iv_list[iI] );
+			iv_orig_ptrs[iI]->Free();	// done with the CompactGappedAlignments
+			iv_orig_ptrs[iI] = NULL;
+			givI++;
+		}
+	}
+	// pick up any intervals that were split in half
+	for( size_t iI = iv_orig_ptrs.size(); iI < iv_list.size(); ++iI )
+		swap( iv_list[givI++], iv_list[iI] );
+	iv_list.erase( iv_list.begin()+givI, iv_list.end() );
+
+	// collapse any intervals that are trivially collinear
+	collapseCollinear( iv_list );
+}
+
+void createBackboneList( const IntervalList& iv_list, backbone_list_t& ula_list ) 
+{
+	ula_list.resize( iv_list.size() );
+	for( size_t ivI = 0; ivI < iv_list.size(); ++ivI )
+	{
+		if( iv_list[ivI].Multiplicity() < 2 )
+			continue;
+		const vector< AbstractMatch* >& matches = iv_list[ivI].GetMatches();
+		int64 right_col = 0;
+		int64 left_col = 0;
+		for( size_t mI = 0; mI < matches.size(); ++mI )
+		{
+			left_col = right_col;
+			right_col += matches[mI]->AlignmentLength();
+			if( matches[mI]->Multiplicity() < 2 )
+				continue;
+			ULA tmp_ula(matches[mI]->SeqCount());
+			ULA* mula = tmp_ula.Copy();
+			for( size_t seqI = 0; seqI < matches[mI]->SeqCount(); ++seqI )
+				if( matches[mI]->LeftEnd(seqI) != NO_MATCH )
+					mula->SetLeftEnd( seqI, left_col+1 );
+			mula->SetLength( right_col - left_col );
+			ula_list[ivI].push_back(mula);
+		}
+		// merge neighbors that cover identical match components
+		for( size_t ulaI = 1; ulaI < ula_list[ivI].size(); ulaI++ )
+		{
+			size_t seqI = 0;
+			for( ; seqI < ula_list[ivI][ulaI]->SeqCount(); ++seqI )
+			{
+				int64 s1 = ula_list[ivI][ulaI-1]->Start(seqI);
+				int64 s2 = ula_list[ivI][ulaI]->Start(seqI);
+				if( s1 == mems::NO_MATCH && s2 == mems::NO_MATCH )
+					continue;
+				if( s1 == mems::NO_MATCH && s2 != mems::NO_MATCH )
+					break;
+				if( s1 != mems::NO_MATCH && s2 == mems::NO_MATCH )
+					break;
+				int64 r1 = ula_list[ivI][ulaI-1]->RightEnd(seqI);
+				if( r1 + 1 != s2 )
+					break;	// must be adjacent to each other
+			}
+			if( seqI == ula_list[ivI][ulaI]->SeqCount() )
+			{
+				// ulaI-1 needs to be swallowed up by ulaI
+				ula_list[ivI][ulaI]->ExtendStart( ula_list[ivI][ulaI-1]->Length() );
+				ula_list[ivI][ulaI-1]->SetLength(0);
+			}
+		}
+		// get rid of matches that were swallowed up
+		vector< ULA* > condensed_list;
+		for( size_t ulaI = 0; ulaI < ula_list[ivI].size(); ulaI++ )
+		{
+			if( ula_list[ivI][ulaI]->Length() > 0 )
+				condensed_list.push_back(ula_list[ivI][ulaI]);
+			else
+				ula_list[ivI][ulaI]->Free();
+		}
+		swap( ula_list[ivI], condensed_list );
+	}
+}
+
+void detectAndApplyBackbone( AbstractMatch* m, vector< gnSequence* >& seq_table, CompactGappedAlignment<>*& result, backbone_list_t& bb_list, const Params& hmm_params, boolean left_homologous, boolean right_homologous )
+{
+	vector< AbstractMatch* > mlist( 1, m );
+	uint seq_count = seq_table.size();
+
+	// indexed by seqI, seqJ, ivI, hssI (left col, right col)
+	pairwise_genome_hss_t hss_cols(boost::extents[seq_count][seq_count][1]);
+
+	// ugg.  need CompactGappedAlignment for its SeqPosToColumn
+	vector< CompactGappedAlignment<>* > iv_ptrs(1);
+	CompactGappedAlignment<> tmp_cga;
+	iv_ptrs[0] = tmp_cga.Copy();
+	new (iv_ptrs[0])CompactGappedAlignment<>( *m );	// this will be freed when unalignIslands() gets called
+
+	vector< CompactGappedAlignment<>* > iv_orig_ptrs(iv_ptrs);
+	hss_array_t island_array, hss_array;
+
+	findHssHomologyHMM( mlist, seq_table, island_array, hmm_params, left_homologous, right_homologous );
+	translateToPairwiseGenomeHSS( island_array, hss_cols );
+
+	// merge overlapping pairwise homology predictions into n-way predictions
+	backbone_list_t ula_list;
+	mergePairwiseHomologyPredictions( iv_orig_ptrs, hss_cols, ula_list );
+
+	// unalignIslands wants an IntervalList
+	IntervalList iv_list;
+	iv_list.seq_table = seq_table;
+	iv_list.resize(1);
+	vector<AbstractMatch*> asdf(1, iv_orig_ptrs.front()->Copy() );
+	iv_list[0].SetMatches( asdf );
+	// unalign regions found to be non-homologous
+	unalignIslands( iv_list, iv_orig_ptrs, ula_list );
+
+	// free all ULAs and reconstruct them from the new alignment column coordinates
+	for( size_t ulaI = 0; ulaI < ula_list.size(); ++ulaI )
+		for( size_t i = 0; i < ula_list[ulaI].size(); ++i )
+			ula_list[ulaI][i]->Free();
+	ula_list.clear();
+
+
+	createBackboneList( iv_list, ula_list );
+
+	iv_orig_ptrs.clear();
+
+	bb_list.clear();
+	bb_list = ula_list;
+
+	result = tmp_cga.Copy();
+	if( iv_list.size() > 0 )
+		new (result)CompactGappedAlignment<>( iv_list[0] );
+}
+
+
+
+void applyBackbone( IntervalList& iv_list, vector< CompactGappedAlignment<>* >& iv_orig_ptrs, backbone_list_t& bb_list )
+{
+	// unalign regions found to be non-homologous
+	unalignIslands( iv_list, iv_orig_ptrs, bb_list );
+
+	// need to add in all the unaligned regions so the viewer doesn't throw a fit
+	addUnalignedRegions( iv_list );
+
+	// free all ULAs and reconstruct them from the new alignment column coordinates
+	for( size_t ulaI = 0; ulaI < bb_list.size(); ++ulaI )
+		for( size_t i = 0; i < bb_list[ulaI].size(); ++i )
+			bb_list[ulaI][i]->Free();
+	bb_list.clear();
+
+	createBackboneList( iv_list, bb_list );
+}
+
+void detectBackbone( IntervalList& iv_list, backbone_list_t& bb_list, const HssDetector* detector, vector< CompactGappedAlignment<>* >& iv_orig_ptrs )
+{
+	// collapse any intervals that are trivially collinear
+	collapseCollinear( iv_list );
+
+	uint seq_count = iv_list.seq_table.size();
+
+	// indexed by seqI, seqJ, ivI, hssI (left col, right col)
+	pairwise_genome_hss_t hss_cols(boost::extents[seq_count][seq_count][iv_list.size()]);
+
+	// ugg.  need CompactGappedAlignment for its SeqPosToColumn
+	vector< CompactGappedAlignment<>* > iv_ptrs(iv_list.size());
+	for( size_t i = 0; i < iv_list.size(); ++i )
+	{
+		CompactGappedAlignment<> tmp_cga;
+		iv_ptrs[i] = tmp_cga.Copy();
+		new (iv_ptrs[i])CompactGappedAlignment<>( iv_list[i] );
+	}
+
+	iv_orig_ptrs = iv_ptrs;
+	makeAllPairwiseGenomeHSS( iv_list, iv_ptrs, iv_orig_ptrs, hss_cols, detector );
+
+	// merge overlapping pairwise homology predictions into n-way predictions
+	mergePairwiseHomologyPredictions( iv_orig_ptrs, hss_cols, bb_list );
+}
+
+
+// add unique segments of some minimum length
+// FIXME: does not add begin and end segments!
+void addUniqueSegments( std::vector< bb_seqentry_t >& bb_seq_list, size_t min_length )
+{
+	if( bb_seq_list.size() == 0 )
+		return;
+	vector< bb_seqentry_t > new_segs;
+	uint seq_count = bb_seq_list[0].size();
+	// now mark segs that are too close to each other to be considered independent
+	for( size_t sI = 0; sI < seq_count; sI++ )
+	{
+		BbSeqEntrySorter bbs(sI);
+		std::sort( bb_seq_list.begin(), bb_seq_list.end(), bbs );
+		for( size_t bbI = 1; bbI < bb_seq_list.size(); bbI++ )
+		{
+			if( bb_seq_list[bbI][sI].first == 0 )
+				continue;
+			int64 diff = genome::absolut(bb_seq_list[bbI][sI].first) - genome::absolut(bb_seq_list[bbI-1][sI].second); 
+			if( genome::absolut(diff) > min_length )
+			{
+				bb_seqentry_t newb( seq_count, make_pair( 0,0 ) );
+				newb[sI].first = genome::absolut(bb_seq_list[bbI-1][sI].second) + 1;
+				newb[sI].second = genome::absolut(bb_seq_list[bbI][sI].first) - 1;
+				new_segs.push_back( newb );
+			}
+		}
+	}
+	bb_seq_list.insert( bb_seq_list.end(), new_segs.begin(), new_segs.end() );
+}
+
+
+void mergeAdjacentSegments( std::vector< bb_seqentry_t >& bb_seq_list )
+{
+	if( bb_seq_list.size() == 0 )
+		return;
+	uint seq_count = bb_seq_list[0].size();
+	// now mark segs that are too close to each other to be considered independent
+	for( size_t sI = 0; sI < seq_count; sI++ )
+	{
+		BbSeqEntrySorter bbs(sI);
+		std::sort( bb_seq_list.begin(), bb_seq_list.end(), bbs );
+		bitset_t merged;
+		merged.resize( bb_seq_list.size() );
+		for( size_t bbI = 1; bbI < bb_seq_list.size(); bbI++ )
+		{
+			if( bb_seq_list[bbI][sI].first == 0 )
+				continue;
+			size_t j = 0;
+			for( ; j < seq_count; j++ )
+			{
+				if( bb_seq_list[bbI][j].first == 0 ^ bb_seq_list[bbI-1][j].first == 0)
+					break;
+				if( bb_seq_list[bbI][j].first == 0)
+					continue;
+				int64 diff = 0;
+				if( bb_seq_list[bbI][j].first > 0 )
+					diff = bb_seq_list[bbI][j].first - bb_seq_list[bbI-1][j].second; 
+				else
+					diff = bb_seq_list[bbI][j].second - bb_seq_list[bbI-1][j].first;
+				if( diff != 1 )
+					break;
+			}
+			if(j == seq_count)
+			{	// they can be merged!
+				merged.set(bbI-1);
+				for( j = 0; j < seq_count; j++ )
+					if( bb_seq_list[bbI][j].first > 0 )
+						bb_seq_list[bbI][j].first = bb_seq_list[bbI-1][j].first;
+					else
+						bb_seq_list[bbI][j].second = bb_seq_list[bbI-1][j].second;
+			}
+		}
+		// remove merged entries
+		size_t cur = 0;
+		for( size_t bbI = 0; bbI < bb_seq_list.size(); bbI++ )
+			if( !merged.test( bbI ) )
+				swap( bb_seq_list[cur++], bb_seq_list[bbI] );
+		bb_seq_list.erase( bb_seq_list.begin() + cur, bb_seq_list.end() );
+	}
+}
+
+
+void detectBackbone( IntervalList& iv_list, backbone_list_t& bb_list, const HssDetector* detector )
+{
+	vector< CompactGappedAlignment<>* > iv_orig_ptrs;
+	detectBackbone( iv_list, bb_list, detector, iv_orig_ptrs );
+	// FIXME: clean up iv_orig_ptrs
+}
+
+void detectAndApplyBackbone( IntervalList& iv_list, backbone_list_t& bb_list, const Params& hmm_params )
+{
+	HomologyHmmDetector* hmm_detector = new HomologyHmmDetector( hmm_params, true, true );
+	vector< CompactGappedAlignment<>* > iv_orig_ptrs;
+	detectBackbone( iv_list, bb_list, hmm_detector, iv_orig_ptrs );
+	applyBackbone( iv_list, iv_orig_ptrs, bb_list );
+	delete hmm_detector;
+}
+
+
+void writeBackboneColumns( ostream& bb_out, backbone_list_t& bb_list )
+{
+	//
+	// At last! write out the backbone list
+	//
+	for( size_t ivI = 0; ivI < bb_list.size(); ++ivI )
+	{
+		for( size_t mI = 0; mI < bb_list[ivI].size(); ++mI )
+		{
+			size_t seqI = bb_list[ivI][mI]->FirstStart();
+			bb_out << ivI << '\t' << bb_list[ivI][mI]->LeftEnd(seqI) << '\t' << bb_list[ivI][mI]->Length();
+			for( ; seqI < bb_list[ivI][mI]->SeqCount(); ++seqI )
+			{
+				if( bb_list[ivI][mI]->LeftEnd(seqI) == NO_MATCH )
+					continue;
+				bb_out << '\t' << seqI;
+			}
+			bb_out << endl;
+		}
+	}
+}
+
+void writeBackboneSeqCoordinates( backbone_list_t& bb_list, IntervalList& iv_list, ostream& bb_out )
+{
+	if( bb_list.size() == 0 )
+		return;
+	// find seq_count
+	uint seq_count = 0;
+	for( size_t bbI = 0; bbI < bb_list.size(); ++bbI )
+		if( bb_list[bbI].size() > 0 )
+		{
+			seq_count = bb_list[bbI].front()->SeqCount();
+			break;
+		}
+
+	// different format -- use real sequence coordinates...
+	// print a header line first
+	for( size_t seqI = 0; seqI < seq_count; ++seqI )
+	{
+		if( seqI > 0 )
+			bb_out << '\t';
+		bb_out << "seq_" << seqI << "_leftend\t";
+		bb_out << "seq_" << seqI << "_rightend";
+	}
+	bb_out << endl;
+	for( size_t ivI = 0; ivI < bb_list.size(); ++ivI )
+	{
+		// there seems to be a bug in the backbone creation code that causes the CGA that gets
+		// stuffed into the interval to have the wrong coordinates internally, while the interval
+		// maintains the correct coordinates.  work around it by converting the whole interval to a cga
+		CompactGappedAlignment<> iv_cga( iv_list[ivI] );
+		for( size_t mI = 0; mI < bb_list[ivI].size(); ++mI )
+		{
+			uint fs = bb_list[ivI][mI]->FirstStart();
+			// get the sequence positions out of the alignment
+			vector< gnSeqI > left_pos;
+			vector< bool > left_cols;
+			iv_cga.GetColumn( bb_list[ivI][mI]->LeftEnd(fs)-1, left_pos, left_cols );
+			vector< gnSeqI > right_pos;
+			vector< bool > right_cols;
+			iv_cga.GetColumn( bb_list[ivI][mI]->RightEnd(fs)-1, right_pos, right_cols );
+			for( size_t seqI = 0; seqI < bb_list[ivI][mI]->SeqCount(); ++seqI )
+			{
+				if( seqI > 0 )
+					bb_out << '\t';
+				if( bb_list[ivI][mI]->LeftEnd(seqI) == NO_MATCH )
+				{
+					bb_out << "0\t0";
+					continue;
+				}else{
+					int64 leftI = left_pos[seqI];
+					int64 rightI = right_pos[seqI];
+					if( iv_cga.Orientation(seqI) == AbstractMatch::forward && leftI != 0 && !left_cols[seqI] )
+						leftI++;
+					if( iv_cga.Orientation(seqI) == AbstractMatch::reverse && rightI != 0 && !right_cols[seqI] )
+						rightI++;
+					if( iv_cga.Orientation(seqI) == AbstractMatch::reverse )
+					{
+						swap( leftI, rightI );	// must be reverse complement
+					}
+					if( rightI + 1 == leftI )
+					{
+						bb_out << "0\t0";
+						continue;
+					}
+					if( leftI > rightI )
+					{
+						cerr << "oh crahpey!\n";
+						cerr << "leftI: " << leftI << endl;
+						cerr << "rightI: " << rightI << endl;
+						cerr << "seqI: " << seqI << endl;
+						cerr << "ivI: " << ivI << endl;
+					}
+					if( leftI == 0 )
+						leftI = iv_cga.LeftEnd(seqI);
+					if( rightI == iv_cga.RightEnd(seqI)+1 )
+						rightI--;
+					if( iv_cga.Orientation(seqI) == AbstractMatch::reverse )
+					{
+						leftI *= -1;
+						rightI *= -1;
+					}
+					bb_out << leftI << '\t' << rightI;
+				}
+			}
+			bb_out << endl;
+		}
+	}
+}
+
+
+}  // namespace mems
+
diff --git a/libMems/Backbone.h b/libMems/Backbone.h
new file mode 100644
index 0000000..878eb8e
--- /dev/null
+++ b/libMems/Backbone.h
@@ -0,0 +1,240 @@
+/*******************************************************************************
+ * $Id: Backbone.h,v 1.7 2004/03/01 02:40:08 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * This file is licensed under the GPL.
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifndef __Backbone_h__
+#define __Backbone_h__
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libGenome/gnSequence.h"
+#include "libMems/SubstitutionMatrix.h"
+#include "libMems/IntervalList.h"
+#include "libMems/NumericMatrix.h"
+#include "libMems/MatchList.h"
+#include "libMems/GappedAlignment.h"
+#include "libMems/CompactGappedAlignment.h"
+#include "libMems/Aligner.h"
+#include "libMems/Islands.h"
+#include <boost/multi_array.hpp>
+
+#include <sstream>
+#include <vector>
+
+namespace mems {
+
+typedef mems::UngappedLocalAlignment< mems::HybridAbstractMatch<> > ULA;
+typedef std::vector< std::vector< ULA* > > backbone_list_t;
+// indexed by seqI, seqJ, ivI, hssI (left col, right col)
+typedef boost::multi_array< std::vector< std::pair< size_t, size_t > >, 3 > pairwise_genome_hss_t;
+
+class HssDetector;
+
+/** compute the GC content of a set of sequences */
+double computeGC( std::vector< genome::gnSequence* >& seq_table );
+
+/**
+ * collapse Intervals that are trivially collinear with each other
+ */
+void collapseCollinear( IntervalList& iv_list );
+
+/**
+ * sanity checks for alignment columns that contain only gaps
+ */
+void checkForAllGapColumns( IntervalList& iv_list );
+
+/**
+ * Applies pairwise transitive homology statistics to detect backbone in a single collinear alignment
+ * Unaligns any regions found to be non-homologous, returns coordinates of the homologous segments in bb_list
+ * @param	m			The input match in which homology detection will be applied
+ * @param	seq_table	A sequence table with one gnSequence pointer per match component
+ * @param	result		(output) A newly allocated CompactGappedAlignment that contains the resulting alignment of 
+ *						homologous sequence.  It is the caller's responsibility to free the memory using AbstractMatch::Free()
+ * @param	bb_list		(output) A list of homologous segments among each component of the output match
+ * @param	left_homologous	Set to true if the detection code should assume that sequence beyond the left-most alignment
+ *							column is homologous sequence
+ * @param	right_homologous	Set to true if the detection code should assume that sequence beyond the right-most alignment
+ *							column is homologous sequence
+ */
+void detectAndApplyBackbone( AbstractMatch* m, std::vector< genome::gnSequence* >& seq_table, CompactGappedAlignment<>*& result, backbone_list_t& bb_list, const Params& hmm_params, boolean left_homologous = false, boolean right_homologous = false );
+
+/**
+ * Applies pairwise transitive homology statistics to detect backbone in a genome alignment
+ * Unaligns any regions found to be non-homologous, returns coordinates of the homologous segments in bb_list
+ */
+void detectAndApplyBackbone( IntervalList& iv_list, backbone_list_t& bb_list, const Params& hmm_params );
+
+/**
+ * Simply detects backbone using the particular algorithm implemented by HssDetector
+ */
+void detectBackbone( IntervalList& iv_list, backbone_list_t& bb_list, const HssDetector* detector );
+
+/**
+ * Writes a backbone column file.  This file type gets used by the Mauve GUI.
+ */
+void writeBackboneColumns( std::ostream& bb_out, backbone_list_t& bb_list );
+
+/**
+ * Writes a backbone sequence coordinate file.  This file type is easier to analyze with statistical packages.
+ */
+void writeBackboneSeqCoordinates( backbone_list_t& bb_list, IntervalList& iv_list, std::ostream& bb_out );
+
+class HssDetector
+{
+public:
+	typedef std::vector< CompactGappedAlignment<>* > MatchListType;
+	virtual void operator() ( 
+		const MatchListType& iv_list, 
+		std::vector< genome::gnSequence* >& seq_table,  
+		hss_array_t& hss_array ) const = 0;
+};
+
+class HomologyHmmDetector : public HssDetector
+{
+public:
+	HomologyHmmDetector( const Params& hmm_params, bool left_homologous, bool right_homologous ) :
+		p(hmm_params), left(left_homologous), right(right_homologous) {}
+	virtual void operator() ( const MatchListType& iv_list, std::vector< genome::gnSequence* >& seq_table, hss_array_t& hss_array ) const
+	{
+		findHssHomologyHMM( iv_list, seq_table, hss_array, p, left, right );
+	}
+private:
+	const Params& p;
+	bool left; 
+	bool right;
+};
+
+class BigGapsDetector : public HssDetector
+{
+public:
+	BigGapsDetector( size_t big_gap_size ) : big(big_gap_size) {}
+	virtual void operator() ( const MatchListType& iv_list, std::vector< genome::gnSequence* >& seq_table, hss_array_t& hss_array ) const
+	{
+		hss_array_t gap_array;
+		findBigGaps( iv_list, seq_table, gap_array, big );
+		// we want the cols that represent regions without big gaps...
+		HssColsToIslandCols( iv_list, seq_table, gap_array, hss_array );
+	}
+private:
+	size_t big;
+};
+
+
+
+typedef std::vector< std::pair< int64, int64 > > bb_seqentry_t;
+typedef struct bb_entry_s
+{
+	bb_seqentry_t bb_seq;
+	ULA bb_cols;
+	size_t iv;
+} bb_entry_t;
+
+void addUniqueSegments( std::vector< bb_seqentry_t >& bb_seq_list, size_t min_length = 20 );
+void mergeAdjacentSegments( std::vector< bb_seqentry_t >& bb_seq_list );
+
+class BbSeqEntrySorter
+{
+public:
+	BbSeqEntrySorter( size_t seqI ){ m_seq = seqI; }
+	bool operator()( const bb_seqentry_t& a, const bb_seqentry_t& b )
+	{
+		return genome::absolut(a[m_seq].first) < genome::absolut(b[m_seq].first);
+	}
+	size_t m_seq;
+};
+
+inline
+void printBbSeq( std::ostream& os, const bb_seqentry_t& bbseq )
+{
+	for( size_t i = 0; i < bbseq.size(); ++i )
+	{
+		if( i > 0 )
+			os << '\t';
+		os << "(" << bbseq[i].first << ", " << bbseq[i].second << ")";
+	}
+}
+
+inline
+void readBackboneSeqFile( std::istream& bbseq_input, std::vector< bb_seqentry_t >& backbone )
+{
+	std::string cur_line;
+	std::getline( bbseq_input, cur_line );	// read off the header line
+	while( std::getline( bbseq_input, cur_line ) )
+	{
+		bb_seqentry_t bb;
+		std::stringstream line_str( cur_line );
+		int64 lpos = 0;
+		while( line_str >> lpos )
+		{
+			int64 rpos = 0;
+			line_str >> rpos;
+			bb.push_back( std::make_pair( lpos, rpos ) );
+		}
+		backbone.push_back(bb);
+	}
+}
+
+inline
+void writeBackboneSeqFile( std::ostream& bbseq_out, std::vector< bb_seqentry_t >& backbone )
+{
+	if(backbone.size()==0)
+		return;	// can't write if there's no backbone!
+	for( size_t seqI = 0; seqI < backbone[0].size(); seqI++ )
+	{
+		if( seqI > 0 )
+			bbseq_out << '\t';
+		stringstream ss;
+		ss << "seq" << seqI;
+		bbseq_out << ss.str() << "_leftend\t" << ss.str() << "_rightend";
+	}
+	bbseq_out << std::endl;
+	for( size_t bbI = 0; bbI < backbone.size(); bbI++ )
+	{
+		for( size_t seqI = 0; seqI < backbone[bbI].size(); seqI++ )
+		{
+			if( seqI > 0 )
+				bbseq_out << '\t';
+			bbseq_out << backbone[bbI][seqI].first << '\t' << backbone[bbI][seqI].second;
+		}
+		bbseq_out << std::endl;
+	}
+}
+
+inline
+void readBackboneColsFile( std::istream& bbcol_input, std::vector< std::pair< size_t, ULA > >& bb_list )
+{
+	std::string cur_line;
+	while( std::getline( bbcol_input, cur_line ) )
+	{
+		ULA tmp_ula;
+		size_t ivI;
+		std::stringstream ss( cur_line );
+		ss >> ivI;
+		size_t left_col;
+		size_t len;
+		ss >> left_col;
+		ss >> len;
+		gnSeqI bbseq;
+		while( ss >> bbseq )
+		{
+			tmp_ula.SetStart( bbseq, left_col );
+		}
+		tmp_ula.SetLength( len );
+		bb_list.push_back( std::make_pair( ivI, tmp_ula ) );
+	}
+}
+
+void makeAllPairwiseGenomeHSS( IntervalList& iv_list, std::vector< CompactGappedAlignment<>* >& iv_ptrs, std::vector< CompactGappedAlignment<>* >& iv_orig_ptrs, pairwise_genome_hss_t& hss_cols, const HssDetector* detector );
+void mergePairwiseHomologyPredictions( 	std::vector< CompactGappedAlignment<>* >& iv_orig_ptrs, pairwise_genome_hss_t& hss_cols, std::vector< std::vector< ULA* > >& ula_list );
+
+
+}
+
+#endif	// __Backbone_h__
+
diff --git a/libMems/ClustalInterface.cpp b/libMems/ClustalInterface.cpp
new file mode 100644
index 0000000..ad2b14a
--- /dev/null
+++ b/libMems/ClustalInterface.cpp
@@ -0,0 +1,576 @@
+/*******************************************************************************
+ * $Id: ClustalInterface.cpp,v 1.27 2004/03/01 02:40:08 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * Please see the file called COPYING for licensing, copying, and modification
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/ClustalInterface.h"
+#include <sstream>
+#include "libGenome/gnFilter.h"
+
+#include <fstream>
+
+extern "C" {
+#include "libClustalW/clustalw.h"
+
+extern sint max_names;
+extern Boolean usemenu, dnaflag, explicit_dnaflag;
+extern Boolean interactive;
+extern char *seqname;
+extern sint nseqs;
+extern sint *seqlen_array;
+extern char **names,**titles;
+extern char **seq_array;
+//extern Boolean profile1_empty, profile2_empty;
+extern sint max_aln_length;
+//extern char *gap_penalty_mask, *sec_struct_mask;
+//extern sint struct_penalties;
+extern float    gap_open,      gap_extend;
+extern float  	dna_gap_open,  dna_gap_extend;
+//extern char *gap_penalty_mask1,*gap_penalty_mask2;
+//extern char *sec_struct_mask1,*sec_struct_mask2;
+//extern sint struct_penalties1,struct_penalties2;
+//extern char *ss_name1,*ss_name2;
+extern float    pw_go_penalty,      pw_ge_penalty;
+extern float  	dna_pw_go_penalty,  dna_pw_ge_penalty;
+//extern sint    wind_gap,ktup,window,signif;
+//extern sint    dna_wind_gap, dna_ktup, dna_window, dna_signif;
+
+extern Boolean 	output_clustal, output_nbrf, output_phylip, output_gcg, output_gde, output_nexus;
+extern FILE 	*clustal_outfile, *gcg_outfile, *nbrf_outfile, *phylip_outfile, *nexus_outfile;
+//extern char 	clustal_outname[FILENAMELEN+1], gcg_outname[FILENAMELEN+1];
+extern char* amino_acid_codes;
+extern sint max_aa;
+
+//extern short   blosum45mt[];
+//extern short   def_aa_xref[];
+extern sint gap_pos1;
+extern double** tmat;
+
+extern Boolean		use_endgaps;
+extern Boolean		endgappenalties;
+
+extern sint output_order;
+extern Boolean 	no_weights;
+
+}
+
+using namespace std;
+using namespace genome;
+namespace mems {
+
+/** 
+ * When performing progressive alignment, clustalW misaligns the first sequence for
+ * some reason.  define MISALIGNMENT_WORKAROUND to enable a workaround for this bug.
+ * The workaround adds an additional copy of the first sequence to each alignment
+ * then removes the misaligned copy of the first sequence.
+ */
+#define MISALIGNMENT_WORKAROUND
+
+lint get_aln_score(void);
+
+
+ClustalInterface& ClustalInterface::getClustalInterface()
+{
+        static ClustalInterface m_ci;
+
+        return m_ci;
+}
+
+ClustalInterface::ClustalInterface(){
+	// some defaults can't hurt
+	max_alignment_length = 10000;
+	min_flank_size = 3;
+	clustal_score_cutoff = 0;
+
+	// shut off end gaps...
+	use_endgaps = FALSE;
+	// enable end gap penalties
+	endgappenalties = TRUE;
+	// force same input/output order
+	output_order = INPUT;
+	no_weights = FALSE; // TRUE;
+
+
+	init_amenu();
+	init_interface();
+	init_matrix();
+
+	fill_chartab();
+	allocated_aln = false;
+	
+	// shut off end gaps...
+	use_endgaps = FALSE;
+	// enable end gap penalties
+	endgappenalties = TRUE;
+}
+
+ClustalInterface& ClustalInterface::operator=( const ClustalInterface& ci )
+{
+	GappedAligner::operator=( ci );
+	min_flank_size = ci.min_flank_size;
+	clustal_score_cutoff = ci.clustal_score_cutoff;
+	distance_matrix = ci.distance_matrix;
+	allocated_aln = ci.allocated_aln;
+	return *this;
+}
+
+void ClustalInterface::SetDistanceMatrix( NumericMatrix< double >& distance_matrix, string& tree_filename ){
+	SetDistanceMatrix( distance_matrix, tree_filename, false );
+}
+
+void ClustalInterface::SetDistanceMatrix( NumericMatrix< double >& distance_matrix, string& tree_filename, boolean reread_tree ){
+	char* phylip_name;
+	uint seqI, seqJ;
+#ifdef MISALIGNMENT_WORKAROUND
+	if( reread_tree == false ){
+		NumericMatrix< double > dist_plus_matrix( distance_matrix.cols() + 1, distance_matrix.cols() + 1 );
+		for( seqI = 0; seqI < dist_plus_matrix.cols(); seqI++ ){
+			for( seqJ = 0; seqJ < dist_plus_matrix.cols(); seqJ++ ){
+				double new_val = 0;
+				if( seqI == 0 ){
+					if( seqJ == 0 )
+						new_val = 0;
+					else
+						new_val = distance_matrix( seqI, seqJ - 1 );
+				}else{
+					if( seqJ == 0 )
+						new_val = distance_matrix( seqI - 1, seqJ );
+					else
+						new_val = distance_matrix( seqI - 1, seqJ - 1 );
+				}
+				dist_plus_matrix( seqI, seqJ ) = new_val;
+			}
+		}
+		SetDistanceMatrix( dist_plus_matrix, tree_filename, true );
+	}
+#else
+	reread_tree = true;
+#endif
+	if( reread_tree )
+		this->distance_matrix = distance_matrix;
+	free_aln( nseqs );
+	nseqs = distance_matrix.cols();
+	alloc_aln( nseqs );
+	allocated_aln = true;
+
+	for( seqI = 1; seqI <= distance_matrix.cols(); seqI++ ){
+		ostringstream ss;
+		ss << "seq" << seqI;
+		int namelen = MAXNAMES < ss.str().size() ? MAXNAMES : ss.str().size();
+		strncpy( names[ seqI ], ss.str().c_str(), namelen);		/*    "   "  name   */
+		strncpy( titles[ seqI ], ss.str().c_str(), namelen);		/*    "   "  title  */
+
+		alloc_seq( seqI, 1 );
+		// set max_names and max_aln_length
+		if( strlen( names[ seqI ] ) > max_names )
+			max_names = strlen( names[ seqI ] );
+	}
+	// copy phylo tree name
+	phylip_name = (char * ) ckalloc( tree_filename.length() + 1);
+	strcpy( phylip_name, tree_filename.c_str() );
+	
+	// copy tmat entries
+	for( seqI = 0; seqI < nseqs; seqI++ )
+		for( uint seqJ = 0; seqJ < nseqs; seqJ++ )
+			tmat[ seqI + 1][ seqJ + 1 ] = distance_matrix( seqI, seqJ );
+	
+	FILE* tree;
+	if((tree = open_explicit_file( phylip_name ))==NULL) return;
+	if (nseqs >= 2) {
+		guide_tree(tree,1,nseqs);
+	}
+
+
+// read the tree back in
+	if( reread_tree )
+		int status = read_tree(phylip_name, (sint)0, nseqs);
+	phylip_name = (char*)ckfree( phylip_name );
+	allocated_aln = false;
+
+}
+
+// tries to read in a guide tree from a particular file,
+// throws an exception if it doesn't work out
+void ClustalInterface::setGuideTree( string& tree_filename, NumericMatrix< double >& dist_mat, uint seq_count ){
+#ifdef MISALIGNMENT_WORKAROUND
+	seq_count++;
+#endif
+	distance_matrix = dist_mat;
+	// check whether the file exists
+	ifstream guide_file( tree_filename.c_str() );
+	if( guide_file.is_open() )
+		guide_file.close();	// success
+	else
+		throw( "Unable to open guide tree file" );
+
+	char* phylip_name;
+	uint seqI;
+
+	free_aln( nseqs );
+	nseqs = seq_count;
+	alloc_aln( nseqs );
+	allocated_aln = true;
+
+	for( seqI = 1; seqI <= seq_count; seqI++ ){
+		ostringstream ss;
+		ss << "seq" << seqI;
+		int namelen = MAXNAMES < ss.str().size() ? MAXNAMES : ss.str().size();
+		strncpy( names[ seqI ], ss.str().c_str(), namelen);		/*    "   "  name   */
+		strncpy( titles[ seqI ], ss.str().c_str(), namelen);		/*    "   "  title  */
+
+		alloc_seq( seqI, 1 );
+		// set max_names and max_aln_length
+		if( strlen( names[ seqI ] ) > max_names )
+			max_names = strlen( names[ seqI ] );
+	}
+
+	// copy tmat entries
+	for( seqI = 0; seqI < nseqs; seqI++ )
+		for( uint seqJ = 0; seqJ < nseqs; seqJ++ )
+			tmat[ seqI + 1][ seqJ + 1 ] = 1 - distance_matrix( seqI, seqJ );
+
+	// copy phylo tree name
+	phylip_name = (char * ) ckalloc( tree_filename.length() + 1);
+	strcpy( phylip_name, tree_filename.c_str() );
+	int success = read_tree(phylip_name, (sint)0, nseqs);
+	phylip_name = (char*)ckfree( phylip_name );
+	allocated_aln = false;
+	if( !success )
+		throw "Error loading guide tree\n";
+}
+
+boolean ClustalInterface::Align( GappedAlignment& cr, Match* r_begin, Match* r_end, vector< gnSequence* >& seq_table ){
+	boolean flank = false;
+	gnSeqI gap_size = 0;
+	boolean create_ok = true;
+	uint seq_count = seq_table.size();
+	uint seqI;
+	uint align_seqs = 0;
+//	
+//  get the size of the largest intervening gap
+//  also do some sanity checking while we're at it.
+//
+try{
+	for( seqI = 0; seqI < seq_count; seqI++ ){
+		int64 gap_start = 0;
+		int64 gap_end = 0;
+		create_ok = getInterveningCoordinates( seq_table, r_begin, r_end, seqI, gap_start, gap_end );
+		// skip this sequence if it's undefined
+		if( gap_start == NO_MATCH || gap_end == NO_MATCH )
+			continue;
+		if( !create_ok )
+			break;
+
+		int64 diff = gap_end - gap_start;
+		if( diff <= 0 ){
+			continue;	// can't align nothing
+		}
+		if( diff > max_alignment_length ){
+			cout << "gap from " << gap_start << " to " << gap_end << " is too big for ClustalW\n";
+			continue;	// can't align if it's too big
+		}
+		gap_size = diff < gap_size ? gap_size : diff;
+		align_seqs++;
+	}
+
+	if( align_seqs <= 1 )
+		create_ok = false;
+// 
+//	Get the sequence in the intervening gaps between these two matches
+//  Include a flank of matching sequence on either side
+//
+	vector< string > seq_data;
+	vector< int64 > starts;
+	gnSeqI left_flank, right_flank;
+	const gnFilter* rc_filter = gnFilter::DNAComplementFilter();
+
+	if( create_ok ){
+//		left_flank = min( r_begin->Length(), max( gap_size, min_flank_size ) );
+//		right_flank = min( r_end->Length(), max( gap_size, min_flank_size ) );
+		left_flank = 0;
+		right_flank = 0;
+		
+		for( seqI = 0; seqI < seq_count; seqI++ ){
+			// cheap hack to avoid mysterious clustalW misalignment
+#ifdef MISALIGNMENT_WORKAROUND
+			if( seqI == 1 )
+				seq_data.push_back( seq_data[ 0 ] );
+#endif
+			// skip this sequence if it's undefined
+			if( (r_end != NULL && r_end->Start( seqI ) == NO_MATCH ) ||
+				(r_begin != NULL && r_begin->Start( seqI ) == NO_MATCH) ){
+				starts.push_back( NO_MATCH );
+				seq_data.push_back( "" );
+				continue;
+			}
+
+			// determine the size of the gap
+			int64 gap_start = 0;
+			int64 gap_end = 0;
+			getInterveningCoordinates( seq_table, r_begin, r_end, seqI, gap_start, gap_end );
+
+			int64 diff = gap_end - gap_start;
+			if( diff <= 0 || diff > max_alignment_length ){
+				starts.push_back( NO_MATCH );
+				seq_data.push_back( "" );
+				continue;
+			}
+			// calculate flank size and extract sequence data
+			if( r_end == NULL || r_end->Start( seqI ) > 0 ){
+				starts.push_back( gap_start );
+				seq_data.push_back( seq_table[ seqI ]->ToString( left_flank + diff + right_flank, gap_start - left_flank ) );
+			}else{
+				// reverse complement the sequence data.
+				starts.push_back( -gap_start );
+				string cur_seq_data = seq_table[ seqI ]->ToString( left_flank + diff + right_flank, gap_start - right_flank );
+				rc_filter->ReverseFilter( cur_seq_data );
+				seq_data.push_back( cur_seq_data );
+			}
+		}
+	}
+
+	if( create_ok ){
+		if( !CallClustal( seq_data ) ){
+			cout << "Clustal was unable to align:\n";
+			cout << "Left match: " << *r_begin << endl;
+			cout << "Right match: " << *r_end << endl;
+			return false;
+		}
+
+		// ensure that the flanks were successfully aligned
+		boolean good_alignment = true;
+		gnSeqI flankI = 0;
+		gnSeqI align_length=0;
+		for( seqI = 1; seqI <= seq_count; seqI++ )
+			if( align_length < ( seqlen_array[seqI] < 0 ? 0 : (gnSeqI)seqlen_array[seqI] ))
+				align_length = seqlen_array[seqI];
+
+		if( !good_alignment ){
+			// just align without the flanking regions for now??
+			return false;
+		}else{
+			// now extract the alignment from clustal's global variables
+			cr = GappedAlignment( seq_count, align_length );
+			vector< string > align_array;
+			int64 last_residue = -1;	// tracks the right-most residue in the alignment
+			int64 first_residue = align_length + 2;	// tracks the left-most residue in the alignment
+#ifdef MISALIGNMENT_WORKAROUND
+			for( seqI = 2; seqI <= seq_count + 1; seqI++ ){
+#else
+			for( seqI = 1; seqI <= seq_count; seqI++ ){
+#endif
+				string new_seq = string( seqlen_array[ seqI ] - left_flank - right_flank, '-' );
+				uint new_seq_charI = 0;
+				uint cur_seq_len = 0;
+				for( uint charJ = left_flank + 1; charJ <= seqlen_array[ seqI ] - right_flank; charJ++ ){
+					char val = seq_array[ seqI ][ charJ ];
+					if( val >= 0 && val <= max_aa ){
+						if( charJ > last_residue )
+							last_residue = charJ;
+						if( charJ < first_residue )
+							first_residue = charJ;
+						new_seq[ new_seq_charI ]= amino_acid_codes[ val ];
+						cur_seq_len++;
+					}
+					new_seq_charI++;
+				}
+				align_array.push_back( new_seq );
+//				cerr << "new_seq.size() is: " << new_seq.size() << endl;
+#ifdef MISALIGNMENT_WORKAROUND
+				cr.SetStart( seqI - 2, starts[ seqI - 2 ] );
+				cr.SetLength( cur_seq_len, seqI - 2 );
+#else
+				cr.SetStart( seqI - 1, starts[ seqI - 1 ] );
+				cr.SetLength( cur_seq_len, seqI - 1 );
+#endif
+			}
+			int64 end_gap_count = align_array[ 0 ].size() - (last_residue - left_flank);
+			if( last_residue != -1 && end_gap_count > 0 ){
+				for( seqI = 0; seqI < align_array.size(); seqI++ ){
+					align_array[ seqI ] = align_array[ seqI ].substr( 0, align_array[ seqI ].size() - end_gap_count );
+				}
+			}
+			int64 start_gap_count = left_flank + 1 - first_residue;
+			if( first_residue != align_length && start_gap_count > 0 ){
+				for( seqI = 0; seqI < align_array.size(); seqI++ ){
+					align_array[ seqI ] = align_array[ seqI ].substr( start_gap_count, align_array[ seqI ].size() - start_gap_count );
+				}
+			}
+			cr.SetAlignment( align_array );
+		}
+		return true;
+	}
+}catch(exception& e){
+	cerr << "At: " << __FILE__ << ":" << __LINE__ << endl;
+	cerr << e.what();
+}
+	return false;
+}
+
+
+boolean ClustalInterface::CallClustal( vector< string >& seq_table ){
+	char* phylip_name;
+	
+//	if( allocated_aln )
+		free_aln( nseqs );
+	alloc_aln( seq_table.size() );
+	allocated_aln = true;
+
+	if( distance_matrix.cols() == seq_table.size() ){
+		// copy tmat entries
+		for( uint seqI = 0; seqI < nseqs; seqI++ )
+			for( uint seqJ = 0; seqJ < nseqs; seqJ++ )
+				tmat[ seqI + 1][ seqJ + 1 ] = 1 - distance_matrix( seqI, seqJ );
+	}else{
+		// prepare to infer a phylo tree
+		phylip_name = (char * ) ckalloc( strlen( "tmp_tree.txt" ) + 1);
+		strcpy( phylip_name, "tmp_tree.txt" );
+	}
+
+	uint seqI;
+	max_aln_length = 0;
+	max_names = 0;
+	for( seqI = 1; seqI <= seq_table.size(); seqI++ ){
+		seqlen_array[ seqI ] = seq_table[ seqI - 1 ].length();		/* store the length */
+		ostringstream ss;
+		ss << "seq" << seqI;
+		int namelen = ss.str().size();
+		names[ seqI ] = (char * ) ckalloc( namelen + 1 );
+		titles[ seqI ] = (char * ) ckalloc( namelen + 1 );
+		strcpy( names[ seqI ], ss.str().c_str());		/*    "   "  name   */
+		strcpy( titles[ seqI ], ss.str().c_str());		/*    "   "  title  */
+
+		// set max_names and max_aln_length
+		if( (int)strlen( names[ seqI ] ) > max_names )
+			max_names = strlen( names[ seqI ] );
+		if( seqlen_array[ seqI ] > max_aln_length )
+			max_aln_length = seqlen_array[ seqI ];
+	}
+
+	for( seqI = 1; seqI <= seq_table.size(); seqI++ ){
+
+		alloc_seq( seqI, max_aln_length );
+		char* seq_char_array = new char[ seq_table[ seqI - 1 ].length() + 2];
+		uint copyI = 0;
+		string& dna_seq = seq_table[ seqI - 1 ];
+		for( ; copyI < dna_seq.length(); copyI++ )
+			seq_char_array[ copyI + 1 ] = toupper( dna_seq[ copyI ] );
+		seq_char_array[ 0 ] = '-';	// silly clustal ignores the first character.
+		seq_char_array[ copyI + 1 ] = 0;
+		n_encode( seq_char_array, seq_array[ seqI ], dna_seq.length() );
+		delete[] seq_char_array;
+	}
+	max_aln_length *= 2;
+
+/*	struct_penalties1 = struct_penalties2 = NONE;
+	if (sec_struct_mask1 != NULL) sec_struct_mask1=( char* )ckfree(sec_struct_mask1);
+	if (sec_struct_mask2 != NULL) sec_struct_mask2=( char* )ckfree(sec_struct_mask2);
+	if (gap_penalty_mask1 != NULL) gap_penalty_mask1=( char* )ckfree(gap_penalty_mask1);
+	if (gap_penalty_mask2 != NULL) gap_penalty_mask2=( char* )ckfree(gap_penalty_mask2);
+	if (ss_name1 != NULL) ss_name1=( char* )ckfree(ss_name1);
+	if (ss_name2 != NULL) ss_name2=( char* )ckfree(ss_name2);
+*/	
+	nseqs = seq_table.size();
+    gap_open   = dna_gap_open;
+    gap_extend = dna_gap_extend;
+    pw_go_penalty  = dna_pw_go_penalty;
+    pw_ge_penalty  = dna_pw_ge_penalty;
+/*    ktup       = dna_ktup;
+    window     = dna_window;
+    signif     = dna_signif;
+    wind_gap   = dna_wind_gap;
+*/	dnaflag = TRUE;
+	output_clustal = FALSE;
+	
+	int retval = 0;
+	if( distance_matrix.cols() == seq_table.size() ){
+//		char* dump_file = "clustalout.txt";
+//		output_clustal = TRUE;
+//		if((clustal_outfile = open_explicit_file( dump_file ))==NULL) return false;
+		
+		retval = malign_nofiles( 0, false );
+//		create_alignment_output( 1, nseqs );
+//		fclose( clustal_outfile );	// this is done by the clustal output function
+
+	}else{
+		pairalign((sint)0,nseqs,(sint)0,nseqs);
+		
+		FILE* tree;
+		if((tree = open_explicit_file( phylip_name ))==NULL) return false;
+		if (nseqs >= 2) {
+			guide_tree(tree,1,nseqs);
+		}
+
+//		char* dump_file = "clustalout.txt";
+//		if((clustal_outfile = open_explicit_file( dump_file ))==NULL) return false;
+		
+		retval = malign( 0, phylip_name );
+
+		phylip_name = (char*)ckfree( phylip_name );
+	//	fclose( clustal_outfile );	// this is done by the clustal output function
+	}
+
+	if( retval <= 0 )
+		return false;
+	return true;
+
+}
+
+/*
+lint get_aln_score(void)
+{
+  static short  *mat_xref, *matptr;
+  static sint maxres;
+  static sint  s1,s2,c1,c2;
+  static sint    ngaps;
+  static sint    i,l1,l2;
+  static lint    score;
+  static sint   matrix[NUMRES][NUMRES];
+
+
+  matptr = blosum45mt;
+  mat_xref = def_aa_xref;
+  maxres = get_matrix(matptr, mat_xref, matrix, TRUE, 100);
+  if (maxres == 0)
+    {
+       fprintf(stdout,"Error: matrix blosum30 not found\n");
+       return -1;
+    }
+
+  score=0;
+  for (s1=1;s1<=nseqs;s1++)
+   {
+    for (s2=1;s2<s1;s2++)
+      {
+
+        l1 = seqlen_array[s1];
+        l2 = seqlen_array[s2];
+        for (i=1;i<l1 && i<l2;i++)
+          {
+            c1 = seq_array[s1][i];
+            c2 = seq_array[s2][i];
+            if ((c1>=0) && (c1<=max_aa) && (c2>=0) && (c2<=max_aa))
+                score += matrix[c1][c2];
+          }
+
+        ngaps = count_gaps(s1, s2, l1);
+
+        score -= (int)(100 * gap_open * ngaps);
+
+      }
+   }
+
+  score /= 100;
+
+  return score;
+}
+*/
+
+}
diff --git a/libMems/ClustalInterface.h b/libMems/ClustalInterface.h
new file mode 100644
index 0000000..b0a6c1e
--- /dev/null
+++ b/libMems/ClustalInterface.h
@@ -0,0 +1,101 @@
+/*******************************************************************************
+ * $Id: ClustalInterface.h,v 1.12 2004/04/19 23:10:50 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * This file is licensed under the GPL.
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifndef _ClustalInterface_h_
+#define _ClustalInterface_h_
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/NumericMatrix.h"
+#include "libGenome/gnFilter.h"
+#include "libGenome/gnSequence.h"
+#include "libMems/GappedAlignment.h"
+#include "libMems/GappedAligner.h"
+
+// attempt to auto-link the ClustalW library on windows
+#if defined(WIN64)&&defined(NDEBUG)&&!defined(FASTDEBUG)&&defined(_OPENMP)
+#pragma comment(lib, "ClustalW64omp.lib")
+#endif
+#if defined(WIN64)&&defined(FASTDEBUG)&&defined(_OPENMP)
+#pragma comment(lib, "ClustalW64fdomp.lib")
+#endif
+#if defined(WIN32)&&!defined(WIN64)&&defined(NDEBUG)&&!defined(FASTDEBUG)&&defined(_OPENMP)
+#pragma comment(lib, "ClustalWomp.lib")
+#endif
+#if defined(WIN32)&&!defined(WIN64)&&defined(FASTDEBUG)&&defined(_OPENMP)
+#pragma comment(lib, "ClustalWfdomp.lib")
+#endif
+#if defined(WIN64)&&defined(NDEBUG)&&!defined(FASTDEBUG)&&!defined(_OPENMP)
+#pragma comment(lib, "ClustalW64.lib")
+#endif
+#if defined(WIN64)&&defined(FASTDEBUG)&&!defined(_OPENMP)
+#pragma comment(lib, "ClustalW64fd.lib")
+#endif
+#if defined(WIN32)&&!defined(WIN64)&&defined(NDEBUG)&&!defined(FASTDEBUG)&&!defined(_OPENMP)
+#pragma comment(lib, "ClustalW.lib")
+#endif
+#if defined(WIN32)&&!defined(WIN64)&&defined(FASTDEBUG)&&!defined(_OPENMP)
+#pragma comment(lib, "ClustalWfd.lib")
+#endif
+
+
+namespace mems {
+
+class ClustalInterface : public GappedAligner {
+public:
+	/**
+	 * Returns a reference to a usable ClustalInterface
+	 */
+	static ClustalInterface& getClustalInterface();
+	/**
+	 * Attempts to perform a multiple alignment using ClustalW between
+	 * <code>r_begin</code> and <code>r_end</code>
+	 */
+	boolean Align( GappedAlignment& cr, Match* r_begin, Match* r_end, std::vector< genome::gnSequence* >& seq_table );
+	/**
+	 * Set the distance matrix to use when computing alignments, writes the guide tree to the location
+	 * specified in <code>tree_filename</code>
+	 * @param distance_matrix An NxN distance matrix for the sequences
+	 * @param tree_filename The output file name for the guide tree
+	 */
+	void SetDistanceMatrix( NumericMatrix< double >& distance_matrix, std::string& tree_filename );
+	/**
+	 * Set the minimum flank size used to anchor alignments on the sequences
+	 */
+	void SetMinFlankSize( gnSeqI min_flank ){ min_flank_size = min_flank; }
+	
+	/**
+	 * Try using the guide tree in the file given by tree_filename.  Throws an
+	 * exception if the tree file couldn't be loaded
+	 * @param tree_filename		The path to the guide tree file
+	 * @param dist_mat			The distance matrix relating sequences
+	 * @param seq_count			The number of genomes in the guide tree file
+	 */
+	void setGuideTree( std::string& tree_filename, NumericMatrix< double >& dist_mat, uint seq_count );
+	
+	/** returns true if a guide tree has been loaded already */
+	boolean guideTreeLoaded() const { return distance_matrix.cols() > 0; };
+	
+	void SetDistanceMatrix( NumericMatrix< double >& distance_matrix, std::string& tree_filename, boolean reread_tree );
+protected:
+	boolean CallClustal( std::vector< std::string >& seq_table );
+	NumericMatrix< double > distance_matrix;
+	gnSeqI min_flank_size;
+	int clustal_score_cutoff;
+	bool allocated_aln;
+private:
+	ClustalInterface( const ClustalInterface& ci ){ *this = ci; }
+	ClustalInterface& operator=( const ClustalInterface& ci );
+	ClustalInterface();
+};
+
+}
+
+#endif // _ClustalInterface_h_
diff --git a/libMems/CompactGappedAlignment.h b/libMems/CompactGappedAlignment.h
new file mode 100644
index 0000000..942d4aa
--- /dev/null
+++ b/libMems/CompactGappedAlignment.h
@@ -0,0 +1,819 @@
+/*******************************************************************************
+ * $Id: CompactGappedAlignment.h,v 1.12 2004/04/19 23:10:50 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * This file is licensed under the GPL.
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifndef __CompactGappedAlignment_h__
+#define __CompactGappedAlignment_h__
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libGenome/gnDebug.h"
+#include "libGenome/gnFilter.h"
+#include "libGenome/gnSequence.h"
+#include "libMems/SparseAbstractMatch.h"
+#include "libMems/HybridAbstractMatch.h"
+#include "libMems/AbstractGappedAlignment.h"
+#include "libMems/UngappedLocalAlignment.h"
+
+#include <algorithm>
+
+#ifdef WIN32
+#include "windows.h"
+#endif
+
+namespace mems {
+
+/**
+ * The CompactGappedAlignment stores a gapped alignment as a bit-vector
+ * Rather than using one byte per aligned position, this class uses one bit, making
+ * particularly space efficient
+ */
+template< class BaseType = AbstractGappedAlignment< HybridAbstractMatch<> > >
+class CompactGappedAlignment : public BaseType
+{
+public:
+	CompactGappedAlignment() : BaseType(){};
+	CompactGappedAlignment( uint seq_count, gnSeqI align_length );
+	CompactGappedAlignment( std::vector< bitset_t >& aln_mat, gnSeqI alignment_length );
+	
+	template< class MatchType >
+	CompactGappedAlignment( MatchType& m ) : 
+		BaseType( m.SeqCount(), m.AlignmentLength() ),
+		bcount( std::vector< std::vector< size_t > >( m.SeqCount() ) )
+	{
+		m.GetAlignment(align_matrix);
+
+		for( uint seqI = 0; seqI < this->SeqCount(); seqI++ )
+		{
+			this->SetStart(seqI, m.Start(seqI));
+			if( m.Start(seqI) != NO_MATCH )
+				this->SetLength(m.Length(seqI), seqI);
+			else
+				this->SetLength(0, seqI);
+		}
+
+		this->create_bitcount();
+
+		if( !this->validate() )
+			std::cerr << "kahnstruct error\n";
+	}
+
+	CompactGappedAlignment* Clone() const { return new CompactGappedAlignment( *this ); }
+	CompactGappedAlignment* Copy() const;
+	virtual void Free();
+	
+	void SetAlignment( const std::vector< std::string >& seq_align );
+
+	void SetAlignment( std::vector< bitset_t >& seq_align );
+
+	// Inherited methods from AbstractMatch:
+	virtual void Invert();
+	virtual void CropStart(gnSeqI crop_amount);
+	virtual void CropEnd(gnSeqI crop_amount);
+
+	virtual void CropLeft(gnSeqI crop_amount, uint seqI);
+	virtual void CropRight(gnSeqI crop_amount, uint seqI);
+
+	void GetAlignment( std::vector< bitset_t >& align_matrix ) const;
+
+	/** allows a peek at the data inside this alignment.  don't change it or the CompactGappedAlignment will become corrupt */
+	const std::vector< bitset_t >& GetAlignment() const{ return align_matrix; }
+
+//	friend void GetAlignment( const CompactGappedAlignment& ga, const std::vector< genome::gnSequence* >& seq_table, std::vector<std::string>& alignment );
+	
+	void GetColumn( gnSeqI col, std::vector<gnSeqI>& pos, std::vector<bool>& column ) const;
+
+	/** returns true if the given row,column of the alignment has a gap character */
+	virtual bool IsGap( uint seq, gnSeqI col ) const;
+	/** translate a cga to a new coordinate system */
+	void translate( CompactGappedAlignment& cga, uint cga_seq, uint my_seq, bool add_bits = true );
+
+	bool validate() const;
+	bool validate_bitcount() const;
+
+	void copyRange( CompactGappedAlignment& dest, gnSeqI left_column, gnSeqI length );
+	gnSeqI SeqPosToColumn( uint seq, int64 pos);
+
+	/** Eliminates any columns that contain only gap characters */
+	void CondenseGapColumns();
+
+	void swap( CompactGappedAlignment& other ){ swap(&other); }
+
+protected:
+	// for use by derived classes in order to swap contents
+	void swap( CompactGappedAlignment* other ){
+		std::swap( align_matrix, other->align_matrix );
+		std::swap( bcount, other->bcount );
+		BaseType::swap( other );
+	}
+
+	std::vector< bitset_t > align_matrix;		/**< aligned positions have true values, gaps are false */
+	std::vector< std::vector< size_t > > bcount;
+
+	void create_bitcount();
+	gnSeqI SeqPosToColumn( gnSeqI pos, const bitset_t& bvec, const std::vector< size_t >& index ) const;
+
+};
+
+static bool debug_cga = false;
+
+template< class BaseType >
+CompactGappedAlignment<BaseType>* CompactGappedAlignment<BaseType>::Copy() const
+{
+	return m_allocateAndCopy( *this );
+}
+
+template< class BaseType >
+void CompactGappedAlignment<BaseType>::Free()
+{
+	m_free(this);
+}
+
+template< class BaseType >
+bool CompactGappedAlignment<BaseType>::validate() const
+{
+	if( !debug_cga )
+		return true;
+	bool good = true;
+	for( uint seqI = 0; seqI < this->SeqCount(); seqI++ )
+	{
+		if( this->AlignmentLength() != align_matrix[seqI].size() )
+		{
+			good = false;
+			std::cerr << "vanishing pig trick\n";
+			genome::breakHere();
+		}
+		gnSeqI count = align_matrix[seqI].count();
+		if( count > 0 && this->LeftEnd(seqI) == 0 )
+		{
+			good = false;
+			std::cerr << "boner_McHoserknob\n";
+			genome::breakHere();
+		}
+		if( (count == 0 || this->Length(seqI) == 0) && this->LeftEnd(seqI) != 0 )
+		{
+			good = false;
+			std::cerr << "Length(" << seqI << "): " << this->Length(seqI) << std::endl;
+			std::cerr << "LeftEnd(seqI): " << this->LeftEnd(seqI) << std::endl;
+			std::cerr << "spumante explosion\n";
+			genome::breakHere();
+		}
+		if( count != this->Length(seqI) )
+		{
+			std::cerr << "seqI: " << seqI << " count: " << count << "  Length(seqI): " << this->Length(seqI) << std::endl;
+			std::cerr << "LeftEnd(seqI): " << this->LeftEnd(seqI) << std::endl;
+			std::cerr << "lendo mismatcho\n";
+			genome::breakHere();
+			return false;
+		}
+//		std::vector< std::vector< size_t > > tmp_bcount = bcount;
+//		create_bitcount();
+//		if( !tmp_bcount == bcount )
+//		{
+//			good = false;
+//			std::cerr << "bcount mismatch!!!\n";
+//		}
+//		bcount = tmp_bcount;
+
+	}
+	if( good )	// check for all gap cols
+	{
+/*	allow gap cols...
+		for( size_t colI = 0; colI < this->AlignmentLength(); ++colI )
+		{
+			bool aa = false;
+			for( uint seqI = 0; seqI < this->SeqCount(); seqI++ )
+				aa = aa || align_matrix[seqI].test(colI);
+			if( aa == false )
+			{
+				std::cerr << "gap col at " << colI << std::endl;
+				genome::breakHere();
+			}
+		}
+		*/
+	}
+	return 	validate_bitcount() && good;
+}
+
+
+template< class BaseType >
+CompactGappedAlignment<BaseType>::CompactGappedAlignment( std::vector< bitset_t >& aln_mat, gnSeqI alignment_length ) :
+BaseType( aln_mat.size(), alignment_length ),
+align_matrix( aln_mat ),
+bcount( std::vector< std::vector< size_t > >( aln_mat.size() ) )
+{
+	this->create_bitcount();
+	this->validate_bitcount();
+}
+
+template< class BaseType >
+CompactGappedAlignment<BaseType>::CompactGappedAlignment( uint seq_count, gnSeqI align_length ) : 
+BaseType( seq_count, align_length )
+{}
+
+
+template< class BaseType >
+void CompactGappedAlignment<BaseType>::SetAlignment( const std::vector< std::string >& seq_align ){
+	if( seq_align.size() == 0 )
+	{
+		this->SetAlignmentLength(0);
+		return;
+	}
+	this->SetAlignmentLength(seq_align[0].size());
+	align_matrix = std::vector< bitset_t >( seq_align.size(), bitset_t( seq_align[0].size(), false ) );
+	bcount = std::vector< std::vector<size_t> >( seq_align.size() );
+	for( size_t seqI = 0; seqI < seq_align.size(); seqI++ )
+	{
+		bool nonzero = false;
+		for( size_t charI = 0; charI < seq_align[seqI].size(); charI++ )
+			if( seq_align[seqI][charI] != '-' )
+			{
+				align_matrix[seqI].set(charI);
+				nonzero = true;
+			}
+	}
+	this->create_bitcount();
+}
+
+template< class BaseType >
+void CompactGappedAlignment<BaseType>::SetAlignment( std::vector< bitset_t >& seq_align )
+{
+	std::swap( align_matrix, seq_align );
+	seq_align.clear();
+	if( align_matrix.size() > 0 )
+		this->SetAlignmentLength( align_matrix[0].size() );
+	else
+		this->SetAlignmentLength(0);
+	bcount = std::vector< std::vector<size_t> >(align_matrix.size());
+	this->create_bitcount();
+	this->validate_bitcount();
+}
+
+template< class BaseType >
+void CompactGappedAlignment<BaseType>::GetAlignment( std::vector< bitset_t >& align_matrix ) const
+{
+	align_matrix = this->align_matrix;
+}
+
+template< class BaseType >
+bool CompactGappedAlignment<BaseType>::IsGap( uint seq, gnSeqI col ) const
+{
+	return !align_matrix[seq][col];
+}
+
+static const unsigned INDEX_INTERVAL = 512;
+
+template< class BaseType >
+void CompactGappedAlignment<BaseType>::create_bitcount()
+{
+	for( uint seqI = 0; seqI < this->SeqCount(); seqI++ )
+	{
+//		if( this->LeftEnd(seqI) == NO_MATCH )
+//			continue;
+		bitset_t& bvec = align_matrix[seqI];
+		bcount[seqI].clear();
+		bcount[seqI].push_back(0);
+		for( size_t indie = 0; indie + INDEX_INTERVAL <= bvec.size(); indie += INDEX_INTERVAL )
+		{
+			size_t end = indie + INDEX_INTERVAL;
+			size_t ct = 0;
+			for( size_t i = indie; i < end; ++i )
+				ct += bvec.test(i);
+			bcount[seqI].push_back( ct + bcount[seqI].back() );
+		}
+	}
+}
+
+template< class BaseType >
+bool CompactGappedAlignment<BaseType>::validate_bitcount() const
+{
+	if( !debug_cga )
+		return true;
+	bool valid = true;	// innocent until proven guilty
+	for( uint seqI = 0; seqI < this->SeqCount(); seqI++ )
+	{
+		gnSeqI count = align_matrix[seqI].count();
+		size_t bc_len = align_matrix[seqI].size() / INDEX_INTERVAL;
+		if( count < INDEX_INTERVAL && bcount[seqI].size() == 0 )
+			continue;	// a-ok here
+		if( bc_len + 1 != bcount[seqI].size() && (bcount[seqI].back() % INDEX_INTERVAL != 0) )
+		{
+			std::cerr << "bitcount problem, bc_len + 1: " << bc_len + 1 << " and bcount[seqI].size(): " << bcount[seqI].size() << std::endl;
+			std::cerr << "count: " << count << " and bcount[seqI].back(): " << bcount[seqI].back() << std::endl;
+			valid = false;
+		}
+		if( count - bcount[seqI].back() > INDEX_INTERVAL )
+		{
+			std::cerr << "bitcount problem, count: " << count << " and bcount[seqI].back(): " << bcount[seqI].back() << std::endl;
+			valid = false;
+		}
+	}
+	return valid;
+}
+
+template< class BaseType > 
+gnSeqI CompactGappedAlignment<BaseType>::SeqPosToColumn( uint seq, int64 pos )
+{
+	if( this->Orientation(seq) == AbstractMatch::forward )
+		pos = genome::absolut(pos) - this->LeftEnd(seq) + 1;
+	else
+		pos = this->RightEnd(seq)-genome::absolut(pos) + 1;	// is this right?
+	return SeqPosToColumn(pos, align_matrix[seq], bcount[seq]);
+}
+
+template< class BaseType > 
+gnSeqI CompactGappedAlignment<BaseType>::SeqPosToColumn( gnSeqI pos, const bitset_t& bvec, const std::vector< size_t >& index ) const
+{
+	std::vector<size_t>::const_iterator iter = std::lower_bound(index.begin(), index.end(), pos);
+	--iter;
+	size_t cur_pos = *iter;
+	size_t col = iter - index.begin();
+	col *= INDEX_INTERVAL;
+	if( col == 0 )
+		col = bvec.find_first();
+	else
+		col = bvec.find_next(col-1);
+	for( ++cur_pos; cur_pos < pos; ++cur_pos )
+		col = bvec.find_next(col);
+	return col;
+}
+
+template< class BaseType >
+void CompactGappedAlignment<BaseType>::translate( CompactGappedAlignment& cga, uint cga_seq, uint my_seq, bool add_bits ) // const
+{
+	AbstractMatch::orientation my_orient = this->Orientation(my_seq);
+
+	if( cga.Length(cga_seq)  > this->Length(my_seq) )
+	{
+		std::cerr << "Oh scheisskopf.  What are you trying to do to me??\n";
+		std::cerr << "cga.Length(" << cga_seq << "): " << cga.Length(cga_seq) << std::endl;
+		std::cerr << "Length(" << my_seq << "): " << this->Length(my_seq) << std::endl;
+		genome::breakHere();
+	}
+
+	gnSeqI prev_lend = cga.LeftEnd(cga_seq);
+	gnSeqI prev_len = cga.Length(cga_seq);
+	gnSeqI my_lend = this->LeftEnd(my_seq);
+	gnSeqI my_len = this->Length(my_seq);
+	gnSeqI my_count = 0;
+	uint seqI = 0;
+
+	// what assumptions should be made about cga?
+	// does it already have the correct left-end relative to this?
+	// no, it needs to have a left-end relative to the first aligned char in this
+	size_t cur_bit = 0;
+
+	// determine left_bit
+	size_t left_bit = this->SeqPosToColumn(cga.LeftEnd(cga_seq), align_matrix[my_seq], bcount[my_seq]);
+	// determine right_bit
+	size_t right_bit = this->SeqPosToColumn(cga.RightEnd(cga_seq), align_matrix[my_seq], bcount[my_seq]);
+	if( right_bit > 4000000000u )
+	{
+		std::cerr << "cga doesn't fit\n";
+		std::cerr << "cga.RightEnd(cga_seq) " << cga.RightEnd(cga_seq) << std::endl;
+		std::cerr << "RightEnd(my_seq): " << this->RightEnd(my_seq) << std::endl;
+		std::cerr << "cga.LeftEnd(cga_seq) " << cga.LeftEnd(cga_seq) << std::endl;
+		std::cerr << "LeftEnd(my_seq): " << this->LeftEnd(my_seq) << std::endl;
+		std::cerr << "cga.AlignmentLength(): " << cga.AlignmentLength() << std::endl;
+		std::cerr << "AlignmentLength(): " << this->AlignmentLength() << std::endl;
+		genome::breakHere();
+	}
+	right_bit++;
+	if( right_bit == 0 )
+		right_bit = this->AlignmentLength();
+
+	cga.SetLeftEnd(cga_seq,left_bit+1);
+
+	// add on length of unaligned left and right sides
+	size_t cga_left = cga.align_matrix[cga_seq].find_first();
+
+	size_t somesize = (right_bit - left_bit) - cga.Length(cga_seq) + cga.AlignmentLength();
+
+	size_t cga_bit = cga_left;
+	size_t my_bit = left_bit;
+	size_t xlat_bit = cga_left;
+	size_t added_bits = 0;
+	// copy in everything up to cga_left
+	std::vector< bitset_t > xrated( cga.SeqCount(), bitset_t( somesize, false ) );
+	for( size_t seqI = 0; seqI < xrated.size(); ++seqI )
+		for( size_t asdf = cga.align_matrix[seqI].find_first(); asdf < cga_left; asdf = cga.align_matrix[seqI].find_next(asdf) )
+			xrated[seqI].set(asdf);
+	
+	while(xlat_bit < somesize)
+	{
+		// assume that align_matrix[my_seq][my_bit] is set
+		if( !align_matrix[my_seq].test(my_bit) )
+		{
+			std::cerr << "ohhhhhhzheiss!\n";
+			genome::breakHere();
+		}
+		// copy the column in cga
+		for( size_t seqI = 0; seqI < xrated.size(); ++seqI )
+			xrated[seqI].set( xlat_bit, cga.align_matrix[seqI].test(cga_bit) );
+
+		++cga_bit;
+		++xlat_bit;
+
+		if( xlat_bit >= somesize )
+			break;
+
+		// TODO: should this condition be replaced by cropping xlat_bit + diff - 1 down to < somesize?
+		if( cga.align_matrix[cga_seq].test(cga_bit) )
+		{
+			size_t next_bit = align_matrix[my_seq].find_next(my_bit);
+			if( next_bit > 4000000000u )
+				genome::breakHere();
+			size_t diff = next_bit - my_bit;
+			if( diff > 1 && add_bits )
+			{
+				if( xlat_bit + diff - 1 >= somesize )
+				{
+					std::cerr << "ERRRORRR porker!!\n";
+					genome::breakHere();
+				}
+				for( size_t i = xlat_bit; i < xlat_bit + diff - 1; ++i )
+					xrated[cga_seq].set(i);
+				added_bits += diff-1;
+			}
+			my_bit = next_bit;
+			xlat_bit += diff - 1;
+		}
+	}
+
+	cga.align_matrix = xrated;
+	cga.create_bitcount();
+	cga.SetLength(cga.Length(cga_seq)+added_bits,cga_seq);
+	cga.SetAlignmentLength(somesize);
+	if( !cga.validate() )
+	{
+		std::cerr << "prev_lend: " << prev_lend << std::endl;
+		std::cerr << "prev_len: " << prev_len << std::endl;
+		std::cerr << "translate error\n";
+		genome::breakHere();
+	}
+}
+
+
+template< class BaseType >
+void CompactGappedAlignment<BaseType>::Invert(){
+	for(uint seqI = 0; seqI < this->SeqCount(); seqI++)
+	{
+		if( this->LeftEnd(seqI) == NO_MATCH )
+			continue;
+		bitset_t& fwd = align_matrix[seqI];
+		bitset_t rev(this->AlignmentLength());
+		size_t r = this->AlignmentLength();
+		for( size_t i = 0; i < fwd.size(); ++i )
+			rev.set( --r, fwd.test(i) );
+		fwd.swap(rev);
+	}
+	this->create_bitcount();
+	BaseType::Invert();
+	if( !this->validate() )
+	{
+		std::cerr << "invert error\n";
+	}
+}
+
+template< class BaseType >
+void CompactGappedAlignment<BaseType>::CropStart(gnSeqI crop_amount){
+	if( crop_amount > this->AlignmentLength() )
+		Throw_gnEx( genome::SeqIndexOutOfBounds() );
+	if( crop_amount == 0 )
+		return;
+
+	gnSeqI pre_alignlen = this->AlignmentLength();
+	gnSeqI pre_lend0 = this->LeftEnd(0);
+
+	std::vector<gnSeqI> pos;
+	std::vector<bool> column;
+	GetColumn( crop_amount-1, pos, column );
+
+	for( uint i=0; i < this->SeqCount(); i++ ){
+		if( this->LeftEnd(i) == NO_MATCH )
+		{
+			align_matrix[i].resize(this->AlignmentLength()-crop_amount);
+			align_matrix[i] = align_matrix[i];	// force reallocation on "optimized" windows builds
+			continue;
+		}
+
+		align_matrix[i] >>= crop_amount;	// why not shift left?  is this a bug in boost::dynamic_bitset?
+		align_matrix[i].resize(this->AlignmentLength()-crop_amount);
+		align_matrix[i] = align_matrix[i];	// force reallocation on "optimized" windows builds
+		size_t char_count = this->Orientation(i) == AbstractMatch::forward ? pos[i] - this->LeftEnd(i) + 1 : this->RightEnd(i) - pos[i] + 1;
+
+		if( pos[i] > 0 && char_count > 0 )
+		{
+			this->SetLength(this->Length(i)-char_count, i);
+			if( this->Length(i) == 0 )
+				this->SetStart(i, NO_MATCH);
+			if( this->Orientation(i) == AbstractMatch::forward )
+				this->SetStart(i, this->Start(i) + char_count);
+		}else if( pos[i] == 0 && this->Orientation(i) == AbstractMatch::reverse )
+		{
+			// this sequence was completely obliterated by the crop
+			this->SetLength(0, i);
+			this->SetStart(i, NO_MATCH);
+		}
+	}
+
+	this->SetAlignmentLength( this->AlignmentLength() - crop_amount );
+	this->create_bitcount();
+	if( !this->validate() )
+	{
+		std::cerr << "pre_lend0: " << pre_lend0 << std::endl;
+		std::cerr << "pre_alignlen: " << pre_alignlen << std::endl;
+		std::cerr << "CropStart error\n";
+	}
+}
+
+template< class BaseType >
+void CompactGappedAlignment<BaseType>::CropEnd(gnSeqI crop_amount){
+	if( crop_amount > this->AlignmentLength() )
+		Throw_gnEx( genome::SeqIndexOutOfBounds() );
+	if( crop_amount == 0 )
+		return;
+
+	std::vector<gnSeqI> pos;
+	std::vector<bool> column;
+	this->GetColumn( this->AlignmentLength()-crop_amount, pos, column );
+
+	for( uint i=0; i < this->SeqCount(); i++ ){
+		align_matrix[i].resize( this->AlignmentLength() - crop_amount );
+		align_matrix[i] = align_matrix[i];	// force reallocation on "optimized" windows builds
+		if( this->LeftEnd(i) == NO_MATCH )
+			continue;
+		AbstractMatch::orientation orient = this->Orientation(i);
+		if( pos[i] > 0 )
+		{
+			gnSeqI char_count = pos[i] - (orient == AbstractMatch::forward ? (column[i] ? 1 : 0 ) : (column[i] ? 0 : 1 ) );
+			char_count = orient == AbstractMatch::forward ? char_count - this->LeftEnd(i) + 1 : this->RightEnd(i) - char_count;
+			if( char_count == 0 && align_matrix[i].count() > 0)
+			{
+				std::cerr << "orienatation: " << (orient == AbstractMatch::forward ? "forward\n" : (orient == AbstractMatch::reverse ? "reverse\n" : "undef\n"));
+				std::cerr << "lend: " << this->LeftEnd(i) << std::endl;
+				std::cerr << "length: " << this->Length(i) << std::endl;
+				std::cerr << "count: " << align_matrix[i].count() << std::endl;
+			}
+			gnSeqI deleted = this->Length(i) - char_count;
+			this->SetLength(char_count, i);
+			if( this->Length(i) == 0 )
+				this->SetStart(i, 0);
+			if( this->Start(i) < 0 )
+				this->SetStart(i, this->Start(i)-deleted);
+		}else if( orient == AbstractMatch::forward ){
+			this->SetLength(0, i);
+			this->SetStart(i, 0);
+		}
+	}
+	this->SetAlignmentLength( this->AlignmentLength() - crop_amount );
+	this->create_bitcount();
+	if( !this->validate() )
+		std::cerr << "CropEnd error\n";
+}
+
+template< class BaseType >
+void CompactGappedAlignment<BaseType>::CropLeft(gnSeqI crop_amount, uint seqI)
+{
+	if( crop_amount == 0 )
+		return;
+
+	gnSeqI pre_len = this->Length(seqI);
+	// count "crop_amount" characters into seqI and crop there
+	if( this->Orientation(seqI) == AbstractMatch::forward )
+	{
+		size_t left_col = this->SeqPosToColumn(crop_amount, align_matrix[seqI], bcount[seqI]) + 1;
+		this->CropStart(left_col);
+	}else{
+		size_t left_col = this->SeqPosToColumn(this->Length(seqI) - crop_amount + 1, align_matrix[seqI], bcount[seqI]);
+		if( left_col > 4000000000u )
+		{
+			std::cerr << this->LeftEnd(seqI) << std::endl;
+			std::cerr << this->LeftEnd(0) << std::endl;
+			std::cerr << "bogus cropper cga\n";
+		}
+		this->CropEnd(this->AlignmentLength()-left_col);
+	}
+	if( this->Length(seqI) != pre_len - crop_amount )
+	{
+		std::cerr << this->LeftEnd(seqI) << std::endl;
+		std::cerr << this->LeftEnd(0) << std::endl;
+		std::cerr << "bad cropperLeftie\n";
+	}
+	if( !this->validate() )
+		std::cerr << "CropLeft error\n";
+}
+
+template< class BaseType >
+void CompactGappedAlignment<BaseType>::CropRight(gnSeqI crop_amount, uint seqI)
+{
+	if( crop_amount == 0 )
+		return;
+
+	gnSeqI pre_len = this->Length(seqI);
+	gnSeqI pre_lend = this->LeftEnd(seqI);
+	gnSeqI pre_lend0 = this->LeftEnd(0);
+	if( this->Orientation(seqI) == AbstractMatch::forward )
+	{
+		// count "crop_amount" characters into seqI and crop there
+		size_t right_col = this->SeqPosToColumn(this->Length(seqI) - crop_amount + 1, align_matrix[seqI], bcount[seqI]);
+		this->CropEnd( this->AlignmentLength()-right_col );
+	}else
+	{
+		size_t right_col = this->SeqPosToColumn(crop_amount, align_matrix[seqI], bcount[seqI]) + 1;
+		if( right_col > 4000000000u )
+		{
+			std::cerr << this->LeftEnd(seqI) << std::endl;
+			std::cerr << this->LeftEnd(0) << std::endl;
+			std::cerr << "bogus cropper cga\n";
+		}
+		this->CropStart( right_col );
+	}
+	if( this->Length(seqI) != pre_len - crop_amount )
+	{
+		std::cerr << this->LeftEnd(seqI) << std::endl;
+		std::cerr << this->LeftEnd(0) << std::endl;
+		std::cerr << "bad cropperight\n";
+	}
+	if( !this->validate() )
+		std::cerr << "CropRight error\n";
+}
+
+template< class BaseType >
+void CompactGappedAlignment<BaseType>::GetColumn( gnSeqI col, std::vector<gnSeqI>& pos, std::vector<bool>& column ) const
+{
+	pos = std::vector<gnSeqI>(this->SeqCount(), NO_MATCH);
+	column = std::vector<bool>(this->SeqCount(), false);
+	for( uint seqI = 0; seqI < this->SeqCount(); seqI++ )
+	{
+		if( align_matrix[seqI][col] )
+			column[seqI] = true;
+
+		gnSeqI count = 0;
+		if( this->LeftEnd(seqI) != NO_MATCH )
+		{
+			size_t col_index = col / INDEX_INTERVAL;
+			for( size_t i = col_index * INDEX_INTERVAL; i <= col; i++ )
+				count += align_matrix[seqI].test(i);
+			count += bcount[seqI][col_index];
+		}
+
+		if( count > 0 && this->Orientation(seqI) == AbstractMatch::forward )
+			pos[seqI] = this->LeftEnd(seqI) + count - 1;
+		else if( this->Orientation(seqI) == AbstractMatch::reverse && !(count == this->Length(seqI) && !column[seqI]) )
+			pos[seqI] = this->RightEnd(seqI) - count + 1;
+	}
+}
+
+template< class BaseType >
+void CompactGappedAlignment<BaseType>::copyRange( CompactGappedAlignment& dest, gnSeqI left_column, gnSeqI length )
+{
+	if( left_column + length > this->AlignmentLength() )
+		Throw_gnEx( genome::SeqIndexOutOfBounds() );
+//	if( length == 0 )
+//		return;
+
+	// first copy the coordinates
+	dest = CompactGappedAlignment(this->SeqCount(), length);
+	for( uint i=0; i < this->SeqCount(); i++ ){
+		dest.SetStart(i, this->Start(i));
+		if( this->Orientation(i) != AbstractMatch::undefined )
+			dest.SetLength(this->Length(i), i);
+	}
+	// then trim the coordinates appropriately
+
+	gnSeqI pre_alignlen = this->AlignmentLength();
+	gnSeqI pre_lend0 = this->LeftEnd(0);
+
+	std::vector< bitset_t > dest_mat(this->SeqCount(), bitset_t(length));
+	std::vector<gnSeqI> pos;
+	std::vector<bool> column;
+	std::vector<gnSeqI> left_cc(this->SeqCount(), 0);
+	if( left_column > 0 )
+	{
+		this->GetColumn( left_column-1, pos, column );
+		for( uint i=0; i < this->SeqCount(); i++ ){
+			if( this->LeftEnd(i) == NO_MATCH )
+				continue;
+
+			size_t char_count = this->Orientation(i) == AbstractMatch::forward ? pos[i] - this->LeftEnd(i) + 1 : this->RightEnd(i) - pos[i] + 1;
+			if( pos[i] > 0 && char_count > 0 )
+			{
+				left_cc[i] = char_count;
+				if( dest.Orientation(i) == AbstractMatch::forward )
+					dest.SetStart(i, dest.Start(i) + char_count);
+			}else if( pos[i] == 0 && dest.Orientation(i) == AbstractMatch::reverse )
+			{
+				// this sequence was completely obliterated by the crop
+				dest.SetStart(i, NO_MATCH);
+			}
+		}
+	}
+
+// now trim up the right side...
+	gnSeqI right_trim = this->AlignmentLength() - left_column - length;
+
+	if( right_trim > 0 )
+	{
+		this->GetColumn( this->AlignmentLength()-right_trim, pos, column );
+
+		for( uint i=0; i < this->SeqCount(); i++ ){
+			if( this->LeftEnd(i) == NO_MATCH )
+				continue;
+			AbstractMatch::orientation orient = this->Orientation(i);
+			if( pos[i] > 0 )
+			{
+				gnSeqI char_count = pos[i] - (orient == AbstractMatch::forward ? (column[i] ? 1 : 0 ) : (column[i] ? 0 : 1 ) );
+				char_count = orient == AbstractMatch::forward ? char_count - this->LeftEnd(i) + 1 : this->RightEnd(i) - char_count;
+				char_count -= left_cc[i];
+				gnSeqI deleted = this->Length(i) - char_count;
+				if( dest.Start(i) < 0 )
+					dest.SetStart(i, dest.Start(i)-deleted+left_cc[i]);	// fixme: is this off-by-one?
+			}else if( orient == AbstractMatch::forward ){
+				dest.SetStart(i, NO_MATCH);
+			}
+		}
+	}
+
+	for( size_t i = 0; i < dest_mat.size(); ++i )
+	{
+		size_t count = 0;
+		for( size_t j = 0; j < length; ++j )
+		{
+			if(align_matrix[i].test(j+left_column))
+			{
+				dest_mat[i].set(j, true);
+				++count;
+			}
+		}
+		dest.SetLength(count, i);
+		if( count == 0 )
+			dest.SetStart(i, NO_MATCH);
+	}
+	dest.SetAlignment(dest_mat);
+
+	dest.create_bitcount();
+	if( !dest.validate() )
+	{
+		std::cerr << "pre_lend0: " << pre_lend0 << std::endl;
+		std::cerr << "pre_alignlen: " << pre_alignlen << std::endl;
+		std::cerr << "CropStart error\n";
+	}
+
+}
+
+template< class BaseType >
+void CompactGappedAlignment<BaseType>::CondenseGapColumns()
+{
+	const size_t len = this->AlignmentLength();
+	size_t d = 0;	// destination index
+	for( size_t i = 0; i < len; ++i )
+	{
+		size_t seqI = 0;
+		// check whether this is a gap col
+		for( ; seqI < align_matrix.size(); ++seqI )
+			if( this->LeftEnd(seqI) != 0 && align_matrix[seqI].test(i) )
+				break;
+
+		// copy if not a gap col (and i != d )
+		if( seqI < align_matrix.size() )
+		{
+			if( i != d )
+			{
+				for( seqI = 0; seqI < align_matrix.size(); ++seqI )
+					align_matrix[seqI].set( d, align_matrix[seqI].test(i)  );
+			}
+			d++;
+		}
+		else
+			std::cout << "";
+	}
+	this->SetAlignmentLength(d);
+	for( size_t seqI = 0; seqI < align_matrix.size(); ++seqI )
+	{
+		align_matrix[seqI].resize(d);
+		align_matrix[seqI] = align_matrix[seqI];	// force reallocation on "optimized" windows builds
+	}
+	this->create_bitcount();
+}
+
+
+}
+
+namespace std {
+template<> inline
+void swap( mems::CompactGappedAlignment<>& a, mems::CompactGappedAlignment<>& b )
+{
+	a.swap(b);
+}
+}
+
+
+#endif // __CompactGappedAlignment_h__
+
diff --git a/libMems/DNAFileSML.cpp b/libMems/DNAFileSML.cpp
new file mode 100644
index 0000000..de91365
--- /dev/null
+++ b/libMems/DNAFileSML.cpp
@@ -0,0 +1,68 @@
+/*******************************************************************************
+ * $Id: DNAFileSML.cpp,v 1.4 2004/03/01 02:40:08 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * Please see the file called COPYING for licensing, copying, and modification
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libGenome/gnSequence.h"
+#include "libGenome/gnFilter.h"
+#include "libMems/DNAFileSML.h"
+
+using namespace std;
+using namespace genome;
+namespace mems {
+
+DNAFileSML::DNAFileSML() : FileSML(){
+	FileSML::header.version = FormatVersion();
+}
+
+DNAFileSML::DNAFileSML(const string& fname, const uint8* table, const uint32 alpha_bits){
+	header.alphabet_bits = alpha_bits;
+	memcpy(header.translation_table, table, UINT8_MAX);
+	filename = fname;
+	header.version = FormatVersion();
+}
+
+DNAFileSML& DNAFileSML::operator=(const DNAFileSML& msa ){
+	FileSML::operator=(msa);
+	return *this;
+}
+
+DNAFileSML* DNAFileSML::Clone() const{
+	DNAFileSML *bdsa = new DNAFileSML();
+	(*bdsa) = *this;
+	return bdsa;
+}
+
+uint64 DNAFileSML::GetNeededMemory(gnSeqI len){
+	uint64 neededmem = (len * FileSML::header.alphabet_bits) / 8;
+	//forward and reverse copies of the sequence
+	neededmem += len * 2;
+	neededmem += sizeof(bmer) * len;
+	return neededmem;
+}
+
+uint32 DNAFileSML::CalculateMaxMerSize() const{
+	return 62 / header.alphabet_bits;
+}
+
+uint64 DNAFileSML::GetMer(gnSeqI position) const{
+	return GetDnaMer( position );
+}
+
+uint64 DNAFileSML::GetSeedMer( gnSeqI offset ) const{
+	return GetDnaSeedMer( offset );
+}
+
+void DNAFileSML::FillSML(const gnSequence& seq, vector<bmer>& sml_array)
+{
+	FillDnaSML(seq, sml_array);
+}
+
+} // namespace mems
diff --git a/libMems/DNAFileSML.h b/libMems/DNAFileSML.h
new file mode 100644
index 0000000..f08514d
--- /dev/null
+++ b/libMems/DNAFileSML.h
@@ -0,0 +1,66 @@
+/*******************************************************************************
+ * $Id: DNAFileSML.h,v 1.6 2004/03/01 02:40:08 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * This file is licensed under the GPL.
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifndef _DNAFileSML_h_
+#define _DNAFileSML_h_
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/FileSML.h"
+
+namespace mems {
+
+/**
+ *  The seed pattern for DNA SMLs must be palindromic
+ */
+class DNAFileSML : public FileSML
+{
+public:
+	DNAFileSML();
+	
+	/** 
+	 *  Load or create a DNAFileSML ()
+	 *  Attempts to load a DNA sorted mer list from the named file if it exists.
+	 *  If the given file does not exist it creates an empty DNAFileSML with 
+	 *  the supplied translation table and alphabet bit size.
+	 *  @param fname The name of the file to create.
+	 *  @param table The array used to translate characters into binary code
+	 *  @param alpha_bits The number of bits each character consumes in binary
+	 */
+	DNAFileSML(const std::string& fname, const uint8* table = SortedMerList::BasicDNATable(), const uint32 alpha_bits = DNA_ALPHA_BITS);
+	DNAFileSML(const SortedMerList& sa);
+	DNAFileSML& operator=(const DNAFileSML& msa );
+	
+	DNAFileSML* Clone() const;
+	
+	virtual uint64 GetMer(gnSeqI position) const;
+	
+	virtual uint32 FormatVersion();
+
+	virtual uint64 GetSeedMer( gnSeqI offset ) const;
+
+protected:
+	virtual void FillSML(const genome::gnSequence& seq, std::vector<bmer>& sml_array);
+	virtual uint32 CalculateMaxMerSize() const;
+	virtual uint64 GetNeededMemory(gnSeqI len);
+};
+
+// version 3 was original DNAFileSML format
+// version 4 was introduction of inexact seeds
+// version 5 was fix in header struct for 64-bit seed size
+inline
+uint32 DNAFileSML::FormatVersion(){
+	static uint32 f_version = 5;
+	return f_version;
+}
+
+}
+
+#endif   //_DNAFileSML_h_
diff --git a/libMems/DNAMemorySML.cpp b/libMems/DNAMemorySML.cpp
new file mode 100644
index 0000000..9ef9285
--- /dev/null
+++ b/libMems/DNAMemorySML.cpp
@@ -0,0 +1,48 @@
+/*******************************************************************************
+ * $Id: DNAMemorySML.cpp,v 1.3 2004/03/01 02:40:08 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * Please see the file called COPYING for licensing, copying, and modification
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libGenome/gnSequence.h"
+#include "libMems/DNAMemorySML.h"
+
+using namespace std;
+using namespace genome;
+namespace mems {
+
+DNAMemorySML::DNAMemorySML(const uint8* table, const uint32 alpha_bits) : 
+MemorySML( table, alpha_bits )
+{}
+
+DNAMemorySML& DNAMemorySML::operator=(const DNAMemorySML& msa ){
+	MemorySML::operator=(msa);
+	return *this;
+}
+
+DNAMemorySML* DNAMemorySML::Clone() const{
+	DNAMemorySML *bdsa = new DNAMemorySML();
+	(*bdsa) = *this;
+	return bdsa;
+}
+
+uint64 DNAMemorySML::GetMer(gnSeqI position) const{
+	return GetDnaMer( position );
+}
+
+uint64 DNAMemorySML::GetSeedMer( gnSeqI offset ) const{
+	return GetDnaSeedMer( offset );
+}
+
+void DNAMemorySML::FillSML(const gnSequence& seq, vector<bmer>& sml_array)
+{
+	FillDnaSML(seq, sml_array);
+}
+
+} // namespace mems
diff --git a/libMems/DNAMemorySML.h b/libMems/DNAMemorySML.h
new file mode 100644
index 0000000..e2558bb
--- /dev/null
+++ b/libMems/DNAMemorySML.h
@@ -0,0 +1,55 @@
+/*******************************************************************************
+ * $Id: DNAMemorySML.h,v 1.3 2004/03/01 02:40:08 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * This file is licensed under the GPL.
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifndef _DNAMemorySML_h_
+#define _DNAMemorySML_h_
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libGenome/gnSequence.h"
+#include "libMems/MemorySML.h"
+
+namespace mems {
+
+/** The DNAMemorySML is an implementation of sorted mer lists which creates and
+ *  stores the sorted mer list entirely in memory.  A DNAMemorySML consumes
+ *  roughly 32 + alpha_bits bits of memory per character in the sequences.
+ *  For unambiguous DNA sequences 4.25 bytes per base are required.
+ *  The seed pattern for DNA SMLs must be palindromic
+ */
+class DNAMemorySML : public MemorySML
+{
+public:
+	/** 
+	 *  Create an empty DNAMemorySML
+	 *  Creates an empty DNAMemorySML with the supplied translation
+	 *  table and alphabet bit size.  Defaults to DNA settings
+	 *  @param table The array used to translate characters into binary code
+	 *  @param alpha_bits The number of bits each character consumes in binary
+	 */
+	DNAMemorySML(const uint8* table = SortedMerList::BasicDNATable(), const uint32 alpha_bits = DNA_ALPHA_BITS);
+	DNAMemorySML(const DNAMemorySML& msa);
+	DNAMemorySML(const SortedMerList& sa);
+	DNAMemorySML& operator=(const DNAMemorySML& msa );
+	DNAMemorySML* Clone() const;
+	
+	
+	virtual uint64 GetMer(gnSeqI offset) const;
+	virtual uint64 GetSeedMer( gnSeqI offset ) const;
+	
+protected:
+
+	virtual void FillSML(const genome::gnSequence& seq, std::vector<bmer>& sml_array);
+
+};
+
+}
+
+#endif   //_DNAMemorySML_h_
diff --git a/libMems/DenseAbstractMatch.h b/libMems/DenseAbstractMatch.h
new file mode 100644
index 0000000..cddeeab
--- /dev/null
+++ b/libMems/DenseAbstractMatch.h
@@ -0,0 +1,169 @@
+/*******************************************************************************
+ * $Id: DenseAbstractMatch.h,v 1.8 2004/02/27 23:08:55 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * This file is licensed under the GPL.
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifndef __DenseAbstractMatch_h__
+#define __DenseAbstractMatch_h__
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libGenome/gnClone.h"
+#include "libMems/AbstractMatch.h"
+#include <limits>
+
+namespace mems {
+
+/**
+ * The DenseAbstractMatch implements the AbstractMatch interface in a way
+ * that is most efficient when Multiplicity and SeqCount are identical or 
+ * nearly so.  It stores all data inline in a fixed size arrays, affording it
+ * storage in a contiguous chunk of memory.
+ */
+template< unsigned int MAX_SEQS >
+class DenseAbstractMatch : public AbstractMatch 
+{
+public:
+	DenseAbstractMatch();
+	/**
+	 * Creates a new AbstractMatch.
+	 * @param seq_count The total number of sequences in the alignment
+	 */
+	DenseAbstractMatch(const uint seq_count );
+	// use the compiler generated copy constructor, assignment operator, and destructor
+
+	virtual AbstractMatch* Clone() const = 0;
+	
+	// see AbstractMatch base class documentation for these functions
+
+	int64 Start(uint seqI) const{
+		int64 s = leftend[seqI];
+		return orient[seqI]? -s : s;
+	}
+	void SetStart(uint seqI, int64 startI)
+	{
+		SetLeftEnd( seqI, genome::absolut(startI) );
+		orient[seqI] = startI < 0;
+	}
+	uint Multiplicity() const{return m_multiplicity;}
+	uint SeqCount() const{return m_seq_count;}
+	virtual uint FirstStart() const;
+	virtual void Invert();
+
+	virtual gnSeqI LeftEnd(uint seqI) const{ return leftend[seqI]; }
+	virtual orientation Orientation(uint seqI) const;
+	virtual void SetLeftEnd(uint seqI, gnSeqI position)
+	{ 
+		if( position == NO_MATCH && leftend[seqI] != NO_MATCH )
+			--m_multiplicity;
+		else if( position != NO_MATCH && leftend[seqI] == NO_MATCH )
+			++m_multiplicity;
+		leftend[seqI]=position; 
+	}
+	virtual void SetOrientation(uint seqI, orientation o){ orient[seqI]= (o == reverse); }
+	
+	virtual boolean operator==( const DenseAbstractMatch& dam ) const;
+	
+	void MoveStart(int64 move_amount);
+
+	void MoveEnd(int64 move_amount);
+
+	virtual uint UsedSeq( uint seqI ) const {
+		return seqI;
+	}
+
+protected:
+
+	uint m_seq_count;
+	gnSeqI leftend[ MAX_SEQS ];
+	bool orient[ MAX_SEQS ];
+	uint m_multiplicity;
+};
+
+template< unsigned int MAX_SEQS >
+DenseAbstractMatch<MAX_SEQS>::DenseAbstractMatch() :
+m_seq_count(0),
+m_multiplicity(0)
+{
+	memset( leftend, 0, MAX_SEQS * sizeof(gnSeqI) );
+	memset( orient, 0, sizeof( orient ) );
+}
+
+template< unsigned int MAX_SEQS >
+DenseAbstractMatch<MAX_SEQS>::DenseAbstractMatch(const uint seq_count ) :
+m_seq_count(seq_count),
+m_multiplicity(0)
+{
+	memset( leftend, 0, MAX_SEQS * sizeof(gnSeqI) );
+	memset( orient, 0, sizeof( orient ) );
+}
+
+template< unsigned int MAX_SEQS >
+boolean DenseAbstractMatch<MAX_SEQS>::operator==( const DenseAbstractMatch<MAX_SEQS>& dam ) const
+{
+	for( uint seqI = 0; seqI < m_seq_count; ++seqI )
+	{
+		if( leftend[seqI] != dam.leftend[seqI] ||
+			(leftend[seqI] != 0 && orient[seqI] != orient[seqI]))
+			return false;
+	}
+	return true;
+}
+
+template< unsigned int MAX_SEQS >
+AbstractMatch::orientation DenseAbstractMatch<MAX_SEQS>::Orientation(uint seqI) const
+{ 
+	if( leftend[seqI] != NO_MATCH && seqI < m_seq_count )
+		return orient[seqI] ? reverse : forward; 
+	return undefined;
+}
+
+template< unsigned int MAX_SEQS >
+void DenseAbstractMatch<MAX_SEQS>::Invert()
+{
+	for( uint seqI = 0; seqI < MAX_SEQS; ++seqI )
+		orient[seqI] = !orient[seqI];
+}
+
+template< unsigned int MAX_SEQS >
+uint DenseAbstractMatch<MAX_SEQS>::FirstStart() const
+{
+	for( uint m_firstStart = 0; m_firstStart < SeqCount(); ++m_firstStart )
+		if( leftend[m_firstStart] != NO_MATCH )
+			return m_firstStart;
+	return (std::numeric_limits<uint>::max)();
+}
+
+template< unsigned int MAX_SEQS >
+void DenseAbstractMatch<MAX_SEQS>::MoveStart(int64 move_amount)
+{
+	for( uint i=0; i < m_seq_count; ++i )
+		if( leftend[i] != NO_MATCH && orient[i] == false )
+			leftend[i] += move_amount;
+}
+
+template< unsigned int MAX_SEQS >
+void DenseAbstractMatch<MAX_SEQS>::MoveEnd(int64 move_amount)
+{
+	for( uint i=0; i < m_seq_count; ++i )
+		if( leftend[i] != NO_MATCH && orient[i] )
+			leftend[i] += move_amount;
+}
+
+
+typedef DenseAbstractMatch<2> DenseAbstractMatch2;
+typedef DenseAbstractMatch<4> DenseAbstractMatch4;
+typedef DenseAbstractMatch<8> DenseAbstractMatch8;
+typedef DenseAbstractMatch<16> DenseAbstractMatch16;
+typedef DenseAbstractMatch<32> DenseAbstractMatch32;
+typedef DenseAbstractMatch<64> DenseAbstractMatch64;
+typedef DenseAbstractMatch<128> DenseAbstractMatch128;
+
+}
+
+#endif // _DenseAbstractMatch_h_
diff --git a/libMems/DistanceMatrix.h b/libMems/DistanceMatrix.h
new file mode 100644
index 0000000..dfefc23
--- /dev/null
+++ b/libMems/DistanceMatrix.h
@@ -0,0 +1,327 @@
+/*******************************************************************************
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * This file is licensed under the GPL.
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifndef __DistanceMatrix_h__
+#define __DistanceMatrix_h__
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libGenome/gnSequence.h"
+#include "libMems/SubstitutionMatrix.h"
+#include "libMems/IntervalList.h"
+#include "libMems/MatchList.h"
+#include "libMems/GappedAlignment.h"
+#include "libMems/CompactGappedAlignment.h"
+
+
+namespace mems {
+
+
+void TransformDistanceIdentity( NumericMatrix<double>& identity );
+
+void DistanceMatrix( const MatchList& mlist, NumericMatrix<double>& identity );
+
+
+template< class AbstractMatchVectorType >
+void IdentityMatrix( const AbstractMatchVectorType& matches, const std::vector< genome::gnSequence* >& seq_table, NumericMatrix<double>& identity );
+template<class AbstractMatchType>
+void MatchIdentityMatrix( const AbstractMatchType& amt, const std::vector< genome::gnSequence* >& seq_table, NumericMatrix<double>& identity);
+
+void DistanceMatrix( uint seq_count, const std::vector< std::pair< uint64, uint64 > >& detail_list, NumericMatrix<double>& distance );
+
+void IdentityMatrix( const IntervalList& iv_list, NumericMatrix<double>& identity );
+inline
+void IdentityMatrix( const IntervalList& iv_list, NumericMatrix<double>& identity )
+{
+	std::vector< const AbstractMatch* > am_list;
+	for( size_t ivI = 0; ivI < iv_list.size(); ivI++ )
+		am_list.push_back( &iv_list[ivI] );
+	IdentityMatrix( am_list, iv_list.seq_table, identity );
+}
+
+template< class AbstractMatchVectorType >
+void IdentityMatrix( const AbstractMatchVectorType& matches, const std::vector< genome::gnSequence* >& seq_table, NumericMatrix<double>& identity ){
+	if( matches.size() == 0 )
+		return;
+
+	uint seq_count = seq_table.size();
+	identity = NumericMatrix<double>( seq_count, seq_count );
+	identity.init( 0 );
+	NumericMatrix<double> possible( seq_count, seq_count );
+	possible.init( 0 );
+	
+	for( uint ivI = 0; ivI < matches.size(); ivI++ ){
+		AddToMatchIdentityMatrix( *matches[ ivI ], seq_table, identity );
+	}
+	for( uint seqI = 0; seqI < seq_count; seqI++ ){
+		for( uint seqJ = 0; seqJ < seq_count; seqJ++ ){
+			gnSeqI shorter_len = seq_table[seqI]->length() < seq_table[seqJ]->length() ? seq_table[seqI]->length() : seq_table[seqJ]->length();
+			possible( seqI, seqJ ) += shorter_len;
+		}
+	}
+	identity /= possible;
+}
+
+
+template< class AbstractMatchVectorType >
+void BackboneIdentityMatrix( const AbstractMatchVectorType& matches, const std::vector< genome::gnSequence* >& seq_table, NumericMatrix<double>& identity ){
+	if( matches.size() == 0 )
+		return;
+
+	size_t seq_count = seq_table.size();
+	identity = NumericMatrix<double>( seq_count, seq_count );
+	identity.init( 0 );
+	
+	for( uint ivI = 0; ivI < matches.size(); ivI++ ){
+		AddToMatchIdentityMatrix( *matches[ ivI ], seq_table, identity );
+	}
+
+	NumericMatrix<double> possible( seq_count, seq_count );
+	possible.init( 0 );
+
+	for( size_t mI = 0; mI < matches.size(); ++mI ){
+		std::vector< std::string > alignment;
+		GetAlignment( *(matches[mI]), seq_table, alignment );
+		for( gnSeqI charI = 0; charI < matches[mI]->AlignmentLength(); charI++ ){
+			for( size_t seqI = 0; seqI < seq_count; seqI++ ){
+				for( size_t seqJ = seqI + 1; seqJ < seq_count; seqJ++ ){
+					if( alignment[ seqI ][ charI ] != '-' &&
+						alignment[ seqJ ][ charI ] != '-' ){
+							possible( seqI, seqJ ) += 1;
+					}
+				}
+			}
+		}
+	}
+
+	identity /= possible;
+}
+
+
+template<class AbstractMatchType>
+void MatchIdentityMatrix( const AbstractMatchType& amt, const std::vector< genome::gnSequence* >& seq_table, NumericMatrix<double>& identity)
+{
+	if( amt.SeqCount() == 0 )
+		return;
+	uint seq_count = amt.SeqCount();
+	identity = NumericMatrix<double>( seq_count, seq_count );
+	identity.init( 0 );
+	uint seqI;
+	uint seqJ;
+
+	std::vector< std::string > alignment;
+	GetAlignment( amt, seq_table, alignment );
+	for( gnSeqI charI = 0; charI < amt.AlignmentLength(); charI++ ){
+		for( seqI = 0; seqI < seq_count; seqI++ ){
+			for( seqJ = seqI + 1; seqJ < seq_count; seqJ++ ){
+				if( ( toupper( alignment[ seqI ][ charI ] ) == 
+					toupper( alignment[ seqJ ][ charI ] ) ) &&
+					alignment[ seqI ][ charI ] != '-' ){
+					
+						identity( seqI, seqJ ) += 1;
+				}
+			}
+		}
+	}
+
+	for( seqI = 0; seqI < seq_count; seqI++ ){
+		for( seqJ = seq_count; seqJ > 0; seqJ-- ){
+			if( seqI == seqJ - 1 )
+				// set the diagonal to identical
+				identity( seqI, seqJ - 1 ) = 1;
+			else if( seqI < seqJ - 1 ){
+				// determine the length of the shorter sequence
+				gnSeqI shorter_len = amt.Length( seqI ) < amt.Length( seqJ - 1 ) ? amt.Length( seqI ) : amt.Length( seqJ - 1 );
+				// divide through
+				identity( seqI, seqJ - 1 ) /= (double)shorter_len;
+				// maxes out at 1
+				if( identity( seqI, seqJ - 1 ) > 1 )
+					identity( seqI, seqJ - 1 ) = 1;
+			}else	// copy the other one
+				identity( seqI, seqJ - 1 ) = identity( seqJ - 1, seqI );
+		}
+	}
+}
+
+
+
+template<class AbstractMatchType>
+void AddToMatchIdentityMatrix( const AbstractMatchType& amt, const std::vector< genome::gnSequence* >& seq_table, NumericMatrix<double>& identity)
+{
+	if( amt.SeqCount() == 0 )
+		return;
+	uint seq_count = amt.SeqCount();
+	uint seqI;
+	uint seqJ;
+
+	std::vector< std::string > alignment;
+	GetAlignment( amt, seq_table, alignment );
+	for( gnSeqI charI = 0; charI < amt.AlignmentLength(); charI++ ){
+		for( seqI = 0; seqI < seq_count; seqI++ ){
+			for( seqJ = seqI + 1; seqJ < seq_count; seqJ++ ){
+				if( ( toupper( alignment[ seqI ][ charI ] ) == 
+					toupper( alignment[ seqJ ][ charI ] ) ) &&
+					alignment[ seqI ][ charI ] != '-' ){
+					
+						identity( seqI, seqJ ) += 1;
+				}
+			}
+		}
+	}
+}
+
+/*
+// template specialization for (exact) matches
+inline
+void AddToMatchIdentityMatrix( const Match& m, const std::vector< genome::gnSequence* >& seq_table, NumericMatrix<double>& identity)
+{
+	if( m.SeqCount() == 0 )
+		return;
+	for( uint seqI = 0; seqI < m.SeqCount(); seqI++ )
+		if( m.LeftEnd(seqI) != NO_MATCH )
+			for( uint seqJ = seqI + 1; seqJ < m.SeqCount(); seqJ++ )
+				if( m.LeftEnd(seqJ) != NO_MATCH )
+					identity(seqI,seqJ) += m.Length();
+}
+*/
+
+template< typename MatchVector >
+void SingleCopyDistanceMatrix( MatchVector& iv_list, std::vector< genome::gnSequence* >& seq_table, NumericMatrix<double>& distance )
+{
+	uint seq_count = seq_table.size();
+	distance = NumericMatrix<double>( seq_count, seq_count );
+	distance.init( 0 );
+	uint seqI;
+	uint seqJ;
+	std::vector< std::pair< bitset_t, bitset_t > > tmp_comp( seq_count );
+	std::vector< std::vector< std::pair< bitset_t, bitset_t > > > pair_comp( seq_count, tmp_comp );
+	for( uint seqI = 0; seqI < seq_count; ++seqI )
+	{
+		for( uint seqJ = seqI+1; seqJ < seq_count; ++seqJ )
+		{
+			pair_comp[seqI][seqJ].first.resize( seq_table[seqI]->length(), false );
+			pair_comp[seqI][seqJ].second.resize( seq_table[seqJ]->length(), false );
+		}
+	}
+#pragma omp parallel for
+	for( int ivI = 0; ivI < iv_list.size(); ++ivI )
+	{
+		std::vector< bitset_t > aln_table;
+#pragma omp critical
+{
+		iv_list[ivI]->GetAlignment(aln_table);
+}
+		for( uint seqI = 0; seqI < seq_count; ++seqI )
+		{
+			for( uint seqJ = seqI+1; seqJ < seq_count; ++seqJ )
+			{
+				gnSeqI seqI_pos = iv_list[ivI]->LeftEnd(seqI);
+				gnSeqI seqJ_pos = iv_list[ivI]->LeftEnd(seqJ);
+				AbstractMatch::orientation o_i = iv_list[ivI]->Orientation(seqI);
+				AbstractMatch::orientation o_j = iv_list[ivI]->Orientation(seqJ);
+				if( o_i == AbstractMatch::reverse )
+					seqI_pos = iv_list[ivI]->RightEnd(seqI);
+				if( o_j == AbstractMatch::reverse )
+					seqJ_pos = iv_list[ivI]->RightEnd(seqJ);
+				if( seqI_pos == NO_MATCH || seqJ_pos == NO_MATCH )
+					continue;
+				for( size_t colI = 0; colI < aln_table[seqI].size(); ++colI )
+				{
+					if( aln_table[seqI].test(colI) && aln_table[seqJ].test(colI) )
+					{
+						pair_comp[seqI][seqJ].first.set(seqI_pos-1,true);
+						pair_comp[seqI][seqJ].second.set(seqJ_pos-1,true);
+					}
+					if( aln_table[seqI].test(colI) )
+						if( o_i == AbstractMatch::forward )
+							seqI_pos++;
+						else
+							seqI_pos--;
+					if( aln_table[seqJ].test(colI) )
+						if( o_j == AbstractMatch::forward )
+							seqJ_pos++;
+						else
+							seqJ_pos--;
+				}
+			}
+		}
+	}
+	for( uint seqI = 0; seqI < seq_count; ++seqI )
+	{
+		distance(seqI,seqI) = 1;
+		for( uint seqJ = seqI+1; seqJ < seq_count; ++seqJ )
+		{
+			double pI = ((double)pair_comp[seqI][seqJ].first.count())/((double)pair_comp[seqI][seqJ].first.size());
+			double pJ = ((double)pair_comp[seqI][seqJ].second.count())/((double)pair_comp[seqI][seqJ].second.size());
+			distance(seqI,seqJ) = (pI + pJ) / 2.0;
+			distance(seqJ,seqI) = (pI + pJ) / 2.0;
+		}
+	}
+	TransformDistanceIdentity(distance);
+}
+
+inline
+void DistanceMatrix( const MatchList& mlist, NumericMatrix<double>& distance ){
+	IdentityMatrix(mlist, mlist.seq_table, distance );
+	TransformDistanceIdentity( distance );
+}
+
+inline
+void TransformDistanceIdentity( NumericMatrix<double>& identity ){
+	for( int i = 0; i < identity.cols(); i++ ){
+		for( int j = 0; j < identity.rows(); j++ ){
+			identity( i, j ) = 1 - identity( i, j );
+		}
+	}
+}
+
+inline
+void DistanceMatrix( uint seq_count, const std::vector< std::pair< uint64, uint64 > >& detail_list, NumericMatrix<double>& distance ){
+	distance = NumericMatrix<double>( seq_count, seq_count );
+	distance.init( 0 );
+	uint seqI;
+	uint seqJ;
+	for( seqI = 0; seqI < seq_count; seqI++ ){
+		uint64 seqI_mask = 1;
+		seqI_mask <<= seq_count - seqI - 1;
+		for( seqJ = 0; seqJ < seq_count; seqJ++ ){
+			uint64 seqJ_mask = 1;
+			seqJ_mask <<= seq_count - seqJ - 1;
+			for( uint pairI = 0; pairI < detail_list.size(); pairI++ ){
+				if( (detail_list[ pairI ].first & seqI_mask) != 0 &&
+					(detail_list[ pairI ].first & seqJ_mask) != 0 ){
+					distance( seqI, seqJ ) += detail_list[ pairI ].second;
+				}
+			}
+		}
+	}
+	
+	for( seqI = 0; seqI < seq_count; seqI++ ){
+		for( seqJ = 0; seqJ < seq_count; seqJ++ ){
+			if( seqI == seqJ )
+				continue;
+			double avg_length = ( distance( seqI, seqI ) + distance( seqJ, seqJ ) ) / 2;
+			distance( seqI, seqJ ) = 1.0 - ( distance( seqI, seqJ ) / avg_length );
+			if( !(distance( seqI, seqJ ) == distance( seqI, seqJ )) ){
+				distance( seqI, seqJ ) = 1.0;
+			}
+		}
+	}
+
+	// set the diagonal identical to itself
+	for( seqI = 0; seqI < seq_count; seqI++ )
+		distance( seqI, seqI ) = 0;
+}
+
+
+}	// namespace mems
+
+
+#endif	// __DistanceMatrix_h__
+
diff --git a/libMems/FileSML.cpp b/libMems/FileSML.cpp
new file mode 100644
index 0000000..ff9ce01
--- /dev/null
+++ b/libMems/FileSML.cpp
@@ -0,0 +1,679 @@
+/*******************************************************************************
+ * $Id: FileSML.cpp,v 1.22 2004/04/26 21:13:58 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * Please see the file called COPYING for licensing, copying, and modification
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+
+#include "libMems/FileSML.h"
+// for CreateTempFileName():
+#include "libMems/Aligner.h"
+#include "libGenome/gnFilter.h"
+#include "libGenome/gnRAWSource.h"
+#include <algorithm>
+#include <cmath>
+#include "boost/filesystem/operations.hpp"
+
+using namespace std;
+using namespace genome;
+namespace mems {
+
+FileSML& FileSML::operator=(const FileSML& sa)
+{
+ 	SortedMerList::operator=( sa );
+ 	filename = sa.filename;
+	sarray_start_offset = sa.sarray_start_offset;
+	seq_coords = sa.seq_coords;
+	sarfile.open(filename.c_str(), ios::binary | ios::in );
+	if(!sarfile.is_open()){
+		DebugMsg("FileSML::=: Unable to open suffix array file.\n");
+		sarfile.clear();
+		return *this;
+	}
+	return *this;
+}
+
+void FileSML::Clear() {
+	SortedMerList::Clear();
+	filename = "";
+	sarfile.close();
+	sarray_start_offset = 0;
+	seq_coords.clear();
+}
+
+void FileSML::LoadFile(const string& fname){
+	filename = fname;
+	sarfile.open(fname.c_str(), ios::binary | ios::in );
+	if(!sarfile.is_open()){
+		sarfile.clear();
+		Throw_gnExMsg( FileNotOpened(), "Unable to open file.\n");
+	}
+	// read the header into a temporary header struct just
+	// in case it's bogus
+	SMLHeader tmp_header;
+	sarfile.read((char*)&tmp_header, sizeof(struct SMLHeader));
+	if(sarfile.gcount() < (int)sizeof(struct SMLHeader)){
+		sarfile.clear();
+		Throw_gnExMsg( FileUnreadable(), "Unable to read file.");
+	}
+	if(tmp_header.version != FormatVersion()){
+		Throw_gnExMsg( FileUnreadable(), "Unsupported file format.");
+	}
+	header = tmp_header;
+	
+	SetMerMaskSize( header.seed_weight );
+	seed_mask = mer_mask;
+	SetMerMaskSize( header.seed_length );
+
+	//header is ok.  read the sequence.
+	gnSeqI seq_len = header.length;
+	if(header.circular)
+		seq_len += header.seed_length - 1;
+	binary_seq_len = ((uint64)seq_len * (uint64)header.alphabet_bits) / 32;
+	if(((uint64)seq_len * (uint64)header.alphabet_bits) % 32 != 0)
+		binary_seq_len++;
+	binary_seq_len+=2;	//fix for access violations.
+
+	if(sequence != NULL)
+		delete[] sequence;
+	sequence = new uint32[binary_seq_len];
+
+	sarfile.read((char*)sequence, binary_seq_len*sizeof(uint32));
+	if(sarfile.gcount() < (int64)(binary_seq_len*sizeof(uint32))){
+		sarfile.clear();
+		Throw_gnExMsg( FileUnreadable(), "Error reading sequence data.");
+	}
+
+	sarray_start_offset = sarfile.tellg();
+	sarfile.seekg(sarray_start_offset + sizeof(gnSeqI) * header.length);
+	if(!sarfile.good()){
+		sarfile.clear();
+		Throw_gnExMsg( FileUnreadable(), "Premature end of file.");
+	}
+	filename = fname;
+
+	// create a memory-map to the data of interest
+	sardata.open(fname);
+	
+	// check whether there is a .coords mask file to read
+	string coordfile = filename + ".coords";
+	ifstream coord_in( coordfile.c_str() );
+	if( coord_in.is_open() ){
+		seq_coords.clear();
+		int64 cur_coord;
+		while( coord_in >> cur_coord ){
+			seq_coords.push_back( cur_coord );
+		}
+	}
+}
+
+void FileSML::OpenForWriting( boolean truncate ){
+	// Open smlfile for writing
+	boolean was_open = sarfile.is_open();
+	if(was_open)
+		sarfile.close();
+	if( truncate )
+		sarfile.open(filename.c_str(), ios::binary | ios::in | ios::out | ios::trunc );
+	else
+		sarfile.open(filename.c_str(), ios::binary | ios::in | ios::out );
+	if(!sarfile.is_open() || !sarfile.good()){
+		sarfile.clear();
+		if(was_open)
+			sarfile.open(filename.c_str(), ios::binary | ios::in );
+		Throw_gnExMsg(FileNotOpened(), "Unable to open file for writing.");
+	}
+}
+
+boolean FileSML::WriteHeader(){
+	if(!sarfile.is_open()){
+		Throw_gnExMsg(IOStreamFailed(), "File is not valid.");
+	}
+	boolean success = true;
+	const char* errormsg = "";
+	// Open sarfile for writing and write new header.
+	OpenForWriting( false );
+	sarfile.write((char*)&header, sizeof(struct SMLHeader));
+	if(!sarfile.good()){
+		errormsg = "Error writing header to disk.";
+		success = false;
+	}
+
+	// reopen the sorted mer list file read-only
+	sarfile.close();
+	sarfile.open(filename.c_str(), ios::binary | ios::in );
+	if(!sarfile.is_open()){
+		errormsg = "Error opening sorted mer list file.";
+		success = false;
+	}
+	if(!success)
+		Throw_gnExMsg(IOStreamFailed(), errormsg);
+	return success;
+}
+
+gnSeqI FileSML::UniqueMerCount(){
+	gnSeqI tmp_count = header.unique_mers;
+	SortedMerList::UniqueMerCount();
+	if(tmp_count != header.unique_mers)
+		WriteHeader();
+	return header.unique_mers;
+}
+
+//change the description in memory and on disk
+void FileSML::SetDescription(const string& d){
+	strncpy(header.description, d.c_str(), DESCRIPTION_SIZE-1);
+	WriteHeader();
+} 
+
+void FileSML::SetID(const sarID_t d){
+	header.id = d;
+	WriteHeader();
+}
+
+
+extern "C" {
+#include "libMems/dmSML/dmsort.h"
+}
+
+char** FileSML::tmp_paths = NULL;
+
+void FileSML::registerTempPath( const string& path ) {
+	string tmp_path = path;
+	// add trailing path separator if necessary
+#ifdef WIN32
+	if( tmp_path[ tmp_path.size() - 1 ] != '\\' )
+		tmp_path += "\\";
+#else
+	if( tmp_path[ tmp_path.size() - 1 ] != '/' )
+		tmp_path += "/";
+#endif
+		
+	if( tmp_paths == NULL ){
+		tmp_paths = new char*[1];
+		tmp_paths[ 0 ] = NULL;
+	}
+	
+	int path_count = 0;
+	while( tmp_paths[ path_count ] != NULL )
+		path_count++;
+
+	// create a new array with room for another element
+	char** tmp_tmp_paths = new char*[ path_count+1 ];
+	// copy old elements
+	for( int pathI = 0; pathI < path_count; pathI++ )
+		tmp_tmp_paths[ pathI ] = tmp_paths[ pathI ];
+	// add new element
+	tmp_tmp_paths[ path_count ] = new char[ tmp_path.size() + 1 ];
+	strncpy( tmp_tmp_paths[ path_count ], tmp_path.c_str(), tmp_path.size() + 1 );
+	// set null terminator element
+	tmp_tmp_paths[ path_count + 1 ] = NULL;
+	
+	// set new paths
+	char** old_paths = tmp_paths;
+	tmp_paths = tmp_tmp_paths;
+	
+	// free old array
+	delete[] old_paths;
+}
+
+const char* FileSML::getTempPath( int pathI ){
+	return tmp_paths[ pathI ];
+}
+
+int FileSML::getTempPathCount(){
+	int path_count = 0;
+	while( tmp_paths && tmp_paths[ path_count ] != NULL )
+		path_count++;
+	return path_count;
+}
+
+
+void maskNNNNN( const gnSequence& in_seq, gnSequence& out_seq, vector< int64 >& seq_coords, int mask_n_length ) {
+	
+	gnSeqI seqI = 1;
+	gnSeqI read_length = 1024*1024;
+	string cur_seq;
+	gnSeqI n_count = 0;
+	gnSeqI n_stretch_start = 0;
+	gnSeqI n_stretch_end = 1;
+
+	while( seqI <= in_seq.length() ){
+		read_length = seqI + read_length < in_seq.length() ? read_length : in_seq.length() - seqI + 1;
+		in_seq.ToString( cur_seq, read_length, seqI );
+		
+		uint charI = 0;
+		for( ; charI < cur_seq.size(); charI++ ){
+			if( cur_seq[ charI ] == 'N' || cur_seq[ charI ] == 'n' ){
+				if( n_count == 0 ){
+					n_stretch_start = seqI + charI;
+				}
+				n_count++;
+			}else{
+				if( n_count > mask_n_length ){
+					if( n_stretch_start - n_stretch_end != 0 ){
+						// Add the sequence region to the output sequence
+						out_seq += in_seq.subseq( n_stretch_end, n_stretch_start - n_stretch_end );
+						// add the masked coordinates
+						seq_coords.push_back( n_stretch_end );
+						seq_coords.push_back( n_stretch_start - 1 );
+					}
+					// update n_stretch_end to the first non N character
+					n_stretch_end = seqI + charI;
+				}
+				n_count = 0;
+			}
+		}
+		seqI += read_length;
+	}
+	out_seq += in_seq.subseq( n_stretch_end, seqI - n_stretch_end );
+
+	// add the masked coordinates
+	seq_coords.push_back( n_stretch_end );
+	seq_coords.push_back( seqI - 1 );
+}
+
+	// use dmSML to construct the SML
+	// then read it in using LoadFile()
+void FileSML::dmCreate(const gnSequence& seq, const uint64 seed){
+	// Filter NNNNNs
+	gnSequence masked_seq;
+	seq_coords.clear();
+	maskNNNNN( seq, masked_seq, seq_coords, 0 );
+	
+	// write a raw sequence to a tmp file stored in the first scratch path
+	string rawfile = CreateTempFileName("dm_rawseq");
+	gnRAWSource::Write( masked_seq, rawfile.c_str() );
+	
+	// write a sequence coordinate file
+	if( seq_coords.size() > 0 ){
+		string coordfile = filename + ".coords";
+		ofstream coord_out( coordfile.c_str() );
+		if( !coord_out.is_open() ){
+			cerr << "Could not open " << coordfile << endl;
+			throw "";
+		}
+		
+		for( int coordI = 0; coordI < seq_coords.size(); coordI+=2 ){
+			coord_out << seq_coords[ coordI ] << '\t' << seq_coords[ coordI + 1 ] << endl;
+		}
+		coord_out.close();
+	}
+	
+	
+	// run dmSML
+	const char* const* scratch_paths = (const char* const*)tmp_paths;
+	sarfile.close();
+	int rval = dmSML( rawfile.c_str(), filename.c_str(), scratch_paths, seed );
+	if( rval != 0 )
+		cerr << "Crap.  It's broke, return value " << rval << endl;
+	
+	boost::filesystem::remove( rawfile );
+	// load the sorted mer list
+	LoadFile( filename );
+}
+
+void FileSML::Create(const gnSequence& seq, const uint64 seed){
+
+	vector<bmer> sml_array;
+	bool is_spaced_seed = getSeedWeight(seed) != getSeedLength(seed);
+	OpenForWriting( true );
+
+	try{
+		SortedMerList::Create( seq, seed );
+		
+		if( is_spaced_seed )
+			FillDnaSeedSML(seq, sml_array);
+		else
+			FillSML(seq, sml_array);
+
+	}catch(...){	
+		// if there was a memory allocation error then
+		// try using dmSML to do an external sort
+		sarfile.clear();
+		sarfile.close();
+		sarfile.clear();
+		if( sequence != NULL )
+			delete[] sequence;
+		binary_seq_len = 0;
+
+		dmCreate( seq, seed );
+	}
+
+//	RadixSort(s_array);
+	sort(sml_array.begin(), sml_array.end(), &bmer_lessthan);
+	
+	/* now write out the file header */
+	sarfile.write((char*)&header, sizeof(struct SMLHeader));
+
+	if(!sarfile.good()){
+		sarfile.clear();
+		Throw_gnExMsg( IOStreamFailed(), "Error writing sorted mer list header to disk.\n");
+	}
+
+	/* write out the actual sequence */
+	sarfile.write((char*)sequence, binary_seq_len*sizeof(uint32));
+	sarray_start_offset = sarfile.tellg();
+
+	/* write out the sorted mer list */
+	for(gnSeqI suffixI=0; suffixI < sml_array.size(); suffixI++)
+		sarfile.write((char*)&(sml_array[suffixI].position), sizeof(smlSeqI_t));
+	
+	sarfile.flush();
+	if(!sarfile.good()){
+		sarfile.clear();
+		Throw_gnExMsg( IOStreamFailed(), "Error writing sorted mer list to disk.\n");
+	}
+	// reopen the sorted mer list file read-only
+	sarfile.close();
+	sarfile.open(filename.c_str(), ios::binary | ios::in );
+	if(!sarfile.is_open())
+		Throw_gnExMsg( FileNotOpened(), "FileSML::Create: Error opening sorted mer list file.\n");
+
+	sardata.open(filename);
+}
+
+bmer FileSML::operator[](gnSeqI index)
+{
+	bmer tmp_mer;
+	tmp_mer.position = base()[index];
+	tmp_mer.mer = GetSeedMer(tmp_mer.position);
+	return tmp_mer;
+}
+
+
+boolean FileSML::Read(vector<bmer>& readVector, gnSeqI size, const gnSeqI offset)
+{
+	if(!sarfile.is_open()){
+		DebugMsg("FileSML::Read: Error sar file not open.\n");
+		return false;
+	}
+
+	gnSeqI total_len = SMLLength();
+	if(offset >= total_len){
+		readVector.clear();
+		return false;
+	}
+	gnSeqI readlen = offset + size < total_len ? size : total_len - offset;
+	
+	readVector.resize( readlen );
+
+	//copy data to the vector
+	for(gnSeqI j=0; j < readlen; j++){
+		bmer tmp_mer;
+		tmp_mer.position = base()[offset+j];
+		if( tmp_mer.position > header.length ){
+			string errmsg = "Corrupted SML, position ";
+			errmsg += tmp_mer.position + " is out of range\n";
+			ErrorMsg( errmsg );
+			cerr << errmsg;
+		}else
+			tmp_mer.mer = GetSeedMer(tmp_mer.position);
+		readVector[ j ] = tmp_mer;
+	}
+	return true;
+}
+
+void FileSML::BigCreate(const gnSequence& seq, const uint32 split_levels, const uint32 mersize){
+//	unsigned long freemem = wxGetFreeMemory();	//get the amount of free memory.
+//	unsigned long neededmem = GetNeededMemory(seq.length());
+//	if(neededmem >= freemem && neededmem > MEMORY_MINIMUM){ // divide and conquer
+	if(split_levels > 0){	// split_levels defines the number of times to divide and conquer
+		uint64 midpoint = seq.length() / 2;
+		midpoint = (midpoint * header.alphabet_bits) / 32;
+		midpoint = (midpoint / header.alphabet_bits) * 32;
+		gnSequence seqA = seq.subseq(1, midpoint);
+		gnSequence seqB = seq.subseq(1 + midpoint, seq.length() - midpoint);
+		seqA.setCircular(false);
+		seqB.setCircular(false);
+		cout << "Splitting " << seq.length() << " to " << seqA.length() << " and " << seqB.length() << "\n";
+
+		//create the first sar
+		string temp_sarfile = CreateTempFileName("bdsa_split");
+		FileSML* temp_sar = this->Clone();
+		temp_sar->filename = temp_sarfile.c_str();
+		temp_sar->BigCreate(seqA, split_levels - 1, mersize);
+
+		//create the second sar
+		string temp_sarfile2 = CreateTempFileName("bdsa_split");
+		FileSML* temp_sar2 = this->Clone();
+		temp_sar2->filename = temp_sarfile2.c_str();
+		temp_sar2->BigCreate(seqB, split_levels - 1, mersize);
+
+		//merge them to this file
+		cout << "Merging " << seqA.length() << " and " << seqB.length() << "\n";
+		Merge(*temp_sar, *temp_sar2);
+		//free up RAM
+		delete temp_sar;
+		delete temp_sar2;
+		//erase the temp files.
+		boost::filesystem::remove(temp_sarfile);
+		boost::filesystem::remove(temp_sarfile2);
+	}else{
+		Create(seq, mersize);
+	}
+}
+
+void FileSML::RadixSort(vector<bmer>& s_array){
+	vector<bmer> *source_buckets;
+	vector<bmer> *tmp_buckets;
+	vector<bmer> *buckets;
+	uint32 radix_size = 11;
+	uint64 radix_mask = 0xFFFFFFFF;
+	radix_mask <<= 32;
+	radix_mask |= 0xFFFFFFFF;
+	radix_mask >>= 64 - radix_size;
+	
+	uint32 bucket_count = (uint32) pow((double)2, (double)radix_size);
+	uint32 cur_shift_bits = 0;
+	buckets = new vector<bmer>[bucket_count];
+	source_buckets = new vector<bmer>[bucket_count];
+	uint64 cur_bucket;
+	for(uint32 merI = 0; merI < s_array.size(); merI++){
+		cur_bucket = s_array[merI].mer & radix_mask;
+		source_buckets[cur_bucket].push_back(s_array[merI]);
+	}
+	s_array.clear();
+	cur_shift_bits += radix_size;
+	radix_mask <<= radix_size;
+	while(cur_shift_bits < 64){
+		for(uint32 bucketI = 0; bucketI < bucket_count; bucketI++){
+			for(uint32 merI = 0; merI < source_buckets[bucketI].size(); merI++){
+				cur_bucket = source_buckets[bucketI][merI].mer & radix_mask;
+				cur_bucket >>= cur_shift_bits;
+				buckets[cur_bucket].push_back(source_buckets[bucketI][merI]);
+			}
+			source_buckets[bucketI].clear();
+		}
+		cur_shift_bits += radix_size;
+		radix_mask <<= radix_size;
+		tmp_buckets = source_buckets;
+		source_buckets = buckets;
+		buckets = tmp_buckets;
+	}
+	s_array.clear();
+	for(uint32 bucketI = 0; bucketI < bucket_count; bucketI++){
+		for(uint32 merI = 0; merI < source_buckets[bucketI].size(); merI++){
+			s_array.push_back(source_buckets[bucketI][merI]);
+		}
+		source_buckets[bucketI].clear();
+	}
+	delete[] source_buckets;
+	delete[] buckets;
+}
+
+//Merges the supplied sorted mer lists into this one, overwriting the existing sml.
+//KNOWN BUG:  The first sorted mer list must have (length * alphabet_bits) / word_bits == 0
+//for Merge to work properly.
+void FileSML::Merge(SortedMerList& sa, SortedMerList& sa2){
+STACK_TRACE_START
+	SMLHeader sa_head = sa.GetHeader();
+	SMLHeader sa_head2 = sa2.GetHeader();
+	
+	//basic copying
+	header = sa_head;
+	//take the smaller mer_size
+	if(sa_head.seed_length < sa_head2.seed_length){
+		header.seed_length = sa_head.seed_length;
+		mer_mask = sa.GetMerMask();
+	}else{
+		header.seed_length = sa_head2.seed_length;
+		mer_mask = sa2.GetMerMask();
+	}
+	header.unique_mers = NO_UNIQUE_COUNT;
+	header.length += sa_head2.length;
+
+	//allocate some memory
+	const uint32 SEQ_BUFFER_SIZE = 200000;
+	Array<uint32> seq_buf ( SEQ_BUFFER_SIZE + header.seed_length );
+
+	//do some sanity checks on the sars we're merging.
+	if(sa_head.alphabet_bits != sa_head2.alphabet_bits ||
+	  sa_head.version != sa_head2.version ||
+	  memcmp(sa_head.translation_table, sa_head2.translation_table, UINT8_MAX)){
+		Throw_gnExMsg(SMLMergeError(), "Incompatible sorted mer lists.");
+	}
+	
+	OpenForWriting( true );
+
+	//write the header
+	sarfile.write((char*)&header, sizeof(struct SMLHeader));
+	if(!sarfile.good()){
+		sarfile.clear();
+		sarfile.close();
+		sarfile.open(filename.c_str(), ios::binary | ios::in );
+		Throw_gnExMsg(IOStreamFailed(), "Error writing sorted mer list header to disk.");
+	}
+
+	//copy sequence data into memory.
+	uint32 binary_seq_len = (header.length * header.alphabet_bits) / 32;
+	if((header.length * header.alphabet_bits) % 32 > 0)
+		binary_seq_len++;
+
+	//The +1 is to avoid access violations when copying in the
+	//binary sequence before shifting.
+	if( sequence != NULL )
+		delete[] sequence;
+	sequence = new uint32[binary_seq_len+1];
+	sa.GetBSequence(sequence, sa_head.length, 0);
+
+	uint32 bseq_len1 = (sa_head.length * sa_head.alphabet_bits) / 32;
+	uint32 bseq_remainder = (sa_head.length * sa_head.alphabet_bits) % 32;
+	if(bseq_remainder > 0){
+		sa2.GetBSequence(&(sequence[bseq_len1]), sa_head2.length, 0);
+		//mask off the end of the first sequence
+		uint32 end_mask = 0xFFFFFFFF;
+		end_mask <<= bseq_remainder;
+		sequence[bseq_len1] &= end_mask;
+
+		//shift the second sequence over.
+		for(uint32 i=bseq_len1; i < binary_seq_len; i++){
+			uint32 tmp = sequence[i+1];
+			tmp >>= 32 - bseq_remainder;
+			sequence[i] |= tmp;
+			sequence[i+1] <<= bseq_remainder;
+		}
+	}else
+		sa2.GetBSequence(&(sequence[bseq_len1]), sa_head2.length, 0);
+	
+	//write the sequence
+	sarfile.write((char*)sequence, binary_seq_len * sizeof(uint32));
+	sarray_start_offset = sarfile.tellg();
+
+	//get new mers in the middle
+	vector<bmer> middle_mers;
+	bmer mid_mer;
+	for(uint32 midI = sa_head.length - header.seed_length + 1; midI < sa_head.length; midI++){
+		mid_mer.position = midI;
+		mid_mer.mer = GetMer(midI);
+		middle_mers.push_back(mid_mer);
+	}
+	sort(middle_mers.begin(), middle_mers.end(), &bmer_lessthan);
+	//put a special mer at the end which will never go into the sorted mer list
+	//since every possible mer is less than it.
+	mid_mer.mer = 0xFFFFFFFF;
+	mid_mer.mer <<= 32;
+	mid_mer.mer |= 0xFFFFFFFF;
+	mid_mer.position = GNSEQI_END;
+	middle_mers.push_back(mid_mer);
+	//merge and write the sorted mer lists
+	vector<bmer> array1, array2;
+	uint32 SAR_BUFFER_SIZE = SEQ_BUFFER_SIZE/2;  //actual size is this number * 13 bytes
+	uint32 k=0, l=0, midI=0;
+	uint32 m = 0, n = 0;
+	gnSeqI bufferI=0;
+	do{
+		//mergesort them
+		while(m < array1.size() && n < array2.size()){
+			if(array1[m].mer <= array2[n].mer){
+				if(array1[m].mer <= middle_mers[midI].mer){
+					seq_buf.data[bufferI] = array1[m].position;
+					m++;
+					bufferI++;
+				}else{
+					seq_buf.data[bufferI] = middle_mers[midI].position;
+					midI++;
+					bufferI++;
+				}
+			}else if(array2[n].mer <= middle_mers[midI].mer){
+				seq_buf.data[bufferI] = array2[n].position + sa_head.length;
+				n++;
+				bufferI++;
+			}else{
+				seq_buf.data[bufferI] = middle_mers[midI].position;
+				midI++;
+				bufferI++;
+			}
+			if(bufferI == SEQ_BUFFER_SIZE){
+				sarfile.write((char*)seq_buf.data, bufferI * sizeof(uint32));
+				bufferI = 0;
+			}
+		}
+		if(m == array1.size()){
+			sa.Read(array1, SAR_BUFFER_SIZE, k);
+			k += array1.size();
+			m = 0;
+		}
+		if(n == array2.size()){
+			sa2.Read(array2, SAR_BUFFER_SIZE, l);
+			l += array2.size();
+			n = 0;
+		}
+	}while(array1.size() != 0 && array2.size() != 0);
+	if(bufferI > 0)
+		sarfile.write((char*)seq_buf.data, (bufferI)*sizeof(uint32));
+	//consolidate the remaining mers to a known vector
+	vector<bmer> remaining_mers;
+	for(;m < array1.size(); m++)
+		remaining_mers.push_back(array1[m]);
+	for(;n < array2.size(); n++){
+		remaining_mers.push_back(array2[n]);
+		remaining_mers[remaining_mers.size()-1].position += sa_head.length;
+	}
+	for(;midI < middle_mers.size() - 1; midI++)
+		remaining_mers.push_back(middle_mers[midI]);
+	//merge them with the remaining middle_mers
+	sort(remaining_mers.begin(), remaining_mers.end(), &bmer_lessthan);
+	uint32 remI = 0;
+	for(;remI < remaining_mers.size(); remI++)
+		seq_buf.data[remI] = remaining_mers[remI].position;
+	if(remI > 0)
+		sarfile.write((char*)seq_buf.data, (remI)*sizeof(uint32));
+
+	if(!sarfile.good()){
+		sarfile.clear();
+		sarfile.close();
+		sarfile.open(filename.c_str(), ios::binary | ios::in );
+		Throw_gnExMsg(IOStreamFailed(), "Error writing position array.");
+	}
+	// reopen the sorted mer list file read-only
+	sarfile.close();
+	sarfile.open(filename.c_str(), ios::binary | ios::in );
+	if(!sarfile.is_open()){
+		sarfile.clear();
+		Throw_gnExMsg(FileNotOpened(), "Error opening sorted mer list file.");
+	}
+STACK_TRACE_END
+}
+
+}
diff --git a/libMems/FileSML.h b/libMems/FileSML.h
new file mode 100644
index 0000000..e262db0
--- /dev/null
+++ b/libMems/FileSML.h
@@ -0,0 +1,135 @@
+/*******************************************************************************
+ * $Id: FileSML.h,v 1.11 2004/03/01 02:40:08 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * This file is licensed under the GPL.
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifndef _FileSML_h_
+#define _FileSML_h_
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#pragma warning(push)
+#pragma warning(disable : 4996)
+#pragma warning(pop)
+
+#include "libGenome/gnSequence.h"
+#include "libMems/SortedMerList.h"
+#include <boost/iostreams/device/mapped_file.hpp>
+#include <fstream>
+#include <vector>
+#include <string>
+
+namespace mems {
+
+//sequence database size will be
+//base_count / 4 + base_count * 12 bytes
+
+#define DEFAULT_MEMORY_MINIMUM 20971520  //~20 Megabytes
+
+class FileSML : public SortedMerList
+{
+public:
+	FileSML() : SortedMerList() {
+//		file_mutex = new wxMutex();
+	};
+	FileSML& operator=(const FileSML& sa);
+	virtual FileSML* Clone() const = 0;
+	
+	virtual void Clear();
+	
+	/**
+	 * Loads an existing sorted mer list from a file on disk.
+	 * @param fname The name of the file to load
+	 * @throws FileNotOpened thrown if the file could not be opened
+	 * @throws FileUnreadable thrown if the file was corrupt or not a sorted mer list
+	 */
+	virtual void LoadFile(const std::string& fname);
+	/**
+	 * Creates large sorted mer lists which do not fit entirely in memory.
+	 * BigCreate uses an external mergesort to create large sorted mer lists.
+	 * It will divide the data a number of times specified by the split_levels
+	 * parameter.  Each split is written to temp files on disk and merged.
+	 * @param seq The sequence to create an SML for.
+	 * @param split_levels The number of times to divide the sequence in half.
+	 * @param mersize The size of the mers to sort on.
+	 * @see FileSML::Create
+	 */
+	virtual void BigCreate(const genome::gnSequence& seq, const uint32 split_levels, const uint32 mersize = DNA_MER_SIZE);
+	virtual void Create(const genome::gnSequence& seq, const uint64 seed );
+	virtual boolean Read(std::vector<bmer>& readVector, gnSeqI size, gnSeqI offset = 0);
+	virtual void Merge(SortedMerList& sa, SortedMerList& sa2);
+
+	virtual bmer operator[]( gnSeqI index );
+
+	virtual gnSeqI UniqueMerCount();
+	virtual void SetDescription(const std::string& d);
+	virtual void SetID(const sarID_t d);
+	
+	virtual uint32 FormatVersion();
+	static uint64 MemoryMinimum();
+	virtual void RadixSort(std::vector<bmer>& s_array);
+
+	void dmCreate(const genome::gnSequence& seq, const uint64 seed);
+	static void registerTempPath( const std::string& tmp_path );
+
+	static const char* getTempPath( int pathI );
+
+	static int getTempPathCount();
+	
+	const std::vector< int64 >& getUsedCoordinates() const { return seq_coords; };
+
+protected:
+	/**
+	 * Reopens the sarfile fstream in read/write mode
+	 * @throws FileNotOpened thrown if the file could not be opened for writing
+	 */
+	virtual void OpenForWriting( boolean truncate = false );
+	/**
+	 * Writes the SML header to disk
+	 * @throws FileNotOpened thrown if the file could not be opened for writing
+	 * @throws IOStreamFailed thrown if an error occurred writing the data
+	 */
+	virtual boolean WriteHeader();
+	/**
+	 * Calculates and returns the amount of memory needed to create a sorted
+	 * mer list for a sequence of the specified length.
+	 * @param len The length of the sequence
+	 * @return The amount of memory needed in bytes.
+	 */
+	virtual uint64 GetNeededMemory(gnSeqI len) = 0;
+
+	std::string filename;
+	std::fstream sarfile;
+	uint64 sarray_start_offset;
+
+	boost::iostreams::mapped_file_source sardata;
+	smlSeqI_t* base(){ return (smlSeqI_t*)(sardata.data()+sarray_start_offset); }
+	
+	static char** tmp_paths;	/**< paths to scratch disk space that can be used for an external sort */
+	std::vector< int64 > seq_coords;	/**< If Ns are masked, contains coordinates of regions without Ns */
+};
+
+// versions 2 and 5 were previous
+// jump to 100 to avoid confusion with DNAFileSML
+inline
+uint32 FileSML::FormatVersion(){
+	static uint32 f_version = 100;
+	return f_version;
+}
+
+inline
+uint64 FileSML::MemoryMinimum(){
+	static uint32 m_minimum = DEFAULT_MEMORY_MINIMUM;
+	return m_minimum;
+}
+
+void maskNNNNN( const genome::gnSequence& in_seq, genome::gnSequence& out_seq, std::vector< int64 >& seq_coords, int mask_n_length );
+
+}
+
+#endif   //_FileSML_h_
diff --git a/libMems/Files.h b/libMems/Files.h
new file mode 100644
index 0000000..9a944e7
--- /dev/null
+++ b/libMems/Files.h
@@ -0,0 +1,213 @@
+/*******************************************************************************
+ * $Id: Files.h,v 1.23 2004/04/19 23:10:13 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * This file is licensed under the GPL.
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifndef __libMems_Files_h__
+#define __libMems_Files_h__
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+// for CreateTempFilename
+#ifdef WIN32
+#include "windows.h"
+#else
+#include "unistd.h"
+#endif
+
+#include "boost/filesystem/operations.hpp"
+#include "boost/filesystem/exception.hpp"
+#include "boost/algorithm/string.hpp"
+#include <string>
+#include <sstream>
+#include <iostream>
+#include <iomanip>
+
+
+/**
+ * Register a file name to be deleted before the process exits
+ * When passed an empty string, it does not add to the list of files to delete
+ * @param fname	The name of a file to delete, empty strings are ignored
+ * @return A vector of file names registered for deletion
+ */
+std::vector< std::string >& registerFileToDelete( std::string fname = "" );
+
+inline
+std::vector< std::string >& registerFileToDelete( std::string fname ) {
+	// since this vector is needed when atexit() is called we allocate it
+	// on the heap so its destructor won't get called
+	static std::vector< std::string >* files = new std::vector< std::string >();
+#pragma omp critical
+{
+	if( fname != "" )
+		files->push_back( fname );
+}
+	return *files;
+}
+
+void deleteRegisteredFiles();
+inline
+void deleteRegisteredFiles() {
+	// don't be a slob, clean up after yourself:
+	// delete any files that are laying around
+	std::vector< std::string >& del_files = registerFileToDelete();
+	for( int fileI = 0; fileI < del_files.size(); fileI++ )
+		boost::filesystem::remove( del_files[ fileI ] );
+	del_files.clear();	// clear the deleted files from the list
+}
+
+
+/**
+ * Create a temporary file
+ */
+std::string CreateTempFileName(const std::string& prefix);
+
+
+/* shamelessly ripped from wxWidgets and boostified*/
+inline
+std::string CreateTempFileName(const std::string& prefix)
+{
+   std::string dir, name, ret_path;
+#ifdef WIN32
+        char buf[MAX_PATH + 1];
+#else
+        char buf[PATH_MAX + 1];
+#endif
+        boost::filesystem::path path( prefix );
+        dir = path.branch_path().string();
+#ifdef WIN32
+        name = path.leaf();
+#else
+        name = path.leaf().string();
+#endif
+        if( name == "/" )
+        {
+                dir += name;
+                name.clear();
+        }
+#if defined(WIN32)
+
+    if ( dir.size() == 0 )
+    {
+                strncpy(buf, dir.c_str(), MAX_PATH);
+        if ( !::GetTempPath(MAX_PATH, buf) )
+			std::cerr << "GetTempPath\n";
+
+                dir = buf;
+        if ( dir.size()==0 )
+            dir = ".";  // GetTempFileName() fails if we pass it an emptystd::string
+    }
+    else // we have a dir to create the file in
+    {
+        // ensure we use only the back slashes as GetTempFileName(), unlike all
+        // the other APIs, is picky and doesn't accept the forward ones
+                boost::algorithm::replace_all( dir, "/", "\\" );
+    }
+
+        strncpy(buf, path.string().c_str(), MAX_PATH);
+        if ( !::GetTempFileName(dir.c_str(), name.c_str(), 0, buf) )
+    {
+        std::cerr << "GetTempFileName\n";
+                path = boost::filesystem::path();
+    }
+        ret_path = buf;
+
+#else // !Windows
+    if ( dir.empty() )
+    {
+        char* env_val = getenv("TMP");
+        dir = env_val != NULL ? env_val : "";
+
+        if ( dir.size() == 0 ){
+            env_val = getenv("TMPDIR");
+            dir = env_val != NULL ? env_val : "";
+        }
+
+        if ( dir.size() == 0 ){
+            env_val = getenv("TEMP");
+            dir = env_val != NULL ? env_val : "";
+        }
+
+        if ( dir.size()==0 )
+        {
+            // default
+            #ifdef __DOS__
+                dir = ".";
+            #else
+                dir = "/tmp";
+            #endif
+        }
+    }
+
+    path = dir;
+    path /= name;
+
+    // we need to copy the path to the buffer in which mkstemp() can modify it
+       std::string path_str = path.string();
+        path_str += "XXXXXX";  // scratch space for mkstemp()
+        strncpy( buf, path_str.c_str(), path_str.size()+1 );
+
+#if defined(HAVE_MKSTEMP)
+    // cast is safe because thestd::string length doesn't change
+    int fdTemp = mkstemp( buf );
+    if ( fdTemp == -1 )
+    {
+        // this might be not necessary as mkstemp() on most systems should have
+        // already done it but it doesn't hurt neither...
+//        path.clear();
+    }
+    else // mkstemp() succeeded
+    {
+                ret_path = buf;
+        close(fdTemp);
+    }
+#else // !HAVE_MKSTEMP
+
+#ifdef HAVE_MKTEMP
+    // same as above
+    if ( int fdTemp = mktemp( buf ) ){
+                ret_path = buf;
+                close(fdTemp);
+    }
+
+#else // !HAVE_MKTEMP (includes __DOS__)
+    // generate the unique file name ourselves
+    unsigned my_pid = 0;
+    #ifndef __DOS__
+    my_pid = getpid();
+    #endif
+
+        std::ostringstream oss;
+
+    std::string oss_str;
+    static const size_t numTries = 1000;
+    for ( size_t n = 0; n < numTries; n++ )
+    {
+	std::ostringstream oss;
+                oss << path.string() << my_pid << "." << std::setfill('0') << std::setw(3) << n;
+        // 3 hex digits is enough for numTries == 1000 < 4096
+                boost::filesystem::path pathTry( oss.str() );
+        oss_str = oss.str();
+        if ( !boost::filesystem::exists(pathTry) )
+            break;
+
+    }
+
+    ret_path = oss_str;
+#endif // HAVE_MKTEMP/!HAVE_MKTEMP
+
+#endif // HAVE_MKSTEMP/!HAVE_MKSTEMP
+
+#endif // Windows/!Windows
+
+        return ret_path;
+}
+
+
+#endif // __libMems_Files_h__
+
diff --git a/libMems/GappedAligner.h b/libMems/GappedAligner.h
new file mode 100644
index 0000000..42d94bd
--- /dev/null
+++ b/libMems/GappedAligner.h
@@ -0,0 +1,73 @@
+/*******************************************************************************
+ * $Id: GappedAligner.h,v 1.12 2004/04/19 23:10:50 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * This file is licensed under the GPL.
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifndef _GappedAligner_h_
+#define _GappedAligner_h_
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libGenome/gnSequence.h"
+#include "libMems/GappedAlignment.h"
+#include "libMems/Match.h"
+
+namespace mems {
+
+class GappedAligner {
+public:
+	GappedAligner(){ max_alignment_length = 10000; }	// default to something
+	GappedAligner& operator=( const GappedAligner& ga )
+	{ 
+		max_alignment_length = ga.max_alignment_length;
+		return *this;
+	}
+	/**
+	 * Set the maximum allowed length for a gapped alignment.  Sequences above this length
+	 * threshold will be ignored.
+	 * @param max_length The maximum length
+	 */
+	void SetMaxAlignmentLength( gnSeqI len ){max_alignment_length = len;}
+	virtual boolean Align( GappedAlignment& cr, Match* r_begin, Match* r_end, std::vector< genome::gnSequence* >& seq_table ) = 0;
+protected:
+	gnSeqI max_alignment_length;
+};
+
+
+
+
+
+boolean getInterveningCoordinates( std::vector< genome::gnSequence* >& seq_table, Match* r_begin, Match* r_end, uint seqI, int64& gap_lend, int64& gap_rend );
+
+inline
+boolean getInterveningCoordinates( std::vector< genome::gnSequence* >& seq_table, Match* r_begin, Match* r_end, uint seqI, int64& gap_lend, int64& gap_rend ){
+	// skip this sequence if it's undefined
+	if( (r_end != NULL && r_end->Start( seqI ) == NO_MATCH) ||
+		(r_begin != NULL && r_begin->Start( seqI ) == NO_MATCH) ){
+		gap_lend = 0;
+		gap_rend = 0;
+		return true;
+	}
+			
+	// determine the size of the gap
+	gap_rend = r_end != NULL ? r_end->Start( seqI ) : seq_table[ seqI ]->length() + 1;
+	gap_lend = r_begin != NULL ? r_begin->End( seqI ) + 1 : 1;
+	if( gap_rend < 0 || gap_lend < 0 ){
+		gap_rend = r_begin != NULL ? -r_begin->Start( seqI ) : seq_table[ seqI ]->length() + 1;
+		gap_lend = r_end != NULL ? -r_end->Start( seqI ) + r_end->Length() : 1;
+	}
+	if( gap_rend <= 0 || gap_lend <= 0 ){
+		// if either is still < 0 then there's a problem...
+		genome::ErrorMsg( "Error constructing intervening coordinates" );
+	}
+	return true;
+}
+
+}
+
+#endif // _GappedAligner_h_
diff --git a/libMems/GappedAlignment.cpp b/libMems/GappedAlignment.cpp
new file mode 100644
index 0000000..043bc41
--- /dev/null
+++ b/libMems/GappedAlignment.cpp
@@ -0,0 +1,77 @@
+/*******************************************************************************
+ * $Id: GappedAlignment.cpp,v 1.27 2004/03/01 02:40:08 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * Please see the file called COPYING for licensing, copying, and modification
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/GappedAlignment.h"
+#include <sstream>
+#include "libGenome/gnFilter.h"
+
+#include <fstream>
+
+using namespace std;
+using namespace genome;
+namespace mems {
+
+GappedAlignment::GappedAlignment() : 
+AbstractGappedAlignment< SparseAbstractMatch<> >()
+{}
+
+GappedAlignment::GappedAlignment( uint seq_count, gnSeqI align_length ) : 
+AbstractGappedAlignment< SparseAbstractMatch<> >( seq_count, align_length )
+{
+	align_matrix.resize(seq_count);
+}
+
+void GappedAlignment::SetAlignment( const vector< string >& seq_align ){
+	align_matrix = seq_align;
+	if( seq_align.size() > 0 )
+		SetAlignmentLength(seq_align[0].size());
+	else
+		SetAlignmentLength(0);
+}
+
+std::ostream& operator<<( std::ostream& os, const GappedAlignment& ga ); //write to source.
+std::ostream& operator<<( std::ostream& os, const GappedAlignment& ga ){
+	os << "GappedAlignmentSeqs: " << ga.SeqCount() << endl;
+	os << ga.AlignmentLength();
+	for( uint seqI = 0; seqI < ga.SeqCount(); seqI++ )
+		os << '\t' << ga.Start( seqI );
+	os << endl;
+	for( uint seqI = 0; seqI < ga.SeqCount(); seqI++ ){
+		os << ga.align_matrix[ seqI ] << endl;
+	}
+	return os;
+};
+
+std::istream& operator>>( std::istream& is, GappedAlignment& ga ); // read from source
+std::istream& operator>>( std::istream& is, GappedAlignment& ga ){
+	uint seq_count;
+	string nuffin;
+	is >> nuffin;
+	is >> seq_count;
+	ga = GappedAlignment( seq_count, 0 );
+	is >> nuffin;
+	for( uint seqI = 0; seqI < seq_count; seqI++ ){
+		int64 startI;
+		is >> startI;
+		ga.SetStart( seqI, startI );
+	}
+	for( uint seqI = 0; seqI < seq_count; seqI++ ){
+		string seq;
+		is >> seq;
+		ga.align_matrix.push_back( seq );
+	}
+	if( ga.align_matrix.size() > 0 )
+		ga.SetAlignmentLength( ga.align_matrix[ 0 ].length() );
+	return is;
+};
+
+}
diff --git a/libMems/GappedAlignment.h b/libMems/GappedAlignment.h
new file mode 100644
index 0000000..74285d5
--- /dev/null
+++ b/libMems/GappedAlignment.h
@@ -0,0 +1,283 @@
+/*******************************************************************************
+ * $Id: GappedAlignment.h,v 1.12 2004/04/19 23:10:50 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * This file is licensed under the GPL.
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifndef __GappedAlignment_h__
+#define __GappedAlignment_h__
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libGenome/gnFilter.h"
+#include "libGenome/gnSequence.h"
+#include "libMems/SparseAbstractMatch.h"
+#include "libMems/AbstractGappedAlignment.h"
+#include "libMems/Memory.h"
+#include <iostream>
+
+namespace mems {
+
+class GappedAlignment : public AbstractGappedAlignment< SparseAbstractMatch<> >
+{
+public:
+	GappedAlignment();
+	GappedAlignment( uint seq_count, gnSeqI align_length );
+	
+	GappedAlignment* Clone() const { return new GappedAlignment( *this ); }
+	GappedAlignment* Copy() const;
+	virtual void Free();
+	
+	void SetAlignment( const std::vector< std::string >& seq_align );
+
+	/**
+	 * Writes this GappedAlignment to the specified output stream (e.g. cout).
+	 */
+	friend std::ostream& operator<<(std::ostream& os, const GappedAlignment& ga); //write to source.
+
+	/**
+	 * Reads a GappedAlignment from the specified input stream (e.g. cin).
+	 */
+	friend std::istream& operator>>(std::istream& is, GappedAlignment& ga); //read from source
+
+	// Inherited methods from AbstractMatch:
+	virtual void Invert();
+	virtual void CropStart(gnSeqI crop_amount);
+	virtual void CropEnd(gnSeqI crop_amount);
+
+	virtual void CropLeft(gnSeqI crop_amount, uint seqI);
+	virtual void CropRight(gnSeqI crop_amount, uint seqI);
+
+	void GetAlignment( std::vector< bitset_t >& align_matrix ) const;
+
+	friend const std::vector<std::string>& GetAlignment( const GappedAlignment& ga, const std::vector< genome::gnSequence* >& seq_table );
+
+	void GetColumn( gnSeqI col, std::vector<gnSeqI>& pos, std::vector<bool>& column ) const;
+
+	/**
+	 * Splits the alignment before the specified column.  The left-side remains in "this" GappedAlignment,
+	 * and the right side is returned as a new GappedAlignment
+	 */
+	virtual AbstractMatch* Split( gnSeqI before_column );
+
+	virtual bool IsGap( uint seq, gnSeqI col ) const;
+
+	void swap( GappedAlignment& other ){ swap(&other); }
+
+protected:
+	// for use by derived classes in order to swap contents
+	void swap( GappedAlignment* other ){
+		std::swap( align_matrix, other->align_matrix );
+		AbstractGappedAlignment< SparseAbstractMatch<> >::swap( other );
+	}
+
+	std::vector< std::string > align_matrix;
+
+	void CropStartCoords(gnSeqI crop_amount);
+	void CropEndCoords(gnSeqI crop_amount);
+};
+
+
+inline
+GappedAlignment* GappedAlignment::Copy() const
+{
+	return m_allocateAndCopy( *this );
+}
+inline
+void GappedAlignment::Free()
+{
+	m_free(this);
+}
+
+inline
+void GappedAlignment::Invert(){
+	const genome::gnFilter* rc_filter = genome::gnFilter::DNAComplementFilter();
+	for(uint startI = 0; startI < SeqCount(); startI++)
+		rc_filter->ReverseFilter( align_matrix[ startI ] );
+	AbstractGappedAlignment< SparseAbstractMatch<> >::Invert();
+}
+
+inline
+void GappedAlignment::CropStartCoords(gnSeqI crop_amount){
+	if( crop_amount > AlignmentLength() )
+		Throw_gnEx( genome::SeqIndexOutOfBounds() );
+	for( uint i=0; i < SeqCount(); i++ ){
+		gnSeqI char_count = 0;
+		for( gnSeqI cropI = 0; cropI < crop_amount; cropI++ )
+			if( align_matrix[i][cropI] != '-' )
+				char_count++;
+		if( Start(i) > 0 )
+			SetStart(i, Start(i) + char_count);
+		SetLength(Length(i)-char_count, i);
+		if( Length(i) == 0 )
+			SetLeftEnd(i, NO_MATCH);
+	}
+	SetAlignmentLength( AlignmentLength() - crop_amount );
+}
+
+inline
+void GappedAlignment::CropStart(gnSeqI crop_amount){
+	CropStartCoords(crop_amount);
+	for( uint i=0; i < SeqCount(); i++ )
+		align_matrix[ i ] = align_matrix[ i ].substr( crop_amount );
+
+}
+
+inline
+void GappedAlignment::CropEndCoords(gnSeqI crop_amount){
+	if( crop_amount > AlignmentLength() )
+		Throw_gnEx( genome::SeqIndexOutOfBounds() );
+	SetAlignmentLength( AlignmentLength() - crop_amount );
+
+	for( uint i=0; i < SeqCount(); i++ ){
+		gnSeqI char_count = 0;
+		for( gnSeqI cropI = align_matrix[i].length() - crop_amount; cropI < align_matrix[i].length(); cropI++ )
+			if( align_matrix[i][cropI] != '-' )
+				char_count++;
+		if( Start(i) < 0 )
+			SetStart(i, Start(i)-char_count);
+		SetLength(Length(i)-char_count, i);
+		if( Length(i) == 0 )
+			SetLeftEnd(i, NO_MATCH);
+	}
+}
+
+inline
+void GappedAlignment::CropEnd(gnSeqI crop_amount){
+	CropEndCoords(crop_amount);
+	// this code doesn't free up memory in Windows release builds
+//	for( uint i=0; i < SeqCount(); i++ )
+//	{
+//		align_matrix[ i ].resize( AlignmentLength() );
+//		align_matrix[ i ].reserve( AlignmentLength() );
+//	}
+	std::vector< std::string > new_matrix(SeqCount());
+	for( uint i=0; i < SeqCount(); i++ )
+		new_matrix[ i ] = align_matrix[ i ].substr( 0, AlignmentLength() );
+	std::swap( new_matrix, align_matrix );
+}
+
+inline
+void GappedAlignment::CropLeft(gnSeqI crop_amount, uint seqI)
+{
+	// count "crop_amount" characters into seqI and crop there
+	size_t left_col = 0;
+	if( Orientation(seqI) == AbstractMatch::forward )
+	{
+		for( ; crop_amount > 0 && left_col < align_matrix[seqI].size(); ++left_col )
+			if( align_matrix[seqI][left_col] != '-' )
+				--crop_amount;
+
+		CropStart(left_col);
+	}else{
+		left_col = align_matrix[seqI].size();
+		for( ; crop_amount > 0 && left_col > 0; --left_col )
+			if( align_matrix[seqI][left_col-1] != '-' )
+				--crop_amount;
+		CropEnd(AlignmentLength()-left_col);
+	}
+}
+
+inline
+void GappedAlignment::CropRight(gnSeqI crop_amount, uint seqI)
+{
+	// TODO: remove the dependency on Invert() since it will be slow
+	Invert();
+	CropLeft(crop_amount, seqI);
+	Invert();
+}
+
+inline
+void GappedAlignment::GetAlignment( std::vector< bitset_t >& align_matrix ) const
+{
+	align_matrix = std::vector< bitset_t >( this->align_matrix.size(), bitset_t(this->AlignmentLength(), false) );
+	for( size_t seqI = 0; seqI < this->align_matrix.size(); seqI++ )
+	{
+		if( LeftEnd(seqI) == NO_MATCH )
+			continue;
+		for( std::string::size_type charI = 0; charI < this->align_matrix[seqI].size(); charI++ )
+			if( this->align_matrix[seqI][charI] != '-' )
+				align_matrix[seqI].set(charI);
+	}
+}
+
+inline
+AbstractMatch* GappedAlignment::Split( gnSeqI before_column )
+{
+	GappedAlignment ga_tmp(SeqCount(), AlignmentLength());
+	GappedAlignment* ga = ga_tmp.Copy();
+
+	for( size_t seqI = 0; seqI < SeqCount(); seqI++ )
+	{
+		ga->SetStart( seqI, Start(seqI) );
+		ga->SetLength( Length(seqI), seqI );
+	}
+	std::swap(ga->align_matrix, align_matrix);
+	ga->CropStartCoords(before_column);
+	std::swap(ga->align_matrix, align_matrix);
+
+	ga->align_matrix.resize(SeqCount());
+	for( size_t seqI = 0; seqI < SeqCount(); seqI++ )
+		ga->align_matrix[seqI] = align_matrix[seqI].substr( before_column );
+	ga->SetAlignmentLength( AlignmentLength()-before_column );
+	CropEnd(AlignmentLength()-before_column);
+
+	return ga;
+}
+
+const std::vector<std::string>& GetAlignment( const GappedAlignment& ga, const std::vector< genome::gnSequence* >& seq_table );
+inline
+const std::vector<std::string>& GetAlignment( const GappedAlignment& ga, const std::vector< genome::gnSequence* >& seq_table )
+{
+	return ga.align_matrix;
+}
+
+inline
+void GappedAlignment::GetColumn( gnSeqI col, std::vector<gnSeqI>& pos, std::vector<bool>& column ) const
+{
+	pos = std::vector<gnSeqI>(SeqCount(), NO_MATCH);
+	column = std::vector<bool>(SeqCount(), false);
+	for( uint seqI = 0; seqI < SeqCount(); seqI++ )
+	{
+		if( align_matrix[seqI][col] != '-' )
+			column[seqI] = true;
+
+		gnSeqI count = 0;
+		for( size_t colI = 0; colI <= col; colI++ )
+			if( align_matrix[seqI][colI] != '-' )
+				count++;
+
+		if( count > 0 )
+		{
+			if( Orientation(seqI) == forward )
+				pos[seqI] = LeftEnd(seqI) + count - 1;
+			else if( Orientation(seqI) == reverse )
+				pos[seqI] = RightEnd(seqI) - count + 1;
+		}
+	}
+}
+
+inline
+bool GappedAlignment::IsGap( uint seq, gnSeqI col ) const
+{
+	return align_matrix[seq][col] == '-';
+}
+
+}
+
+
+namespace std {
+template<> inline
+void swap( mems::GappedAlignment& a, mems::GappedAlignment& b )
+{
+	a.swap(b);
+}
+}
+
+
+#endif // __GappedAlignment_h__
+
diff --git a/libMems/GreedyBreakpointElimination.cpp b/libMems/GreedyBreakpointElimination.cpp
new file mode 100644
index 0000000..3e30e59
--- /dev/null
+++ b/libMems/GreedyBreakpointElimination.cpp
@@ -0,0 +1,994 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+
+#include "libMems/GreedyBreakpointElimination.h"
+#include "libMems/ProgressiveAligner.h"
+#include "libMems/Aligner.h"
+#include "libMems/Islands.h"
+#include "libMems/DNAFileSML.h"
+#include "libMems/MuscleInterface.h"	// it's the default gapped aligner
+#include "libGenome/gnRAWSource.h"
+#include "libMems/gnAlignedSequences.h"
+#include "libMems/CompactGappedAlignment.h"
+#include "libMems/MatchProjectionAdapter.h"
+#include "libMems/PairwiseMatchFinder.h"
+#include "libMems/TreeUtilities.h"
+#include "libMems/PairwiseMatchAdapter.h"
+
+#include <boost/dynamic_bitset.hpp>
+#include <boost/tuple/tuple.hpp>
+
+#include <map>
+#include <fstream>	// for debugging
+#include <sstream>
+#include <stack>
+#include <algorithm>
+#include <limits>
+#include <iomanip>
+
+using namespace std;
+using namespace genome;
+
+namespace mems {
+// working in mems
+
+bool penalize_repeats = false;
+
+void printProgress( uint prev_prog, uint cur_prog, ostream& os )
+{
+	if( prev_prog != cur_prog )
+	{
+		if( cur_prog / 10 != prev_prog / 10 )
+			os << endl;
+		os << cur_prog << "%..";
+		os.flush();
+	}
+}
+
+
+
+
+void getPairwiseLCBs( 
+	uint nI, 
+	uint nJ, 
+	uint dI, 
+	uint dJ, 
+	vector< TrackingMatch* >& tracking_matches, 
+	vector< TrackingLCB<TrackingMatch*> >& t_lcbs,
+	boost::multi_array< double, 3 >& tm_score_array,
+	boost::multi_array< size_t, 3 >& tm_lcb_id_array )
+{
+	// make a set of projection matches
+	vector< AbstractMatch* > pair_matches;
+	for( size_t mI = 0; mI < tracking_matches.size(); ++mI )
+	{
+		if( tracking_matches[mI]->node_match->LeftEnd(nI) == NO_MATCH ||
+			tracking_matches[mI]->node_match->LeftEnd(nJ) == NO_MATCH )
+			continue;
+		PairwiseMatchAdapter pma(tracking_matches[mI]->node_match, nI, nJ );
+		pma.tm = tracking_matches[mI];
+		if( pma.Orientation(0) == AbstractMatch::reverse )
+			pma.Invert();
+		pair_matches.push_back(pma.Copy());
+	}
+	// find LCBs...
+	vector< gnSeqI > breakpoints;
+	IdentifyBreakpoints( pair_matches, breakpoints );
+
+	vector< vector< AbstractMatch* > > LCB_list;
+	ComputeLCBs_v2( pair_matches, breakpoints, LCB_list );
+
+	//
+	// now compute scores on them
+	//
+	vector< double > lcb_scores(LCB_list.size());
+	for( size_t lcbI = 0; lcbI < LCB_list.size(); ++lcbI )
+	{
+		double lcb_score = 0;
+		for( size_t mI = 0; mI < LCB_list[lcbI].size(); ++mI )
+		{
+			PairwiseMatchAdapter* pma = (PairwiseMatchAdapter*)LCB_list[lcbI][mI];
+			lcb_score += tm_score_array[pma->tm->match_id][dI][dJ];
+		}
+		lcb_scores[lcbI] = lcb_score;
+	}
+
+	// and build the pairwise adjacency list
+	vector< LCB > adjacencies;
+	computeLCBAdjacencies_v3( LCB_list, lcb_scores, adjacencies );
+
+	t_lcbs.resize(adjacencies.size());
+	for( size_t lcbI = 0; lcbI < adjacencies.size(); ++lcbI )
+	{
+		t_lcbs[lcbI] = adjacencies[lcbI];
+		t_lcbs[lcbI].matches.resize(LCB_list[lcbI].size());
+		for( size_t mI = 0; mI < LCB_list[lcbI].size(); ++mI )
+			t_lcbs[lcbI].matches[mI] = ((PairwiseMatchAdapter*)LCB_list[lcbI][mI])->tm;
+		// sort them by ptr
+		sort( t_lcbs[lcbI].matches.begin(), t_lcbs[lcbI].matches.end() );
+
+		// set the match LCB ids appropriately
+		for( size_t mI = 0; mI < t_lcbs[lcbI].matches.size(); ++mI )
+			tm_lcb_id_array[t_lcbs[lcbI].matches[mI]->match_id][dI][dJ] = lcbI;
+	}
+
+	// free the memory used by pairwise matches
+	for( size_t mI = 0; mI < pair_matches.size(); ++mI )
+		pair_matches[mI]->Free();
+}
+
+/** creates an appropriately sized matrix for mapping individual TrackingMatches to their containing LCBs */
+void initTrackingMatchLCBTracking( 
+	const std::vector< TrackingMatch >& tracking_matches, 
+	size_t n1_count, 
+	size_t n2_count, 
+	boost::multi_array< size_t, 3 >& tm_lcb_id_array )
+{
+	tm_lcb_id_array.resize( boost::extents[tracking_matches.size()][n1_count][n2_count] );
+	for( size_t mI = 0; mI < tracking_matches.size(); ++mI )
+	{
+		for( size_t nI = 0; nI < n1_count; ++nI )
+			for( size_t nJ = 0; nJ < n2_count; ++nJ )
+				tm_lcb_id_array[mI][nI][nJ] = LCB_UNASSIGNED;
+	}
+}
+
+
+/** removes an LCB from an LCB list and coalesces surrounding LCBs.  Returns the number of LCBs removed 
+ *  After LCBs are removed, the adjacency list should be processed with filterLCBs()
+ *  @param	id_remaps	This is populated with a list of LCB ids that were deleted or coalesced and now have a new LCB id
+ *                      for each coalesced LCB, an entry of the form <old id, new id> is added, deleted LCBs have
+ *						entries of the form <deleted, -1>.  Entries appear in the order operations were performed
+ *						and the function undoLcbRemoval() can undo these operations in reverse order
+ */
+template< class LcbVector >
+uint RemoveLCBandCoalesce( size_t lcbI, uint seq_count, LcbVector& adjacencies, std::vector< double >& scores, std::vector< std::pair< uint, uint > >& id_remaps, std::vector< uint >& impact_list )
+{
+	uint removed_count = 0;
+	vector< uint > imp_tmp(seq_count * (2 + seq_count * 4), LCB_UNASSIGNED);
+	swap(impact_list, imp_tmp);
+	size_t impactI = 0;
+	id_remaps.clear();
+
+	adjacencies[ lcbI ].lcb_id = -2;
+	
+	// update adjacencies
+	uint seqI;
+	uint left_adj;
+	uint right_adj;
+	for( seqI = 0; seqI < seq_count; seqI++ ){
+		left_adj = adjacencies[ lcbI ].left_adjacency[ seqI ];
+		right_adj = adjacencies[ lcbI ].right_adjacency[ seqI ];
+		if( left_adj != -1 )
+			adjacencies[ left_adj ].right_adjacency[ seqI ] = right_adj;
+		if( right_adj != -1 && right_adj != adjacencies.size() )
+			adjacencies[ right_adj ].left_adjacency[ seqI ] = left_adj;
+	}
+
+	// populate the impact list -- LCBs whose removal scores may change due to this one's removal
+	for( seqI = 0; seqI < seq_count; seqI++ ){
+		left_adj = adjacencies[ lcbI ].left_adjacency[ seqI ];
+		right_adj = adjacencies[ lcbI ].right_adjacency[ seqI ];
+		impact_list[impactI++] = left_adj;
+		impact_list[impactI++] = right_adj;
+		for( uint seqJ = 0; seqJ < seq_count; seqJ++ ){
+			if( left_adj != -1 )
+			{
+				impact_list[impactI++] = adjacencies[ left_adj ].left_adjacency[ seqJ ];
+				impact_list[impactI++] = adjacencies[ left_adj ].right_adjacency[ seqJ ];
+			}
+			if( right_adj != -1 )
+			{
+				impact_list[impactI++] = adjacencies[ right_adj ].left_adjacency[ seqJ ];
+				impact_list[impactI++] = adjacencies[ right_adj ].right_adjacency[ seqJ ];
+			}
+		}
+	}
+
+	// just deleted an lcb...
+	id_remaps.push_back( make_pair( lcbI, -1 ) );
+	removed_count++;
+
+	// check for collapse
+	for( seqI = 0; seqI < seq_count; seqI++ ){
+		left_adj = adjacencies[ lcbI ].left_adjacency[ seqI ];
+		right_adj = adjacencies[ lcbI ].right_adjacency[ seqI ];
+		// find the real slim shady
+		while( left_adj != -1 && adjacencies[ left_adj ].lcb_id != left_adj )
+			left_adj = adjacencies[ left_adj ].left_adjacency[ seqI ];
+		while( right_adj != -1 && adjacencies[ right_adj ].lcb_id != right_adj )
+			right_adj = adjacencies[ right_adj ].right_adjacency[ seqI ];
+		if( left_adj == -1 || right_adj == -1 )
+			continue;	// can't collapse with a non-existant LCB!
+		if( adjacencies[ left_adj ].lcb_id != left_adj ||
+			adjacencies[ right_adj ].lcb_id != right_adj )
+			if( seqI > 0 )
+				continue;	// already coalesced
+			else
+				cerr << "trouble on down street\n";
+
+		// check whether the two LCBs are adjacent in each sequence
+		boolean orientation = adjacencies[ left_adj ].left_end[ seqI ] > 0 ? true : false;
+		uint seqJ;
+		for( seqJ = 0; seqJ < seq_count; seqJ++ ){
+			boolean j_orientation = adjacencies[ left_adj ].left_end[ seqJ ] > 0;
+			if( j_orientation == orientation &&
+				adjacencies[ left_adj ].right_adjacency[ seqJ ] != right_adj )
+				break;
+			if( j_orientation != orientation &&
+				adjacencies[ left_adj ].left_adjacency[ seqJ ] != right_adj )
+				break;
+			// check that they are both in the same orientation
+			if( adjacencies[ right_adj ].left_end[ seqJ ] > 0 != j_orientation )
+				break;
+		}
+
+		if( seqJ != seq_count ||
+			adjacencies[ left_adj ].to_be_deleted ||
+			adjacencies[ right_adj ].to_be_deleted )
+			continue;	// if these two aren't collinear, or one or both will get deleted, then don't coalesce
+		
+
+		// these two can be coalesced
+		// do it.  do it now.
+		id_remaps.push_back( make_pair( adjacencies[ right_adj ].lcb_id, left_adj ) );
+		adjacencies[ right_adj ].lcb_id = left_adj;
+		scores[ left_adj ] += scores[ right_adj ];
+		adjacencies[ left_adj ].weight += adjacencies[ right_adj ].weight;
+
+		// unlink right_adj from the adjacency list and
+		// update left and right ends of left_adj
+		for( seqJ = 0; seqJ < seq_count; seqJ++ ){
+			boolean j_orientation = adjacencies[ left_adj ].left_end[ seqJ ] > 0;
+			uint rr_adj = adjacencies[ right_adj ].right_adjacency[ seqJ ];
+			uint rl_adj = adjacencies[ right_adj ].left_adjacency[ seqJ ];
+			if( j_orientation == orientation ){
+				adjacencies[ left_adj ].right_end[ seqJ ] = adjacencies[ right_adj ].right_end[ seqJ ];
+				adjacencies[ left_adj ].right_adjacency[ seqJ ] = rr_adj;
+				if( rr_adj != -1 )
+					adjacencies[ rr_adj ].left_adjacency[ seqJ ] = left_adj;
+			}else{
+				adjacencies[ left_adj ].left_end[ seqJ ] = adjacencies[ right_adj ].left_end[ seqJ ];
+				adjacencies[ left_adj ].left_adjacency[ seqJ ] = rl_adj;
+				if( rl_adj != -1 )
+					adjacencies[ rl_adj ].right_adjacency[ seqJ ] = left_adj;
+			}
+		}
+		// just coalesced two LCBs...
+		removed_count++;
+	}
+	// uniquify the impact list and get rid of empty entries
+	std::sort( impact_list.begin(), impact_list.end() );
+	vector< uint >::iterator imp_end = std::unique( impact_list.begin(), impact_list.end() );
+	vector< uint >::iterator imp_preend = std::lower_bound( impact_list.begin(), imp_end, LCB_UNASSIGNED );
+	impact_list.erase( imp_preend, impact_list.end() );
+
+	return removed_count;
+}
+
+
+template< class LcbVector >
+void undoLcbRemoval( uint seq_count, LcbVector& adjs, std::vector< std::pair< uint, uint > >& id_remaps )
+{
+	for( size_t rI = id_remaps.size(); rI > 0; --rI )
+	{
+		if( id_remaps[rI-1].second == -1 )
+		{
+			// this one was deleted
+			// revert adjacencies
+			uint lcbI = id_remaps[rI-1].first;
+			for( uint seqI = 0; seqI < seq_count; seqI++ )
+			{
+				uint left_adj = adjs[ lcbI ].left_adjacency[ seqI ];
+				uint right_adj = adjs[ lcbI ].right_adjacency[ seqI ];
+				if( left_adj != -1 )
+					adjs[ left_adj ].right_adjacency[ seqI ] = lcbI;
+				if( right_adj != -1 && right_adj != adjs.size() )
+					adjs[ right_adj ].left_adjacency[ seqI ] = lcbI;
+			}
+			adjs[lcbI].lcb_id = lcbI;	// reset the lcb id
+			adjs[lcbI].to_be_deleted = false;	// no longer TBD
+		}else{
+			// this one was coalesced
+			// uncoalesce it
+			uint lcbI = id_remaps[rI-1].first;
+			uint lcbJ = id_remaps[rI-1].second;
+			adjs[lcbI].lcb_id = lcbI;
+			adjs[lcbJ].weight -= adjs[lcbI].weight;
+			// link lcbI back in
+			// TODO: fix right end and left end coordinates
+			for( uint seqI = 0; seqI < seq_count; ++seqI )
+			{
+				uint ladj = adjs[lcbI].left_adjacency[seqI];
+				uint radj = adjs[lcbI].right_adjacency[seqI];
+				if(  ladj == lcbJ )
+				{
+					adjs[lcbJ].right_adjacency[seqI] = lcbI;
+					if( radj != -1 && radj != adjs.size())
+						adjs[radj].left_adjacency[seqI] = lcbI;
+				}else
+				if(  radj == lcbJ )
+				{
+					adjs[lcbJ].left_adjacency[seqI] = lcbI;
+					if( ladj != -1 && ladj != adjs.size())
+						adjs[ladj].right_adjacency[seqI] = lcbI;
+				}
+			}
+		}
+	}
+}
+
+EvenFasterSumOfPairsBreakpointScorer::EvenFasterSumOfPairsBreakpointScorer( 
+	double breakpoint_penalty,
+	double minimum_breakpoint_penalty,
+	boost::multi_array<double,2> bp_weight_matrix, 
+	boost::multi_array<double,2> conservation_weight_matrix,
+	vector< TrackingMatch* > tracking_match,
+	PairwiseLCBMatrix& pairwise_adjacency_matrix,
+	vector<node_id_t>& n1_descendants,
+	vector<node_id_t>& n2_descendants,
+	boost::multi_array< double, 3 >& tm_score_array,
+	boost::multi_array< size_t, 3 >& tm_lcb_id_array,
+	size_t seqI_begin,
+	size_t seqI_end,
+	size_t seqJ_begin,
+	size_t seqJ_end
+	) :
+  bp_penalty( breakpoint_penalty ),
+  min_breakpoint_penalty( minimum_breakpoint_penalty ),
+  bp_weights( bp_weight_matrix ), 
+  conservation_weights( conservation_weight_matrix ),
+  tracking_matches( tracking_match ),
+  pairwise_adjacencies( pairwise_adjacency_matrix ),
+  n1_des(n1_descendants),
+  n2_des(n2_descendants),
+  tm_score_array(tm_score_array),
+  tm_lcb_id_array(tm_lcb_id_array),
+  seqI_count(pairwise_adjacencies.shape()[0]),
+  seqJ_count(pairwise_adjacencies.shape()[1]),
+  seqI_first(seqI_begin),
+  seqI_last(seqI_end),
+  seqJ_first(seqJ_begin),
+  seqJ_last(seqJ_end),
+  first_time(true)
+{
+	std::sort(tracking_matches.begin(), tracking_matches.end());
+	pairwise_lcb_count.resize( boost::extents[pairwise_adjacencies.shape()[0]][pairwise_adjacencies.shape()[1]] );
+	pairwise_lcb_score.resize( boost::extents[pairwise_adjacencies.shape()[0]][pairwise_adjacencies.shape()[1]] );;
+	all_id_remaps.resize( boost::extents[pairwise_lcb_count.shape()[0]][pairwise_lcb_count.shape()[1]] );
+	full_impact_list.resize( boost::extents[pairwise_lcb_count.shape()[0]][pairwise_lcb_count.shape()[1]] );
+	my_del_lcbs.resize(100);	// buffer for use during lcb removal score computation
+	for( size_t i = 0; i < 3; ++i )
+	{
+		internal_lcb_score_diff[i].resize( boost::extents[pairwise_adjacencies.shape()[0]][pairwise_adjacencies.shape()[1]] );
+		internal_lcb_removed_count[i].resize( boost::extents[pairwise_adjacencies.shape()[0]][pairwise_adjacencies.shape()[1]] );
+	}
+	lsd_zeros.resize( internal_lcb_score_diff[0].num_elements(), 0 );
+	lrc_zeros.resize( internal_lcb_removed_count[0].num_elements(), 0 );
+	using_lsd = -1;
+	size_t max_pair_adj_size = 0;
+	for( size_t i = 0; i < seqI_count; ++i )
+	{
+		for( size_t j = 0; j < seqJ_count; ++j )
+		{
+			pairwise_lcb_count[i][j] = pairwise_adjacencies[i][j].size();
+			pairwise_lcb_score[i][j] = 0;
+			max_pair_adj_size = (std::max)(max_pair_adj_size, pairwise_adjacencies[i][j].size());
+			for( size_t lcbI = 0; lcbI < pairwise_adjacencies[i][j].size(); ++lcbI )
+				pairwise_lcb_score[i][j] += pairwise_adjacencies[i][j][lcbI].weight;
+		}
+	}
+	bogus_scores.resize(max_pair_adj_size+10);
+};
+
+
+/**
+ * Returns the number of possible moves a search algorithm may make from the current 
+ * location in LCB search space.  In this case it's simply the total number of pairwise LCBs
+ */
+size_t EvenFasterSumOfPairsBreakpointScorer::getMoveCount()
+{
+	size_t move_count = 0;
+	for( size_t i = seqI_first; i < seqI_last; ++i )
+		for( size_t j = seqJ_first; j < seqJ_last; ++j )
+			move_count += pairwise_adjacencies[i][j].size();
+	return move_count;
+}
+
+/** returns the score of the current state */
+double EvenFasterSumOfPairsBreakpointScorer::score()
+{
+	// score is the sum of all pairwise LCB scores,
+	// minus the sum of all pairwise breakpoint penalties
+	double score = 0;
+	for( size_t seqI = seqI_first; seqI < seqI_last; ++seqI )
+	{
+		for( size_t seqJ = seqJ_first; seqJ < seqJ_last; ++seqJ )
+		{
+			const double pw_lcb_score = pairwise_lcb_score[seqI][seqJ];
+			// add LCB scores
+			score += pairwise_lcb_score[seqI][seqJ];
+			// subtract breakpoint penalty
+			// subtract 1 from number of LCBs so that a single circular LCB doesn't get penalized
+			double cweights = 1 - conservation_weights[seqI][seqJ];
+			double bweights = 1 - bp_weights[seqI][seqJ];
+			double penalty = max( bp_penalty * cweights * cweights * cweights * cweights * bweights * bweights, min_breakpoint_penalty );
+			if(first_time)
+				cout << "Scoring with scaled breakpoint penalty: " << penalty << endl;
+			first_time = false;
+			score -= ( penalty * (pairwise_lcb_count[seqI][seqJ]-1));
+			if( !(score > -1e200 && score < 1e200) )
+			{
+				genome::breakHere();
+				cerr << "bp_weights[seqI][seqJ] " << bp_weights[seqI][seqJ] << endl;
+				cerr << "conservation_weights[seqI][seqJ] " << conservation_weights[seqI][seqJ] << endl;
+				cerr << "pairwise_lcb_count[seqI][seqJ] " << pairwise_lcb_count[seqI][seqJ] << endl;
+				cerr << "pairwise_lcb_score[seqI][seqJ] " << pw_lcb_score << endl;
+				cerr << "Invalid score!!\n";
+			}
+		}
+	}
+	return score;
+}
+
+/** scores a move */
+double EvenFasterSumOfPairsBreakpointScorer::operator()( pair< double, size_t >& the_move  )
+{
+	size_t new_move_count;
+	vector< pair< double, size_t > > new_move_list;
+	using_lsd++;
+	std::copy(lsd_zeros.begin(),lsd_zeros.end(),internal_lcb_score_diff[using_lsd].data());
+	std::copy(lrc_zeros.begin(),lrc_zeros.end(),internal_lcb_removed_count[using_lsd].data());
+	remove( the_move, false, internal_lcb_score_diff[using_lsd], internal_lcb_removed_count[using_lsd], false, new_move_list, new_move_count );
+	applyScoreDifference( internal_lcb_score_diff[using_lsd], internal_lcb_removed_count[using_lsd] );
+	double m_score = score();
+	undoScoreDifference( internal_lcb_score_diff[using_lsd], internal_lcb_removed_count[using_lsd] );
+	using_lsd--;
+	return m_score;
+}
+
+bool EvenFasterSumOfPairsBreakpointScorer::isValid( pair< double, size_t >& the_move )
+{
+	using_lsd++;
+	std::copy(lsd_zeros.begin(),lsd_zeros.end(),internal_lcb_score_diff[using_lsd].data());
+	std::copy(lrc_zeros.begin(),lrc_zeros.end(),internal_lcb_removed_count[using_lsd].data());
+	vector< pair< double, size_t > > new_move_list;
+	size_t new_move_count;
+	bool success = remove( the_move, false, internal_lcb_score_diff[using_lsd], internal_lcb_removed_count[using_lsd], false, new_move_list, new_move_count );
+	using_lsd--;
+	return success;
+}
+
+bool EvenFasterSumOfPairsBreakpointScorer::remove( pair< double, size_t >& the_move, vector< pair< double, size_t > >& new_move_list, size_t& new_move_count )
+{
+	using_lsd++;
+	std::copy(lsd_zeros.begin(),lsd_zeros.end(),internal_lcb_score_diff[using_lsd].data());
+	std::copy(lrc_zeros.begin(),lrc_zeros.end(),internal_lcb_removed_count[using_lsd].data());
+	bool success = remove( the_move, true, internal_lcb_score_diff[using_lsd], internal_lcb_removed_count[using_lsd], true, new_move_list, new_move_count );
+	if( success )
+		applyScoreDifference( internal_lcb_score_diff[using_lsd], internal_lcb_removed_count[using_lsd] );
+	using_lsd--;
+	return success;
+}
+
+void EvenFasterSumOfPairsBreakpointScorer::applyScoreDifference( boost::multi_array< double, 2 >& lcb_score_diff, boost::multi_array< size_t, 2 >& lcb_removed_count )
+{
+	size_t nelems = pairwise_lcb_count.num_elements();
+	for( size_t elemI = 0; elemI < nelems; elemI++ )
+	{
+		if( !(lcb_score_diff.data()[elemI] > -1e200 && lcb_score_diff.data()[elemI] < 1e200) )
+		{
+			genome::breakHere();
+			cerr << "Invalid score!!\n";
+		}
+		pairwise_lcb_count.data()[elemI] -= lcb_removed_count.data()[elemI];
+		pairwise_lcb_score.data()[elemI] -= lcb_score_diff.data()[elemI];
+		if( !(pairwise_lcb_score.data()[elemI] > -1e200 && pairwise_lcb_score.data()[elemI] < 1e200) )
+		{
+			genome::breakHere();
+			cerr << "Invalid score!!\n";
+		}
+	}
+}
+
+void EvenFasterSumOfPairsBreakpointScorer::undoScoreDifference( boost::multi_array< double, 2 >& lcb_score_diff, boost::multi_array< size_t, 2 >& lcb_removed_count )
+{
+	size_t nelems = pairwise_lcb_count.num_elements();
+	for( size_t elemI = 0; elemI < nelems; elemI++ )
+	{
+		if( !(lcb_score_diff.data()[elemI] > -1e200 && lcb_score_diff.data()[elemI] < 1e200) )
+		{
+			genome::breakHere();
+			cerr << "Invalid score!!\n";
+		}
+		pairwise_lcb_count.data()[elemI] += lcb_removed_count.data()[elemI];
+		pairwise_lcb_score.data()[elemI] += lcb_score_diff.data()[elemI];
+		if( !(pairwise_lcb_score.data()[elemI] > -1e200 && pairwise_lcb_score.data()[elemI] < 1e200) )
+		{
+			genome::breakHere();
+			cerr << "Invalid score!!\n";
+		}
+	}
+}
+
+size_t EvenFasterSumOfPairsBreakpointScorer::getMaxNewMoveCount()
+{
+	return 20 * seqI_count * seqJ_count;
+}
+
+/** call to indicate that the given LCB has been removed 
+  * returns false if the move was invalid
+  */
+bool EvenFasterSumOfPairsBreakpointScorer::remove( pair< double, size_t >& the_move, bool really_remove, boost::multi_array< double, 2 >& lcb_score_diff, boost::multi_array< size_t, 2 >& lcb_removed_count, bool score_new_moves, vector< pair< double, size_t > >& new_move_list, size_t& new_move_count )
+{
+	if( score_new_moves && !really_remove )
+	{
+		cerr << "Error: Incompatible options in the breakpoint scorer!!!\n";
+		throw "oh shit!";
+	}
+	new_move_count = 0;
+	// figure out which lcb we're being asked to delete
+	size_t moveI = the_move.second;
+	size_t move_count = 0;
+	size_t move_base = 0;
+	size_t seqI = 0;
+	size_t seqJ = 0;
+	for( seqI = seqI_first; seqI < seqI_last; ++seqI )
+	{
+		for( seqJ = seqJ_first; seqJ < seqJ_last; ++seqJ )
+		{
+			all_id_remaps[seqI][seqJ].clear();
+			full_impact_list[seqI][seqJ].clear();
+		}
+	}
+
+	for( seqI = seqI_first; seqI < seqI_last; ++seqI )
+	{
+		for( seqJ = seqJ_first; seqJ < seqJ_last; ++seqJ )
+		{
+			move_count += pairwise_adjacencies[seqI][seqJ].size();
+			if( move_count > moveI )
+				break;
+			move_base = move_count;
+		}
+		if( move_count > moveI )
+			break;
+	}
+	// score deletion of the LCB at (moveI - move_base) from the pairwise alignment of seqI and seqJ
+	size_t del_lcb = moveI - move_base;
+	if( pairwise_adjacencies[seqI][seqJ][del_lcb].lcb_id != del_lcb && really_remove )
+	{
+		if( pairwise_adjacencies[seqI][seqJ][del_lcb].lcb_id == LCB_UNASSIGNED )
+			cerr << "bad movement, dirty dancing\n";
+		return false;	// this is an invalid move -- already deleted or coalesced with another
+	}
+	if( pairwise_adjacencies[seqI][seqJ][del_lcb].lcb_id != del_lcb )
+	{
+		return false;	// this is an invalid move -- already deleted
+	}
+	
+	vector< TrackingMatch* > matches(pairwise_adjacencies[seqI][seqJ][del_lcb].matches);
+	double cur_score = score();
+
+	if( really_remove )
+	{
+		deleted_tracking_matches.insert( deleted_tracking_matches.end(), matches.begin(), matches.end() );
+	}
+
+	for( size_t i = seqI_first; i < seqI_last; ++i )
+	{
+		for( size_t j = seqJ_first; j < seqJ_last; ++j )
+		{
+			lcb_score_diff[i][j] = 0;
+			vector< TrackingLCB< TrackingMatch* > >& adjs = pairwise_adjacencies[i][j];
+			// create a list of LCBs affected by deletion of this match
+			// check whether any of them will have all of their matches removed
+			if( lcb_ids.size() < matches.size() )
+				lcb_ids.resize( matches.size() + 100 );
+			for( size_t mI = 0; mI < matches.size(); ++mI )
+				lcb_ids[mI] = tm_lcb_id_array[matches[mI]->match_id][i][j];
+			size_t lcb_id_count = matches.size();
+			std::sort(lcb_ids.begin(), lcb_ids.begin()+lcb_id_count);
+			vector< size_t >::iterator last = std::unique(lcb_ids.begin(), lcb_ids.begin()+lcb_id_count);
+			lcb_id_count = last - lcb_ids.begin();
+			// delete the last one if its unassigned
+			if( lcb_ids[lcb_id_count-1] == LCB_UNASSIGNED )
+				lcb_id_count--;
+
+			vector< pair< size_t, vector< TrackingMatch* > > > aff_lcbs(lcb_id_count);
+			for( size_t lI = 0; lI < lcb_id_count; ++lI )
+				aff_lcbs[lI].first = lcb_ids[lI];
+
+			// organize the deleted matches
+			for( size_t mI = 0; mI < matches.size(); ++mI )
+			{
+				size_t id = tm_lcb_id_array[matches[mI]->match_id][i][j];
+				if( id == LCB_UNASSIGNED )
+					continue;
+				vector< pair< size_t, vector< TrackingMatch* > > >::iterator iter = std::lower_bound( aff_lcbs.begin(), aff_lcbs.end(), make_pair(id,vector< TrackingMatch* >() ) );
+				iter->second.push_back( matches[mI] );
+			}
+
+			// actually delete the matches and keep a list of LCBs that get completely deleted
+			size_t my_del_count = 0;
+			for( size_t lI = 0; lI < aff_lcbs.size(); ++lI )
+			{
+				vector< TrackingMatch* >& cur_matches = adjs[lcb_ids[lI]].matches;
+				size_t diff = cur_matches.size() - aff_lcbs[lI].second.size();
+				if( diff == 0 )
+				{
+					if( my_del_count + 1 >= my_del_lcbs.size() )
+						my_del_lcbs.resize(2*my_del_lcbs.size());
+					my_del_lcbs[my_del_count++] = lcb_ids[lI];
+					adjs[lcb_ids[lI]].to_be_deleted = true;
+					lcb_score_diff[i][j] += adjs[lcb_ids[lI]].weight;
+					if( really_remove )
+					{
+						adjs[lcb_ids[lI]].weight = 0;
+						cur_matches.clear();
+					}
+					continue;
+				}
+
+				// update the LCB score
+				double del_score_sum = 0;
+				for( size_t mI = 0; mI < aff_lcbs[lI].second.size(); ++mI )
+					del_score_sum += tm_score_array[aff_lcbs[lI].second[mI]->match_id][i][j];
+				lcb_score_diff[i][j] += del_score_sum;
+				full_impact_list[i][j].push_back( aff_lcbs[lI].first );
+
+				if( really_remove )
+				{
+					adjs[lcb_ids[lI]].weight -= del_score_sum;
+				
+					// remove the deleted matches
+					vector< TrackingMatch* > dest( diff );
+					std::set_difference( cur_matches.begin(), cur_matches.end(), 
+						aff_lcbs[lI].second.begin(), aff_lcbs[lI].second.end(), dest.begin() );
+					swap( dest, cur_matches );
+				}
+			}
+
+			lcb_removed_count[i][j] = 0;
+
+			// now remove each LCB that needs to be deleted
+			std::vector< std::pair< uint, uint > >& fid_remaps = all_id_remaps[i][j];
+			std::vector< uint >& fimp_list = full_impact_list[i][j];
+			for( size_t delI = 0; delI < my_del_count; ++delI )
+			{
+				if( adjs[my_del_lcbs[delI]].lcb_id != my_del_lcbs[delI] )
+					continue;	// skip this one if it's already been deleted
+
+				std::vector< std::pair< uint, uint > > id_remaps;
+				std::vector< uint > impact_list;
+				uint removed_count = RemoveLCBandCoalesce( my_del_lcbs[delI], 2, adjs, bogus_scores, id_remaps, impact_list );
+				fid_remaps.insert( fid_remaps.end(), id_remaps.begin(), id_remaps.end() );
+				fimp_list.insert( fimp_list.end(), impact_list.begin(), impact_list.end() );
+
+				lcb_removed_count[i][j] += removed_count;
+				// only do this part if we're really deleting
+				if( really_remove )
+				{
+					// move all matches to the new LCB
+					for( size_t rI = 0; rI < id_remaps.size(); ++rI )
+					{
+						if( id_remaps[rI].second == -1 )
+							continue;	// deletion
+						vector< TrackingMatch* >& src_matches = adjs[id_remaps[rI].first].matches;
+						vector< TrackingMatch* >& dest_matches = adjs[id_remaps[rI].second].matches;
+						for( size_t mI = 0; mI < src_matches.size(); ++mI )
+							tm_lcb_id_array[src_matches[mI]->match_id][i][j] = id_remaps[rI].second;
+						dest_matches.insert( dest_matches.end(), src_matches.begin(), src_matches.end() );
+						std::sort( dest_matches.begin(), dest_matches.end() );
+						src_matches.clear();
+					}
+				}
+			}
+		}
+	}
+
+	// will be undone later
+	applyScoreDifference( lcb_score_diff, lcb_removed_count );
+	double new_score = score();
+
+	if( score_new_moves )
+	{
+		size_t mbase = 0;
+		for( size_t i = seqI_first; i < seqI_last; ++i )
+		{
+			for( size_t j = seqJ_first; j < seqJ_last; ++j )
+			{
+				vector< TrackingLCB< TrackingMatch* > >& adjs = pairwise_adjacencies[i][j];
+				std::vector< uint >& fimp_list = full_impact_list[i][j];
+				sort( fimp_list.begin(), fimp_list.end() );
+				vector< uint >::iterator iter = std::unique( fimp_list.begin(), fimp_list.end() );
+				fimp_list.erase( iter, fimp_list.end() );
+				for( size_t fI = 0; fI < fimp_list.size(); fI++ )
+				{
+					if( adjs[fimp_list[fI]].lcb_id != fimp_list[fI] )
+					{
+						new_move_list[new_move_count++] = make_pair( -(std::numeric_limits<double>::max)(), mbase + fimp_list[fI] );
+						continue;	// this one got trashed
+					}
+					// score removal of this block
+					pair< double, size_t > p( 0, mbase + fimp_list[fI] );
+					double scorediff = (*this)(p) - new_score;
+					p.first = scorediff;
+					new_move_list[new_move_count++] = p;
+				}
+				mbase += adjs.size();
+			}
+		}
+	}
+
+
+	// if we're not really removing, undo all the removals
+	if( !really_remove )
+		for( size_t i = seqI_first; i < seqI_last; ++i )
+			for( size_t j = seqJ_first; j < seqJ_last; ++j )
+				undoLcbRemoval( 2, pairwise_adjacencies[i][j], all_id_remaps[i][j] );
+
+	undoScoreDifference( lcb_score_diff, lcb_removed_count );
+
+	// if the change in score doesn't match then this is an invalid move!!
+	// allow for some numerical instability
+	bool valid = true;
+	if( new_score - cur_score < the_move.first - 0.00001 ||
+		new_score - cur_score  > the_move.first + 0.00001 )
+		valid = false;
+
+	return valid;
+}
+
+vector< TrackingMatch* > EvenFasterSumOfPairsBreakpointScorer::getResults() 
+{
+	std::sort(deleted_tracking_matches.begin(), deleted_tracking_matches.end());
+	vector< TrackingMatch* > result_matches(tracking_matches.size()-deleted_tracking_matches.size());
+	std::set_difference( tracking_matches.begin(), tracking_matches.end(), deleted_tracking_matches.begin(), deleted_tracking_matches.end(), result_matches.begin() );
+	return result_matches;
+}
+
+	bool EvenFasterSumOfPairsBreakpointScorer::validate()
+{
+	vector< TrackingMatch* > trams = getResults();	// need to apply any deletions...
+	bool success = true;	// be optimistic!
+	// make sure all the tracking matches point to the right LCBs
+	for( size_t tmI = 0; tmI < trams.size(); tmI++ )
+	{
+		TrackingMatch* tm = trams[tmI];
+		for( size_t i = 0; i < tm_lcb_id_array.shape()[1]; ++i )
+			for( size_t j = 0; j < tm_lcb_id_array.shape()[2]; ++j )
+			{
+				// skip this match if it's not defined
+				if( tm->node_match->LeftEnd(n1_des[i]) == NO_MATCH ||
+					tm->node_match->LeftEnd(n2_des[j]) == NO_MATCH ||
+					tm_lcb_id_array[tm->match_id][i][j] == LCB_UNASSIGNED)
+					continue;
+				// find the tracking match in this LCB
+				size_t id = tm_lcb_id_array[tm->match_id][i][j];
+				vector< TrackingMatch* >& matches = pairwise_adjacencies[i][j][id].matches;
+				vector< TrackingMatch* >::iterator iter = std::lower_bound( matches.begin(), matches.end(), tm );
+				if( iter == matches.end() || *iter != tm )
+				{
+					cerr << "Missing match!!\n";
+					cerr << "lcb_id: " << id << endl;
+					cerr << "match: " << tm << endl;
+					genome::breakHere();
+					success = false;
+				}
+			}
+	}
+	// make sure all the LCBs point to valid tracking matches
+	for( size_t i = 0; i < pairwise_adjacencies.shape()[0]; ++i )
+		for( size_t j = 0; j < pairwise_adjacencies.shape()[1]; ++j )
+		{
+			vector< TrackingLCB< TrackingMatch* > >& adjs = pairwise_adjacencies[i][j];
+			for( size_t lcbI = 0; lcbI < adjs.size(); lcbI++ )
+			{
+				for( size_t mI = 0; mI < adjs[lcbI].matches.size(); ++mI )
+				{
+					vector< TrackingMatch* >::iterator iter = std::lower_bound( trams.begin(), trams.end(), adjs[lcbI].matches[mI] );
+					if( *iter != adjs[lcbI].matches[mI] )
+					{
+						cerr << "Missing match:  in adjacencies but not tracking_matches!!\n";
+						cerr << "lcb_id: " << tm_lcb_id_array[adjs[lcbI].matches[mI]->match_id][i][j] << endl;
+						genome::breakHere();
+						success = false;
+					}
+				}
+			}
+		}
+
+	// make sure that the number of breakpoints matches up with what tracking_matches suggests
+	vector< TrackingMatch* > final = trams;
+	// convert back to an LCB list
+	vector< AbstractMatch* > new_matches(final.size());
+	for( size_t mI = 0; mI < final.size(); ++mI )
+		new_matches[mI] = final[mI]->original_match;
+
+	vector< gnSeqI > breakpoints;
+	IdentifyBreakpoints( new_matches, breakpoints );
+	vector< vector< AbstractMatch* > > LCB_list;
+	IdentifyBreakpoints( new_matches, breakpoints );
+	ComputeLCBs_v2( new_matches, breakpoints, LCB_list );
+	cout << "breakpoints.size(): " << breakpoints.size() << "\tpairwise_lcb_count[0][0]: " << pairwise_lcb_count[0][0] << endl;
+	if( breakpoints.size() != pairwise_lcb_count[0][0] )
+		success = false;
+	size_t adjI = 0;
+	vector< TrackingLCB< TrackingMatch* > >& adjs = pairwise_adjacencies[0][0];
+	for( size_t lcbI = 0; lcbI < LCB_list.size(); ++lcbI )
+	{
+		// make sure each LCB exists...
+		while( adjI != -1 && adjI != adjs[adjI].lcb_id )
+			adjI++;
+
+		// compare matches...
+		vector< AbstractMatch* > ms(adjs[adjI].matches.size()+LCB_list[lcbI].size(), (AbstractMatch*)NULL);
+		std::sort( LCB_list[lcbI].begin(), LCB_list[lcbI].end() );
+		vector< AbstractMatch* > asdf(adjs[adjI].matches.size());
+		for( size_t mI = 0; mI < adjs[adjI].matches.size(); ++mI )
+			asdf[mI] = adjs[adjI].matches[mI]->original_match;
+		std::sort( asdf.begin(), asdf.end() );
+		std::set_symmetric_difference( LCB_list[lcbI].begin(), LCB_list[lcbI].end(), asdf.begin(), asdf.end(), ms.begin() );
+		// this should throw a fit if the sets aren't equal.
+		if( ms[0] != NULL )
+		{
+			cerr << "In adjacencies:\n";
+			for( size_t asdfI = 0; asdfI < asdf.size(); asdfI++ )
+			{
+				printMatch(asdf[asdfI], cerr);
+				cerr << endl;
+			}
+			cerr << "\nIn LCB_list:\n";
+			for( size_t mI = 0; mI < LCB_list[lcbI].size(); mI++ )
+			{
+				printMatch(LCB_list[lcbI][mI], cerr);
+				cerr << endl;
+			}
+			cerr << "\nAll matches ssc1\n";
+			SingleStartComparator<AbstractMatch> ssc1(1);
+			std::sort(new_matches.begin(), new_matches.end(), ssc1);
+			for( size_t mI = 0; mI < new_matches.size(); mI++ )
+			{
+				printMatch(new_matches[mI], cerr);
+				cerr << endl;
+			}
+
+			cerr << "\nAll matches ssc0\n";
+			SingleStartComparator<AbstractMatch> ssc0(0);
+			std::sort(new_matches.begin(), new_matches.end(), ssc0);
+			for( size_t mI = 0; mI < new_matches.size(); mI++ )
+			{
+				printMatch(new_matches[mI], cerr);
+				cerr << endl;
+			}
+			genome::breakHere();
+		}
+		adjI++;
+	}
+
+	return success;
+}
+
+
+
+SimpleBreakpointScorer::SimpleBreakpointScorer( std::vector< LCB >& adjacencies, double breakpoint_penalty, bool collinear ) : 
+  adjs( adjacencies ),
+  bp_penalty( breakpoint_penalty ),
+  collinear( collinear )
+{
+	scores = std::vector< double >(adjs.size(), 0);
+	total_weight = 0;
+	bp_count = adjs.size();
+	for( size_t lcbI = 0; lcbI < adjs.size(); lcbI++ )
+		total_weight += adjs[lcbI].weight;
+}
+
+size_t SimpleBreakpointScorer::getMoveCount() 
+{
+	return adjs.size();
+}
+
+double SimpleBreakpointScorer::score()
+{
+	double bp_score = (double)bp_count * bp_penalty;
+	return total_weight - bp_score;
+}
+
+bool SimpleBreakpointScorer::isValid( size_t lcbI, double move_score )
+{
+	if( adjs[lcbI].lcb_id != lcbI )
+		return false;
+	return (*this)(lcbI) == move_score;
+}
+
+/** return the relative change in score if lcbI were to be removed */
+double SimpleBreakpointScorer::operator()( size_t lcbI )
+{
+	double cur_score = score();
+	std::vector< std::pair< uint, uint > > id_remaps;
+	std::vector< uint > impact_list;
+	uint bp_removed = RemoveLCBandCoalesce( lcbI, adjs[0].left_adjacency.size(), adjs, scores, id_remaps, impact_list );
+	undoLcbRemoval( adjs[0].left_adjacency.size(), adjs, id_remaps );
+	double bp_score = (double)(bp_count - bp_removed) * bp_penalty;
+	double move_score = total_weight - adjs[lcbI].weight - bp_score;
+	double score_diff = move_score - cur_score;
+	if( collinear && bp_count - bp_removed > 0 && score_diff < 0 )
+		return 1/(-score_diff);	// ensure that we continue removing blocks until only one is left
+	return move_score - cur_score;
+}
+
+/** call to indicate that the given LCB has been removed */
+void SimpleBreakpointScorer::remove( uint lcbI, vector< pair< double, size_t > >& new_moves )
+{
+	std::vector< std::pair< uint, uint > > id_remaps;
+	std::vector< uint > impact_list;
+	uint bp_removed = RemoveLCBandCoalesce( lcbI, adjs[0].left_adjacency.size(), adjs, scores, id_remaps, impact_list );
+	total_weight -= adjs[lcbI].weight;
+	bp_count -= bp_removed;
+	for( size_t impI = 0; impI < impact_list.size(); impI++ )
+	{
+		if( adjs[impact_list[impI]].lcb_id != impact_list[impI] )
+			continue;
+		double scorediff = (*this)(impact_list[impI]);
+		new_moves.push_back(make_pair(scorediff, impact_list[impI]));
+	}
+}
+
+
+GreedyRemovalScorer::GreedyRemovalScorer( std::vector< LCB >& adjacencies, double minimum_weight ) : 
+adjs( adjacencies ),
+min_weight( minimum_weight )
+{
+	scores = std::vector< double >(adjs.size(), 0);
+	total_weight = 0;
+	for( size_t lcbI = 0; lcbI < adjs.size(); lcbI++ )
+		total_weight += adjs[lcbI].weight - min_weight;
+}
+
+size_t GreedyRemovalScorer::getMoveCount() 
+{
+	return adjs.size();
+}
+
+double GreedyRemovalScorer::score()
+{
+	return total_weight;
+}
+
+bool GreedyRemovalScorer::isValid( size_t lcbI, double move_score )
+{
+	if( adjs[lcbI].lcb_id != lcbI )
+		return false;
+	return (*this)(lcbI) == move_score;
+}
+
+/** return the relative change in score if lcbI were to be removed */
+double GreedyRemovalScorer::operator()( size_t lcbI )
+{
+	return -(adjs[lcbI].weight-min_weight);
+}
+
+/** call to indicate that the given LCB has been removed */
+void GreedyRemovalScorer::remove( uint lcbI, vector< pair< double, size_t > >& new_moves )
+{
+	std::vector< std::pair< uint, uint > > id_remaps;
+	std::vector< uint > impact_list;
+	uint bp_removed = RemoveLCBandCoalesce( lcbI, adjs[0].left_adjacency.size(), adjs, scores, id_remaps, impact_list );
+	total_weight -= (adjs[lcbI].weight-min_weight);
+	for( size_t impI = 0; impI < impact_list.size(); impI++ )
+	{
+		if( adjs[impact_list[impI]].lcb_id != impact_list[impI] )
+			continue;
+		double scorediff = (*this)(impact_list[impI]);
+		new_moves.push_back(make_pair(scorediff, impact_list[impI]));
+	}
+}
+
+
+
+
+}	// namespace mems
+
diff --git a/libMems/GreedyBreakpointElimination.h b/libMems/GreedyBreakpointElimination.h
new file mode 100644
index 0000000..254a880
--- /dev/null
+++ b/libMems/GreedyBreakpointElimination.h
@@ -0,0 +1,873 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifndef __GreedyBreakpointElimination_h__
+#define __GreedyBreakpointElimination_h__
+
+#include <libMems/AbstractMatch.h>
+#include <iostream>
+#include <boost/multi_array.hpp>
+#include <libMems/PhyloTree.h>
+#include <libMems/SubstitutionMatrix.h>
+#include <libMems/SeedOccurrenceList.h>
+#include <libMems/IntervalList.h>
+#include <libMems/LCB.h>
+#include <stack>
+
+namespace mems {
+
+extern bool penalize_repeats;
+
+/**
+ * A wrapper that maps a match among extant sequences to a match among ancestral and extant seqs
+ */
+template <class MatchType>
+class LcbTrackingMatch
+{ 
+public:
+	MatchType original_match;
+	MatchType node_match;
+	size_t match_id;	// used to index into global arrays of lcb_id and score
+};
+typedef LcbTrackingMatch< mems::AbstractMatch* > TrackingMatch;
+
+/** 
+ * This class is used to track relationships between LCBs during the LCB determination process.
+ */
+template <class MatchType>
+class TrackingLCB
+{
+public:
+	TrackingLCB(){}
+	TrackingLCB( const TrackingLCB& l ){ *this = l; }
+	/** Constructs a TrackingLCB from a pairwise LCB */
+	TrackingLCB( const mems::LCB& l ){ *this = l; }
+	TrackingLCB& operator=( const mems::LCB& l )
+	{
+		left_end[0] = l.left_end[0];
+		left_end[1] = l.left_end[1];
+		right_end[0] = l.right_end[0];
+		right_end[1] = l.right_end[1];
+		left_adjacency[0] = l.left_adjacency[0];
+		left_adjacency[1] = l.left_adjacency[1];
+		right_adjacency[0] = l.right_adjacency[0];
+		right_adjacency[1] = l.right_adjacency[1];
+		lcb_id = l.lcb_id;
+		weight = l.weight;
+		to_be_deleted = false;
+		return *this;
+	}
+	int64 left_end[2];	/**< The left end position of the LCB in each sequence */
+	int64 right_end[2];  /**< The right end position of the LCB in each sequence */
+	uint left_adjacency[2];	/**< 'Pointers' (actually IDs) to the LCBs on the left in each sequence */
+	uint right_adjacency[2];	/**< 'Pointers' (actually IDs) to the LCBs on the right in each sequence */
+	double weight;		/**< The weight (or coverage) of this LCB */
+	std::vector< MatchType > matches;
+	int lcb_id;			/**< A numerical ID that can be assigned to this LCB */
+	bool to_be_deleted;
+};
+
+/** indicates an LCB identifier hasn't been assigned or is unknown */
+const uint LCB_UNASSIGNED = (std::numeric_limits<uint>::max)();
+
+typedef boost::multi_array< std::vector< TrackingLCB< TrackingMatch* > >, 2 > PairwiseLCBMatrix;
+
+
+/**
+ * computes an anchoring score for the matches contained inside an LCB
+ */
+template< class MatchVector >
+double GetPairwiseAnchorScore( 
+		MatchVector& lcb, std::vector< genome::gnSequence* >& seq_table, 
+		const mems::PairwiseScoringScheme& subst_scoring, mems::SeedOccurrenceList& sol_1, 
+		mems::SeedOccurrenceList& sol_2, bool penalize_gaps = false );
+
+class MoveScoreHeapComparator
+{
+public:
+	bool operator()( const std::pair< double, size_t >& a, const std::pair< double, size_t >& b ) const
+	{
+		return a.first < b.first;	// want to order by > instead of <
+	}
+};
+
+/**
+ * Computes all pairwise LCBs from a set of tracking matches
+ */
+void getPairwiseLCBs( 
+	uint nI, 
+	uint nJ, 
+	uint dI, 
+	uint dJ, 
+	std::vector< TrackingMatch* >& tracking_matches, 
+	std::vector< TrackingLCB<TrackingMatch*> >& t_lcbs,
+	boost::multi_array< double, 3 >& tm_score_array,
+	boost::multi_array< size_t, 3 >& tm_lcb_id_array );
+
+/** creates an appropriately sized matrix for mapping individual TrackingMatches to their containing LCBs */
+void initTrackingMatchLCBTracking( 
+  const std::vector< mems::TrackingMatch >& tracking_matches, 
+	size_t n1_count, 
+	size_t n2_count, 
+	boost::multi_array< size_t, 3 >& tm_lcb_id_array );
+
+
+/** removes an LCB from an LCB list and coalesces surrounding LCBs.  Returns the number of LCBs removed 
+ *  After LCBs are removed, the adjacency list should be processed with filterLCBs()
+ *  @param	id_remaps	This is populated with a list of LCB ids that were deleted or coalesced and now have a new LCB id
+ *                      for each coalesced LCB, an entry of the form <old id, new id> is added, deleted LCBs have
+ *						entries of the form <deleted, -1>.  Entries appear in the order operations were performed
+ *						and the function undoLcbRemoval() can undo these operations in reverse order
+ */
+template< class LcbVector >
+uint RemoveLCBandCoalesce( size_t lcbI, uint seq_count, 
+						  LcbVector& adjacencies, 
+						  std::vector< double >& scores, 
+						  std::vector< std::pair< uint, uint > >& id_remaps, 
+						  std::vector< uint >& impact_list );
+
+
+void printMatch( mems::AbstractMatch* m, std::ostream& os );
+
+inline
+void printMatch( mems::AbstractMatch* m, std::ostream& os )
+{
+	for( size_t ii = 0; ii < m->SeqCount(); ++ii )
+	{
+		if( ii > 0 )
+			os << '\t';
+		os << "(" << m->Start(ii) << "," << m->RightEnd(ii) << ")";
+	}
+}
+
+void printProgress( uint prev_prog, uint cur_prog, std::ostream& os );
+
+
+template< typename PairType >
+class LabelSort 
+{
+public:
+	LabelSort( uint seqI ) : ssc( seqI ) {};
+	bool operator()( const PairType& pt1, const PairType& pt2 )
+	{
+		return ssc( pt1.first, pt2.first );
+	}
+private:
+	LabelSort();
+	mems::SSC<mems::AbstractMatch> ssc;
+};
+
+template<class MatchVector>
+void IdentifyBreakpoints( MatchVector& mlist, std::vector<gnSeqI>& breakpoints )
+{
+	if( mlist.size() == 0 )
+		return;
+	breakpoints = std::vector<gnSeqI>(1, mlist.size()-1);
+
+	mems::SSC<mems::AbstractMatch> ssc(0);
+	std::sort( mlist.begin(), mlist.end(), ssc );
+	typedef typename MatchVector::value_type value_type;
+	typedef std::pair< value_type, size_t > LabelPairType;
+	std::vector< LabelPairType > label_list;
+	typename MatchVector::iterator cur = mlist.begin();
+	typename MatchVector::iterator end = mlist.end();
+	size_t i = 0;
+	for( ;cur != end; ++cur )
+	{
+		label_list.push_back( std::make_pair( *cur, i ) );
+		++i;
+	}
+
+	uint seq_count = mlist[0]->SeqCount();
+	// check for breakpoints in each sequence
+	for( uint seqI = 1; seqI < seq_count; seqI++ )
+	{
+		LabelSort< LabelPairType > ls(seqI); 
+		std::sort( label_list.begin(), label_list.end(), ls );
+
+		typename std::vector< LabelPairType >::const_iterator prev = label_list.begin();
+		typename std::vector< std::pair< typename MatchVector::value_type, size_t > >::const_iterator iter = label_list.begin();
+		typename std::vector< std::pair< typename MatchVector::value_type, size_t > >::const_iterator lab_end = label_list.end();
+
+		bool prev_orient = (*prev).first->Orientation(seqI) == (*prev).first->Orientation(0);
+		if( !prev_orient )	// if we start in a different orientation than the ref seq there's a bp here
+			breakpoints.push_back(prev->second);
+
+		for( ++iter; iter != lab_end; ++iter )
+		{
+			bool cur_orient = (*iter).first->Orientation(seqI) == (*iter).first->Orientation(0);
+			if( prev_orient == cur_orient &&
+				( ( prev_orient && (*prev).second + 1 == (*iter).second) ||
+				  ( !prev_orient && (*prev).second - 1 == (*iter).second) 
+				)
+			  )
+			{
+				prev_orient = cur_orient;
+				++prev;
+				continue;	// no breakpoint here
+			}
+
+			// always add the last match in a new block (scanning from left to right in seq 0)
+			if( prev_orient )
+				breakpoints.push_back( prev->second );
+			if( !cur_orient )
+				breakpoints.push_back( iter->second );
+
+			prev_orient = cur_orient;
+			++prev;
+		}
+		if( prev_orient )
+			breakpoints.push_back( prev->second );
+	}
+	std::sort( breakpoints.begin(), breakpoints.end() );
+	std::vector<gnSeqI>::iterator uni = std::unique( breakpoints.begin(), breakpoints.end() );
+	breakpoints.erase( uni, breakpoints.end() );
+}
+
+
+template< class MatchVector >
+void ComputeLCBs_v2( const MatchVector& meml, const std::vector<gnSeqI>& breakpoints, std::vector< MatchVector >& lcb_list )
+{
+	// there must be at least one end of a block defined
+	if( breakpoints.size() < 1 )
+		return;
+		
+	lcb_list.clear();
+	
+	// organize the LCBs into different MatchVector instances
+	std::vector<gnSeqI>::const_iterator break_iter = breakpoints.begin();
+	uint prev_break = 0;	// prev_break is the first match in the current block
+	MatchVector lcb;
+	for( ; break_iter != breakpoints.end(); ++break_iter ){
+		// add the new MatchList to the set if it made the cut
+		lcb_list.push_back( lcb );
+		lcb_list.back().insert( lcb_list.back().end(), meml.begin() + prev_break, meml.begin() + *break_iter + 1 );
+		prev_break = *break_iter + 1;
+	}
+}
+
+
+template <class MatchVector>
+void computeLCBAdjacencies_v3( const std::vector< MatchVector >& lcb_list, std::vector< double >& weights, std::vector< mems::LCB >& adjacencies )
+{
+	adjacencies.clear(); // start with no LCB adjacencies
+	if( lcb_list.size() == 0 )
+		return;	// there aren't any LCBs so there aren't any adjacencies!
+
+	uint seq_count = lcb_list.front().front()->SeqCount();
+	uint seqI;
+	uint lcbI;
+	for( lcbI = 0; lcbI < lcb_list.size(); ++lcbI ){
+		mems::LCB lcb;
+		std::vector<gnSeqI> left_end;
+		std::vector<gnSeqI> length;
+		std::vector<bool> orientation;
+		FindBoundaries( lcb_list[lcbI], left_end, length, orientation );
+
+		lcb.left_adjacency = std::vector<uint>( left_end.size(), -1 );
+		lcb.right_adjacency = std::vector<uint>( left_end.size(), -1 );
+		lcb.left_end = std::vector<int64>( left_end.size(), 0 );
+		lcb.right_end = std::vector<int64>( left_end.size(), 0 );
+
+		for( seqI = 0; seqI < seq_count; seqI++ ){
+			// support "ragged edges" on the ends of LCBs
+			if( left_end[seqI] == mems::NO_MATCH )
+				continue;
+			lcb.left_end[seqI] = left_end[seqI];
+			lcb.right_end[seqI] = left_end[seqI] + length[seqI];
+			if( !orientation[seqI] )
+			{
+				lcb.left_end[seqI] = -lcb.left_end[seqI];
+				lcb.right_end[seqI] = -lcb.right_end[seqI];
+			}
+		}
+		lcb.lcb_id = adjacencies.size();
+		lcb.weight = weights[ lcbI ];
+		adjacencies.push_back( lcb );
+	}
+
+	for( seqI = 0; seqI < seq_count; seqI++ ){
+		mems::LCBLeftComparator llc( seqI );
+		std::sort( adjacencies.begin(), adjacencies.end(), llc );
+		for( lcbI = 1; lcbI + 1 < lcb_list.size(); lcbI++ ){
+			adjacencies[ lcbI ].left_adjacency[ seqI ] = adjacencies[ lcbI - 1 ].lcb_id;
+			adjacencies[ lcbI ].right_adjacency[ seqI ] = adjacencies[ lcbI + 1 ].lcb_id;
+		}
+		if( lcbI == lcb_list.size() )
+			lcbI--;	// need to decrement when there is only a single LCB
+
+		// set first and last lcb adjacencies to -1
+		adjacencies[ 0 ].left_adjacency[ seqI ] = (uint)-1;
+		adjacencies[ lcbI ].right_adjacency[ seqI ] = (uint)-1;
+		if( lcbI > 0 ){
+			adjacencies[ 0 ].right_adjacency[ seqI ] = adjacencies[ 1 ].lcb_id;
+			adjacencies[ lcbI ].left_adjacency[ seqI ] = adjacencies[ lcbI - 1 ].lcb_id;
+		}
+	}
+	mems::LCBIDComparator lic;
+	std::sort( adjacencies.begin(), adjacencies.end(), lic );
+
+}
+
+/**
+ *  Redesign to be more intuitive.  left_adjacency is always left, regardless of LCB orientation
+ */
+inline
+void computeLCBAdjacencies_v3( mems::IntervalList& iv_list, std::vector< double >& weights, std::vector< mems::LCB >& adjacencies ){
+	std::vector< std::vector< mems::Interval* > > nivs;
+	for( size_t ivI = 0; ivI < iv_list.size(); ivI++ )
+		nivs.push_back( std::vector< mems::Interval* >( 1, &iv_list[ivI] ) );
+	computeLCBAdjacencies_v3( nivs, weights, adjacencies );
+}
+
+/**
+ * Takes a set of filtered LCB adjacencies and an unfiltered set of matches as input
+ * returns a filtered set of matches that reflects the LCBs found
+ */
+template< class MatchVector >
+void filterMatches_v2( std::vector< mems::LCB >& adjacencies, std::vector< MatchVector >& lcb_list, std::vector< double >& weights, MatchVector& deleted_matches ){
+	if( lcb_list.size() < 1 )
+		return;
+	MatchVector lcb_tmp = lcb_list[ 0 ];
+	lcb_tmp.clear();
+	std::vector< MatchVector > filtered_lcbs( lcb_list.size(), lcb_tmp );
+	uint lcbI;
+	for( lcbI = 0; lcbI < adjacencies.size(); lcbI++ ){
+		if( adjacencies[ lcbI ].lcb_id == lcbI ){
+			filtered_lcbs[ lcbI ].insert( filtered_lcbs[ lcbI ].end(), lcb_list[ lcbI ].begin(), lcb_list[ lcbI ].end() );
+			continue;
+		}
+		if( adjacencies[ lcbI ].lcb_id == -1 ){
+			std::cerr << "weird";
+			continue; 	// this one was removed
+		}
+		if( adjacencies[ lcbI ].lcb_id == -2 )
+		{
+			deleted_matches.insert( deleted_matches.end(), lcb_list[lcbI].begin(), lcb_list[lcbI].end() );
+			continue; 	// this one was removed
+		}
+
+		// this one points elsewhere
+		// search and update the union/find structure for the target
+		std::stack< uint > visited_lcbs;
+		visited_lcbs.push( lcbI );
+		uint cur_lcb = adjacencies[ lcbI ].lcb_id;
+		while( adjacencies[ cur_lcb ].lcb_id != cur_lcb ){
+			visited_lcbs.push( cur_lcb );
+			cur_lcb = adjacencies[ cur_lcb ].lcb_id;
+			if( cur_lcb == -1 || cur_lcb == -2 ){
+//				std::cerr << "improper hoodidge\n";
+				break;	// this one points to an LCB that got deleted
+			}
+		}
+		while( visited_lcbs.size() > 0 ){
+			adjacencies[ visited_lcbs.top() ].lcb_id = cur_lcb;
+			visited_lcbs.pop();
+		}
+		// add this LCB's matches to the target LCB.
+		if( cur_lcb != -1 && cur_lcb != -2 )
+			filtered_lcbs[ cur_lcb ].insert( filtered_lcbs[ cur_lcb ].end(), lcb_list[ lcbI ].begin(), lcb_list[ lcbI ].end() );
+		else
+			deleted_matches.insert( deleted_matches.end(), lcb_list[lcbI].begin(), lcb_list[lcbI].end() );
+	}
+
+
+	lcb_list.clear();
+	std::vector< double > new_weights;
+	for( lcbI = 0; lcbI < filtered_lcbs.size(); lcbI++ ){
+		if( filtered_lcbs[ lcbI ].size() > 0 ){
+			lcb_list.push_back( filtered_lcbs[ lcbI ] );
+			new_weights.push_back( weights[lcbI] );
+		}
+	}
+
+	// sort the matches inside consolidated LCBs
+	mems::MatchStartComparator<mems::AbstractMatch> msc( 0 );
+	for( lcbI = 0; lcbI < lcb_list.size(); lcbI++ ){
+		std::sort( lcb_list[ lcbI ].begin(), lcb_list[ lcbI ].end(), msc );
+	}
+
+	// calculate the LCB adjacencies
+	weights = new_weights;
+	computeLCBAdjacencies_v3( lcb_list, weights, adjacencies );
+
+}
+
+// predeclared to avoid need to include Islands.h
+const score_t INV_SCORE = (std::numeric_limits<score_t>::max)();
+void computeMatchScores( const std::string& seq1, const std::string& seq2, const PairwiseScoringScheme& scoring, std::vector<score_t>& scores );
+void computeGapScores( const std::string& seq1, const std::string& seq2, const PairwiseScoringScheme& scoring, std::vector<score_t>& scores );
+
+
+template< class MatchVector >
+double GetPairwiseAnchorScore( MatchVector& lcb, 
+							  std::vector< genome::gnSequence* >& seq_table, 
+							  const mems::PairwiseScoringScheme& subst_scoring, 
+							  mems::SeedOccurrenceList& sol_1, 
+							  mems::SeedOccurrenceList& sol_2, 
+							  bool penalize_gaps )
+{
+	double lcb_score = 0;
+	typename MatchVector::iterator match_iter = lcb.begin();
+	for( ; match_iter != lcb.end(); ++match_iter )
+	{
+		typedef typename MatchVector::value_type MatchPtrType;
+		MatchPtrType m = *match_iter;
+		std::vector< score_t > scores(m->AlignmentLength(), 0);
+		std::vector< std::string > et;
+		mems::GetAlignment(*m, seq_table, et);
+
+		// get substitution/gap score
+		mems::computeMatchScores( et[0], et[1], subst_scoring, scores );
+		if( penalize_gaps )
+			mems::computeGapScores( et[0], et[1], subst_scoring, scores );
+
+		// scale match scores by uniqueness
+		size_t merI = 0;
+		size_t merJ = 0;
+		double uni_count = 0;
+		double uni_score = 0;
+		const size_t m_aln_length = m->AlignmentLength();
+		const int64 m_leftend_0 = m->LeftEnd(0);
+		const int64 m_leftend_1 = m->LeftEnd(1);
+		for( size_t colI = 0; colI < m_aln_length; ++colI )
+		{
+			if(et[0][colI] != '-' && et[1][colI] != '-' )
+			{
+				mems::SeedOccurrenceList::frequency_type uni1 = sol_1.getFrequency(m_leftend_0 + merI - 1);
+				mems::SeedOccurrenceList::frequency_type uni2 = sol_2.getFrequency(m_leftend_1 + merJ - 1);
+				mems::SeedOccurrenceList::frequency_type uniprod = uni1*uni2;
+				uniprod = uniprod == 0 ? 1 : uniprod;
+				// scale by the uniqueness product, which approximates the number of ways to match up non-unique k-mers
+				// in the worst case of a very repetitive match, the score becomes the negative of the match score
+				if( scores[colI] > 0 )
+				{
+					if(penalize_repeats)
+						scores[colI] = (score_t)((double)scores[colI] * (2.0 / uniprod)) - scores[colI];
+					else
+						scores[colI] = (score_t)((mems::SeedOccurrenceList::frequency_type)scores[colI] / uniprod);
+				}
+			}
+			if(et[0][colI] != '-')
+				merI++;
+			if(et[1][colI] != '-')
+				merJ++;
+		}
+
+
+		double m_score = 0;
+		for( size_t i = 0; i < scores.size(); ++i )
+			if( scores[i] != INV_SCORE )
+				m_score += scores[i];
+
+		if( !( m_score > -1000000000 && m_score < 1000000000 ) )
+		{
+			std::cerr << "scoring error\n";
+			genome::breakHere();
+		}
+		lcb_score += m_score;
+	}
+	
+
+	return lcb_score;
+}
+
+
+
+class EvenFasterSumOfPairsBreakpointScorer
+{
+public:
+	EvenFasterSumOfPairsBreakpointScorer( 
+		double breakpoint_penalty,
+		double minimum_breakpoint_penalty,
+		boost::multi_array<double,2> bp_weight_matrix, 
+		boost::multi_array<double,2> conservation_weight_matrix,
+		std::vector< TrackingMatch* > tracking_match,
+		mems::PairwiseLCBMatrix& pairwise_adjacency_matrix,
+		std::vector<node_id_t>& n1_descendants,
+		std::vector<node_id_t>& n2_descendants,
+		boost::multi_array< double, 3 >& tm_score_array,
+		boost::multi_array< size_t, 3 >& tm_lcb_id_array,
+		size_t seqI_begin,
+		size_t seqI_end,
+		size_t seqJ_begin,
+		size_t seqJ_end
+		);
+
+	/**
+	 * Returns the number of possible moves a search algorithm may make from the current 
+	 * location in LCB search space.  In this case it's simply the total number of pairwise LCBs
+	 */
+	size_t getMoveCount();
+
+	/** returns the score of the current state */
+	double score();
+
+	/** scores a move */
+	double operator()( std::pair< double, size_t >& the_move  );
+
+	/** checks whether a particular move is a valid move */
+	bool isValid( std::pair< double, size_t >& the_move );
+
+	bool remove( std::pair< double, size_t >& the_move, std::vector< std::pair< double, size_t > >& new_move_list, size_t& new_move_count );
+
+	/** applies a score difference */
+	void applyScoreDifference( boost::multi_array< double, 2 >& lcb_score_diff, boost::multi_array< size_t, 2 >& lcb_removed_count );
+
+	/** undoes a score difference, if it wasn't accepted for example */
+	void undoScoreDifference( boost::multi_array< double, 2 >& lcb_score_diff, boost::multi_array< size_t, 2 >& lcb_removed_count );
+
+	/** returns the maximum number of new moves generated by any LCB removal */
+	size_t getMaxNewMoveCount();
+
+	/** call to indicate that the given LCB has been removed 
+	  * @param really_remove	set to false if the move should merely be checked for validity
+	  * returns false if the move was invalid
+	  */
+	bool remove( std::pair< double, size_t >& the_move, bool really_remove, 
+		boost::multi_array< double, 2 >& lcb_score_diff, boost::multi_array< size_t, 2 >& lcb_removed_count, 
+		bool score_new_moves, std::vector< std::pair< double, size_t > >& new_move_list, size_t& new_move_count );
+
+	/** returns the final set of TrackingMatch values which remain after applying greedy breakpoint elimination */
+	std::vector< mems::TrackingMatch* > getResults();
+
+	/** sanity checks all internal data structures */
+	bool validate();
+
+protected:
+	double bp_penalty;
+	boost::multi_array<double,2> bp_weights;
+	boost::multi_array<double,2> conservation_weights;
+	std::vector< mems::TrackingMatch* > tracking_matches;
+	mems::PairwiseLCBMatrix pairwise_adjacencies;
+	std::vector<node_id_t> n1_des;
+	std::vector<node_id_t> n2_des;
+
+	boost::multi_array< size_t, 2 > pairwise_lcb_count;
+	boost::multi_array< double, 2 > pairwise_lcb_score;
+
+	std::vector< TrackingMatch* > deleted_tracking_matches;
+
+	double min_breakpoint_penalty;
+
+private:
+	// avoid continuous size lookup
+	const size_t seqI_count;
+	const size_t seqJ_count;
+
+	// variables used during score computation
+	boost::multi_array< std::vector< std::pair< uint, uint > >, 2 > all_id_remaps;
+	boost::multi_array< std::vector< uint >, 2 > full_impact_list;
+	boost::multi_array< double, 2 > internal_lcb_score_diff[3];
+	boost::multi_array< size_t, 2 > internal_lcb_removed_count[3];
+	int using_lsd;
+	std::vector< double > lsd_zeros;
+	std::vector< size_t > lrc_zeros;
+	std::vector< double > bogus_scores;
+	std::vector< size_t > my_del_lcbs;
+	std::vector< size_t > lcb_ids;
+
+	boost::multi_array< double, 3 >& tm_score_array;
+	boost::multi_array< size_t, 3 >& tm_lcb_id_array;
+
+	// limit to a range of sequences
+	const size_t seqI_first;
+	const size_t seqJ_first;
+	const size_t seqI_last;
+	const size_t seqJ_last;
+
+	// for debugging
+	bool first_time;
+};
+
+
+template< class BreakpointScorerType >
+int64 greedyBreakpointElimination_v4( std::vector< mems::LCB >& adjacencies, std::vector< double >& scores, BreakpointScorerType& bp_scorer, std::ostream* status_out, size_t g1_tag = 0, size_t g2_tag = 0 );
+
+template< class SearchScorer >
+double greedySearch( SearchScorer& spbs );
+
+
+/**
+ * A breakpoint scorer that applies a fixed penalty for each breakpoint that exists in a set of
+ * two or more sequences 
+ */
+class SimpleBreakpointScorer
+{
+public:
+	SimpleBreakpointScorer( std::vector< LCB >& adjacencies, double breakpoint_penalty, bool collinear );
+
+	size_t getMoveCount();
+
+	double score();
+
+	bool isValid( size_t lcbI, double move_score );
+
+	/** return the relative change in score if lcbI were to be removed */
+	double operator()( size_t lcbI );
+
+	/** call to indicate that the given LCB has been removed */
+	void remove( uint lcbI, std::vector< std::pair< double, size_t > >& new_moves );
+
+private:
+	std::vector< mems::LCB > adjs;
+	double bp_penalty;
+	std::vector< double > scores;
+	double total_weight;
+	size_t bp_count;
+	bool collinear;
+};
+
+
+class GreedyRemovalScorer
+{
+public:
+	GreedyRemovalScorer( std::vector< LCB >& adjacencies, double minimum_weight );
+
+	size_t getMoveCount();
+
+	double score();
+
+	bool isValid( size_t lcbI, double move_score );
+
+	/** return the relative change in score if lcbI were to be removed */
+	double operator()( size_t lcbI );
+
+	/** call to indicate that the given LCB has been removed */
+	void remove( uint lcbI, std::vector< std::pair< double, size_t > >& new_moves );
+
+private:
+	std::vector< mems::LCB > adjs;
+	double min_weight;
+	std::vector< double > scores;
+	double total_weight;
+};
+
+
+
+
+template< class BreakpointScorerType >
+int64 greedyBreakpointElimination_v4( std::vector< mems::LCB >& adjacencies, 
+			std::vector< double >& scores, BreakpointScorerType& bp_scorer, std::ostream* status_out, 
+			size_t g1_tag, size_t g2_tag )
+{
+	// repeatedly remove the low weight LCBs until the minimum weight criteria is satisfied
+	uint lcb_count = adjacencies.size();
+	double total_initial_lcb_weight = 0;
+	for( size_t wI = 0; wI < scores.size(); wI++ )
+		total_initial_lcb_weight += scores[wI];
+	double total_current_lcb_weight = total_initial_lcb_weight;
+
+	if( adjacencies.size() == 0 )
+		return 0;	// nothing can be done
+	uint seq_count = adjacencies[0].left_end.size();
+	
+	double prev_score = bp_scorer.score();
+	uint report_frequency = 10;
+	uint moves_made = 0;
+
+	size_t move_count = bp_scorer.getMoveCount();
+	std::vector< std::pair< double, size_t > > move_heap( move_count * 2 );
+	size_t heap_end = move_count;
+	for( size_t moveI = 0; moveI < move_count; ++moveI )
+	{
+		move_heap[moveI].first = bp_scorer(moveI);
+		move_heap[moveI].second = moveI;
+	}
+
+#ifdef LCB_WEIGHT_LOSS_PLOT
+	std::vector< double >::iterator min_iter = std::min_element(scores.begin(), scores.end());
+	double mins = *min_iter;
+	if( status_out != NULL )
+	{
+		(*status_out) << g1_tag << '\t' << g2_tag << '\t' << lcb_count << '\t' << 1 - (total_current_lcb_weight / total_initial_lcb_weight) << '\t' << mins << endl;
+	}
+#endif
+
+	// make a heap of moves ordered by score
+	// repeatedly:
+	// 1) pop the highest scoring move off the heap
+	// 2) attempt to apply the move
+	// 3) add any new moves to the heap
+	// 4) stop when the highest scoring move no longer increases the score
+	MoveScoreHeapComparator mshc;
+	std::make_heap( move_heap.begin(), move_heap.end(), mshc );
+	while( heap_end > 0 )
+	{
+		std::pop_heap( move_heap.begin(), move_heap.begin()+heap_end, mshc );
+		heap_end--;
+		std::pair< double, size_t > best_move = move_heap[ heap_end ];
+#ifdef LCB_WEIGHT_LOSS_PLOT
+		if( total_current_lcb_weight == scores[best_move.second] )
+			break;	// don't remove the last LCB
+#else
+		if( (best_move.first < 0 ) ||
+			total_current_lcb_weight == scores[best_move.second] )
+			break;	// can't improve score
+#endif
+
+		std::vector< std::pair< double, size_t > > new_moves;
+		bool success = bp_scorer.isValid(best_move.second, best_move.first);
+		if( !success )
+			continue;
+		bp_scorer.remove(best_move.second, new_moves);
+
+		
+		for( size_t newI = 0; newI < new_moves.size(); newI++ )
+		{
+			if( heap_end < move_heap.size() )
+			{
+				heap_end++;
+				move_heap[heap_end-1] = new_moves[newI];
+				std::push_heap( move_heap.begin(), move_heap.begin()+heap_end, mshc );
+			}else{
+				// just push the rest on all at once
+				size_t prev_size = move_heap.size();
+				move_heap.insert( move_heap.end(), new_moves.begin()+newI, new_moves.end() );
+				for( size_t newdI = 0; newdI < new_moves.size()-newI; newdI++ )
+					std::push_heap( move_heap.begin(), move_heap.begin()+prev_size+newdI+1, mshc );
+				heap_end = move_heap.size();
+				break;
+			}
+		}
+
+		total_current_lcb_weight -= scores[best_move.second];
+		std::vector< std::pair< uint, uint > > id_remaps;
+		std::vector< uint > impact_list;
+		lcb_count -= RemoveLCBandCoalesce( best_move.second, adjacencies[0].left_end.size(), adjacencies, scores, id_remaps, impact_list );
+#ifdef LCB_WEIGHT_LOSS_PLOT
+		mins = scores[best_move.second];
+		if( status_out != NULL )
+		{
+			(*status_out) << g1_tag << '\t' << g2_tag << '\t' << lcb_count << '\t' << 1 - (total_current_lcb_weight / total_initial_lcb_weight) << '\t' << mins << endl;
+		}
+#endif
+		double cur_score = bp_scorer.score();
+		prev_score = cur_score;
+		moves_made++;
+#ifndef LCB_WEIGHT_LOSS_PLOT
+		if( status_out != NULL && moves_made % report_frequency == 0 )
+			(*status_out) << "move: " << moves_made << " alignment score " << cur_score << std::endl;
+#endif
+	}
+
+	return 0;
+}
+
+extern bool debug_aligner;
+
+/** finds the best anchoring, returns the anchoring score */
+template< class SearchScorer >
+double greedySearch( SearchScorer& spbs )
+{
+	double prev_score = spbs.score();
+	uint report_frequency = 10;
+	uint moves_made = 0;
+	if( debug_aligner )
+		spbs.validate();
+	size_t move_count = spbs.getMoveCount();
+	std::vector< double > current_moves( spbs.getMoveCount() );
+	// use double the size for the move heap to avoid an almost instant reallocation
+	// when a new move gets pushed onto the heap
+	size_t heap_end = spbs.getMoveCount();
+	std::vector< std::pair< double, size_t > > move_heap( spbs.getMoveCount() * 2 );
+	std::vector< std::pair< double, size_t > > new_moves( spbs.getMaxNewMoveCount() + 10 );
+	for( size_t moveI = 0; moveI < move_count; ++moveI )
+	{
+		std::pair< double, size_t > p( 0, moveI );
+		double scorediff = spbs(p) - prev_score;
+		p.first = scorediff;
+		move_heap[moveI] = p;
+		current_moves[moveI] = p.first;
+	}
+
+	if( debug_aligner )
+		spbs.validate();
+	// make a heap of moves ordered by score
+	// repeatedly:
+	// 1) pop the highest scoring move off the heap
+	// 2) attempt to apply the move
+	// 3) add any new moves to the heap
+	// 4) stop when the highest scoring move no longer increases the score
+	MoveScoreHeapComparator mshc;
+	std::make_heap( move_heap.begin(), move_heap.begin() + heap_end, mshc );
+	double successful = 0;
+	double invalids = 0;
+	int progress = 0;
+	int prev_progress = -1;
+	while( heap_end > 0 )
+	{
+		std::pop_heap( move_heap.begin(), move_heap.begin()+heap_end, mshc );
+		std::pair< double, size_t > best_move = move_heap[--heap_end];
+		if( best_move.first < 0 )
+			break;	// can't improve score
+
+		if( best_move.first != current_moves[best_move.second] )
+			continue;
+
+		if( !spbs.isValid(best_move) )
+		{
+			invalids++;
+			continue;
+		}
+
+		size_t new_move_count = 0;
+		bool success = spbs.remove(best_move, new_moves, new_move_count);
+		if( !success )
+		{
+			std::cerr << "numerical instability?  need to investigate this...\n";
+//			genome::breakHere();
+			invalids++;
+			continue;
+		}
+
+		successful++;
+		if( debug_aligner )
+			spbs.validate();
+
+		current_moves[ best_move.second ] = -(std::numeric_limits<double>::max)();
+		for( size_t newI = 0; newI < new_move_count; newI++ )
+			current_moves[ new_moves[newI].second ] = new_moves[newI].first;
+
+		for( size_t newI = 0; newI < new_move_count; newI++ )
+		{
+			if( heap_end < move_heap.size() )
+			{
+				heap_end++;
+				move_heap[heap_end-1] = new_moves[newI];
+				std::push_heap( move_heap.begin(), move_heap.begin()+heap_end, mshc );
+			}else{
+				// just push the rest on all at once
+				move_heap.resize( (std::min)((size_t)(heap_end * 1.6), heap_end + new_move_count) );
+				std::copy( new_moves.begin() + newI, new_moves.begin() + new_move_count, move_heap.begin()+heap_end );
+				for( size_t newdI = 0; newdI < new_move_count-newI; newdI++ )
+					std::push_heap( move_heap.begin(), move_heap.begin()+heap_end+newdI+1, mshc );
+				heap_end = move_heap.size();
+				break;
+			}
+		}
+
+		moves_made++;
+		prev_progress = progress;
+		progress = (100 * moves_made) / move_count;
+		printProgress( prev_progress, progress, std::cout );
+//		if( moves_made % report_frequency == 0 )
+//			cout << "move: " << moves_made << " alignment score " << cur_score << " success ratio " << successful / invalids << endl;
+	}
+
+	return spbs.score();
+}
+
+struct AlnProgressTracker
+{
+	gnSeqI total_len;
+	gnSeqI cur_leftend;
+	double prev_progress;
+};
+
+
+}	// namespace mems
+
+#endif // __greedyBreakpointElimination_h__
+
diff --git a/libMems/HomologyHMM/algebras.cc b/libMems/HomologyHMM/algebras.cc
new file mode 100644
index 0000000..4996a8e
--- /dev/null
+++ b/libMems/HomologyHMM/algebras.cc
@@ -0,0 +1,52 @@
+/*
+ *    This file is part of HMMoC 1.0, a hidden Markov model compiler.
+ *    Copyright (C) 2006 by Gerton Lunter, Oxford University.
+ *
+ *    HMMoC is free software; you can redistribute it and/or modify
+ *    it under the terms of the GNU General Public License as published by
+ *    the Free Software Foundation; either version 2 of the License, or
+ *    (at your option) any later version.
+ *
+ *    HMMOC is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU General Public License for more details.
+ *
+ *    You should have received a copy of the GNU General Public License
+ *    along with HMMoC; if not, write to the Free Software
+ *    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+\*/
+//
+// algebras.cc - extended real types
+//
+// Gerton Lunter, 27/8/04
+//
+//
+
+
+#include "algebras.h"
+
+
+BFMantissa *BFloat::aConversionLookup;           // Actual location of the static members of BFloat class
+double *BFloat::aDoubleConversionLookup;
+
+
+_BFloatInitialize _dummyInitializer;             // This initializes aConversionLookup and aDoubleConversionLookup
+
+
+_BFloatInitialize::_BFloatInitialize() {
+
+  BFloat::aConversionLookup = new BFMantissa[cBFloatConvTableSize];
+  BFloat::aDoubleConversionLookup = new double[cBFloatDoubleConvTableSize];
+
+  BFMantissa iBFM = 1.0;
+  for (int i = 0; i < cBFloatConvTableSize; i++) {
+    BFloat::aConversionLookup[ i ] = iBFM;
+    iBFM *= cBFloatRangeInv;
+  }
+
+  for (int i = 0; i < cBFloatDoubleConvTableSize; i++) {
+    BFloat::aDoubleConversionLookup[ i ] = exp( (i-cBFloatDoubleConvTableSize/2) * logcBFloatRange );
+  }
+
+}
diff --git a/libMems/HomologyHMM/algebras.h b/libMems/HomologyHMM/algebras.h
new file mode 100644
index 0000000..15c109d
--- /dev/null
+++ b/libMems/HomologyHMM/algebras.h
@@ -0,0 +1,558 @@
+/*
+ *    This file is part of HMMoC 1.0, a hidden Markov model compiler.
+ *    Copyright (C) 2006 by Gerton Lunter, Oxford University.
+ *
+ *    HMMoC is free software; you can redistribute it and/or modify
+ *    it under the terms of the GNU General Public License as published by
+ *    the Free Software Foundation; either version 2 of the License, or
+ *    (at your option) any later version.
+ *
+ *    HMMOC is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU General Public License for more details.
+ *
+ *    You should have received a copy of the GNU General Public License
+ *    along with HMMoC; if not, write to the Free Software
+ *    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+\*/
+//
+// algebras.h - extended real types
+//
+// Gerton Lunter, 27/8/04
+//
+
+
+
+#ifndef _algebras_h_
+#define _algebras_h_
+
+
+#include <cstdlib>
+#include <cmath>
+#include <iostream>
+
+
+using namespace std;
+
+
+// typedefs
+
+typedef float BFMantissa;
+const BFMantissa cBFloatRange = 20282409603651670423947251286016.0;  // 2.03e+31; 2^104
+const BFMantissa cBFloatRangeInv = 1.0/cBFloatRange;
+// Aaron E. Darling 6/7/7: need to typecast to avoid compiler warnings about imprecise FP representations
+const BFMantissa cBFloatRangeSqrt    = (BFMantissa)1.0e+18;          // Value between square root of the exponent, and the exponent
+const BFMantissa cBFloatRangeInvSqrt = (BFMantissa)1.0e-18;          // Square of this should still be representable, with full mantissa!
+const BFMantissa logcBFloatRange     = log(cBFloatRange);
+const int cBFloatDigits              = 7;                 // Number of significant digits for printing (7 for floats, 16 for doubles?)
+const int cBFloatInfinity            = 1000000000;        // Tiniest number representable is cBFloatRangeInv ^ BFloatInfinity
+const int cBFloatConvTableSize       = 100;               // This includes many zero entries, it makes additions a bit faster
+const int cBFloatDoubleConvTableSize = 50;                // Table size for bfloat -> double conversion; cBFloatRange^(-size/2) is double 0
+//#define BFLOAT_CHECK_UOFLOW                             // Don't bother with under- and overflow checking.
+
+
+//
+// BFloats: more buoyant floats.
+//
+// struct{ float + int } is 8 bytes; nice size makes noticable speed difference
+//
+class BFloat {                   
+ public:
+  static BFMantissa* aConversionLookup;              // used by addition
+  static double* aDoubleConversionLookup;       // used by Value()
+  BFMantissa f;
+  int e;
+ public:
+  BFloat(BFMantissa iF, int iE) : f(iF), e(iE) {};
+  BFloat() {};
+  ~BFloat() {};
+  inline double Value() const { 
+    if (abs(e) < cBFloatDoubleConvTableSize/2) {
+      return (double)f * aDoubleConversionLookup[ e + cBFloatDoubleConvTableSize/2 ];
+    } else if (e < cBFloatDoubleConvTableSize/2) {
+      return 0.0;
+    } else {
+      return (double)f * exp((double)e * logcBFloatRange);
+    }
+  }
+  void clear() { f=0; e=-cBFloatInfinity; }
+};
+
+
+//
+// dummy class to initialise BFloat lookup table
+//
+
+class _BFloatInitialize {
+public:
+  _BFloatInitialize();
+};
+
+
+
+
+//
+// Finally, all implementations of BFloat calculations
+//
+
+
+// Normalization of BFloat result of a single operation
+#ifdef BFLOAT_CHECK_UOFLOW
+static inline void BFloatNormalise(BFloat& a)
+     //#define BFloatNormalise(a)
+{\
+  if (a.f > cBFloatRangeSqrt) {\
+    a.f *= cBFloatRangeInv;\
+    a.e++;\
+  } else if (a.f < cBFloatRangeInvSqrt) {\
+    if (a.f == 0.0) {\
+      a.e = -cBFloatInfinity;\
+    } else {\
+      a.f *= cBFloatRange;\
+      a.e--;\
+    }\
+  }\
+  if (a.e > cBFloatInfinity) {\
+    cerr << "BFloat: Overflow" << endl;\
+    a.e = cBFloatInfinity;\
+  } else if (a.e < -cBFloatInfinity) {\
+    cerr << "BFloat: Underflow" << endl;\
+    a.e = -cBFloatInfinity;\
+    a.f = 0.0;\
+  }\
+};
+#else
+static inline void BFloatNormDown(BFloat& a) { 
+  a.f *= cBFloatRangeInv;
+  a.e++;
+}
+static inline void BFloatNormUp(BFloat& a) { 
+  if (a.f == 0.0) {
+    a.e = -cBFloatInfinity;
+  } else {
+    a.f *= cBFloatRange;
+    a.e--;
+  }
+}
+static inline void BFloatNormalise(BFloat& a)
+     //#define BFloatNormalise(a) 
+{
+  if (a.f > cBFloatRangeSqrt) {
+    BFloatNormDown(a);
+  } else if (a.f < cBFloatRangeInvSqrt) {
+    BFloatNormUp(a);
+  }
+};
+#endif
+
+static inline void DoubleNormalise(double& f, int& e)
+{
+  if (f <= 0.0) {
+    if (f != 0.0) cerr << "BFloat: Negative number: " << f << endl;
+    f = 0.0; 
+    e=-cBFloatInfinity;
+  } else {
+    while (f > cBFloatRangeSqrt) {
+      f *= cBFloatRangeInv;
+      e++;
+    }
+    while (f < cBFloatRangeInvSqrt) {
+      f *= cBFloatRange;
+      e--;
+    }
+  }
+};
+
+// Logarithm of a BFloat
+static inline double bfloat_doublelog( const BFloat& a ) { return a.e*logcBFloatRange+log(a.f); }
+
+// BFloat exp of a double
+static inline BFloat bfloat_doubleexp( double iA ) 
+{
+  int iE = (int)floor( iA / log(cBFloatRange) );
+  iA -= iE * log(cBFloatRange);
+  BFloat iX( exp(iA), iE );
+  BFloatNormalise( iX );
+  return iX;
+}
+
+// Returns a double value - or underflow/overflow if it does not fit.
+static inline double bfloat2double( const BFloat bfloat) { return bfloat.Value(); }
+
+// Brain-dead version of double-to-BFloat conversion - can be slow if mantisse is a float
+static inline BFloat double2bfloat( double prob) { 
+  if (prob <= 0.0) {
+    if (prob != 0.0)
+      cerr << "BFloat: Negative number: " << prob << endl;
+    return BFloat (0.0, -cBFloatInfinity );
+  } else {
+    register BFloat a( 0.0, 0 );
+    while (prob > cBFloatRangeSqrt) {
+      prob *= cBFloatRangeInv;
+      a.e++;
+    }
+    //if (prob == 0.0) {
+    //  a.e = -cBFloatInfinity;
+    //} else {
+    while ((prob < cBFloatRangeInvSqrt)) {
+      prob *= cBFloatRange;
+      a.e--;
+    }
+    a.f = prob; 
+    return a;
+  }
+}
+
+static inline BFloat bfloat_pr_product (const BFloat& a, const BFloat& b) 
+{ 
+  register BFloat sf(a.f*b.f,a.e+b.e); 
+  BFloatNormalise(sf); 
+  return sf; 
+}
+
+static inline BFloat bfloat_pr_double_product (const BFloat& a, double b) 
+{ 
+  register double mantisse = a.f*b;
+  int exponent = a.e;
+  DoubleNormalise(mantisse, exponent);
+  return BFloat(mantisse, exponent);
+}
+
+static inline void bfloat_pr_product_accum( BFloat& a, const BFloat& b) { 
+  a.f *= b.f; a.e += b.e; 
+  BFloatNormalise( a ); 
+}
+
+static inline void bfloat_pr_double_product_accum (BFloat& a, double b) 
+{ 
+  register double mantisse = a.f*b;
+  DoubleNormalise(mantisse, a.e);
+  a.f = mantisse;
+}
+
+static inline BFloat bfloat_pr_quotient( const BFloat& a, const BFloat& b) 
+{ 
+  register BFloat sf(a.f/b.f, a.e-b.e); 
+  BFloatNormalise(sf); 
+  return sf;
+}
+  
+static inline void bfloat_pr_quotient_accum( BFloat& a, const BFloat& b) 
+{ 
+  a.f /= b.f; 
+  a.e -= b.e; 
+  BFloatNormalise( a ); 
+}
+
+static inline BFloat bfloat_pr_sum(const BFloat& a, const BFloat& b) 
+{
+  if (a.e > b.e) {
+    if (a.e >= b.e + cBFloatConvTableSize)
+      return a;
+    else
+      return BFloat( a.f + b.f * BFloat::aConversionLookup[ a.e - b.e ], a.e );
+  } else {
+    if (a.e <= b.e - cBFloatConvTableSize)
+      return b;
+    else
+      return BFloat( b.f + a.f * BFloat::aConversionLookup[ b.e - a.e ], b.e );
+  }
+}
+ 
+static inline void bfloat_pr_sum_accum( BFloat& a, const BFloat& b) 
+{
+  if (a.e >= b.e) {
+    if (a.e < b.e + cBFloatConvTableSize)
+      a.f += b.f * BFloat::aConversionLookup[ a.e - b.e ];
+  } else {
+    if (a.e > b.e - cBFloatConvTableSize) {
+      a.f = b.f + a.f * BFloat::aConversionLookup[ b.e - a.e ];
+      a.e = b.e;
+    } else {
+      a = b;
+    }
+  }
+}
+
+static inline bool bfloat_less( const BFloat& a, const BFloat& b) 
+{
+  if (a.e > b.e) {
+    if (a.e >= b.e + cBFloatConvTableSize)
+      return false;
+    else
+      return a.f < b.f * BFloat::aConversionLookup[ a.e - b.e ];
+  }
+  if (a.e <= b.e - cBFloatConvTableSize)
+    return true;
+  else
+    return a.f * BFloat::aConversionLookup[ b.e - a.e ] < b.f;
+};
+  
+static inline bool bfloat_equal( const BFloat& a, const BFloat& b) 
+{
+  if (a.e > b.e) {
+    if (a.e >= b.e + cBFloatConvTableSize)
+      return false;
+    else
+      return a.f == b.f * BFloat::aConversionLookup[ a.e - b.e ];
+  }
+  if (a.e <= b.e - cBFloatConvTableSize)
+    return false;
+  else
+    return a.f * BFloat::aConversionLookup[ b.e - a.e ] == b.f;
+};
+
+static inline bool bfloat_lessequal( const BFloat& a, const BFloat& b) 
+{
+  if (a.e > b.e) {
+    if (a.e >= b.e + cBFloatConvTableSize)
+      return false;
+    else
+      return a.f <= b.f * BFloat::aConversionLookup[ a.e - b.e ];
+  }
+  if (a.e <= b.e - cBFloatConvTableSize)
+    return true;
+  else
+    return a.f * BFloat::aConversionLookup[ b.e - a.e ] <= b.f;
+};
+
+static inline ostream& bfloat_print( ostream& out, const BFloat& x ) 
+{
+  static const double log10 = log(10.0);
+  static const double maxmantisse = 10.0 * (1.0 - 0.55 * exp(-cBFloatDigits * log10));
+  //out.setf(ios::fixed,ios::floatfield);
+  out.precision( cBFloatDigits );
+  if (x.e == cBFloatInfinity) {
+    out << 1.0 << "e+Inf";
+  }
+  if (x.e == -cBFloatInfinity) {
+    out << 1.0 << "e-Inf";
+  } else {
+    double iM = (log(x.f) + log(cBFloatRange)*(double)x.e) / log10;
+    long iExp = long(floor(iM));
+    iM = exp((iM - iExp) * log10);
+    if (iM > maxmantisse) {
+      iExp += 1;
+      iM = 1.0;
+    }
+    out << iM << ( iExp<0 ? "e" : "e+" ) << iExp;
+  }
+  //out.setf(ios::fixed,ios::floatfield);  // default  // ****** first arg should be 0
+  out.precision( 6 );           // default
+  return out;
+}
+
+
+
+//
+// Wrapper to allow BFloats to be used by Algebra template
+//
+struct BFloatMethods
+{
+  typedef BFloat Value;
+  static inline double to_prob (BFloat iX) { return bfloat2double(iX); }
+  static inline BFloat from_prob (double iP) { return double2bfloat(iP); }
+  static inline BFloat pmul( BFloat iX, BFloat iY) { return bfloat_pr_product(iX,iY); }
+  static inline BFloat pmuldouble( BFloat iX, double iY) { return bfloat_pr_double_product(iX,iY); }
+  static inline BFloat pdiv( BFloat iX, BFloat iY) { return bfloat_pr_quotient(iX,iY); }
+  static inline BFloat psum( BFloat iX, BFloat iY) { return bfloat_pr_sum(iX,iY); }
+  static inline BFloat pdiff( BFloat iX, BFloat iY) { cerr << "Bfloat pdiff: Not implemented." << endl; return BFloat(0,0); }
+  static inline BFloat doubleexp( double iX) { return bfloat_doubleexp(iX); }
+  static inline double doublelog( BFloat iX) { return bfloat_doublelog(iX); }
+  static inline void pmulacc( BFloat& iX, BFloat iY) { bfloat_pr_product_accum(iX,iY); }
+  static inline void pmulaccdouble( BFloat& iX, double iY) { bfloat_pr_double_product_accum(iX,iY); }
+  static inline void pdivacc( BFloat& iX, BFloat iY) { bfloat_pr_quotient_accum(iX,iY); }
+  static inline void psumacc( BFloat& iX, BFloat iY) { bfloat_pr_sum_accum(iX,iY); }
+  static inline void pdiffacc( BFloat& iX, BFloat iY) { cerr << "Bfloat pdiffacc: Not implemented." << endl; }
+  static inline bool less( BFloat iX, BFloat iY) { return bfloat_less(iX,iY); }
+  static inline bool equal( BFloat iX, BFloat iY) { return bfloat_equal(iX,iY); }
+  static inline bool lessequal( BFloat iX, BFloat iY) { return bfloat_lessequal(iX,iY); }
+  static inline ostream& print( ostream& iOut, BFloat iX ) { return bfloat_print( iOut, iX ); }
+};
+
+
+
+
+//
+// Simple log-space numbers - don't use, except possibly for Viterbi
+//
+class Logspace {
+  double x;
+ public:
+  Logspace( double x ) : x(x) {}
+  Logspace() {}
+  operator double&(){ return x; }
+  void clear() {x=-1.0e+300;}
+};
+
+inline Logspace logspace_addsmall( Logspace iX, Logspace iY ) {
+  if (iX - iY > 36.7) return iX;
+  return iX + log(1.0+exp(iY-iX));
+}
+
+inline Logspace logspace_add( Logspace iX, Logspace iY ) {
+  if (iX>iY) return logspace_addsmall(iX,iY); else return logspace_addsmall(iY,iX);
+}
+
+struct LogspaceMethods
+{
+  typedef Logspace Value;
+  static inline double to_prob (Value iX) { return exp(iX); }
+  static inline Value from_prob (double iP) { return Value(log(iP)); }
+  static inline Value pmul( Value iX, Value iY) { return iX+iY; }
+  static inline Value pmuldouble( Value iX, double iY) { return iX+log(iY); }
+  static inline Value pdiv( Value iX, Value iY) { return iX-iY; }
+  static inline Value psum( Value iX, Value iY) { return logspace_add(iX,iY); }
+  static inline Value pdiff( Value iX, Value iY) { cerr << "Logspace pdiff: Not implemented." << endl; return 0.0; }
+  static inline Value doubleexp( double iX) { return iX; }
+  static inline double doublelog( Value iX) { return iX; }
+  static inline void pmulacc( Value& iX, Value iY) { iX+=iY; }
+  static inline void pmulaccdouble( Value& iX, double iY) { iX+=log(iY); }
+  static inline void pdivacc( Value& iX, Value iY) { iX -= iY; }
+  static inline void psumacc( Value& iX, Value iY) { iX = logspace_add(iX,iY); }
+  static inline void pdiffacc( Value& iX, Value iY) { cerr << "Logspace pdiffacc: Not implemented." << endl; }
+  static inline bool less( Value iX, Value iY) { return iX<iY; }
+  static inline bool equal( Value iX, Value iY) { return iX==iY; }
+  static inline bool lessequal( Value iX, Value iY) { return iX<=iY; }
+  static inline ostream& print( ostream& iOut, Value iX ) { return bfloat_print( iOut, bfloat_doubleexp(iX) ); }
+};
+
+
+
+//
+// Algebra - Wrapper for overloading all arithmetic operators, to use different 'algebras'.
+//
+// Gerton Lunter, 19/3/03
+// Based on logprob.h by by Ian Holmes.
+//
+
+
+template <class AlgebraMethods>
+class Algebra {
+public:
+  // typedef
+  typedef typename AlgebraMethods::Value Value;
+
+  // value
+  Value val;
+
+public:
+  // constructors
+  Algebra() { }  // no initialisation, for speed
+  Algebra (double px) : val(from_prob(px)) { }
+  Algebra (const Algebra& lx) : val(lx.val) { }
+  Algebra (const BFloat v) : val(v) { }
+
+  // fast initialization
+  void clear() { val.clear(); }
+
+  // assignment operators
+  inline Algebra& operator= (const Algebra& lx) { val = lx.val; return *this; }
+  inline Algebra& operator= (double px) { val = from_prob(px); return *this; }
+
+  // arithmetic operators; all combinations of Algebra and double are covered
+  inline friend Algebra operator+ (const Algebra& lx, const Algebra& ly) { return from_log (psum (lx.val, ly.val)); }
+  inline friend Algebra operator+ (const Algebra& lx, double py) { return from_log (psum (lx.val, from_prob(py))); }
+  inline friend Algebra operator+ (double px, const Algebra& ly) { return from_log (psum (from_prob(px), ly.val)); }
+  inline Algebra& operator+= (const Algebra& lx) { psumacc (val, lx.val); return *this; }
+  inline Algebra& operator+= (double px) { psumacc (val, from_prob(px)); return *this; }
+
+  inline friend Algebra operator- (const Algebra& lx, const Algebra& ly) { return from_log (pdiff (lx.val, ly.val)); }
+  inline friend Algebra operator- (const Algebra& lx, double py) { return from_log (pdiff (lx.val, from_prob(py))); }
+  inline friend Algebra operator- (double px, const Algebra& ly) { return from_log (pdiff (from_prob(px), ly.val)); }
+  inline Algebra& operator-= (const Algebra& lx) { pdiffacc (val, lx.val); return *this; }
+  inline Algebra& operator-= (double px) { pdiffacc (val, from_prob(px)); return *this; }
+
+  inline friend Algebra operator* (const Algebra& lx, const Algebra& ly) { return from_log (pmul (lx.val, ly.val)); }
+  inline friend Algebra operator* (const Algebra& lx, double py) { return from_log (pmuldouble (lx.val, py)); }
+  inline friend Algebra operator* (double px, const Algebra& ly) { return from_log (pmuldouble (ly.val, px)); }
+  inline Algebra& operator*= (const Algebra& lx) { pmulacc (val, lx.val); return *this; }
+  inline Algebra& operator*= (double px) { pmulaccdouble (val, px); return *this; }
+
+  inline friend Algebra operator/ (const Algebra& lx, const Algebra& ly) { return from_log (pdiv (lx.val, ly.val)); }
+  inline friend Algebra operator/ (const Algebra& lx, double py) { return from_log (pdiv (lx.val, from_prob(py))); }
+  inline friend Algebra operator/ (double px, const Algebra& ly) { return from_log (pdiv (from_prob(px), ly.val)); }
+  inline Algebra& operator/= (const Algebra& lx) { pdivacc (val, lx.val); return *this; }
+  inline Algebra& operator/= (double px) { pdivacc (val, from_prob(px)); return *this; }
+
+  // miscellaneous operators
+  inline friend double log( const Algebra& lx ) { return doublelog( lx.val ); }
+  inline friend Algebra exp( const Algebra& px ) { return doubleexp( to_prob(px) ); }
+  
+  // increment & decremement
+  Algebra& operator++() { *this += 1.; return *this; }
+  Algebra operator++(int) { Algebra tmp (*this); ++(*this); return tmp; }
+
+  Algebra& operator--() { *this -= 1.; return *this; }
+  Algebra operator--(int) { Algebra tmp (*this); --(*this); return tmp; }
+
+  // relational operators
+  inline friend int operator== (const Algebra& lx, const Algebra& ly) { return equal(lx.val, ly.val); }
+  inline friend int operator== (const Algebra& lx, const double py) { return equal(lx.val, from_prob(py)); }
+  inline friend int operator== (const double px, const Algebra& ly) { return equal(from_prob(px), ly.val); }
+
+  inline friend int operator!= (const Algebra& lx, const Algebra& ly) { return !equal(lx.val, ly.val); }
+  inline friend int operator!= (const Algebra& lx, const double py) { return !equal(lx.val, from_prob(py)); }
+  inline friend int operator!= (const double px, const Algebra& ly) { return !equal(from_prob(px), ly.val); }
+
+  inline friend int operator< (const Algebra& lx, const Algebra& ly) { return less(lx.val, ly.val); }
+  inline friend int operator< (const Algebra& lx, const double py) { return less(lx.val, from_prob(py)); }
+  inline friend int operator< (const double px, const Algebra& ly) { return less(from_prob(px), ly.val); }
+
+  inline friend int operator> (const Algebra& lx, const Algebra& ly) { return less(ly.val, lx.val); }
+  inline friend int operator> (const Algebra& lx, const double py) { return less(from_prob(py), lx.val); }
+  inline friend int operator> (const double px, const Algebra& ly) { return less(ly.val, from_prob(px)); }
+
+  inline friend int operator<= (const Algebra& lx, const Algebra& ly) { return lessequal(lx.val, ly.val); }
+  inline friend int operator<= (const Algebra& lx, const double py) { return lessequal( lx.val, from_prob(py) ); }
+  inline friend int operator<= (const double px, const Algebra& ly) { return lessequal( from_prob(px), ly.val); }
+
+  inline friend int operator>= (const Algebra& lx, const Algebra& ly) { return lessequal( ly.val, lx.val); }
+  inline friend int operator>= (const Algebra& lx, const double py) { return lessequal( from_prob(py), lx.val ); }
+  inline friend int operator>= (const double px, const Algebra& ly) { return lessequal( ly.val, from_prob(px) ); }
+
+  // stream operators
+  inline friend ostream& operator<< (ostream& out, const Algebra& lx) { return AlgebraMethods::print(out, lx.val); }
+  inline friend istream& operator>> (istream& in, const Algebra& lx) { double px; in >> px; lx.val = px; return in; }
+
+  // cast operators
+  inline double prob() const { return to_prob (val); }
+  inline operator double() const { return to_prob (val); }
+
+private:
+  // private AlgebraMethods method wrappers
+  static inline double to_prob (Value X) { return AlgebraMethods::to_prob (X); }
+  static inline Value from_prob (double P) { return AlgebraMethods::from_prob (P); }
+  static inline Value pmul (Value X, Value Y) { return AlgebraMethods::pmul (X, Y); }
+  static inline Value pmuldouble (Value X, double Y) { return AlgebraMethods::pmuldouble (X, Y); }
+  static inline Value pdiv (Value X, Value Y) { return AlgebraMethods::pdiv( X, Y); }
+  static inline Value psum (Value X, Value Y) { return AlgebraMethods::psum (X, Y); }
+  static inline Value pdiff (Value X, Value Y) { return AlgebraMethods::pdiff (X, Y); }
+  static inline Value doubleexp (double X) { return AlgebraMethods::doubleexp( X ); }
+  static inline double doublelog (Value X) { return AlgebraMethods::doublelog( X ); }
+  static inline void pmulacc (Value& X, Value Y) { AlgebraMethods::pmulacc (X, Y); }
+  static inline void pmulaccdouble (Value& X, double Y) { AlgebraMethods::pmulaccdouble (X, Y); }
+  static inline void pdivacc( Value& X, Value Y) { AlgebraMethods::pdivacc( X, Y); }
+  static inline void psumacc (Value& X, Value Y) { AlgebraMethods::psumacc (X, Y); }
+  static inline void pdiffacc (Value& X, Value Y) { AlgebraMethods::pdiffacc (X, Y); }
+  static inline bool less (Value X, Value Y ) { return AlgebraMethods::less( X, Y ); }
+  static inline bool equal (Value X, Value Y ) { return AlgebraMethods::equal( X, Y ); }
+  static inline bool lessequal( Value X, Value Y ) { return AlgebraMethods::lessequal( X, Y ); }
+
+public:
+  // static constructor from logspace value
+  static inline Algebra from_log (Value X) { Algebra lx; lx.val = X; return lx; }
+};
+
+
+
+//
+// and bfloats are the things that we'll use:
+//
+
+#define bfloat Algebra<BFloatMethods>
+
+#define logspace Algebra<LogspaceMethods>
+
+#endif
diff --git a/libMems/HomologyHMM/dptables.h b/libMems/HomologyHMM/dptables.h
new file mode 100644
index 0000000..90c9c9a
--- /dev/null
+++ b/libMems/HomologyHMM/dptables.h
@@ -0,0 +1,387 @@
+/*
+ *    This file is part of HMMoC 1.0, a hidden Markov model compiler.
+ *    Copyright (C) 2006 by Gerton Lunter, Oxford University.
+ *
+ *    HMMoC is free software; you can redistribute it and/or modify
+ *    it under the terms of the GNU General Public License as published by
+ *    the Free Software Foundation; either version 2 of the License, or
+ *    (at your option) any later version.
+ *
+ *    HMMOC is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU General Public License for more details.
+ *
+ *    You should have received a copy of the GNU General Public License
+ *    along with HMMoC; if not, write to the Free Software
+ *    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+\*/
+/*
+ *
+ * DPTable.h -- ordinary and sparse dynamic programming tables
+ *
+ * Gerton Lunter, 1 Oct 2006
+ *
+ * Modified for GCC 4.0.2 and Microsoft Visual Studio 2005 by Aaron Darling, 2007
+ *
+ */
+
+#ifndef __dptable_h_
+#define __dptable_h_
+
+
+#include <map>
+#include <cassert>
+
+
+#ifdef __GNUC__
+ #define HAVE_HASH_MAP
+ #if __GNUC__ < 3
+  #include <hash_map.h>
+  namespace Sgi { using ::hash_map; }; // inherit globals
+ #else
+  #include <ext/hash_map>
+  #if __GNUC_MINOR__ + __GNUC__ == 3
+   namespace Sgi = std;               // GCC 3.0
+  #else
+   namespace Sgi = ::__gnu_cxx;       // GCC 3.1 and later
+  #endif
+ #endif
+#else      // ...there are other compilers, right?
+#ifdef _MSC_VER
+// visual studio 2005 has no hash map.  older versions did.
+#else
+// default for all other compilers
+#define HAVE_HASH_MAP
+namespace Sgi = std;
+#endif
+#endif
+
+
+using std::map;
+#ifdef HAVE_HASH_MAP
+using Sgi::hash_map;
+#endif
+
+// Define aliases for two maps: red-black trees, and hashes
+// (GNU C++ does not define a hash function for long long int, so we have to define our own)
+
+template<class key>
+struct _hash {
+  size_t operator()(long long x) const { return x; }
+};
+
+// typedefs can't be templated
+
+template<class key, class value>
+class treemap : public map<key,value> {};
+
+#ifdef HAVE_HASH_MAP
+template<class key, class value>
+class hashmap : public hash_map<key,value,_hash<key> > {};
+#endif
+
+// select one of the maps (the hash map is faster and appears to use less memory)
+
+#ifdef HAVE_HASH_MAP
+#define _mymap hashmap
+#else
+#define _mymap treemap
+#endif
+
+
+
+// States are stored in a self-initializing array
+
+template<class Real,int size> class States;
+
+template<int size>
+class States<double,size> {
+private:
+  double data[size];
+public:
+  enum { length = size };                                      // to know the size, just in case
+  States() { for (int i=0; i<size; i++) data[i]=0; }           // initialization
+  operator double* () { return data; }                         // cast to actual array
+  operator const double* () const { return data; }
+};
+
+template<class Real, int size>
+class States {
+private:
+  Real data[size];
+public:
+  enum { length = size };                                      // to know the size, just in case
+  States() { for (int i=0; i<size; i++) data[i].clear(); }      // initialization
+  operator Real* () { return data; }                           // cast to actual array
+  operator const Real* () const { return data; }
+};
+
+
+
+// Define index types to serve as keys to the DP table position
+
+template<int dim> class _index {};
+template<> class _index<1> { public: typedef unsigned int t; };
+template<> class _index<2> { public: typedef unsigned long t; };
+template<> class _index<3> { public: typedef unsigned long long t; };
+template<> class _index<4> { public: typedef unsigned long long t; };
+
+
+//
+// Base classes for a dynamic programming table
+//
+// DP tables provide the following methods:
+//
+// const States& read(...)   :   read access to state array
+// States& write(...)        :   write access to state array
+// void written()            :   signal that write access is finished
+// void allocate(...)        :   inform table about its dimensions; must be called before read/write access
+// void clear()              :   empties the table; keeps its dimensions
+// void clear(int)           :   empties one column of a (folded) DP table
+// void absolve()            :   ensures that table does not delete its data; another table with reference to the data,
+//                               which is created by the default copy constructor, is now responsible.  Not allowed for
+//                               folded tables
+//
+
+template<class States>
+class _DPT {                            // base class, keeps track of the responsibility for the data
+ private:
+  void clear(int i) {}                  // placeholder, to allow dummy definition for non-folded tables
+ protected:
+  bool isInCharge;                      // true if this class' destructor destroys the data
+ public:
+  typedef States states_type;
+  _DPT() : isInCharge(true) {}
+  void absolve() { isInCharge=false; }  // take away the responsibility of destroying the data
+  void written() {}                     // signal that we're done writing -- used by extensions
+};
+
+template<template<typename,int> class DPTable, class States, int dim>     // Wrapper for memory-efficient Fw/Bw/Baum-Welch
+class _FoldedTable : public _DPT<States> {
+ protected:
+  DPTable<States,dim-1>* aTables[2];
+ public:
+  _FoldedTable() { aTables[0] = new DPTable<States,dim-1>(); aTables[1] = new DPTable<States,dim-1>(); }
+  ~_FoldedTable() { assert(_DPT<States>::isInCharge); delete aTables[0]; delete aTables[1]; }        // do not allow data to be retained
+  void clear(int i) { aTables[i%2]->clear(); }
+};
+  
+template<class States, int dim>
+class DPTable {};
+
+template<class States, int dim>
+class SparseDPTable {};
+
+template<template<typename,int> class DPTable, class States, int dim>
+class FoldedTable {};
+
+
+
+// Explicit partial specializations for up to 4 spatial dimensions
+
+
+template<template<typename,int> class DPTable, class States>
+class FoldedTable<DPTable, States, 0> : public _FoldedTable<DPTable, States, 1> {
+ public:
+  void allocate() { this->aTables[0]->allocate(); };
+  const States& read() const { return this->aTables[0]->read(); }
+  States& write() { return this->aTables[0]->write(); }
+  void written() { this->aTables[0]->written(); }
+};
+
+
+template<template<typename,int> class DPTable, class States>
+class FoldedTable<DPTable, States, 1> : public _FoldedTable<DPTable, States, 1> {
+  int z;
+ public:
+  void allocate(int a) { this->aTables[0]->allocate(); this->aTables[1]->allocate(); };
+  const States& read(int a) const { return this->aTables[a%2]->read(); }
+  States& write(int a) { return this->aTables[z=a%2]->write(); }
+  void written() { this->aTables[z]->written(); }
+};
+
+
+template<template<typename,int> class DPTable, class States>
+class FoldedTable<DPTable, States, 2> : public _FoldedTable<DPTable, States, 2> {
+  int z;
+ public:
+  void allocate(int a, int b) { this->aTables[0]->allocate(a); this->aTables[1]->allocate(a); };
+  const States& read(int a, int b) const { return this->aTables[b%2]->read(a); }
+  States& write(int a, int b) { return this->aTables[z=b%2]->write(a); }
+  void written() { this->aTables[z]->written(); }
+};
+
+
+template<template<typename,int> class DPTable, class States>
+class FoldedTable<DPTable, States, 3> : public _FoldedTable<DPTable, States, 3> {
+  int z;
+ public:
+  void allocate(int a, int b, int c) { this->aTables[0]->allocate(a,b); this->aTables[1]->allocate(a,b); };
+  const States& read(int a, int b, int c) const { return this->aTables[c%2]->read(a,b); }
+  States& write(int a, int b, int c) { return this->aTables[z=c%2]->write(a,b); }
+  void written() { this->aTables[z]->written(); }
+};
+
+
+template<template<typename,int> class DPTable, class States>
+class FoldedTable<DPTable, States, 4> : public _FoldedTable<DPTable, States, 4> {
+  int z;
+ public:
+  void allocate(int a, int b, int c, int d) { this->aTables[0]->allocate(a,b,c); this->aTables[1]->allocate(a,b,c); };
+  const States& read(int a, int b, int c, int d) const { return this->aTables[d%2]->read(a,b,c); }
+  States& write(int a, int b, int c, int d) { return this->aTables[z=d%2]->write(a,b,c); }
+  void written() { this->aTables[z]->written(); }
+};
+
+
+template<class States>
+class DPTable<States,0> : public _DPT<States> {
+private:
+  States *pTable;
+public:
+  DPTable() { pTable = 0; };
+  ~DPTable() { if (pTable && _DPT<States>::isInCharge) delete pTable; };
+  void allocate() { pTable = new States(); };
+  void clear() { delete pTable; allocate(); };
+  const States& read() const { return *pTable; }
+  States& write() { return *pTable; }
+};
+
+
+template<class States>
+class DPTable<States,1> : public _DPT<States> {
+private:
+  States *pTable;
+  int maxa;
+public:
+  DPTable() { pTable = 0; }
+  ~DPTable() { if (pTable && _DPT<States>::isInCharge ) { delete[] pTable; } }
+  void allocate(int a) { maxa = a; pTable = new States[a]; }
+  void clear() { delete[] pTable; allocate(maxa); };
+  const States& read(int a) const { return pTable[a]; }
+  States& write(int a) { return pTable[a]; }
+};
+
+
+template<class States>
+class DPTable<States,2> : public _DPT<States> {
+private:
+  States *pTable;
+  int maxa, maxb;
+public:
+  DPTable() { pTable = 0; }
+  ~DPTable() { if (pTable && _DPT<States>::isInCharge ) { delete[] pTable; } }
+  void allocate(int a, int b) { maxa = a; maxb = b; pTable = new States[a*b]; }
+  void clear() { delete[] pTable; allocate(maxa,maxb); };
+  const States& read(int a, int b) const { return pTable[a+maxa*b]; }
+  States& write(int a, int b) { return pTable[a+maxa*b]; }
+};
+
+
+template<class States>
+class DPTable<States,3> : public _DPT<States> {
+private:
+  States *pTable;
+  int maxa, maxb, maxc;
+public:
+  DPTable() { pTable = 0; }
+  ~DPTable() { if (pTable && _DPT<States>::isInCharge ) { delete[] pTable; } }
+  void allocate(int a, int b, int c) { maxa = a; maxb = b; maxc = c; pTable = new States[a*b*c]; }
+  void clear() { delete[] pTable; allocate(maxa,maxb,maxc); };
+  const States& read(int a, int b, int c) const { return pTable[a+maxa*(b+maxb*c)]; }
+  States& write(int a, int b, int c) { return pTable[a+maxa*(b+maxb*c)]; }
+};
+
+
+template<class States>
+class SparseDPTable<States,0> : public DPTable<States,0> {};
+
+
+template<class States>
+class SparseDPTable<States,1> : public _DPT<States> {
+private:
+  typedef _index<1>::t idx;
+  _mymap<idx,States> &table;
+  const States zero;
+public:
+  SparseDPTable() : table(*new _mymap<_index<1>::t,States>), zero() {};
+  ~SparseDPTable() { if (_DPT<States>::isInCharge) delete &table; };
+  void allocate(int a) {};
+  void clear() { table.clear(); }
+  States& write(int a) { return table[idx(a)]; }
+  const States& read(int a) const {
+    _mymap<_index<1>::t,char>::iterator iter2;
+    typename _mymap<idx,States>::const_iterator iter = table.find(idx(a));
+    if (iter == table.end()) return zero;
+    return iter->second;
+  }
+};
+
+
+template<class States>
+class SparseDPTable<States,2> : public _DPT<States> {
+private:
+  typedef _index<2>::t idx;
+  _mymap<idx,States> &table;
+  idx maxa;
+  const States zero;
+public:
+  SparseDPTable() : table(*new _mymap<idx,States>), zero() {};
+  ~SparseDPTable() { if (_DPT<States>::isInCharge) delete &table; };
+  void allocate(int a, int b) { maxa = a; };
+  void clear() { table.clear(); }
+  States& write(int a, int b) { return table[idx(a)+maxa*idx(b)]; }
+  const States& read(int a, int b) const {
+    typename _mymap<idx,States>::const_iterator iter = table.find(unsigned(a)+maxa*unsigned(b));
+    if (iter == table.end()) return zero;
+    return iter->second;
+  }
+};
+
+
+template<class States>
+class SparseDPTable<States,3> : public _DPT<States> {
+
+private:
+  typedef _index<3>::t idx;
+  _mymap<idx,States> &table;
+  idx maxa, maxb;
+  const States zero;
+public:
+  SparseDPTable() : table(*new _mymap<idx,States>), zero() {};
+  ~SparseDPTable() { if (_DPT<States>::isInCharge) delete &table; };
+  void allocate(int a, int b, int c) { maxa = a; maxb = b; };
+  void clear() { table.clear(); }
+  States& write(int a, int b, int c) { return table[idx(a)+maxa*(idx(b)+maxb*idx(c))]; }
+  const States& read(int a, int b, int c) const {
+    typename _mymap<idx,States>::const_iterator iter = table.find(unsigned(a)+maxa*(unsigned(b)+maxb*unsigned(c)));
+    if (iter == table.end()) return zero;
+    return iter->second;
+  }
+};
+
+
+template<class States>
+class SparseDPTable<States,4> : public _DPT<States> {
+private:
+  typedef _index<4>::t idx;
+  _mymap<idx,States> &table;
+  idx maxa, maxb, maxc;
+  const States zero;
+public:
+  SparseDPTable() : table(*new _mymap<idx,States>), zero() {};
+  ~SparseDPTable() { if (_DPT<States>::isInCharge) delete &table; };
+  void allocate(int a, int b, int c, int d) { maxa=a; maxb=b; maxc=c; }
+  void clear() { table.clear(); }
+  States& write(int a, int b, int c, int d) { return table[unsigned(a)+maxa*(unsigned(b)+maxb*(unsigned(c)+maxc*unsigned(d)))]; }
+  const States& read(int a, int b, int c, int d) const {
+    typename _mymap<idx,States>::const_iterator iter = table.find(unsigned(a)+maxa*(unsigned(b)+maxb*(unsigned(c)+maxc*unsigned(d))));
+    if (iter == table.end()) return zero;
+    return iter->second;
+  }
+};
+
+
+#endif
+
diff --git a/libMems/HomologyHMM/homology.cc b/libMems/HomologyHMM/homology.cc
new file mode 100644
index 0000000..55d10be
--- /dev/null
+++ b/libMems/HomologyHMM/homology.cc
@@ -0,0 +1,786 @@
+/* Code generated by HMMoC version VERSION, Copyright (C) 2006 Gerton Lunter */
+/* Generated from file homology.xml (author: Aaron Darling) on Mon Jul 16 11:09:12 EST 2007 */
+
+/*
+This file is a work based on HMMoC VERSION, a hidden Markov model compiler.
+Copyright (C) 2006 by Gerton Lunter, Oxford University.
+
+HMMoC and works based on it are free software; you can redistribute 
+it and/or modify it under the terms of the GNU General Public
+License as published by the Free Software Foundation; either
+version 2 of the License, or (at your option) any later version.
+
+HMMOC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with HMMoC; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+*/
+
+
+#include "homology.h"
+
+
+const extern string _HomologystateId[];
+const extern string _HomologyemissionId[];
+const extern string _HomologytransitionId[];
+const extern string _HomologytransF[];
+const extern string _HomologytransT[];
+const extern string _HomologytransP[];
+const extern string _HomologytransE[];
+const extern string _HomologyoutputId[];
+const extern string _Homologyempty;
+const extern int _HomologystateNum;
+const extern int _HomologyemitNum;
+const extern int _HomologytransNum;
+const extern int _HomologyoutputNum;
+
+HomologyDPTable::HomologyDPTable(int iLen) : isInCharge(true), stateId(_HomologystateId), emissionId(_HomologyemissionId), transitionId(_HomologytransitionId), transitionFrom(_HomologytransF), transitionTo(_HomologytransT), transitionProb(_HomologytransP), transitionEmit(_HomologytransE), outputId(_HomologyoutputId) {
+    // init code:
+    this->iLen = iLen;
+    StateMemoryblock2.allocate(0+iLen);
+    StateMemoryblock1.allocate();
+    StateMemoryblock3.allocate();
+}
+
+
+HomologyDPTable::~HomologyDPTable() {
+    if (!isInCharge) {
+        // make sure data does not get deleted:
+        StateMemoryblock2.absolve();
+        StateMemoryblock1.absolve();
+        StateMemoryblock3.absolve();
+    } // if(!isInCharge)
+} // destructor
+
+const string& HomologyDPTable::getTransitionId(int id) { return id>=0 && id<_HomologytransNum ? _HomologytransitionId[id] : _Homologyempty; }
+const string& HomologyDPTable::getEmissionId(int id) { return id>=0 && id<_HomologyemitNum ? _HomologyemissionId[id] : _Homologyempty; }
+const string& HomologyDPTable::getStateId(int id) { return id>=0 && id<_HomologystateNum ? _HomologystateId[id] : _Homologyempty; }
+const string& HomologyDPTable::getOutputId(int id) { return id>=0 && id<_HomologyoutputNum ? _HomologyoutputId[id] : _Homologyempty; }
+int HomologyDPTable::getId(const string& sId)
+{
+    static bool bInit = false;
+    static map<string,int>* pmId;
+    if (!bInit) {
+        pmId = new map<string,int>();
+        for (int i=0;i<_HomologystateNum;i++) {
+            (*pmId)[_HomologystateId[i]] = i;         // add state identifiers
+        }
+        for (int i=0; i<_HomologyemitNum; i++) {
+            (*pmId)[_HomologyemissionId[i]] = i;      // add emission identifiers
+        }
+        for (int i=0; i<_HomologytransNum; i++) {  
+            (*pmId)[_HomologytransitionId[i]] = i;    // add transition identifiers
+        }
+        for (int i=0; i<_HomologyoutputNum; i++) {
+            (*pmId)[_HomologyoutputId[i]] = i;        // finally, add output identifiers
+        }
+        bInit = true;
+    }
+    map<string,int>::iterator iter = pmId->find(sId);
+    if (iter == pmId->end()) {
+        if (sId == "_cleanup_") {
+            delete pmId;
+            } else {
+            cout << "HomologyDPTable::getId: WARNING: identifier '" << sId << "' not found." << endl;
+        }
+        return -1;
+    }
+    return iter->second;
+}
+
+
+bfloat HomologyDPTable::getProb(const string sState ,int iPos0) const
+{
+    return getProb(getId(sState) ,iPos0);
+}
+
+
+bfloat HomologyDPTable::getProb(int iState ,int iPos0) const
+{
+    const bfloat *CurStateMemoryblock1Secondary;
+    const bfloat *CurStateMemoryblock2Secondary;
+    const bfloat *CurStateMemoryblock3Secondary;
+    static const int blockTable[] = {0, 1, 1, 2};
+    static const int stateTable[] = {0, 0, 1, 0};
+    switch (blockTable[iState]) {
+        default:
+        return 0.0;
+        break;
+        case 0:
+        if ((iPos0+0>=0)&&(iPos0+0<=0)) {
+            CurStateMemoryblock1Secondary = this->StateMemoryblock1.read();
+            return CurStateMemoryblock1Secondary[stateTable[iState]];
+        } 
+        else { 
+            return 0.0;
+            
+        }
+        break;
+        case 1:
+        if ((iPos0+0>=1)&&(iPos0+0<=iLen+0)) {
+            CurStateMemoryblock2Secondary = this->StateMemoryblock2.read((iPos0-(0))-(1));
+            return CurStateMemoryblock2Secondary[stateTable[iState]];
+        } 
+        else { 
+            return 0.0;
+            
+        }
+        break;
+        case 2:
+        if ((iPos0+0>=iLen+0)&&(iPos0+0<=iLen+0)) {
+            CurStateMemoryblock3Secondary = this->StateMemoryblock3.read();
+            return CurStateMemoryblock3Secondary[stateTable[iState]];
+        } 
+        else { 
+            return 0.0;
+            
+        }
+    } // switch
+} // DPTable...::getProb(int,...)
+
+int HomologyBaumWelch::transitionIndex(string strId) const {
+    map<const string,int>::const_iterator iter = mId.find(strId);
+    if (iter == mId.end()) {
+        cout << "HomologyBaumWelch::transitionIndex: WARNING: identifier '" << strId << "' not found." << endl;
+        return -1;
+    }
+    return iter->second;
+}
+
+
+int HomologyBaumWelch::emissionIndex(string strId) const {
+    map<const string,int>::const_iterator iter = mId.find(strId);
+    if (iter == mId.end()) {
+        cout << "HomologyBaumWelch::emissionIndex: WARNING: identifier '" << strId << "' not found." << endl;
+        return -1;
+    }
+    return iter->second;
+}
+
+
+void HomologyBaumWelch::resetCounts() {
+    static bool bInited = false;
+    if (!bInited) {
+        static const int aTemp[] = {0, 1, 2, 3, 4, 5, 6, 7};
+        for (int i=0; i<8; i++) {
+            transitionIdentifier0[i] = aTemp[i];
+            atransitionIdx[aTemp[i]] = i;
+            mId[_HomologytransitionId[aTemp[i]]] = i;
+        }
+    }
+    for (int i=0; i<8; i++) {
+        
+        transitionBaumWelchCount0[i] = 0.0;
+    }
+    if (!bInited) {
+        static const int aTemp[] = {1};
+        for (int i=0; i<1; i++) {
+            emissionIdentifier0[i] = aTemp[i];
+            aemissionIdx[aTemp[i]] = i;
+            mId[_HomologyemissionId[aTemp[i]]] = i;
+        }
+    }
+    for (int i=0; i<1; i++) {
+        
+        emissionBaumWelchCount0[i] = 0.0;
+    }
+    if (!bInited) {
+        static const int aTemp[] = {0, 2};
+        for (int i=0; i<2; i++) {
+            emissionIdentifier1[i] = aTemp[i];
+            aemissionIdx[aTemp[i]] = i;
+            mId[_HomologyemissionId[aTemp[i]]] = i;
+        }
+    }
+    for (int i=0; i<2; i++) {
+        for(int v00=0;v00<8;v00++)
+        emissionBaumWelchCount1[v00][i] = 0.0;
+    }
+    bInited = true;
+};
+
+
+int HomologyBaumWelch::transitionIdentifier0[];
+int HomologyBaumWelch::emissionIdentifier0[];
+int HomologyBaumWelch::emissionIdentifier1[];
+
+void HomologyBaumWelch::scaleCounts(bfloat scale) {
+    for (int i=0; i<8; i++) {
+        
+        transitionBaumWelchCount0[i] *= scale;
+    }
+    for (int i=0; i<1; i++) {
+        
+        emissionBaumWelchCount0[i] *= scale;
+    }
+    for (int i=0; i<2; i++) {
+        for(int v00=0;v00<8;v00++)
+        emissionBaumWelchCount1[v00][i] *= scale;
+    }
+}
+
+
+map<const string,int> HomologyBaumWelch::mId;
+int HomologyBaumWelch::atransitionIdx[];
+int HomologyBaumWelch::aemissionIdx[];
+
+bfloat hmmocMax(bfloat i, bfloat j) { return i>j ? i : j; }
+void hmmocMaxInPlace(bfloat& i, bfloat j) { if (i<j) i=j; }
+ostream& operator<<(ostream& os, const Path& p)
+{
+    for (unsigned int i=0; i<p.size(); i++) {
+        os << p.fromState(i) << "--{";
+            for (unsigned int j=0; j<p.emission(i).size(); j++) {
+                if (j) os<<",";
+                os<<p.emission(i)[j];
+            }
+        os<<"};"<<p.prob(i)<<"-->"<<p.toState(i)<<endl;
+    }
+    return os;
+}
+
+void SimplePath::addEdge(int tr, double p, vector<int>& e, int f, int t) {
+    transitions.push_back(tr);
+    probs.push_back(p);
+    emissions.push_back(e);
+    froms.push_back(f);
+    tos.push_back(t);
+}
+
+void SimplePath::reverse() 
+{
+    std::reverse(transitions.begin(),transitions.end());
+    std::reverse(probs.begin(),probs.end());
+    std::reverse(emissions.begin(),emissions.end());
+    std::reverse(froms.begin(),froms.end());
+    std::reverse(tos.begin(),tos.end());
+}
+
+double SimplePath::prob(int i) const {
+    return probs[i];
+}
+
+int SimplePath::nextFrom(int i) const {
+    if (i+1 < (int)transitions.size())
+    return i+1;
+    else
+    return -1;
+}
+
+int SimplePath::nextTo(int i) const {
+    return -1;
+}
+
+const vector<int>& SimplePath::emission(int i) const {
+    return emissions[i];
+}
+
+int SimplePath::fromState(int i) const {
+    return froms[i];
+}
+
+int SimplePath::toState(int i) const {
+    return tos[i];
+}
+
+const string _HomologystateId[] = {"start","homologous","unrelated","end"};
+const string _HomologyemissionId[] = {"emitHomologous","empty","emitUnrelated"};
+const string _HomologytransitionId[] = {"id$13","id$14","id$15","id$16","id$17","id$18","id$19","id$20"};
+const string _HomologytransF[] = {"start","start","homologous","homologous","unrelated","unrelated","homologous","unrelated"};
+const string _HomologytransT[] = {"homologous","unrelated","homologous","unrelated","unrelated","homologous","end","end"};
+const string _HomologytransP[] = {"startHomologous","startUnrelated","stayHomologous","goUnrelated","stayUnrelated","goHomologous","goStopFromHomologous","goStopFromUnrelated"};
+const string _HomologytransE[] = {"emitHomologous","emitUnrelated","emitHomologous","emitUnrelated","emitUnrelated","emitHomologous","empty","empty"};
+const string _HomologyoutputId[] = {"sequence"};
+const string _Homologyempty = "";
+const int _HomologystateNum = 4;
+const int _HomologyemitNum = 3;
+const int _HomologytransNum = 8;
+const int _HomologyoutputNum = 1;
+
+
+
+
+bfloat Forward(HomologyDPTable** ppOutTable,Params iPar,char *aSeq,int iLen) {
+    double iTransition[8];
+    bfloat *CurStateMemoryblock2To;
+    const bfloat *CurStateMemoryblock1From;
+    const bfloat *CurStateMemoryblock2From;
+    bfloat *CurStateMemoryblock3To;
+    const bfloat *CurStateMemoryblock3From;
+    int iPrevSlowCoord;
+    int iSymbol[1];
+    if (false && iSymbol[0] == iSymbol[0]) {}   // avoid 'unused variable' warnings
+    double iEmission[2];
+    /* temporary storage for ordinary reals */
+    register double iTempResult[1];
+    /* temporary storage for extended-exponent reals */
+    register bfloat iTempProb[1];
+    HomologyDPTable dp(iLen);
+    iTransition[0] = iPar.iStartHomologous;
+    
+    iTransition[1] = 1.0 - iPar.iStartHomologous;
+    
+    iTransition[2] = 1.0 - iPar.iGoUnrelated - iPar.iGoStopFromHomologous;
+    
+    iTransition[3] = iPar.iGoUnrelated;
+    
+    iTransition[4] = 1.0 - iPar.iGoHomologous - iPar.iGoStopFromUnrelated;
+    
+    iTransition[5] = iPar.iGoHomologous;
+    
+    iTransition[6] = iPar.iGoStopFromHomologous;
+    
+    iTransition[7] = iPar.iGoStopFromUnrelated;
+    dp.StateMemoryblock1.write()[0] = 1.0;
+    dp.StateMemoryblock1.written();
+    iPrevSlowCoord = -1;
+    for (int iPos0=0; iPos0<iLen+1; ++iPos0) {
+        if ((iPos0+0<=0)) {
+        }
+        if ((iPos0+0>=1)) {
+            if ((iPos0+-1>=0)) {
+                iSymbol[0] = aSeq[iPos0+-1];
+            } 
+            else { 
+                iSymbol[0] = '1' /* dummy value */;
+                
+            }
+            CurStateMemoryblock2To = dp.StateMemoryblock2.write((iPos0-(0))-(1));
+            iTempResult[0] = iPar.aEmitUnrelated[ iSymbol[0] - '1' ];
+            iEmission[0] = iTempResult[0];
+            iTempResult[0] = iPar.aEmitHomologous[ iSymbol[0] - '1' ];
+            iEmission[1] = iTempResult[0];
+            if ((iPos0+-1<=0)) {
+                CurStateMemoryblock1From = dp.StateMemoryblock1.read();
+                CurStateMemoryblock2To[1] = ((iTransition[1])*(iEmission[0]))*CurStateMemoryblock1From[0];
+                CurStateMemoryblock2To[0] = ((iTransition[0])*(iEmission[1]))*CurStateMemoryblock1From[0];
+            }
+            if ((iPos0+-1>=1)) {
+                CurStateMemoryblock2From = dp.StateMemoryblock2.read((iPos0-(1))-(1));
+                CurStateMemoryblock2To[1] += ((iTransition[4])*(iEmission[0]))*CurStateMemoryblock2From[1];
+                CurStateMemoryblock2To[1] += ((iTransition[3])*(iEmission[0]))*CurStateMemoryblock2From[0];
+                CurStateMemoryblock2To[0] += ((iTransition[5])*(iEmission[1]))*CurStateMemoryblock2From[1];
+                CurStateMemoryblock2To[0] += ((iTransition[2])*(iEmission[1]))*CurStateMemoryblock2From[0];
+            }
+            dp.StateMemoryblock2.written();
+        }
+        if ((iPos0+0>=iLen+0)) {
+            CurStateMemoryblock3To = dp.StateMemoryblock3.write();
+            iEmission[0] = 1.0;
+            if ((iPos0+0>=1)) {
+                CurStateMemoryblock2From = dp.StateMemoryblock2.read((iPos0-(0))-(1));
+                CurStateMemoryblock3To[0] = ((iTransition[7])*(iEmission[0]))*CurStateMemoryblock2From[1];
+                CurStateMemoryblock3To[0] += ((iTransition[6])*(iEmission[0]))*CurStateMemoryblock2From[0];
+            }
+            dp.StateMemoryblock3.written();
+        }
+        iPrevSlowCoord = iPos0;
+    }
+    iPrevSlowCoord = -1;
+    {
+        int iPos0=iLen+0;
+        if (iPos0==iPos0) {} // avoid 'unused variable' warnings
+        CurStateMemoryblock3From = dp.StateMemoryblock3.read();
+        iTempProb[0] = CurStateMemoryblock3From[0];
+    }
+    *ppOutTable = new HomologyDPTable(dp);
+    // make sure tables don't get deleted
+    dp.isInCharge = false;
+    return iTempProb[0];
+};
+
+
+
+
+
+bfloat Backward(HomologyBaumWelch& bw,HomologyDPTable* pInTable,HomologyDPTable** ppOutTable,Params iPar,char *aSeq,int iLen) {
+    const bfloat *CurStateMemoryblock3Secondary;
+    double iTransition[8];
+    bfloat *CurStateMemoryblock2To;
+    const bfloat *CurStateMemoryblock2Secondary;
+    const bfloat *CurStateMemoryblock2From;
+    unsigned char alphaSymbolsitepatterns[8] = {'1', '2', '3', '4', '5', '6', '7', '8'};
+    unsigned char alphaIndexsitepatterns[256];
+    const bfloat *CurStateMemoryblock3From;
+    bfloat *CurStateMemoryblock1To;
+    const bfloat *CurStateMemoryblock1Secondary;
+    const bfloat *CurStateMemoryblock1From;
+    int iPrevSlowCoord;
+    int iSymbol[1];
+    if (false && iSymbol[0] == iSymbol[0]) {}   // avoid 'unused variable' warnings
+    double iEmission[2];
+    /* temporary storage for ordinary reals */
+    register double iTempResult[1];
+    /* temporary storage for extended-exponent reals */
+    register bfloat iTempProb[3];
+    HomologyDPTable dp(iLen);
+    HomologyDPTable dp2(*pInTable);
+    // make sure tables don't get deleted
+    dp2.isInCharge = false;
+    iTransition[0] = iPar.iStartHomologous;
+    
+    iTransition[1] = 1.0 - iPar.iStartHomologous;
+    
+    iTransition[2] = 1.0 - iPar.iGoUnrelated - iPar.iGoStopFromHomologous;
+    
+    iTransition[3] = iPar.iGoUnrelated;
+    
+    iTransition[4] = 1.0 - iPar.iGoHomologous - iPar.iGoStopFromUnrelated;
+    
+    iTransition[5] = iPar.iGoHomologous;
+    
+    iTransition[6] = iPar.iGoStopFromHomologous;
+    
+    iTransition[7] = iPar.iGoStopFromUnrelated;
+    for (int i=0; i<256; i++) {
+        alphaIndexsitepatterns[i]=0;
+    }
+    for (int i=0; i<8; i++) {
+        alphaIndexsitepatterns[alphaSymbolsitepatterns[i]]=i;
+    }
+    dp.StateMemoryblock3.write()[0] = 1.0;
+    dp.StateMemoryblock3.written();
+    iPrevSlowCoord = -1;
+    {
+        int iPos0=iLen+0;
+        if (iPos0==iPos0) {} // avoid 'unused variable' warnings
+        CurStateMemoryblock3Secondary = dp2.StateMemoryblock3.read();
+        iTempProb[2] = CurStateMemoryblock3Secondary[0];
+        bw.scaleCounts(iTempProb[2]);
+    }
+    iPrevSlowCoord = -1;
+    for (int iPos0=(iLen+1)-1; iPos0>=0; --iPos0) {
+        if ((iPos0+0>=iLen+0)) {
+        }
+        if ((iPos0+0>=1)) {
+            if ((iPos0+0<=iLen+-1)) {
+                iSymbol[0] = aSeq[iPos0+0];
+            } 
+            else { 
+                iSymbol[0] = '1' /* dummy value */;
+                
+            }
+            CurStateMemoryblock2To = dp.StateMemoryblock2.write((iPos0-(0))-(1));
+            CurStateMemoryblock2Secondary = dp2.StateMemoryblock2.read((iPos0-(0))-(1));
+            iTempResult[0] = iPar.aEmitUnrelated[ iSymbol[0] - '1' ];
+            iEmission[0] = iTempResult[0];
+            iTempResult[0] = iPar.aEmitHomologous[ iSymbol[0] - '1' ];
+            iEmission[1] = iTempResult[0];
+            if ((iPos0+1<=iLen+0)) {
+                CurStateMemoryblock2From = dp.StateMemoryblock2.read((iPos0-(-1))-(1));
+                CurStateMemoryblock2To[0] = iTempProb[1] = ((iTransition[2])*(iEmission[1]))*CurStateMemoryblock2From[0];
+                iTempProb[1] *= CurStateMemoryblock2Secondary[0];
+                bw.transitionBaumWelchCount0[2] += iTempProb[1];
+                bw.emissionBaumWelchCount1[alphaIndexsitepatterns[iSymbol[0]]][0] += iTempProb[1];
+                CurStateMemoryblock2To[0] += iTempProb[1] = ((iTransition[3])*(iEmission[0]))*CurStateMemoryblock2From[1];
+                iTempProb[1] *= CurStateMemoryblock2Secondary[0];
+                bw.transitionBaumWelchCount0[3] += iTempProb[1];
+                bw.emissionBaumWelchCount1[alphaIndexsitepatterns[iSymbol[0]]][1] += iTempProb[1];
+                CurStateMemoryblock2To[1] = iTempProb[1] = ((iTransition[4])*(iEmission[0]))*CurStateMemoryblock2From[1];
+                iTempProb[1] *= CurStateMemoryblock2Secondary[1];
+                bw.transitionBaumWelchCount0[4] += iTempProb[1];
+                bw.emissionBaumWelchCount1[alphaIndexsitepatterns[iSymbol[0]]][1] += iTempProb[1];
+                CurStateMemoryblock2To[1] += iTempProb[1] = ((iTransition[5])*(iEmission[1]))*CurStateMemoryblock2From[0];
+                iTempProb[1] *= CurStateMemoryblock2Secondary[1];
+                bw.transitionBaumWelchCount0[5] += iTempProb[1];
+                bw.emissionBaumWelchCount1[alphaIndexsitepatterns[iSymbol[0]]][0] += iTempProb[1];
+            }
+            iEmission[0] = 1.0;
+            if ((iPos0+0>=iLen+0)) {
+                CurStateMemoryblock3From = dp.StateMemoryblock3.read();
+                CurStateMemoryblock2To[0] += iTempProb[1] = ((iTransition[6])*(iEmission[0]))*CurStateMemoryblock3From[0];
+                iTempProb[1] *= CurStateMemoryblock2Secondary[0];
+                bw.transitionBaumWelchCount0[6] += iTempProb[1];
+                bw.emissionBaumWelchCount0[0] += iTempProb[1];
+                CurStateMemoryblock2To[1] += iTempProb[1] = ((iTransition[7])*(iEmission[0]))*CurStateMemoryblock3From[0];
+                iTempProb[1] *= CurStateMemoryblock2Secondary[1];
+                bw.transitionBaumWelchCount0[7] += iTempProb[1];
+                bw.emissionBaumWelchCount0[0] += iTempProb[1];
+            }
+            dp.StateMemoryblock2.written();
+        }
+        if ((iPos0+0<=0)) {
+            if ((iPos0+0<=iLen+-1)) {
+                iSymbol[0] = aSeq[iPos0+0];
+            } 
+            else { 
+                iSymbol[0] = '1' /* dummy value */;
+                
+            }
+            CurStateMemoryblock1To = dp.StateMemoryblock1.write();
+            CurStateMemoryblock1Secondary = dp2.StateMemoryblock1.read();
+            iTempResult[0] = iPar.aEmitUnrelated[ iSymbol[0] - '1' ];
+            iEmission[0] = iTempResult[0];
+            iTempResult[0] = iPar.aEmitHomologous[ iSymbol[0] - '1' ];
+            iEmission[1] = iTempResult[0];
+            if ((iPos0+1<=iLen+0)) {
+                CurStateMemoryblock2From = dp.StateMemoryblock2.read((iPos0-(-1))-(1));
+                CurStateMemoryblock1To[0] = iTempProb[1] = ((iTransition[0])*(iEmission[1]))*CurStateMemoryblock2From[0];
+                iTempProb[1] *= CurStateMemoryblock1Secondary[0];
+                bw.transitionBaumWelchCount0[0] += iTempProb[1];
+                bw.emissionBaumWelchCount1[alphaIndexsitepatterns[iSymbol[0]]][0] += iTempProb[1];
+                CurStateMemoryblock1To[0] += iTempProb[1] = ((iTransition[1])*(iEmission[0]))*CurStateMemoryblock2From[1];
+                iTempProb[1] *= CurStateMemoryblock1Secondary[0];
+                bw.transitionBaumWelchCount0[1] += iTempProb[1];
+                bw.emissionBaumWelchCount1[alphaIndexsitepatterns[iSymbol[0]]][1] += iTempProb[1];
+            }
+            dp.StateMemoryblock1.written();
+        }
+        iPrevSlowCoord = iPos0;
+    }
+    bw.scaleCounts(1.0 / iTempProb[2]);
+    iPrevSlowCoord = -1;
+    {
+        int iPos0=0;
+        if (iPos0==iPos0) {} // avoid 'unused variable' warnings
+        CurStateMemoryblock1From = dp.StateMemoryblock1.read();
+        iTempProb[0] = CurStateMemoryblock1From[0];
+    }
+    *ppOutTable = new HomologyDPTable(dp);
+    // make sure tables don't get deleted
+    dp.isInCharge = false;
+    return iTempProb[0];
+};
+
+
+
+
+
+bfloat Viterbi_recurse(HomologyDPTable** ppOutTable,Params iPar,char *aSeq,int iLen) {
+    double iTransition[8];
+    bfloat *CurStateMemoryblock2To;
+    const bfloat *CurStateMemoryblock2From;
+    const bfloat *CurStateMemoryblock3From;
+    bfloat *CurStateMemoryblock1To;
+    const bfloat *CurStateMemoryblock1From;
+    int iPrevSlowCoord;
+    int iSymbol[1];
+    if (false && iSymbol[0] == iSymbol[0]) {}   // avoid 'unused variable' warnings
+    double iEmission[2];
+    /* temporary storage for ordinary reals */
+    register double iTempResult[1];
+    /* temporary storage for extended-exponent reals */
+    register bfloat iTempProb[1];
+    HomologyDPTable dp(iLen);
+    iTransition[0] = iPar.iStartHomologous;
+    
+    iTransition[1] = 1.0 - iPar.iStartHomologous;
+    
+    iTransition[2] = 1.0 - iPar.iGoUnrelated - iPar.iGoStopFromHomologous;
+    
+    iTransition[3] = iPar.iGoUnrelated;
+    
+    iTransition[4] = 1.0 - iPar.iGoHomologous - iPar.iGoStopFromUnrelated;
+    
+    iTransition[5] = iPar.iGoHomologous;
+    
+    iTransition[6] = iPar.iGoStopFromHomologous;
+    
+    iTransition[7] = iPar.iGoStopFromUnrelated;
+    dp.StateMemoryblock3.write()[0] = 1.0;
+    dp.StateMemoryblock3.written();
+    iPrevSlowCoord = -1;
+    for (int iPos0=(iLen+1)-1; iPos0>=0; --iPos0) {
+        if ((iPos0+0>=iLen+0)) {
+        }
+        if ((iPos0+0>=1)) {
+            if ((iPos0+0<=iLen+-1)) {
+                iSymbol[0] = aSeq[iPos0+0];
+            } 
+            else { 
+                iSymbol[0] = '1' /* dummy value */;
+                
+            }
+            CurStateMemoryblock2To = dp.StateMemoryblock2.write((iPos0-(0))-(1));
+            iTempResult[0] = iPar.aEmitHomologous[ iSymbol[0] - '1' ];
+            iEmission[0] = iTempResult[0];
+            iTempResult[0] = iPar.aEmitUnrelated[ iSymbol[0] - '1' ];
+            iEmission[1] = iTempResult[0];
+            if ((iPos0+1<=iLen+0)) {
+                CurStateMemoryblock2From = dp.StateMemoryblock2.read((iPos0-(-1))-(1));
+                CurStateMemoryblock2To[0] = ((iTransition[2])*(iEmission[0]))*CurStateMemoryblock2From[0];
+                hmmocMaxInPlace( CurStateMemoryblock2To[0], ((iTransition[3])*(iEmission[1]))*CurStateMemoryblock2From[1] );
+                CurStateMemoryblock2To[1] = ((iTransition[4])*(iEmission[1]))*CurStateMemoryblock2From[1];
+                hmmocMaxInPlace( CurStateMemoryblock2To[1], ((iTransition[5])*(iEmission[0]))*CurStateMemoryblock2From[0] );
+            }
+            iEmission[0] = 1.0;
+            if ((iPos0+0>=iLen+0)) {
+                CurStateMemoryblock3From = dp.StateMemoryblock3.read();
+                hmmocMaxInPlace( CurStateMemoryblock2To[0], ((iTransition[6])*(iEmission[0]))*CurStateMemoryblock3From[0] );
+                hmmocMaxInPlace( CurStateMemoryblock2To[1], ((iTransition[7])*(iEmission[0]))*CurStateMemoryblock3From[0] );
+            }
+            dp.StateMemoryblock2.written();
+        }
+        if ((iPos0+0<=0)) {
+            if ((iPos0+0<=iLen+-1)) {
+                iSymbol[0] = aSeq[iPos0+0];
+            } 
+            else { 
+                iSymbol[0] = '1' /* dummy value */;
+                
+            }
+            CurStateMemoryblock1To = dp.StateMemoryblock1.write();
+            iTempResult[0] = iPar.aEmitHomologous[ iSymbol[0] - '1' ];
+            iEmission[0] = iTempResult[0];
+            iTempResult[0] = iPar.aEmitUnrelated[ iSymbol[0] - '1' ];
+            iEmission[1] = iTempResult[0];
+            if ((iPos0+1<=iLen+0)) {
+                CurStateMemoryblock2From = dp.StateMemoryblock2.read((iPos0-(-1))-(1));
+                CurStateMemoryblock1To[0] = ((iTransition[0])*(iEmission[0]))*CurStateMemoryblock2From[0];
+                hmmocMaxInPlace( CurStateMemoryblock1To[0], ((iTransition[1])*(iEmission[1]))*CurStateMemoryblock2From[1] );
+            }
+            dp.StateMemoryblock1.written();
+        }
+        iPrevSlowCoord = iPos0;
+    }
+    iPrevSlowCoord = -1;
+    {
+        int iPos0=0;
+        if (iPos0==iPos0) {} // avoid 'unused variable' warnings
+        CurStateMemoryblock1From = dp.StateMemoryblock1.read();
+        iTempProb[0] = CurStateMemoryblock1From[0];
+    }
+    *ppOutTable = new HomologyDPTable(dp);
+    // make sure tables don't get deleted
+    dp.isInCharge = false;
+    return iTempProb[0];
+};
+
+
+
+
+
+Path& Viterbi_trace(HomologyDPTable* pInTable,Params iPar,char *aSeq,int iLen) {
+    double iTransition[8];
+    const bfloat *CurStateMemoryblock1To;
+    const bfloat *CurStateMemoryblock2To;
+    const bfloat *CurStateMemoryblock3To;
+    int iPrevSlowCoord;
+    SimplePath* pPath = new SimplePath();
+    vector<int> emit;
+    int iSymbol[1];
+    if (false && iSymbol[0] == iSymbol[0]) {}   // avoid 'unused variable' warnings
+    double iEmission[2];
+    /* temporary vector storage */
+    bfloat iTempVector[9];
+    /* temporary int vector storage */
+    int iTempIntVec[6];
+    /* temporary storage for ordinary reals */
+    register double iTempResult[1];
+    iTransition[0] = iPar.iStartHomologous;
+    
+    iTransition[1] = 1.0 - iPar.iStartHomologous;
+    
+    iTransition[2] = 1.0 - iPar.iGoUnrelated - iPar.iGoStopFromHomologous;
+    
+    iTransition[3] = iPar.iGoUnrelated;
+    
+    iTransition[4] = 1.0 - iPar.iGoHomologous - iPar.iGoStopFromUnrelated;
+    
+    iTransition[5] = iPar.iGoHomologous;
+    
+    iTransition[6] = iPar.iGoStopFromHomologous;
+    
+    iTransition[7] = iPar.iGoStopFromUnrelated;
+    static const int stateTable[] = {1, 2, 1, 2, 2, 1, 3, 3};
+    static const int stateFromTable[] = {0, 0, 1, 1, 2, 2, 1, 2};
+    static const int iPos0Table[] = {1, 1, 1, 1, 1, 1, 0, 0};
+    HomologyDPTable dp(*pInTable);
+    // make sure tables don't get deleted
+    dp.isInCharge = false;
+    dp.StateMemoryblock1.write()[0] = 1.0;
+    dp.StateMemoryblock1.written();
+    iPrevSlowCoord = -1;
+    {
+        int iPos0=0;
+        if (iPos0==iPos0) {} // avoid 'unused variable' warnings
+        iTempIntVec[0] = 0;
+        while (iTempIntVec[0] != 3) {
+            iTempIntVec[1] = 2;
+            if ((iPos0+0<=iLen+-1)) {
+                iSymbol[0] = aSeq[iPos0+0];
+            } 
+            else { 
+                iSymbol[0] = '1' /* dummy value */;
+                
+            }
+            CurStateMemoryblock1To = dp.StateMemoryblock1.read();
+            CurStateMemoryblock2To = dp.StateMemoryblock2.read((iPos0-(0))-(1));
+            if ((iPos0+1<=iLen+0)) {
+                iTempResult[0] = iPar.aEmitHomologous[ iSymbol[0] - '1' ];
+                iEmission[0] = iTempResult[0];
+                iTempResult[0] = iPar.aEmitUnrelated[ iSymbol[0] - '1' ];
+                iEmission[1] = iTempResult[0];
+                CurStateMemoryblock2To = dp.StateMemoryblock2.read((iPos0-(-1))-(1));
+                switch (iTempIntVec[0]) {
+                    default:
+                    break;
+                    case 0:
+                    iTempVector[iTempIntVec[1]] = iTransition[0]*iEmission[0]*CurStateMemoryblock2To[0];
+                    iTempVector[iTempIntVec[1]+3] = iTransition[0]*iEmission[0];
+                    iTempIntVec[iTempIntVec[1]++] = 0;
+                    iTempVector[iTempIntVec[1]] = iTransition[1]*iEmission[1]*CurStateMemoryblock2To[1];
+                    iTempVector[iTempIntVec[1]+3] = iTransition[1]*iEmission[1];
+                    iTempIntVec[iTempIntVec[1]++] = 1;
+                    break;
+                    case 1:
+                    iTempVector[iTempIntVec[1]] = iTransition[2]*iEmission[0]*CurStateMemoryblock2To[0];
+                    iTempVector[iTempIntVec[1]+3] = iTransition[2]*iEmission[0];
+                    iTempIntVec[iTempIntVec[1]++] = 2;
+                    iTempVector[iTempIntVec[1]] = iTransition[3]*iEmission[1]*CurStateMemoryblock2To[1];
+                    iTempVector[iTempIntVec[1]+3] = iTransition[3]*iEmission[1];
+                    iTempIntVec[iTempIntVec[1]++] = 3;
+                    break;
+                    case 2:
+                    iTempVector[iTempIntVec[1]] = iTransition[5]*iEmission[0]*CurStateMemoryblock2To[0];
+                    iTempVector[iTempIntVec[1]+3] = iTransition[5]*iEmission[0];
+                    iTempIntVec[iTempIntVec[1]++] = 5;
+                    iTempVector[iTempIntVec[1]] = iTransition[4]*iEmission[1]*CurStateMemoryblock2To[1];
+                    iTempVector[iTempIntVec[1]+3] = iTransition[4]*iEmission[1];
+                    iTempIntVec[iTempIntVec[1]++] = 4;
+                    break;
+                }
+            }
+            CurStateMemoryblock3To = dp.StateMemoryblock3.read();
+            if ((iPos0+0>=iLen+0)) {
+                iEmission[0] = 1.0;
+                CurStateMemoryblock3To = dp.StateMemoryblock3.read();
+                switch (iTempIntVec[0]) {
+                    default:
+                    break;
+                    case 1:
+                    iTempVector[iTempIntVec[1]] = iTransition[6]*iEmission[0]*CurStateMemoryblock3To[0];
+                    iTempVector[iTempIntVec[1]+3] = iTransition[6]*iEmission[0];
+                    iTempIntVec[iTempIntVec[1]++] = 6;
+                    break;
+                    case 2:
+                    iTempVector[iTempIntVec[1]] = iTransition[7]*iEmission[0]*CurStateMemoryblock3To[0];
+                    iTempVector[iTempIntVec[1]+3] = iTransition[7]*iEmission[0];
+                    iTempIntVec[iTempIntVec[1]++] = 7;
+                    break;
+                }
+            }
+            iTempVector[0] = 0.0;
+            for (int i=2; i<iTempIntVec[1]; i++) {
+                if (iTempVector[i]>iTempVector[0]) {
+                    iTempVector[0]=iTempVector[i];
+                    iTempIntVec[0] = i;
+                }
+            }
+            emit.resize(1);
+            emit[0] = iPos0Table[iTempIntVec[iTempIntVec[0]]];
+            pPath->addEdge(iTempIntVec[iTempIntVec[0]],iTempVector[iTempIntVec[0]+3],emit,stateFromTable[iTempIntVec[iTempIntVec[0]]],stateTable[iTempIntVec[iTempIntVec[0]]]);
+            iPos0 += iPos0Table[iTempIntVec[iTempIntVec[0]]];
+            iTempIntVec[0] = stateTable[iTempIntVec[iTempIntVec[0]]];
+        }
+    }
+    return *pPath;
+};
+
+
+
+/* --- end of HMMoC-generated file --- */
diff --git a/libMems/HomologyHMM/homology.h b/libMems/HomologyHMM/homology.h
new file mode 100644
index 0000000..888c403
--- /dev/null
+++ b/libMems/HomologyHMM/homology.h
@@ -0,0 +1,188 @@
+/* Code generated by HMMoC version VERSION, Copyright (C) 2006 Gerton Lunter */
+/* Generated from file homology.xml (author: Aaron Darling) on Mon Jul 16 11:09:12 EST 2007 */
+
+/*
+This file is a work based on HMMoC VERSION, a hidden Markov model compiler.
+Copyright (C) 2006 by Gerton Lunter, Oxford University.
+
+HMMoC and works based on it are free software; you can redistribute 
+it and/or modify it under the terms of the GNU General Public
+License as published by the Free Software Foundation; either
+version 2 of the License, or (at your option) any later version.
+
+HMMOC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with HMMoC; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+*/
+
+#ifndef _homology_h_
+#define _homology_h_
+
+
+#include "dptables.h"
+#include "algebras.h"
+
+#include <vector>
+#include <iostream>
+#include <string>
+#include <algorithm>
+
+using std::vector;
+using std::cout;
+
+#include <map>
+
+using std::map;
+
+struct Params;
+
+//void run(std::string& sequence, std::string& prediction, double goHomologous = 0.004, double goUnrelated = 0.004, std::vector<double>* emitHomologous = NULL, std::vector<double>* emitUnrelated = NULL);
+
+void run(std::string& sequence, std::string& prediction, const Params& params );
+
+// Here go the state memory clique typedefs:
+typedef States<bfloat,2> Statesblock2;
+typedef States<bfloat,1> Statesblock1;
+typedef States<bfloat,1> Statesblock3;
+
+class HomologyDPTable {
+    public:
+    // If true, this class' destructor will delete the DP arrays
+    bool isInCharge;
+    // Pointers to arrays containing ids of states and transitions
+    const std::string* const stateId;
+    const std::string* const emissionId;
+    const std::string* const transitionId;
+    const std::string* const transitionFrom;
+    const std::string* const transitionTo;
+    const std::string* const transitionProb;
+    const std::string* const transitionEmit;
+    const std::string* const outputId;
+    // The actual DP tables, and total sequence lengths (which determine size of DP arrays) follow:
+    int iLen;
+    DPTable<Statesblock2,1> StateMemoryblock2;
+    DPTable<Statesblock1,0> StateMemoryblock1;
+    DPTable<Statesblock3,0> StateMemoryblock3;
+    // Member functions:
+    public:
+    // Default copy constructor is used; user has to set isInCharge appropriately afterwards!
+    HomologyDPTable(int iLen);
+    ~HomologyDPTable();
+    // returns probability from DP table, given position and int or std::string state identifier
+    bfloat getProb(int iState ,int ) const;
+    bfloat getProb(const std::string sState ,int ) const;
+    // converts std::string identifier (for state, transition or emission) into integer id
+    static int getId(const std::string& sState);
+    static const std::string& getTransitionId(int id);
+    static const std::string& getEmissionId(int id);
+    static const std::string& getStateId(int id);
+    static const std::string& getOutputId(int id);
+    static void _cleanup() { getId("_cleanup_"); }
+};
+
+// give a name to the real type used for this HMM
+typedef bfloat HomologyReal;
+// define type for a 'short' real -- usually double, but can be logspace for efficiency
+typedef double HomologyShortReal;
+
+
+
+class HomologyBaumWelch {
+    public:
+    // Default copy constructor is used.
+    // Void constructor:
+    HomologyBaumWelch() { resetCounts(); }
+    // Not calling resetCounts() across calls allows to aggregate results over multiple datasets
+    void resetCounts();
+    void scaleCounts(bfloat scale);
+    // Translate an identifier (string or integer) to the index into their corresponding Baum-Welch counter array (below)
+    // Which array is used for any particular emission/transition depends on its order signature - see documentation for details
+    int transitionIndex(int intId) const { return atransitionIdx[intId]; }
+    int transitionIndex(string strId) const;
+    int emissionIndex(int intId) const { return aemissionIdx[intId]; }
+    int emissionIndex(string strId) const;
+    // Now follow, in triplets (one for each order signature):
+    //  Transition or emission counters;
+    //  Array of identifiers; and
+    //  Dimension of array (number of counters).
+    bfloat transitionBaumWelchCount0[8];
+    static int transitionIdentifier0[8];   
+    static const int transitionDimension0 = 8;
+    bfloat emissionBaumWelchCount0[1];
+    static int emissionIdentifier0[1];   
+    static const int emissionDimension0 = 1;
+    bfloat emissionBaumWelchCount1[8][2];
+    static int emissionIdentifier1[2];   
+    static const int emissionDimension1 = 2;
+    private:
+    static int atransitionIdx[8];
+    static int aemissionIdx[3];
+    static map<const std::string,int> mId;
+};
+
+
+
+class Path {
+    //protected:
+    public:
+    vector<int> transitions;
+    public:
+    unsigned int size() const {                             // Number of transitions in path
+        return transitions.size(); 
+    }
+    int transition(int i) const {                           // i-th transition
+        return transitions[i]; 
+    }
+    virtual double prob(int) const = 0;                  // i-th transition*emission probability:
+    virtual const vector<int>& emission(int) const = 0;  // i-th emission vector
+    virtual int fromState(int) const = 0;                // State at from-end of i-th transition
+    virtual int toState(int) const = 0;                  // State at to-end of i-th transition
+    virtual int nextFrom(int) const = 0;                 // index of next sibling, -1 if no more (always -1 for simple paths)
+    virtual int nextTo(int) const = 0;                   // index of child (always i+1 for simple paths), or -1 if no more
+    virtual ~Path() {} 
+};
+
+ostream& operator<<(ostream& os, const Path& p);
+
+class SimplePath: public Path {
+    public:
+    vector<double> probs;
+    vector<vector<int> > emissions;
+    vector<int> froms;
+    vector<int> tos;
+    public:
+    void addEdge(int transition, double prob, vector<int>& emission, int from, int to);
+    double prob(int index) const;
+    int nextFrom(int index) const;
+    int nextTo(int index) const;
+    const vector<int>& emission(int index) const;
+    int fromState(int index) const;
+    int toState(int index) const;
+    void reverse();
+};
+struct Params {
+    double iStartHomologous;
+    double iGoHomologous;
+    double iGoUnrelated;
+    double iGoStopFromUnrelated;
+    double iGoStopFromHomologous;
+    double aEmitHomologous[8];
+    double aEmitUnrelated[8];
+};
+
+bfloat Forward(HomologyDPTable** ppOutTable,Params iPar,char *aSeq,int iLen);
+
+bfloat Backward(HomologyBaumWelch& bw,HomologyDPTable* pInTable,HomologyDPTable** ppOutTable,Params iPar,char *aSeq,int iLen);
+
+bfloat Viterbi_recurse(HomologyDPTable** ppOutTable,Params iPar,char *aSeq,int iLen);
+
+Path& Viterbi_trace(HomologyDPTable* pInTable,Params iPar,char *aSeq,int iLen);
+
+#endif // _homology_h_
+
+/* --- end of HMMoC-generated file --- */
diff --git a/libMems/HomologyHMM/homology.xml b/libMems/HomologyHMM/homology.xml
new file mode 100644
index 0000000..2ee7022
--- /dev/null
+++ b/libMems/HomologyHMM/homology.xml
@@ -0,0 +1,217 @@
+<?xml version="1.0"?>
+<!--
+			Derived from casino.xml by Aaron Darling 
+			Copyright (C) 2007 Aaron Darling
+
+      This file is part of HMMoC 0.5, a hidden Markov model compiler.
+      Copyright (C) 2006 by Gerton Lunter, Oxford University.
+  
+      HMMoC is free software; you can redistribute it and/or modify
+      it under the terms of the GNU General Public License as published by
+      the Free Software Foundation; either version 2 of the License, or
+      (at your option) any later version.
+  
+      HMMOC is distributed in the hope that it will be useful,
+      but WITHOUT ANY WARRANTY; without even the implied warranty of
+      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+      GNU General Public License for more details.
+  
+      You should have received a copy of the GNU General Public License
+      along with HMMoC; if not, write to the Free Software
+      Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+-->
+
+
+
+<hml debug="true">
+
+
+
+<author>Aaron Darling</author>
+
+
+<!--  We code a site pattern as a number  
+ 1 A:A, T:T
+ 2 A:C, C:A, G:T, T:G
+ 3 A:G, G:A, C:T, T:C
+ 4 A:T, T:A
+ 5 C:C, G:G
+ 6 C:G, G:C
+ 7 N:gap open/close
+ 8 N:gap extend
+-->
+<alphabet id="sitepatterns">
+ 12345678
+</alphabet>
+
+
+
+<output id="sequence">
+  <alphabet idref="sitepatterns"/>
+  <identifier type="length" value="iLen"/>
+  <identifier type="sequence" value="aSeq"/>
+  <code type="parameter" value="char *aSeq"/>
+  <code type="parameter" value="int iLen"/>
+</output>
+
+
+<hmm id="Homology">
+
+ <description>  An HMM to distinguish homologous from unrelated sites in an alignment </description>
+
+ <outputs id="homologyoutputs">
+  <output idref="sequence"/>
+ </outputs>
+
+
+ <clique id="block1">
+  <state id="start"/>
+ </clique>
+
+ <clique id="block2">
+  <state id="homologous"/>
+  <state id="unrelated"/>
+ </clique>
+
+ <clique id="block3">
+  <state id="end"/>
+ </clique>
+
+
+ <graph>
+  <clique idref="block1"/>
+  <clique idref="block2"/>
+  <clique idref="block3"/>
+ </graph>
+
+
+ <transitions>
+  <transition from="start" to="homologous" probability="startHomologous" emission="emitHomologous"/>
+  <transition from="start" to="unrelated" probability="startUnrelated" emission="emitUnrelated"/>
+  <transition from="homologous" to="homologous" probability="stayHomologous" emission="emitHomologous"/>
+  <transition from="homologous" to="unrelated" probability="goUnrelated" emission="emitUnrelated"/>
+  <transition from="unrelated" to="unrelated" probability="stayUnrelated" emission="emitUnrelated"/>
+  <transition from="unrelated" to="homologous" probability="goHomologous" emission="emitHomologous"/>
+  <transition from="homologous" to="end" probability="goStopFromHomologous" emission="empty"/>
+  <transition from="unrelated" to="end" probability="goStopFromUnrelated" emission="empty"/>
+ </transitions>
+
+
+ <code id="paramsClassDef" where="classdefinitions">
+   <![CDATA[
+     struct Params {
+       double iStartHomologous;
+       double iGoHomologous;
+       double iGoUnrelated;
+       double iGoStopFromUnrelated;
+       double iGoStopFromHomologous;
+       double aEmitHomologous[8];
+       double aEmitUnrelated[8];
+     };
+   ]]>
+  </code>
+
+
+  <emission id="empty">
+   <probability>
+    <code type="expression"> 1.0 </code>
+   </probability>
+  </emission>
+
+
+  <emission id="emitHomologous">
+   <output idref="sequence"/>
+   <probability>
+    <code type="statement">
+     <identifier output="sequence" value="iEmission"/>
+     <identifier type="result" value="iProb"/>
+     <![CDATA[
+  
+       iProb = iPar.aEmitHomologous[ iEmission - '1' ];
+
+     ]]>
+    </code>
+   </probability>
+  </emission>
+
+
+  <emission id="emitUnrelated">
+   <output idref="sequence"/>
+   <probability>
+    <code type="statement">
+     <identifier output="sequence" value="iEmission"/>
+     <identifier type="result" value="iProb"/>
+     <!--  Here goes the code computing the probability -->
+     <![CDATA[
+  
+       iProb = iPar.aEmitUnrelated[ iEmission - '1' ];
+
+     ]]>
+    </code>
+   </probability>
+  </emission>
+
+
+  <probability id="one"><code> 1.0 </code></probability>
+
+
+  <probability id="goUnrelated">
+    <code>
+      <!--  Tell HMMoC that this code requires an input parameter, which itself need a definition to make sense -->
+      <code type="parameter" init="paramsClassDef" value="Params iPar"/>
+      <!-- The actual code for this probability follows (no need to quote this) -->
+
+        iPar.iGoUnrelated 
+
+    </code>
+  </probability>
+
+  <probability id="startHomologous"><code> iPar.iStartHomologous </code></probability>
+  <probability id="startUnrelated"><code> 1.0 - iPar.iStartHomologous </code></probability>
+  <probability id="goHomologous"><code> iPar.iGoHomologous </code></probability>
+  <probability id="goStopFromHomologous"><code> iPar.iGoStopFromHomologous </code></probability>
+  <probability id="goStopFromUnrelated"><code> iPar.iGoStopFromUnrelated </code></probability>
+  <probability id="stayHomologous"><code> 1.0 - iPar.iGoUnrelated - iPar.iGoStopFromHomologous </code></probability>
+  <probability id="stayUnrelated"><code> 1.0 - iPar.iGoHomologous - iPar.iGoStopFromUnrelated </code></probability>
+
+</hmm>
+
+
+
+
+
+
+
+
+<!-- Code generation -->
+
+
+<forward  outputTable="yes" name="Forward" id="fw">
+  <!-- Specify HMM to make code for -->
+  <hmm idref="Homology"/>
+</forward>
+
+<backward  outputTable="yes" baumWelch="yes" name="Backward" id="bw">
+  <!-- Specify HMM to make code for -->
+  <hmm idref="Homology"/>
+</backward>
+
+<viterbi  name="Viterbi" id="vit">
+  <hmm idref="Homology"/>
+</viterbi>
+
+
+
+<codeGeneration realtype="bfloat" file="homology.cc" header="homology.h" language="C++">
+
+  <forward idref="fw"/>
+  <backward idref="bw"/>
+  <viterbi idref="vit"/>
+
+</codeGeneration>
+
+
+ 
+</hml>
+
+
diff --git a/libMems/HomologyHMM/homologymain.cc b/libMems/HomologyHMM/homologymain.cc
new file mode 100644
index 0000000..ef1a5cf
--- /dev/null
+++ b/libMems/HomologyHMM/homologymain.cc
@@ -0,0 +1,65 @@
+/*
+ *    This file is part of HMMoC 0.5, a hidden Markov model compiler.
+ *    Copyright (C) 2006 by Gerton Lunter, Oxford University.
+ *
+ *    HMMoC is free software; you can redistribute it and/or modify
+ *    it under the terms of the GNU General Public License as published by
+ *    the Free Software Foundation; either version 2 of the License, or
+ *    (at your option) any later version.
+ *
+ *    HMMOC is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU General Public License for more details.
+ *
+ *    You should have received a copy of the GNU General Public License
+ *    along with HMMoC; if not, write to the Free Software
+ *    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+\*/
+#include <cstdlib>
+#include <cstring>
+#include "homology.h"
+
+
+void run(std::string& sequence, std::string& prediction, const Params& params ) 
+{
+
+  // The parameters of the model
+  Params iPar = params;
+
+  //
+  // Next, build an input emission sequence by sampling the emitted symbols according to true path
+  //
+
+  int iPathLength = sequence.length() ;
+  char* aSequence = new char[ iPathLength ];
+  memcpy(aSequence, sequence.data(), iPathLength );
+
+  // Decode the emission sequence using Viterbi, and compute posteriors and Baum Welch counts using Forward and Backward
+  HomologyDPTable *pViterbiDP, *pFWDP, *pBWDP;
+  HomologyBaumWelch bw;
+
+  bfloat iFWProb = Forward(&pFWDP, iPar, aSequence, iPathLength );
+  bfloat iBWProb = Backward(bw, pFWDP, &pBWDP, iPar, aSequence, iPathLength );
+
+  prediction.resize(iPathLength);
+  for (int i=0; i<iPathLength; i++) {
+
+    double iPosterior = pFWDP->getProb("homologous",i+1)*pBWDP->getProb("homologous",i+1)/iFWProb;
+//    if (iViterbiPath.toState(i) == iVHomologous) {
+    if (iPosterior >= 0.9) {
+      prediction[i] = 'H';
+    } else {
+      prediction[i] = 'N';
+    }
+//    cout << " " << iPosterior << endl;
+
+  }
+  //clean up aSequence, does this do any good? 
+  delete[] aSequence;
+  delete pFWDP;
+  delete pBWDP;
+
+}
+
+
diff --git a/libMems/HomologyHMM/parameters.h b/libMems/HomologyHMM/parameters.h
new file mode 100644
index 0000000..304afc9
--- /dev/null
+++ b/libMems/HomologyHMM/parameters.h
@@ -0,0 +1,162 @@
+#ifndef __HomologyHMM_parameters_h__
+#define __HomologyHMM_parameters_h__
+
+#include "homology.h"
+
+Params getHoxdParams();
+Params getAdaptedHoxdMatrixParameters( double gc_content );
+void adaptToPercentIdentity( Params& params, double pct_identity );
+
+inline
+Params getHoxdParams()
+{
+    static Params* params = NULL;
+	if( params == NULL )
+	{
+		params = new Params();
+		params->iStartHomologous = 0.5;
+		params->iGoHomologous = 0.00001;
+		params->iGoUnrelated = 0.0000001;
+		params->iGoStopFromUnrelated = 0.00000001;
+		params->iGoStopFromHomologous = 0.00000001;
+
+		// original values from Chiaromonte et al supplied by Webb Miler
+		params->aEmitHomologous[0] = 0.1723*2;		//a:a, t:t
+		params->aEmitHomologous[1] = 0.1462*2;	//c:c, g:g
+		params->aEmitHomologous[2] = 0.0180*4;	//a:c, c:a, g:t, t:g
+		params->aEmitHomologous[3] = 0.0426*4;	//a:g, g:a, c:t, t:c
+		params->aEmitHomologous[4] = 0.0186*2;	//a:t, t:a
+		params->aEmitHomologous[5] = 0.0142*2;	//g:c, c:g
+		params->aEmitHomologous[6] = 0.004461;	// gap open (from an e. coli y pestis alignment)
+		// gap extend // 0.050733
+		params->aEmitHomologous[7] = 1 - (params->aEmitHomologous[0] + params->aEmitHomologous[1] + params->aEmitHomologous[2] +
+				params->aEmitHomologous[3] + params->aEmitHomologous[4] + params->aEmitHomologous[5] + params->aEmitHomologous[6]);
+
+
+		params->aEmitUnrelated[0] = 0.12818742714404662781015820149872;	// a:a, t:t
+		params->aEmitUnrelated[1] = 0.10493347210657785179017485428807;	// c:c, g:g
+		params->aEmitUnrelated[2] = 0.11597910074937552039966694421313;	// a:c, c:a
+		params->aEmitUnrelated[3] = params->aEmitUnrelated[2];
+		params->aEmitUnrelated[4] = params->aEmitUnrelated[0];
+		params->aEmitUnrelated[5] = params->aEmitUnrelated[1]; 
+		params->aEmitUnrelated[6] = 0.0483;	// gap open (derived by aligning a 48%GC sequence with 
+										// its reverse--not complement--to derive expected gap frequencies in 
+										// unrelated sequence)
+		// gap extend // 0.2535
+		params->aEmitUnrelated[7] = 1 - (params->aEmitUnrelated[0] + params->aEmitUnrelated[1] + params->aEmitUnrelated[2] +
+				params->aEmitUnrelated[3] + params->aEmitUnrelated[4] + params->aEmitUnrelated[5] + params->aEmitUnrelated[6]);
+	}
+
+	return *params;
+}
+
+
+/**
+ * Adapts an emission matrix to an arbitrary nucleotide composition
+ * @param gc_content	The fraction of the genome which is G/C
+ */
+inline
+Params getAdaptedHoxdMatrixParameters( double gc_content )
+{
+	Params params;
+    double s = 0.03028173853;
+    double at_content = 1-gc_content;
+    double norm_factor = 0.0;
+
+	double gO_unrelated = 0.0483;
+	double gE_unrelated = 0.2535;
+
+	double gO_homologous = 0.004461;
+	double gE_homologous = 0.050733;
+
+    // Unrelated state emission probabilities
+    // use AT/GC background frequency instead of mononucleotide frequency since that is how it is described in the manuscript
+    params.aEmitUnrelated[0] = (at_content/2)*(at_content/2)+(at_content/2)*(at_content/2); // a:a, t:t
+    params.aEmitUnrelated[1] = (gc_content/2)*(gc_content/2)+(gc_content/2)*(gc_content/2); // c:c, g:g
+    params.aEmitUnrelated[2] = (at_content/2)*(gc_content/2)+(gc_content/2)*(at_content/2); //a:c, c:a, g:t, t:g
+    params.aEmitUnrelated[3] = params.aEmitUnrelated[2]; //a:g, g:a, c:t, t:c
+    params.aEmitUnrelated[4] = params.aEmitUnrelated[0]; //a:t, t:a 
+    params.aEmitUnrelated[5] = params.aEmitUnrelated[1]; //g:c, c:g 
+    
+    
+    norm_factor = (1-(gO_unrelated+gE_unrelated))/(params.aEmitUnrelated[0] + params.aEmitUnrelated[1] +params.aEmitUnrelated[2] + params.aEmitUnrelated[3] 
+                        + params.aEmitUnrelated[4] + params.aEmitUnrelated[5] );
+
+    //NORMALIZE the values
+    params.aEmitUnrelated[0] = params.aEmitUnrelated[0]*norm_factor;
+    params.aEmitUnrelated[1] = params.aEmitUnrelated[1]*norm_factor;
+    params.aEmitUnrelated[2] = params.aEmitUnrelated[2]*norm_factor;
+    params.aEmitUnrelated[3] = params.aEmitUnrelated[3]*norm_factor;
+    params.aEmitUnrelated[4] = params.aEmitUnrelated[4]*norm_factor;
+    params.aEmitUnrelated[5] = params.aEmitUnrelated[5]*norm_factor;
+    params.aEmitUnrelated[6] = gO_unrelated;// gap open 
+    params.aEmitUnrelated[7] = 1 - (params.aEmitUnrelated[0] + params.aEmitUnrelated[1] + params.aEmitUnrelated[2] + params.aEmitUnrelated[3] 
+                        + params.aEmitUnrelated[4] + params.aEmitUnrelated[5] + params.aEmitUnrelated[6]);
+
+    //USE PRE-NORMALIZED VALUES (from the HOXD matrix)!!
+    double H_AA = 0.1723*2;		//a:a, t:t
+    double H_CC = 0.1462*2;		//c:c, g:g
+    double H_AC = 0.0180*4;	//a:c, c:a, g:t, t:g
+    double H_AG = 0.0426*4;	//a:g, g:a, c:t, t:c
+    double H_AT = 0.0186*2;	//a:t, t:a
+    double H_CG = 0.0142*2;	//g:c, c:g
+
+    // Homologous state emission probabilities 
+    params.aEmitHomologous[0] = (at_content/0.525)*H_AA; // a:a, t:t
+    params.aEmitHomologous[1] = (gc_content/0.475)*H_CC; // c:c, g:g
+    params.aEmitHomologous[2] = H_AC; //a:c, c:a, g:t, t:g
+    params.aEmitHomologous[3] = H_AG; //a:g, g:a, c:t, t:c
+    params.aEmitHomologous[4] = (at_content/0.525)*H_AT; //a:t, t:a 
+    params.aEmitHomologous[5] = (gc_content/0.475)*H_CG; //g:c, c:g 
+
+    
+    norm_factor = (1-(gO_homologous+gE_homologous))/(params.aEmitHomologous[0] + params.aEmitHomologous[1] + params.aEmitHomologous[2] + params.aEmitHomologous[3] 
+                    + params.aEmitHomologous[4] + params.aEmitHomologous[5]);
+    
+    //NORMALIZE the values
+    params.aEmitHomologous[0] = params.aEmitHomologous[0]*norm_factor;
+    params.aEmitHomologous[1] = params.aEmitHomologous[1]*norm_factor;
+    params.aEmitHomologous[2] = params.aEmitHomologous[2]*norm_factor;
+    params.aEmitHomologous[3] = params.aEmitHomologous[3]*norm_factor;
+    params.aEmitHomologous[4] = params.aEmitHomologous[4]*norm_factor;
+    params.aEmitHomologous[5] = params.aEmitHomologous[5]*norm_factor;
+    params.aEmitHomologous[6] = gO_homologous;// gap open
+    params.aEmitHomologous[7] = 1 - (params.aEmitHomologous[0] + params.aEmitHomologous[1] + params.aEmitHomologous[2] + params.aEmitHomologous[3] 
+                        + params.aEmitHomologous[4] + params.aEmitHomologous[5] + params.aEmitHomologous[6]);
+
+
+	// set state transition probabilities
+	params.iStartHomologous = 0.5;
+	params.iGoHomologous = 0.00001;
+	params.iGoUnrelated = 0.0000001;
+
+	params.iGoStopFromHomologous = 0.0000001;
+	params.iGoStopFromUnrelated = 0.0000001;
+
+	return params;
+}
+
+inline
+void adaptToPercentIdentity( Params& params, double pct_identity )
+{
+	if( pct_identity <= 0 || pct_identity > 1 )
+		throw "Bad pct identity";		// error condition
+	// normalize new pct identity to gap content
+	double gapnorm_pct_id = pct_identity * (1.0 - params.aEmitHomologous[6] - params.aEmitHomologous[7]);
+	// calculate the previous expected identity as H_AA + H_CC
+	double prev_pct_id = params.aEmitHomologous[0] + params.aEmitHomologous[1];
+	double id_diff = prev_pct_id - gapnorm_pct_id;
+	// spread id_diff proportionally among other substitutions
+	double rest_sum = params.aEmitHomologous[2] + params.aEmitHomologous[3] + 
+		params.aEmitHomologous[4] + params.aEmitHomologous[5];
+	params.aEmitHomologous[2] += id_diff * params.aEmitHomologous[2] / rest_sum;
+	params.aEmitHomologous[3] += id_diff * params.aEmitHomologous[3] / rest_sum;
+	params.aEmitHomologous[4] += id_diff * params.aEmitHomologous[4] / rest_sum;
+	params.aEmitHomologous[5] += id_diff * params.aEmitHomologous[5] / rest_sum;
+
+	params.aEmitHomologous[0] -= id_diff * params.aEmitHomologous[0] / prev_pct_id;
+	params.aEmitHomologous[1] -= id_diff * params.aEmitHomologous[1] / prev_pct_id;
+}
+
+#endif	// __HomologyHMM_parameters_h__
+
diff --git a/libMems/HybridAbstractMatch.h b/libMems/HybridAbstractMatch.h
new file mode 100644
index 0000000..d76f11a
--- /dev/null
+++ b/libMems/HybridAbstractMatch.h
@@ -0,0 +1,315 @@
+/*******************************************************************************
+ * $Id: HybridAbstractMatch.h,v 1.8 2004/02/27 23:08:55 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * This file is licensed under the GPL.
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifndef __HybridAbstractMatch_h__
+#define __HybridAbstractMatch_h__
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libGenome/gnClone.h"
+#include "libGenome/gnDefs.h"
+#include "libMems/AbstractMatch.h"
+#include <vector>
+#include <limits>
+#include <cstring>
+namespace mems {
+
+/**
+ * The HybridAbstractMatch implements the AbstractMatch interface in a way
+ * that allows matches with a large SeqCount and low Multiplicity to be stored efficiently
+ */
+template< unsigned FIXED_SEQ_COUNT=2, class int64Alloc=std::allocator<int64>, class uintAlloc=std::allocator<uint> >
+class HybridAbstractMatch : public AbstractMatch {
+public:
+	HybridAbstractMatch() : m_seq_count(0) 
+	{
+		memset(fixed_seq_ids, 0xFF, sizeof(fixed_seq_ids));
+		memset(fixed_starts, 0, sizeof(fixed_starts));
+	}
+	/**
+	 * Creates a new HybridAbstractMatch.
+	 * @param seq_count The total number of sequences in the alignment
+	 */
+	HybridAbstractMatch(const uint seq_count )
+		: m_seq_count(seq_count)
+	{
+		memset(fixed_seq_ids, 0xFF, sizeof(fixed_seq_ids));
+		memset(fixed_starts, 0, sizeof(fixed_starts));
+	}
+
+
+	// use compiler-generated copy constructor, assignment operator, and destructor
+
+	// see AbstractMatch base class documentation for these functions
+
+	int64 Start(uint seqI) const;
+	void SetStart(uint seqI, int64 startI);
+	uint Multiplicity() const
+	{
+		uint mult = 0;
+		for( size_t fI = 0; fI < FIXED_SEQ_COUNT; ++fI )
+			mult += fixed_seq_ids[fI] != NO_SEQ ? 1 : 0;
+		return mult + (uint)seq_ids.size();
+	}
+	uint SeqCount() const{return m_seq_count;}
+	uint FirstStart() const;
+	virtual void Invert();
+
+	gnSeqI LeftEnd(uint seqI) const;
+	orientation Orientation(uint seqI) const;
+	void SetLeftEnd(uint seqI, gnSeqI position);
+	void SetOrientation(uint seqI, orientation o);
+	
+	// these functions manipulate the start coordinates quickly
+	virtual void MoveStart(int64 move_amount);
+	virtual void MoveEnd(int64 move_amount);
+
+	virtual boolean operator==( const HybridAbstractMatch& ham ) const;
+
+	virtual uint UsedSeq( uint seqI ) const { 
+		if(seqI < FIXED_SEQ_COUNT) return fixed_seq_ids[seqI];
+		return seq_ids[seqI];
+	}
+
+protected:
+	uint m_seq_count;
+
+	static const uint NO_SEQ = UINT_MAX;
+
+	// storage for a fixed number of seqs
+	uint fixed_seq_ids[FIXED_SEQ_COUNT];
+	int64 fixed_starts[FIXED_SEQ_COUNT];
+
+	// storage for any number of seqs
+	std::vector<uint, uintAlloc > seq_ids;
+	std::vector<int64, int64Alloc > starts;
+
+	uint SeqToIndex( uint seqI ) const;
+
+	// for use by derived classes in order to swap contents
+	void swap( HybridAbstractMatch* other );
+};
+
+
+template< unsigned FIXED_SEQ_COUNT, class gnSeqIAlloc, class uintAlloc >
+void HybridAbstractMatch< FIXED_SEQ_COUNT, gnSeqIAlloc, uintAlloc >::swap( HybridAbstractMatch* other )
+{
+	std::swap( m_seq_count, other->m_seq_count );
+
+	uint tmp_ids[FIXED_SEQ_COUNT];
+	for( int i = 0; i < FIXED_SEQ_COUNT; i++ ) tmp_ids[i] = other->fixed_seq_ids[i];
+	for( int i = 0; i < FIXED_SEQ_COUNT; i++ ) other->fixed_seq_ids[i] = fixed_seq_ids[i];
+	for( int i = 0; i < FIXED_SEQ_COUNT; i++ ) fixed_seq_ids[i] = tmp_ids[i];
+
+	int64 tmp_starts[FIXED_SEQ_COUNT];
+	for( int i = 0; i < FIXED_SEQ_COUNT; i++ ) tmp_starts[i] = other->fixed_starts[i];
+	for( int i = 0; i < FIXED_SEQ_COUNT; i++ ) other->fixed_starts[i] = fixed_starts[i];
+	for( int i = 0; i < FIXED_SEQ_COUNT; i++ ) fixed_starts[i] = tmp_starts[i];
+
+	std::swap( seq_ids, other->seq_ids );
+	std::swap( starts, other->starts );
+}
+
+template< unsigned FIXED_SEQ_COUNT, class gnSeqIAlloc, class uintAlloc >
+uint HybridAbstractMatch< FIXED_SEQ_COUNT, gnSeqIAlloc, uintAlloc >::FirstStart() const
+{
+	uint minI = NO_SEQ;
+	std::size_t i = 0;
+	for( ; i < FIXED_SEQ_COUNT; ++i )
+		minI = fixed_seq_ids[i] < minI ? fixed_seq_ids[i] : minI;
+	for( i = 0; i < seq_ids.size(); ++i )
+		minI = seq_ids[i] < minI ? seq_ids[i] : minI;
+	return minI;
+}
+
+template< unsigned FIXED_SEQ_COUNT, class gnSeqIAlloc, class uintAlloc >
+uint HybridAbstractMatch< FIXED_SEQ_COUNT, gnSeqIAlloc, uintAlloc >::SeqToIndex( uint seqI ) const
+{
+	uint posI = 0;
+	for( ; posI < FIXED_SEQ_COUNT; ++posI )
+		if( fixed_seq_ids[posI] == seqI )
+			break;
+	if(posI < FIXED_SEQ_COUNT)
+		return posI;
+	for( posI = 0; posI < seq_ids.size(); ++posI )
+		if( seq_ids[posI] == seqI )
+			break;
+	if( posI == seq_ids.size() )
+		return NO_SEQ;
+	return posI + FIXED_SEQ_COUNT;
+}
+
+
+template< unsigned FIXED_SEQ_COUNT, class gnSeqIAlloc, class uintAlloc >
+int64 HybridAbstractMatch< FIXED_SEQ_COUNT, gnSeqIAlloc, uintAlloc >::Start(uint seqI) const
+{
+	uint posI = SeqToIndex( seqI );
+	if( posI == NO_SEQ )
+		return NO_MATCH;
+	if( posI < FIXED_SEQ_COUNT )
+		return fixed_starts[posI];
+	return starts[posI-FIXED_SEQ_COUNT];
+}
+
+
+template< unsigned FIXED_SEQ_COUNT, class gnSeqIAlloc, class uintAlloc >
+void HybridAbstractMatch< FIXED_SEQ_COUNT, gnSeqIAlloc, uintAlloc >::SetStart(uint seqI, int64 startI)
+{
+	uint posI = SeqToIndex( seqI );
+	if( startI == NO_MATCH && posI == NO_SEQ )
+		return;
+	if( posI == NO_SEQ )
+	{
+		for( size_t i = 0; i < FIXED_SEQ_COUNT; ++i )
+			if( fixed_seq_ids[i] == NO_SEQ )
+			{
+				posI = i;
+				break;
+			}
+	}
+	if( posI < FIXED_SEQ_COUNT )
+	{
+		if( startI == NO_MATCH )
+			fixed_seq_ids[posI] = NO_SEQ;
+		else
+			fixed_seq_ids[posI] = seqI;
+		fixed_starts[posI] = startI;
+	}
+	else
+	{
+		posI -= FIXED_SEQ_COUNT;
+		if( startI == NO_MATCH )
+		{
+			seq_ids.erase( seq_ids.begin() + posI );
+			starts.erase( starts.begin() + posI );
+			return;
+		}
+		if( posI >= seq_ids.size() )
+		{
+			seq_ids.push_back(seqI);
+			starts.push_back(startI);
+		}else{
+			starts[posI] = startI; 
+		}
+	}
+}
+
+
+template< unsigned FIXED_SEQ_COUNT, class gnSeqIAlloc, class uintAlloc >
+void HybridAbstractMatch< FIXED_SEQ_COUNT, gnSeqIAlloc, uintAlloc >::Invert()
+{
+	for( size_t i = 0; i < FIXED_SEQ_COUNT; ++i )
+		fixed_starts[i] = -fixed_starts[i];
+	for( size_t i = 0; i < starts.size(); ++i )
+		starts[i] = -starts[i];
+}
+
+
+
+template< unsigned FIXED_SEQ_COUNT, class gnSeqIAlloc, class uintAlloc >
+gnSeqI HybridAbstractMatch< FIXED_SEQ_COUNT, gnSeqIAlloc, uintAlloc >::LeftEnd(uint seqI) const
+{ 
+	uint posI = SeqToIndex( seqI );
+	if( posI == NO_SEQ )
+		return NO_MATCH;
+	if( posI < FIXED_SEQ_COUNT )
+		return genome::absolut(fixed_starts[posI]);
+	return genome::absolut(starts[posI-FIXED_SEQ_COUNT]);
+}
+
+
+template< unsigned FIXED_SEQ_COUNT, class gnSeqIAlloc, class uintAlloc >
+AbstractMatch::orientation HybridAbstractMatch< FIXED_SEQ_COUNT, gnSeqIAlloc, uintAlloc >::Orientation(uint seqI) const
+{ 
+	uint posI = SeqToIndex( seqI );
+	if( posI == NO_SEQ )
+		return undefined;
+	if( posI < FIXED_SEQ_COUNT )
+		return fixed_starts[posI] < 0 ? reverse : forward;
+	return starts[posI-FIXED_SEQ_COUNT] < 0 ? reverse : forward;
+}
+
+
+template< unsigned FIXED_SEQ_COUNT, class gnSeqIAlloc, class uintAlloc >
+void HybridAbstractMatch< FIXED_SEQ_COUNT, gnSeqIAlloc, uintAlloc >::SetLeftEnd(uint seqI, gnSeqI position)
+{ 
+	uint posI = SeqToIndex( seqI );
+	orientation o = posI == NO_SEQ || position == NO_MATCH ? undefined : Orientation( seqI );
+	SetStart(seqI,position);
+	if( o != undefined )
+		SetOrientation(seqI, o);
+}
+
+template< unsigned FIXED_SEQ_COUNT, class gnSeqIAlloc, class uintAlloc >
+void HybridAbstractMatch< FIXED_SEQ_COUNT, gnSeqIAlloc, uintAlloc >::SetOrientation(uint seqI, orientation o)
+{ 
+	if( o == undefined )
+	{
+		SetStart(seqI, NO_MATCH);
+		return;
+	}
+	uint posI = SeqToIndex( seqI );
+	if( posI == NO_SEQ )
+		throw "ArrayIndexOutOfBounds!\n";
+	int oi = o == reverse ? -1 : 1;
+	if( posI < FIXED_SEQ_COUNT )
+	{
+		fixed_starts[posI] = genome::absolut(fixed_starts[posI]) * oi;
+		return;
+	}
+	starts[posI-FIXED_SEQ_COUNT] = genome::absolut(starts[posI-FIXED_SEQ_COUNT]) * oi;
+}
+
+template< unsigned FIXED_SEQ_COUNT, class gnSeqIAlloc, class uintAlloc >
+void HybridAbstractMatch< FIXED_SEQ_COUNT, gnSeqIAlloc, uintAlloc >::MoveStart(int64 move_amount)
+{
+	for( size_t i=0; i < FIXED_SEQ_COUNT; ++i )
+		if( fixed_starts[i] > 0 )
+			fixed_starts[i] += move_amount;
+	for( size_t i=0; i < starts.size(); ++i )
+		if( starts[i] > 0 )
+			starts[i] += move_amount;
+}
+
+template< unsigned FIXED_SEQ_COUNT, class gnSeqIAlloc, class uintAlloc >
+void HybridAbstractMatch< FIXED_SEQ_COUNT, gnSeqIAlloc, uintAlloc >::MoveEnd(int64 move_amount)
+{
+	for( size_t i=0; i < FIXED_SEQ_COUNT; ++i )
+		if( fixed_starts[i] < 0 )
+			fixed_starts[i] -= move_amount;
+	for( size_t i=0; i < starts.size(); ++i )
+		if( starts[i] < 0 )
+			starts[i] -= move_amount;
+}
+
+template< unsigned FIXED_SEQ_COUNT, class gnSeqIAlloc, class uintAlloc >
+boolean HybridAbstractMatch< FIXED_SEQ_COUNT, gnSeqIAlloc, uintAlloc >::operator==( const HybridAbstractMatch< FIXED_SEQ_COUNT, gnSeqIAlloc, uintAlloc >& sam ) const
+{
+	for( size_t i = 0; i < FIXED_SEQ_COUNT; ++i )
+	{
+		if( fixed_seq_ids[i] == NO_SEQ )
+			continue;
+		if( Start(fixed_seq_ids[i]) !=  sam.Start(fixed_seq_ids[i]) )
+			return false;
+	}
+	for( size_t i = 0; i < seq_ids.size(); ++i )
+	{
+		if( seq_ids[i] == NO_SEQ )
+			continue;
+		if( Start(seq_ids[i]) !=  sam.Start(seq_ids[i]) )
+			return false;
+	}
+	return Multiplicity() == sam.Multiplicity();
+}
+
+
+}
+
+#endif // __HybridAbstractMatch_h__
diff --git a/libMems/Interval.cpp b/libMems/Interval.cpp
new file mode 100644
index 0000000..b7060cb
--- /dev/null
+++ b/libMems/Interval.cpp
@@ -0,0 +1,25 @@
+/*******************************************************************************
+ * $Id: Interval.cpp,v 1.12 2004/03/01 02:40:08 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * Please see the file called COPYING for licensing, copying, and modification
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/Interval.h"
+#include "libMems/gnAlignedSequences.h"
+#include "libMems/GappedAlignment.h"
+#include "libMems/Match.h"
+#include <list>
+#include <iterator>
+
+using namespace std;
+using namespace genome;
+namespace mems {
+
+
+}
diff --git a/libMems/Interval.h b/libMems/Interval.h
new file mode 100644
index 0000000..724ca5f
--- /dev/null
+++ b/libMems/Interval.h
@@ -0,0 +1,958 @@
+/*******************************************************************************
+ * $Id: GenericInterval.h,v 1.4 2004/03/01 02:40:08 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * This file is licensed under the GPL.
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifndef __Interval_h__
+#define __Interval_h__
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libGenome/gnClone.h"
+#include "libGenome/gnDebug.h"
+#include "libMems/SparseAbstractMatch.h"
+#include "libMems/gnAlignedSequences.h"
+#include "libMems/AbstractGappedAlignment.h"
+#include "libMems/Match.h"
+#include "libMems/GappedAlignment.h"
+#include <iostream>
+#include <vector>
+#include "libMems/twister.h"
+
+//#include "boost/pool/object_pool.hpp"
+
+namespace mems {
+
+// adapter function to allow inserts on reverse iterators
+template< typename ListType, typename RanIt, typename Ty >
+void insert( ListType& the_list, std::reverse_iterator<RanIt>& riter, Ty& val )
+{
+	the_list.insert( riter.base(), val );
+	++riter;	// need to shift riter
+}
+template< typename ListType, typename Ty >
+void insert( ListType& the_list, const typename ListType::iterator& iter, Ty& val )
+{
+	the_list.insert( iter, val );
+}
+
+
+template< class GappedBaseImpl = AbstractGappedAlignment< SparseAbstractMatch<> > >
+class GenericInterval : public GappedBaseImpl
+{
+public:
+	GenericInterval(){};
+
+//	GenericInterval( uint seq_count, gnSeqI aln_length) : GappedBaseImpl( seq_count, aln_length ){};
+
+	/** construct from a MatchList or a vector of pointers to AbstractMatches */
+	template<typename BidIt>
+	GenericInterval( BidIt it_begin, const BidIt& it_end ) : GappedBaseImpl( (*it_begin)->SeqCount(), 0 )
+	{
+		std::vector<gnSeqI> pos((*it_begin)->SeqCount(), NO_MATCH);
+		for( ; it_begin != it_end; ++it_begin )
+			this->matches.push_back( (*it_begin)->Copy() );
+		CalculateOffset();
+		addUnalignedRegions();
+		CalculateAlignmentLength();
+		ValidateMatches();
+	}
+
+	GenericInterval( const GenericInterval& iv );
+	~GenericInterval();
+	GenericInterval& operator=( const GenericInterval& iv );
+	
+	GenericInterval* Clone() const;
+	GenericInterval* Copy() const;
+	virtual void Free();
+	
+	/** Set the matches in this interval *without* making a copy.  The GenericInterval takes ownership of matches */
+	template< class MatchVector >
+	void SetMatches( MatchVector& matches )
+	{
+		// Set the SeqCount and other bits
+		Match m( matches[0]->SeqCount() );
+		std::vector<AbstractMatch*> tmp(1, &m);
+		*this = GenericInterval( tmp.begin(), tmp.end() );
+
+		// then delete the allocated dummy match
+		for( std::size_t mI = 0; mI < this->matches.size(); mI++ )
+			this->matches[mI]->Free();
+		
+		// now set the matches and update the interval data
+		this->matches.resize(matches.size());
+		std::copy(matches.begin(), matches.end(), this->matches.begin());
+//		this->matches.insert( this->matches.end(), matches.begin(), matches.end() );
+		CalculateOffset();
+ 	    addUnalignedRegions();
+		CalculateAlignmentLength();
+		ValidateMatches();
+
+		// finally, clear the user supplied matches to indicate that we own the memory
+		matches.clear();
+	}
+
+	/** Set the matches in this interval *without* cloberring the interval.*/
+	template< class MatchVector >
+	void SetMatchesTemp( MatchVector& matches )
+	{
+		// Set the SeqCount and other bits
+		Match m( matches[0]->SeqCount() );
+		std::vector<AbstractMatch*> tmp(1, &m);
+		*this = GenericInterval( tmp.begin(), tmp.end() );
+
+		// then delete the allocated dummy match
+		for( std::size_t mI = 0; mI < this->matches.size(); mI++ )
+			this->matches[mI]->Free();
+		
+		// now set the matches and update the interval data
+		this->matches.resize(matches.size());
+		std::copy(matches.begin(), matches.end(), this->matches.begin());
+		CalculateOffset();
+		CalculateAlignmentLength();
+		ValidateMatches();
+
+		// finally, clear the user supplied matches to indicate that we own the memory
+		matches.clear();
+	}
+	/**
+	 * Writes this GenericInterval to the specified output stream (e.g. cout).
+	 */
+	template<typename BaseImpl> friend std::ostream& operator<<(std::ostream& os, const GenericInterval<BaseImpl>& iv); //write to source.
+
+	/**
+	 * Reads a GenericInterval from the specified input stream (e.g. cin).
+	 */
+	template<typename BaseImpl> friend std::istream& operator>>(std::istream& is, const GenericInterval<BaseImpl>& iv); //read from source
+
+	// Inherited methods from AbstractMatch:
+	void Invert();
+	void CropStart(gnSeqI crop_amount);
+	void CropEnd(gnSeqI crop_amount);
+	void MoveStart(int64 move_amount);
+	void MoveEnd(int64 move_amount);
+
+	virtual void CalculateOffset();
+
+	void add( AbstractMatch* am ){ matches.push_back( am->Copy() ); }
+	
+	/** 
+	 * Get a gnAlignedSequences object
+	 * TODO: get rid of this
+	 */
+	virtual void GetAlignedSequences( gnAlignedSequences& gnas, const std::vector< genome::gnSequence* >& seq_table ) const;
+
+	void GetAlignment( std::vector< bitset_t >& align_matrix ) const;
+
+	void CropLeft( gnSeqI amount, uint seqI );
+	void CropRight( gnSeqI amount, uint seqI );
+
+	void SetAlignment( const std::vector< std::string >& seq_align );
+
+	// TODO: get rid of code that uses this hack...
+	const std::vector<AbstractMatch*>& GetMatches() const{ return matches; }
+	void StealMatches( std::vector<AbstractMatch*>& matches );
+
+	/** marbles the gaps so that no sequence has more than "size" contiguous gaps */
+	void Marble( gnSeqI size );
+
+	void GetColumn( gnSeqI col, std::vector<gnSeqI>& pos, std::vector<bool>& column ) const;
+	
+	bool IsGap( uint seq, gnSeqI col ) const;
+
+	/** self test code */
+	void ValidateMatches() const;
+
+	void swap( GenericInterval& other ){ swap(&other); }
+
+protected:
+	// for use by derived classes in order to swap contents
+	void swap( GenericInterval* other ){
+		std::swap( matches, other->matches );
+		GappedBaseImpl::swap( other );
+	}
+	std::vector< AbstractMatch* > matches;
+private:
+	void addUnalignedRegions();
+	void FindMatchPos( uint seqI, gnSeqI pos, size_t& matchI, gnSeqI& match_pos );
+	void GetColumnAndMatch( gnSeqI col, std::vector<gnSeqI>& pos, std::vector<bool>& column, size_t& matchI, gnSeqI& match_col ) const;
+	void CalculateAlignmentLength();
+};
+
+typedef GenericInterval<> Interval;
+
+
+template<class GappedBaseImpl>
+GenericInterval<GappedBaseImpl>* GenericInterval<GappedBaseImpl>::Copy() const
+{
+	return m_allocateAndCopy( *this );
+}
+template<class GappedBaseImpl>
+void GenericInterval<GappedBaseImpl>::Free()
+{
+	m_free(this);
+}
+
+template<class GappedBaseImpl>
+GenericInterval<GappedBaseImpl>::~GenericInterval()
+{
+	for( std::size_t mI = 0; mI < matches.size(); mI++ )
+		matches[mI]->Free();
+}
+
+template<class GappedBaseImpl>
+void GenericInterval<GappedBaseImpl>::StealMatches( std::vector<AbstractMatch*>& matches ){
+	matches = this->matches;
+	this->matches.clear();
+	for( uint seqI = 0; seqI < this->SeqCount(); seqI++ )
+	{
+		this->SetLeftEnd( seqI, NO_MATCH );
+		this->SetLength( 0, seqI );
+	}
+	this->SetAlignmentLength(0);
+}
+
+template<class GappedBaseImpl>
+GenericInterval<GappedBaseImpl>::GenericInterval( const GenericInterval<GappedBaseImpl>& iv )
+{
+	*this = iv;
+}
+
+template<class GappedBaseImpl>
+GenericInterval<GappedBaseImpl>& GenericInterval<GappedBaseImpl>::operator=( const GenericInterval& iv )
+{
+	GappedBaseImpl::operator=( iv );
+	for( std::size_t mI = 0; mI < matches.size(); mI++ )
+		matches[mI]->Free();
+	matches.clear();
+	for( std::size_t mI = 0; mI < iv.matches.size(); mI++ )
+		matches.push_back( iv.matches[mI]->Copy() );
+	return *this;
+}
+
+template<class GappedBaseImpl>
+GenericInterval<GappedBaseImpl>* GenericInterval<GappedBaseImpl>::Clone() const 
+{
+	return new GenericInterval( *this );
+}
+
+
+static bool debug_interval = false;
+
+template<class GappedBaseImpl>
+void GenericInterval<GappedBaseImpl>::ValidateMatches() const
+{
+	if( !debug_interval )
+		return;
+	if( matches.size() == 0 )
+	{
+//		genome::breakHere();
+//		std::cerr << "iv has no matches\n";
+		return;
+	}
+	for( uint seqI = 0; seqI < matches[0]->SeqCount(); ++seqI )
+	{
+		gnSeqI prev_rend = this->LeftEnd(seqI);
+		if( this->Orientation(seqI) == AbstractMatch::forward )
+		{
+			for( size_t mI = 0; mI < matches.size(); ++mI )
+			{
+				if( matches[mI]->LeftEnd(seqI) != NO_MATCH )
+				{
+					if( prev_rend != matches[mI]->LeftEnd(seqI) )
+					{
+						std::cerr << "iv broken\n";
+						std::cerr << "seqI: " << seqI << "\t prev_rend: " << prev_rend << std::endl;
+						std::cerr << "mI: " << mI << "\tlend: " << matches[mI]->LeftEnd(seqI) << std::endl;
+						genome::breakHere();
+					}
+					prev_rend = matches[mI]->RightEnd(seqI) + 1;
+				}
+			}
+		}else if( this->Orientation(seqI) == AbstractMatch::reverse )
+		{
+			for( size_t mI = matches.size(); mI > 0; mI-- )
+			{
+				if( matches[mI-1]->LeftEnd(seqI) != NO_MATCH )
+				{
+					if( prev_rend != matches[mI-1]->LeftEnd(seqI) )
+					{
+						std::cerr << "iv broken 2\n";
+						genome::breakHere();
+					}
+					prev_rend = matches[mI-1]->RightEnd(seqI) + 1;
+				}
+			}
+		}
+
+		if( this->Orientation(seqI) != AbstractMatch::undefined && this->Length(seqI) == 0 )
+		{
+			genome::breakHere();
+			std::cerr << "ERROR: confused interval\n";
+		}
+	}
+}
+
+template<class GappedBaseImpl>
+void GenericInterval<GappedBaseImpl>::GetColumnAndMatch( gnSeqI col, std::vector<gnSeqI>& pos, std::vector<bool>& column, size_t& matchI, gnSeqI& match_col ) const
+{
+	// bail when the appropriate match is found
+	gnSeqI col_pos = 0;
+	size_t mI = 0;
+	pos.clear();
+	for( uint seqI = 0; seqI < this->SeqCount(); ++seqI )
+	{
+		if( this->LeftEnd(seqI) == NO_MATCH )
+			pos.push_back(NO_MATCH);
+		else if( this->Orientation(seqI) == AbstractMatch::forward )
+			pos.push_back(this->LeftEnd(seqI));
+		else
+			pos.push_back(this->RightEnd(seqI)+1);
+	}
+
+	column = std::vector<bool>(this->SeqCount(), false);
+
+	for( ; mI < matches.size(); ++mI )
+	{
+		uint seqI = 0;
+
+		gnSeqI diff = matches[mI]->AlignmentLength();
+		diff = col_pos + diff <= col ? diff : col - col_pos;
+
+		for( seqI = 0; seqI < this->SeqCount(); ++seqI )
+			if( this->Orientation(seqI) == AbstractMatch::forward )
+				pos[seqI] += diff;
+			else if( this->Orientation(seqI) == AbstractMatch::reverse )
+				pos[seqI] -= diff;
+
+		col_pos += diff;
+
+		if( col_pos >= col && diff < matches[mI]->AlignmentLength() )
+		{
+			std::vector<gnSeqI> m_pos;
+			matches[mI]->GetColumn( diff, m_pos, column );
+			for( uint seqI = 0; seqI < this->SeqCount(); ++seqI )
+				if( m_pos[seqI] != NO_MATCH )
+					pos[seqI] = m_pos[seqI];
+			matchI = mI;
+			match_col = diff;
+			break;
+		}
+	}
+}
+
+template<typename ListType, typename Iter>
+void AddGapMatches( ListType& the_list, const Iter& first, const Iter& last, 
+				   uint seqI, int64 left_end, int64 right_end, 
+				   AbstractMatch::orientation seq_orient, uint seq_count )
+{
+	Iter iter = first;
+	int64 pos = left_end-1;
+    //MatchList& tmp_list;
+    std::vector< std::pair<Match*,Iter> > insert_pos;
+	for( ; iter != last; ++iter )
+	{
+		if( (*iter)->LeftEnd(seqI) != NO_MATCH )
+		{
+			gnSeqI len = (*iter)->LeftEnd(seqI)-pos-1;
+
+            //tjt: there are perfectly valid chains that blow up when this is enabled
+            //i.e:      
+            //                         <----c1----><----d1---->
+            //          <--a1---><---b1--->
+            // pos would get set to b1->RightEnd() since diff between a1 & b1 == 0
+            // but then c1->LeftEnd < pos, so genome::breakHere() gets called
+            // this is because SetMatches() gets called before finalize(), but should it??
+
+            if( len > 4000000000u )
+			{
+				std::cerr << "triplebogus interval data\n";
+				std::cerr << "(*iter)->LeftEnd(" << seqI << "): " << (*iter)->LeftEnd(seqI) << std::endl;
+				std::cerr << "pos: " << pos << std::endl;
+				genome::breakHere();
+			}
+
+			if( len > 0 )
+			{
+				Match tmp(seq_count);
+				Match* new_m = tmp.Copy();
+				new_m->SetLeftEnd(seqI, pos + 1);
+				new_m->SetOrientation(seqI, seq_orient);
+				new_m->SetLength(len);
+				pos = (*iter)->RightEnd(seqI);
+				//insert(the_list, iter, new_m);	// this may move iter
+                //tmp_list.push_back(new_m);
+                insert_pos.push_back(make_pair(new_m,iter));
+			}
+            else
+				pos = (*iter)->RightEnd(seqI);
+		}
+	}
+    for ( uint i = 0; i < insert_pos.size(); i++)
+    {
+        insert(the_list, insert_pos.at(i).second, insert_pos.at(i).first);
+    }
+	if( right_end != pos )
+	{
+		Match tmp(seq_count);
+		Match* new_m = tmp.Copy();
+		new_m->SetLeftEnd(seqI, pos+1);
+		new_m->SetLength(right_end-pos-1);
+		insert(the_list, iter, new_m);
+	}
+}
+
+// The best steaks are well marbled
+template<class GappedBaseImpl>
+void GenericInterval<GappedBaseImpl>::Marble( gnSeqI size )
+{
+	if( this->SeqCount() > 2 )
+		throw "I can't handle that many at once\n";
+	if( this->Multiplicity() < 2 )
+		return;	// can't marble unless there are at least two seqs
+
+	// first break up all the pieces
+	std::list<AbstractMatch*> mlist;
+	mlist.insert( mlist.end(), matches.begin(), matches.end() );
+	std::list<AbstractMatch*>::iterator m_iter = mlist.begin();
+	for(; m_iter != mlist.end(); ++m_iter )
+	{
+		if( (*m_iter)->Multiplicity() != 1 || (*m_iter)->AlignmentLength() <= size )
+			continue;
+		// which seq are we working with?
+		uint seqI = 0;
+		for( ; seqI < (*m_iter)->SeqCount(); seqI++ )
+			if( (*m_iter)->LeftEnd(seqI) != NO_MATCH )
+				break;
+		AbstractMatch* left_iv = (*m_iter)->Copy();
+		left_iv->CropEnd( left_iv->AlignmentLength() - size );
+		(*m_iter)->CropStart( size );
+		m_iter = mlist.insert( m_iter, left_iv );
+	}
+	matches.clear();
+	matches.insert( matches.end(), mlist.begin(), mlist.end() );
+	this->ValidateMatches();
+
+	// now interleave the gaps
+	std::vector< std::vector<AbstractMatch*>::iterator > seq_iter( this->SeqCount(), matches.begin() );
+	std::vector< AbstractMatch* > interleaved(matches.size());
+	std::vector<AbstractMatch*>::iterator anchor = matches.begin();
+	for( uint seqI = 0; seqI < this->SeqCount(); seqI++ )
+	{
+		if( this->LeftEnd(seqI) == NO_MATCH )
+			continue;
+		for( ; seq_iter[seqI] != matches.end() && (*seq_iter[seqI])->LeftEnd(seqI) == NO_MATCH; ++seq_iter[seqI] );
+	}
+	for( ; anchor != matches.end() && (*anchor)->Multiplicity() < this->SeqCount(); ++anchor );
+	size_t cur = 0;
+	while(true)
+	{
+		// increment anchor if an iter has caught up to it...
+		uint seqI = 0;
+		do{
+			for( seqI = 0; seqI < this->SeqCount(); seqI++ )
+			{
+				if( seq_iter[seqI] == anchor && anchor != matches.end() )
+				{
+					for( uint seqJ = 0; seqJ < this->SeqCount(); seqJ++ )
+					{
+						// add anything in seq_iter[seqJ]
+						while( seq_iter[seqJ] != anchor )
+						{
+							interleaved[cur++] = *(seq_iter[seqJ]);
+							for( ++seq_iter[seqJ]; seq_iter[seqJ] != matches.end() && (*seq_iter[seqJ])->LeftEnd(seqJ) == NO_MATCH; ++seq_iter[seqJ] );
+						}
+						// don't end on an anchor
+						for( ++seq_iter[seqJ]; seq_iter[seqJ] != matches.end() && (*seq_iter[seqJ])->LeftEnd(seqJ) == NO_MATCH; ++seq_iter[seqJ] );
+					}
+					// increment anchor
+					interleaved[cur++] = *anchor;
+					for( ++anchor; anchor != matches.end() && (*anchor)->Multiplicity() < this->SeqCount(); ++anchor );
+
+					break;
+				}
+			}
+		}while( seqI < this->SeqCount() );
+
+		size_t diff1 = anchor - seq_iter[0];
+		size_t diff2 = anchor - seq_iter[1];
+		if( diff1 == 0 && diff2 == 0 )
+			break;
+		// sample from a binomial with p(success) = diff1 / diff1+diff2
+//		double samp = ((double)rand())/((double)RAND_MAX);
+		double samp = RandTwisterDouble();
+		// add one of the intervals and move on to the next...
+		if( diff2 == 0 || (samp < .5 && diff1 > 0) )
+		{
+			interleaved[cur++] = *(seq_iter[0]);
+			for( ++seq_iter[0]; seq_iter[0] != matches.end() && (*seq_iter[0])->LeftEnd(0) == NO_MATCH; ++seq_iter[0] );
+		}else{
+			interleaved[cur++] = *(seq_iter[1]);
+			for( ++seq_iter[1]; seq_iter[1] != matches.end() && (*seq_iter[1])->LeftEnd(1) == NO_MATCH; ++seq_iter[1] );
+		}
+	}
+	matches = interleaved;
+	this->ValidateMatches();
+}
+
+
+template<class GappedBaseImpl>
+void GenericInterval<GappedBaseImpl>::CropStart(gnSeqI crop_amount)
+{
+	if( crop_amount > this->AlignmentLength() )
+		Throw_gnEx( genome::SeqIndexOutOfBounds() );
+	if( crop_amount == 0 )
+		return;
+	
+	std::vector<bool> col;
+	std::vector<gnSeqI> pos;
+	size_t matchI = 0;
+	gnSeqI match_col;
+	this->GetColumnAndMatch( crop_amount, pos, col, matchI, match_col );
+
+	// delete everything before matchI
+	for( size_t mI = 0; mI < matchI; ++mI )
+		matches[mI]->Free();
+	matches.erase(matches.begin(), matches.begin()+matchI);
+
+	// crop from within matchI
+	matches[0]->CropStart(match_col);
+
+	this->CalculateOffset();
+	this->ValidateMatches();
+}
+
+template<class GappedBaseImpl>
+void GenericInterval<GappedBaseImpl>::CropEnd(gnSeqI crop_amount)
+{
+	if( crop_amount > this->AlignmentLength() )
+		Throw_gnEx( genome::SeqIndexOutOfBounds() );
+	if( crop_amount == 0 )
+		return;
+	std::vector<bool> col;
+	std::vector<gnSeqI> pos;
+	size_t matchI = 0;
+	gnSeqI match_col;
+	this->GetColumnAndMatch( this->AlignmentLength()-crop_amount, pos, col, matchI, match_col );
+
+	// delete everything after matchI
+	size_t plusmatch = match_col == 0 ? 0 : 1;
+	for( size_t mI = matchI+plusmatch; mI < matches.size(); ++mI )
+		matches[mI]->Free();
+	matches.erase(matches.begin()+matchI+plusmatch, matches.end());
+
+	// crop from within matchI
+	if( matches.size() > 0 && plusmatch == 1 )
+		matches.back()->CropEnd(matches.back()->AlignmentLength() - match_col);
+
+	this->CalculateOffset();
+	this->ValidateMatches();
+}
+
+template<class GappedBaseImpl>
+void GenericInterval<GappedBaseImpl>::GetAlignment( std::vector< bitset_t >& align_matrix ) const
+{
+	gnSeqI cur_col = 0;
+	align_matrix = std::vector< bitset_t >( this->SeqCount(), bitset_t(this->AlignmentLength(),false) );
+	for( uint matchI = 0; matchI < matches.size(); ++matchI ){
+		std::vector< bitset_t > aln_mat;
+		matches[matchI]->GetAlignment( aln_mat );
+		for( uint seqI = 0; seqI < this->SeqCount(); ++seqI )
+		{
+			if( matches[matchI]->LeftEnd(seqI) == NO_MATCH || matches[matchI]->Length(seqI) == 0 )
+				continue;
+
+			size_t ct = 0;
+			gnSeqI len = matches[matchI]->Length(seqI);
+			for( bitset_t::size_type pos = aln_mat[seqI].find_first(); ct < len; pos = aln_mat[seqI].find_next(pos) )
+			{
+				align_matrix[seqI].set( cur_col + pos );
+				ct++;
+			}
+		}
+		cur_col += matches[matchI]->AlignmentLength();
+	}
+}
+
+template<class GappedBaseImpl>
+void GenericInterval<GappedBaseImpl>::CropLeft( gnSeqI amount, uint seqI )
+{
+	if( amount > this->Length(seqI) )
+		Throw_gnEx( genome::SeqIndexOutOfBounds() );
+	if( this->LeftEnd(seqI) == NO_MATCH || amount == 0 )
+		return;
+
+	// for debugging
+	gnSeqI pre_len = this->Length(seqI);
+	gnSeqI pre_lend = this->LeftEnd(seqI);
+
+	gnSeqI match_pos;
+	size_t mI;
+	this->FindMatchPos(seqI, amount, mI, match_pos);
+	if( matches[mI]->Orientation(seqI) == this->Orientation(seqI) )
+		matches[mI]->CropLeft(match_pos, seqI);
+	else
+		matches[mI]->CropRight(match_pos, seqI);
+
+	if( matches[mI]->Length(seqI) == 0 )
+		std::cerr << "Big fat zero 1\n";
+
+	// get rid of everything to the left of mI
+	if( this->Orientation(seqI) == AbstractMatch::forward )
+	{
+		for( size_t m = 0; m < mI; m++ )
+			matches[m]->Free();
+		matches.erase(matches.begin(), matches.begin()+mI);
+	}else{
+		for( size_t m = mI+1; m < matches.size(); m++ )
+			matches[m]->Free();
+		matches.erase(matches.begin()+mI+1, matches.end());
+	}
+
+	this->CalculateOffset();
+	this->ValidateMatches();
+
+	if( this->Length(seqI) != pre_len - amount )
+	{
+		std::cerr << "Error intercroplef\n";
+		std::cerr << "pre len: " << pre_len << std::endl;
+		std::cerr << "pre lend: " << pre_lend << std::endl;
+		std::cerr << "amount: " << amount << std::endl;
+		std::cerr << "LeftEnd(seqI) " << this->LeftEnd(seqI) << std::endl;
+		std::cerr << "Length(seqI) " << this->Length(seqI) << std::endl;
+		std::cerr << "AlignmentLength() " << this->AlignmentLength() << std::endl;
+		genome::breakHere();
+	}
+}
+
+template<class GappedBaseImpl>
+void GenericInterval<GappedBaseImpl>::CropRight( gnSeqI amount, uint seqI )
+{
+	if( amount > this->Length(seqI) )
+		Throw_gnEx( genome::SeqIndexOutOfBounds() );
+
+	if( this->LeftEnd(seqI) == NO_MATCH || amount == 0 )
+		return;
+
+	// for debugging
+	gnSeqI pre_len = this->Length(seqI);
+	gnSeqI pre_lend = this->LeftEnd(seqI);
+
+	gnSeqI left_amount = this->Length(seqI) - amount;
+	gnSeqI match_pos;
+	size_t mI;
+	this->FindMatchPos(seqI, left_amount, mI, match_pos);
+	if( matches[mI]->Orientation(seqI) == this->Orientation(seqI) )
+		matches[mI]->CropRight(matches[mI]->Length(seqI)-match_pos, seqI);
+	else
+		matches[mI]->CropLeft(matches[mI]->Length(seqI)-match_pos, seqI);
+
+	if( matches[mI]->Length(seqI) == 0 )
+		mI += this->Orientation(seqI) == AbstractMatch::forward ? -1 : 1;	// delete this match too
+
+	// get rid of everything to the left of mI
+	if( this->Orientation(seqI) == AbstractMatch::forward )
+	{
+		for( size_t m = mI+1; m < matches.size(); m++ )
+			matches[m]->Free();
+		matches.erase(matches.begin()+(mI+1), matches.end());
+	}else{
+		for( size_t m = 0; m < mI; m++ )
+			matches[m]->Free();
+		matches.erase(matches.begin(), matches.begin()+mI);
+	}
+
+	this->CalculateOffset();
+	this->ValidateMatches();
+
+	if( this->Length(seqI) != pre_len - amount )
+	{
+		std::cerr << "Error intercropright\n";
+		std::cerr << "pre len: " << pre_len << std::endl;
+		std::cerr << "pre lend: " << pre_lend << std::endl;
+		std::cerr << "amount: " << amount << std::endl;
+		std::cerr << "LeftEnd(seqI) " << this->LeftEnd(seqI) << std::endl;
+		std::cerr << "Length(seqI) " << this->Length(seqI) << std::endl;
+		std::cerr << "AlignmentLength() " << this->AlignmentLength() << std::endl;
+		genome::breakHere();
+	}
+}
+
+template<class GappedBaseImpl>
+void GenericInterval<GappedBaseImpl>::MoveStart(int64 move_amount)
+{
+	GappedBaseImpl::MoveStart(move_amount);
+	for( size_t mI = 0; mI < matches.size(); mI++ )
+		matches[mI]->MoveStart(move_amount);
+}
+
+template<class GappedBaseImpl>
+void GenericInterval<GappedBaseImpl>::MoveEnd(int64 move_amount)
+{
+	GappedBaseImpl::MoveEnd(move_amount);
+	for( size_t mI = 0; mI < matches.size(); mI++ )
+		matches[mI]->MoveEnd(move_amount);
+}
+
+
+template< class MatchVector >
+void FindBoundaries( const MatchVector& matches, std::vector<gnSeqI>& left_ends, std::vector<gnSeqI>& lengths, std::vector<bool>& orientations )
+{
+	uint seqI;
+	boolean zero_exists = false;
+	uint seq_count = matches.front()->SeqCount();
+	left_ends = std::vector<gnSeqI>( seq_count, NO_MATCH );
+	lengths = std::vector<gnSeqI>( seq_count, 0 );
+	orientations = std::vector<bool>( seq_count, false );
+
+	// find leftend in each forward sequence
+	uint matchI = 0;
+	for(; matchI != matches.size(); ++matchI )
+	{
+		zero_exists = false;
+		for( seqI = 0; seqI < seq_count; ++seqI )
+		{
+			if( left_ends[seqI] == NO_MATCH && matches[matchI]->Orientation(seqI) == AbstractMatch::forward )
+			{
+				left_ends[seqI] = matches[ matchI ]->LeftEnd(seqI);
+				orientations[seqI] = true;
+			}
+			else if( left_ends[seqI] == NO_MATCH )
+				zero_exists = true;
+		}
+		if( !zero_exists )
+			break;
+	}
+
+	// find end in each forward sequence
+	for( matchI = matches.size(); matchI > 0; matchI-- )
+	{
+		zero_exists = false;
+		for( seqI = 0; seqI < seq_count; ++seqI )
+		{
+			if( lengths[seqI] == 0 &&
+				matches[ matchI - 1 ]->Orientation(seqI) == AbstractMatch::forward )
+			{
+					lengths[seqI] = matches[matchI - 1]->LeftEnd(seqI) + matches[matchI - 1]->Length(seqI) - left_ends[seqI];
+			}
+			if( left_ends[seqI] != NO_MATCH && lengths[seqI] == 0 )
+				zero_exists = true;
+		}
+		if( !zero_exists )
+			break;
+	}
+
+	// find start in each reverse sequence
+	for( matchI = matches.size(); matchI > 0; matchI-- )
+	{
+		zero_exists = false;
+		for( seqI = 0; seqI < seq_count; ++seqI )
+		{
+			if( left_ends[seqI] == NO_MATCH && matches[ matchI - 1 ]->Orientation(seqI) == AbstractMatch::reverse )
+				left_ends[seqI] = matches[matchI - 1]->LeftEnd(seqI);
+			if( left_ends[seqI] == NO_MATCH )
+				zero_exists = true;
+		}
+		if( !zero_exists )
+			break;
+	}
+
+	// find end in each reverse sequence
+	for( matchI = 0; matchI != matches.size(); ++matchI )
+	{
+		zero_exists = false;
+		for( seqI = 0; seqI < seq_count; ++seqI )
+		{
+			if( lengths[seqI] == 0 &&
+				matches[matchI]->Orientation(seqI) == AbstractMatch::reverse )
+			{
+					lengths[seqI] = matches[matchI]->Length(seqI)+(matches[matchI]->LeftEnd(seqI) - left_ends[seqI]);
+			}
+			if( lengths[seqI] == 0 )
+				zero_exists = true;
+		}
+		if( !zero_exists )
+			break;
+	}
+}
+
+
+template<class GappedBaseImpl>
+void GenericInterval<GappedBaseImpl>::addUnalignedRegions()
+{
+	std::list<AbstractMatch*> new_matches(matches.begin(), matches.end());
+
+	for( uint seqI = 0; seqI < this->SeqCount(); ++seqI )
+	{
+		if( this->LeftEnd(seqI) == NO_MATCH )
+			continue;
+		if(this->Orientation(seqI) == AbstractMatch::forward)
+			AddGapMatches( new_matches, new_matches.begin(), new_matches.end(), seqI, this->LeftEnd(seqI), this->RightEnd(seqI), this->Orientation(seqI), this->SeqCount() );
+		else
+			AddGapMatches( new_matches, new_matches.rbegin(), new_matches.rend(), seqI, this->LeftEnd(seqI), this->RightEnd(seqI), this->Orientation(seqI), this->SeqCount() );
+	}
+	matches.clear();
+	matches.insert(matches.end(), new_matches.begin(), new_matches.end());
+}
+
+template<class GappedBaseImpl>
+void GenericInterval<GappedBaseImpl>::Invert(){
+	GappedBaseImpl::Invert();
+	for( uint matchI = 0; matchI < matches.size(); ++matchI )
+		matches[ matchI ]->Invert();
+
+	std::reverse( matches.begin(), matches.end() );
+}
+
+template<class GappedBaseImpl>
+void GenericInterval<GappedBaseImpl>::GetColumn( gnSeqI col, std::vector<gnSeqI>& pos, std::vector<bool>& column ) const
+{
+	size_t matchI;
+	gnSeqI match_col;
+	this->GetColumnAndMatch( col, pos, column, matchI, match_col );
+}
+
+template<class GappedBaseImpl>
+void GenericInterval<GappedBaseImpl>::FindMatchPos( uint seqI, gnSeqI pos, size_t& matchI, gnSeqI& match_pos )
+{
+	match_pos = pos;
+	int diff_amt = 0;
+	int incr = 1;
+	matchI = 0;
+	size_t end_mI = matches.size();
+	if( this->Orientation(seqI) == AbstractMatch::reverse )
+	{
+		diff_amt = -1;
+		incr = -1;
+		matchI = matches.size();
+		end_mI = 0;
+	}
+
+	for( ; matchI != end_mI; matchI+=incr )
+	{
+		if( matches[matchI+diff_amt]->LeftEnd(seqI) == NO_MATCH )
+			continue;
+		if( matches[matchI+diff_amt]->Length(seqI) <= match_pos )
+			match_pos -= matches[matchI+diff_amt]->Length(seqI);
+		else
+			break;
+	}
+
+	if( this->Orientation(seqI) == AbstractMatch::reverse )
+		matchI--;
+}
+
+template<class GappedBaseImpl>
+void GenericInterval<GappedBaseImpl>::CalculateOffset(){
+	std::vector<gnSeqI> left_end( this->SeqCount(), NO_MATCH );
+	std::vector<gnSeqI> length( this->SeqCount(), 0 );
+	std::vector<bool> orientation;
+	if( this->matches.size() > 0 )
+		FindBoundaries( this->matches, left_end, length, orientation );
+	for( uint seqI = 0; seqI < this->SeqCount(); seqI++ )
+	{
+		if( left_end[seqI] != 0 )
+		{
+			this->SetLeftEnd(seqI, left_end[seqI]);
+			this->SetLength(length[seqI], seqI);
+			if( orientation[seqI] )
+				this->SetOrientation(seqI, AbstractMatch::forward);
+			else
+				this->SetOrientation(seqI, AbstractMatch::reverse);
+		}else if( this->LeftEnd(seqI) != NO_MATCH )
+		{
+			this->SetLength(0, seqI);
+			this->SetLeftEnd(seqI, NO_MATCH);
+		}
+
+	}
+
+	this->CalculateAlignmentLength();
+}
+
+template<class GappedBaseImpl>
+void GenericInterval<GappedBaseImpl>::SetAlignment( const std::vector< std::string >& seq_align )
+{
+	GappedAlignment* ga = new GappedAlignment(seq_align.size(), seq_align[0].size());
+	matches.clear();
+	matches.push_back(ga);
+	ga->SetAlignment(seq_align);
+	for( uint seqI = 0; seqI < this->SeqCount(); ++seqI )
+	{
+		ga->SetStart(seqI, this->Start(seqI));
+		ga->SetLength(this->Length(seqI), seqI);
+	}
+}
+
+
+/**
+ * Writes this GenericInterval to the specified output stream (e.g. cout).
+ */
+template<class GappedBaseImpl>
+std::ostream& operator<<(std::ostream& os, const GenericInterval<GappedBaseImpl>& cr){
+	try{
+	for( uint matchI = 0; matchI < cr.matches.size(); ++matchI ){
+		const AbstractMatch* m = cr.matches[ matchI ];
+		const GappedAlignment* clust = dynamic_cast< const GappedAlignment* >( m );
+		if( clust != NULL )
+			os << *clust;
+		const Match* match = dynamic_cast< const Match* >( m );
+		if( match != NULL )
+			os << *match;
+		os << std::endl;
+	}
+	}catch(...){
+		std::cerr << "Exceptional handler\n";
+	}
+	return os;
+}
+
+template<class GappedBaseImpl>
+void GenericInterval<GappedBaseImpl>::CalculateAlignmentLength()
+{
+	gnSeqI aln_len = 0;
+	// count each match's alignment length
+	for( size_t mI = 0; mI < matches.size(); ++mI )
+		aln_len += matches[mI]->AlignmentLength();
+	this->SetAlignmentLength(aln_len);
+}
+
+template<class GappedBaseImpl>
+void GenericInterval<GappedBaseImpl>::GetAlignedSequences( gnAlignedSequences& gnas, const std::vector< genome::gnSequence* >& seq_table ) const 
+{
+	gnas.names.clear();
+	for( uint seqI = 0; seqI < seq_table.size(); ++seqI ){
+		std::string name;
+		if( seq_table[ seqI ]->contigListSize() > 0 )
+			name = seq_table[ seqI ]->contigName( 0 );
+		gnas.names.push_back( name );
+		gnas.positions.push_back(this->Start(seqI));
+	}
+	mems::GetAlignment( *this, seq_table, gnas.sequences );
+}
+
+template<class GappedBaseImpl>
+bool GenericInterval<GappedBaseImpl>::IsGap( uint seq, gnSeqI col ) const
+{
+	std::vector<gnSeqI> pos;
+	std::vector<bool> column;
+	GetColumn(col, pos, column);
+	return column[seq];
+}
+
+}
+
+namespace std {
+template<> inline
+void swap( mems::Interval& a, mems::Interval& b )
+{
+	a.swap(b);
+}
+}
+
+#endif	// __Interval_h__
diff --git a/libMems/IntervalList.cpp b/libMems/IntervalList.cpp
new file mode 100644
index 0000000..ef612ca
--- /dev/null
+++ b/libMems/IntervalList.cpp
@@ -0,0 +1,25 @@
+/*******************************************************************************
+ * $Id: IntervalList.cpp,v 1.12 2004/03/01 02:40:08 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * Please see the file called COPYING for licensing, copying, and modification
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/IntervalList.h"
+#include "libMems/DNAFileSML.h"
+#include "libMems/MemHash.h"
+#include "libMems/GappedAlignment.h"
+
+#include <map>
+#include <sstream>
+
+using namespace std;
+using namespace genome;
+namespace mems {
+
+}
diff --git a/libMems/IntervalList.h b/libMems/IntervalList.h
new file mode 100644
index 0000000..9d9321a
--- /dev/null
+++ b/libMems/IntervalList.h
@@ -0,0 +1,842 @@
+/*******************************************************************************
+ * $Id: GenericIntervalList.h,v 1.6 2004/03/01 02:40:08 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * This file is licensed under the GPL.
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifndef _IntervalList_h_
+#define _IntervalList_h_
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <iostream>
+#include <list>
+#include <sstream>
+
+#include "libMems/SortedMerList.h"
+#include "libGenome/gnSequence.h"
+#include "libMems/Interval.h"
+#include "libMems/MemHash.h"
+#include "libMems/CompactGappedAlignment.h"
+#include "libGenome/gnSourceFactory.h"
+#include "libGenome/gnFASSource.h"
+#include "libGenome/gnSEQSource.h"
+#include "libGenome/gnGBKSource.h"
+#include "libGenome/gnRAWSource.h"
+
+namespace mems {
+
+/**
+ * This class represents a set Intervals, each of which is a collinear aligned region
+ * There are functions to read and write an GenericIntervalList.
+ * @see Interval
+ */
+template< class MatchType = Interval >
+class GenericIntervalList : public std::vector< MatchType > {
+public:
+	GenericIntervalList(){};
+	GenericIntervalList( const GenericIntervalList& ml );
+	GenericIntervalList& operator=( const GenericIntervalList& ml );
+	
+	/**
+	 * Deletes the objects associated
+	 * with this GenericIntervalList.
+	 */
+	void Clear();
+
+	/**
+	 * Reads a GenericIntervalList from an input stream
+	 * Sequence and SML file names are read into the seq_filename
+	 * and sml_filename vectors, but the actual files are not
+	 * opened.  The calling function should load them after
+	 * using this method.
+	 * @param match_stream The input stream to read from
+	 */
+	void ReadList( std::istream& match_stream );
+
+	/**
+	 *  Writes a GenericIntervalList to the designated output stream
+	 * @param match_stream The outptu stream to write to
+	 */
+	void WriteList( std::ostream& match_stream ) const;
+	
+	/**
+	 *  Writes a gapped alignment of sequences to the output stream
+	 */
+	void WriteAlignedSequences(std::ostream& match_file) const;
+	
+	/**
+	 *	Writes a gapped alignment of sequences in a standard format
+	 */
+	void WriteStandardAlignment( std::ostream& out_file ) const;
+
+    /**
+	 *	Writes a gapped alignment of sequences in xml format
+	 */
+    void WriteXMLAlignment( std::ostream& out_file ) const;
+	
+	/**
+	 * Reads in a set of intervals that are in xmfa (eXtended MultiFastA) format
+	 */
+	void ReadStandardAlignment( std::istream& in_stream );
+
+	/**
+	 * Reads in a set of intervals that are in xmfa (eXtended MultiFastA) format
+	 * and stores them in CompactGappedAlignments<>
+	 */
+	void ReadStandardAlignmentCompact( std::istream& in_stream );
+	
+	std::vector<std::string> seq_filename;	/**< The names of files associated with the sequences used by this alignment */
+	std::vector<genome::gnSequence*> seq_table;	/**< The actual sequences used in this alignment */
+
+	std::string backbone_filename;	/**< The name of an associated backbone file, or empty if none exists */
+protected:
+
+};
+
+
+typedef GenericIntervalList<> IntervalList;
+
+template< class MatchType >
+GenericIntervalList<MatchType>::GenericIntervalList( const GenericIntervalList<MatchType>& ml )
+{
+	*this = ml;
+}
+
+template< class MatchType >
+GenericIntervalList<MatchType>& GenericIntervalList<MatchType>::operator=( const GenericIntervalList<MatchType>& ml )
+{
+	std::vector< MatchType >::operator=( ml );
+	seq_filename = ml.seq_filename;
+	seq_table = ml.seq_table;
+	return *this;
+}
+
+template< class MatchType >
+void GenericIntervalList<MatchType>::Clear() 
+{
+	for( uint seqI = 0; seqI < seq_table.size(); seqI++ ){
+		if( seq_table[ seqI ] != NULL )
+			delete seq_table[ seqI ];
+	}
+	seq_filename.clear();
+	this->clear();
+}
+
+template< class MatchType >
+void GenericIntervalList<MatchType>::ReadList(std::istream& match_file)
+{
+	std::string tag;
+	gnSeqI len;
+	int64 start;
+	unsigned int seq_count;
+	uint seqI;
+	
+	match_file >> tag;	//format version tag
+	if( tag != "FormatVersion" ){
+		Throw_gnEx(InvalidFileFormat());
+	}
+	match_file >> tag;	//format version
+	if( tag != "4" ){
+		Throw_gnEx(InvalidFileFormat());
+	}
+	match_file >> tag;	//sequence count tag
+	if( tag != "SequenceCount" ){
+		Throw_gnEx(InvalidFileFormat());
+	}
+	match_file >> seq_count;	//sequence count
+	if(seq_count < 2){
+		Throw_gnEx(InvalidFileFormat());
+	}
+	
+	std::vector< std::string > alignment;
+	// read the sequence file names and lengths
+	for( seqI = 0; seqI < seq_count; seqI++ ){
+		match_file >> tag;	// name tag
+		getline( match_file, tag );
+		// skip the tab character
+		tag = tag.substr( 1 );
+		seq_filename.push_back(tag);
+//		try{
+//			gnSequence *new_seq = new gnSequence();
+//			new_seq->LoadSource(tag);
+//			seq_table.push_back( new_seq );
+//		}catch( gnException& gne );
+		match_file >> tag;	// length tag
+		match_file >> tag;	// length
+
+		alignment.push_back( "" );	// initialize alignment vector
+	}
+	uint interval_count;
+	match_file >> tag;	// interval count tag
+	match_file >> interval_count;	// interval count
+	
+	
+	// read the matches
+	std::string cur_line;
+	Interval* cur_iv = NULL;
+	boolean clustal_match;
+	std::vector< AbstractMatch* > iv_matches;
+	bool parsing = false;
+	
+	while( std::getline( match_file, cur_line ) ){
+		if( cur_line.find( "Interval" ) != std::string::npos ){
+			// end the old interval
+			if( iv_matches.size() > 0 )
+			{
+				this->push_back( Interval(iv_matches.begin(), iv_matches.end()) );
+//				for( size_t mI = 0; mI < iv_matches.size(); mI++ )
+//					delete iv_matches[mI];
+				iv_matches.clear();
+			}
+			parsing = true;
+			continue;
+		}
+		if( !parsing )
+			continue;
+		if( cur_line.length() == 0 )
+			continue;
+		
+		clustal_match = false;
+		if( cur_line == "GappedAlignment" ){
+			clustal_match = true;
+			getline( match_file, cur_line );
+
+			std::stringstream line_stream( cur_line );
+			line_stream >> len;
+			GappedAlignment* cr = new GappedAlignment( seq_count, len );
+			
+			for( seqI = 0; seqI < seq_count; seqI++ ){
+				line_stream >> start;
+				cr->SetStart( seqI, start );
+				std::getline( match_file, alignment[ seqI ] );
+				int64 seq_len = 0;
+				for( uint charI = 0; charI < alignment[ seqI ].length(); charI++ )
+					if( alignment[ seqI ][ charI ] != '-' )
+						seq_len++;
+				cr->SetLength( seq_len, seqI );
+			}
+			cr->SetAlignment( alignment );
+			iv_matches.push_back( cr );
+		}
+		else{
+
+			Match mmhe( seq_count );
+			Match* mhe = mmhe.Copy();
+			std::stringstream line_stream( cur_line );
+			
+			line_stream >> len;
+			mhe->SetLength(len);
+
+			for( seqI = 0; seqI < seq_count; seqI++){
+				line_stream >> start;
+				mhe->SetStart(seqI, start);
+			}
+		
+			iv_matches.push_back( mhe );
+		}
+	}
+	if( iv_matches.size() > 0 )
+		this->push_back( Interval(iv_matches.begin(), iv_matches.end()) );
+	if( interval_count != this->size() ){
+		Throw_gnEx(InvalidFileFormat());
+	}
+	
+}
+
+template< class MatchType >
+void GenericIntervalList<MatchType>::WriteList(std::ostream& match_file) const
+{
+
+	unsigned int seq_count = seq_table.size();
+	uint seqI;
+	
+	match_file << "FormatVersion" << '\t' << 4 << "\n";
+	match_file << "SequenceCount" << '\t' << seq_count << "\n";
+	for(seqI = 0; seqI < seq_count; seqI++){
+		match_file << "Sequence" << seqI << "File" << '\t';
+		if( seq_filename.size() > seqI )
+			match_file << seq_filename[seqI];
+		else
+			match_file << "null";
+		match_file << "\n";
+		match_file << "Sequence" << seqI << "Length" << '\t';
+		if( seq_table.size() > seqI )
+			match_file << seq_table[seqI]->length();
+		else
+			match_file << "0";
+		match_file << "\n";
+	}
+
+	match_file << "IntervalCount" << '\t' << this->size() << std::endl;
+	
+	for( uint ivI = 0; ivI < this->size(); ivI++ ){
+		match_file << "Interval " << ivI << std::endl;
+		const std::vector<AbstractMatch*>& matches = (*this)[ ivI ].GetMatches();
+		for( uint matchI = 0; matchI < matches.size(); matchI++ ){
+			const AbstractMatch* m = matches[ matchI ];
+			const GappedAlignment* cr = dynamic_cast< const GappedAlignment* >( m );
+			const Match* match = dynamic_cast< const Match* >( m );
+			if( match != NULL ){
+				match_file << *match << std::endl;
+			}
+			else if( cr != NULL ){
+				match_file << "GappedAlignment\n";
+				match_file << cr->Length();
+				for( seqI = 0; seqI < seq_count; seqI++ )
+					match_file << '\t' << cr->Start( seqI );
+				match_file << std::endl;
+
+				const std::vector< std::string >& align_matrix = GetAlignment( *cr, seq_table );
+				for( seqI = 0; seqI < seq_count; seqI++ )
+					match_file << align_matrix[ seqI ] << std::endl;
+			}
+		}
+		match_file << std::endl;
+	}
+}
+
+//stub for now, later use a XML library to write/read alignments in xml format..
+template< class MatchType >
+void GenericIntervalList<MatchType>::WriteXMLAlignment( std::ostream& out_file ) const 
+{
+	if( this->size() == 0 )
+		return;
+    // write source sequence filenames and formats
+	// to make Paul happy
+	boolean single_input = true;
+    uint seqI = 0;
+	for( seqI = 1; seqI < seq_filename.size(); seqI++ ){
+		if( seq_filename[ 0 ] != seq_filename[ seqI ] ){
+			single_input = false;
+			break;
+		}
+	}
+//	unsigned int seq_count = seq_table.size();
+	
+    out_file << "<procrastAlignment sequence=\"" << seq_filename[ 0 ] << "\">" << std::endl;
+	for( uint ivI = 0; ivI < this->size(); ivI++ ){
+		if( (*this)[ ivI ].AlignmentLength() == 0 ){
+			continue;
+		}
+        out_file << "\t<localAlignment id = \"" << ivI+1 << "\" length = \"" << (*this)[ ivI ].AlignmentLength() << "\" multiplicity = \"" << (*this)[ ivI ].Multiplicity() << "\" spscore=\"" << (*this)[ ivI ].spscore << "\">" << std::endl;
+    
+		std::vector<std::string> alignment;
+		if( seq_table.size() == 1 && seq_table.size() != (*this)[ ivI ].SeqCount() )
+		{
+			GetAlignment( (*this)[ ivI ], std::vector<genome::gnSequence*>((*this)[ ivI ].SeqCount(), seq_table[0]), alignment );
+		}else
+			GetAlignment( (*this)[ ivI ], seq_table, alignment );
+		for( seqI = 0; seqI < (*this)[ ivI ].SeqCount(); seqI++ ){
+			int64 startI = (*this)[ ivI ].Start( seqI );
+			gnSeqI length = (*this)[ ivI ].Length( seqI );
+			// if this genome doesn't have any sequence in this
+			// interval then skip it...
+			if( startI == 0 &&ivI > 0)	// kludge: write all seqs into the first interval so java parser can read it
+				continue;
+   
+		    out_file << "\t\t<component id=\"" << seqI+1 << "\" seqid=\"1\" leftend=\"" << (*this)[ ivI ].LeftEnd( seqI ) << "\" length=\"" << (*this)[ ivI ].Length( seqI ) << "\" orientation=\"" <<  (*this)[ ivI ].Orientation( seqI) << "\">" << alignment[ seqI ].data();
+            out_file << "\t\t</component> " << std::endl;
+
+
+		}
+		out_file << "\t</localAlignment>" << std::endl;
+	}
+	out_file << "</procrastAlignment>" << std::endl;
+}
+
+template< class MatchType >
+void GenericIntervalList<MatchType>::WriteStandardAlignment( std::ostream& out_file ) const 
+{
+	if( this->size() == 0 )
+		return;
+
+//	unsigned int seq_count = seq_table.size();
+	uint seqI = 0;
+	
+	// write out the format version
+	out_file << "#FormatVersion Mauve1" << std::endl;
+	
+	// write source sequence filenames and formats
+	// to make Paul happy
+	boolean single_input = true;
+	for( seqI = 1; seqI < seq_filename.size(); seqI++ ){
+		if( seq_filename[ 0 ] != seq_filename[ seqI ] ){
+			single_input = false;
+			break;
+		}
+	}
+	for( seqI = 0; seqI < seq_filename.size(); seqI++ ){
+		out_file << "#Sequence" << seqI + 1 << "File\t" << seq_filename[ seqI ] << std::endl;
+		if( single_input )
+			out_file << "#Sequence" << seqI + 1 << "Entry\t" << seqI + 1 << std::endl;
+		
+		genome::gnSourceFactory* sf = genome::gnSourceFactory::GetSourceFactory();
+		genome::gnBaseSource* gnbs = sf->MatchSourceClass( seq_filename[ seqI ] );
+		genome::gnFASSource* gnfs = dynamic_cast< genome::gnFASSource* >(gnbs);
+		genome::gnRAWSource* gnrs = dynamic_cast< genome::gnRAWSource* >(gnbs);
+		genome::gnSEQSource* gnss = dynamic_cast< genome::gnSEQSource* >(gnbs);
+		genome::gnGBKSource* gngs = dynamic_cast< genome::gnGBKSource* >(gnbs);
+		if( gnfs != NULL )
+			out_file << "#Sequence" << seqI + 1 << "Format\tFastA" << std::endl;
+		else if( gnrs != NULL )
+			out_file << "#Sequence" << seqI + 1 << "Format\traw" << std::endl;
+		else if( gnss != NULL ){
+			out_file << "#Sequence" << seqI + 1 << "Format\tDNAstar" << std::endl;
+			out_file << "#Annotation" << seqI + 1 << "File\t" << seq_filename[ seqI ] << std::endl;
+			out_file << "#Annotation" << seqI + 1 << "Format\tDNAstar" << std::endl;
+		}else if( gngs != NULL ){
+			out_file << "#Sequence" << seqI + 1 << "Format\tGenBank" << std::endl;
+			out_file << "#Annotation" << seqI + 1 << "File\t" << seq_filename[ seqI ] << std::endl;
+			out_file << "#Annotation" << seqI + 1 << "Format\tGenBank" << std::endl;
+		}
+	}
+
+	if( this->backbone_filename != "" )
+		out_file << "#BackboneFile\t" << this->backbone_filename << std::endl;
+	
+	for( uint ivI = 0; ivI < this->size(); ivI++ ){
+		if( (*this)[ ivI ].AlignmentLength() == 0 ){
+			continue;
+		}
+		std::vector<std::string> alignment;
+		if( seq_table.size() == 1 && seq_table.size() != (*this)[ ivI ].SeqCount() )
+		{
+			GetAlignment( (*this)[ ivI ], std::vector<genome::gnSequence*>((*this)[ ivI ].SeqCount(), seq_table[0]), alignment );
+		}else
+			GetAlignment( (*this)[ ivI ], seq_table, alignment );
+		for( seqI = 0; seqI < (*this)[ ivI ].SeqCount(); seqI++ ){
+			int64 startI = (*this)[ ivI ].Start( seqI );
+			gnSeqI length = (*this)[ ivI ].Length( seqI );
+			// if this genome doesn't have any sequence in this
+			// interval then skip it...
+			if( startI == 0 &&ivI > 0)	// kludge: write all seqs into the first interval so java parser can read it
+				continue;
+			out_file << "> " << seqI + 1 << ":";
+			if( startI > 0 ){
+				out_file << genome::absolut( startI ) << "-" << genome::absolut( startI ) + length - 1 << " + ";
+			}else if(startI == 0){
+				out_file << 0 << "-" << 0 << " + ";
+			}else{
+				out_file << genome::absolut( startI ) << "-" << genome::absolut( startI ) + length - 1 << " - ";
+			}
+			if( single_input )
+				out_file << seq_filename[ 0 ];	// write the sequence filename as the seq name
+			else				
+				out_file << seq_filename[ seqI ];	// write the sequence filename as the seq name
+			out_file << std::endl;
+			gnSeqI cur_pos = 0;
+			for( ; cur_pos < alignment[ seqI ].length(); cur_pos += 80 ){
+				gnSeqI cur_len = cur_pos + 80 < alignment[ seqI ].length() ? 80 : alignment[ seqI ].length() - cur_pos;
+				out_file.write( alignment[ seqI ].data() + cur_pos, cur_len );
+				out_file << std::endl;
+			}
+		}
+		out_file << "=" << std::endl;
+		out_file.flush();
+	}
+	
+}
+
+template< class MatchType >
+void GenericIntervalList<MatchType>::ReadStandardAlignment( std::istream& in_stream ) 
+{
+	uint seq_count = 0;
+	gnSeqI max_len = 0;
+	std::string cur_line;
+	if( !std::getline( in_stream, cur_line ) )
+	{
+		Clear();	// if we can't read from the file then just return an empty interval list
+		return;
+	}
+	uint seqI = 0;
+	std::vector< gnSeqI > lengths;
+	std::vector< GappedAlignment* > ga_list;
+	GappedAlignment cr;
+	std::string empty_line;
+	std::vector< std::string > aln_mat;
+	uint line_count = 1;
+	while( true ){
+		
+		while( cur_line[0] == '#' ){
+			// hit a comment or metadata.  try to parse it if it's a filename
+			std::getline( in_stream, cur_line );
+			line_count++;
+			std::stringstream ss( cur_line );
+			std::string token;
+			std::getline( ss, token, '\t' );
+			if( token.substr(1, 8) != "Sequence" || token.find( "File" ) == std::string::npos )
+				continue;
+			std::getline( ss, token );
+			seq_filename.push_back( token );
+		}
+		
+		// read and parse the def. line
+		std::stringstream line_str( cur_line );
+		std::getline( line_str, cur_line, '>' );
+		std::getline( line_str, cur_line, ':' );
+		// take off leading whitespace
+		std::stringstream parse_str( cur_line );
+
+		parse_str >> seqI;	// the sequence number
+				
+		int64 start, stop;
+		std::getline( line_str, cur_line, '-' );
+		parse_str.clear();
+		parse_str.str( cur_line );
+		parse_str >> start;
+		line_str >> stop;
+		std::string strand;
+		line_str >> strand;
+
+		std::string name;	// anything left is the name
+		std::getline( line_str, name );
+
+		// read and parse the sequence
+		while( aln_mat.size() < seqI )
+			aln_mat.push_back( empty_line );
+
+		gnSeqI chars = 0;
+		while( std::getline( in_stream, cur_line ) ){
+			line_count++;
+			if( (cur_line[ 0 ] == '>' ) || (cur_line[ 0 ] == '=' ))
+				break;
+			for( uint charI = 0; charI < cur_line.length(); charI++ )
+				if( cur_line[ charI ] != '-' )
+					chars++;
+			aln_mat[ seqI - 1 ] += cur_line;
+		}
+		while( lengths.size() < seqI )
+			lengths.push_back(0);
+
+		lengths[ seqI - 1 ] = chars;
+
+// temporary workaround for file format inconsistency
+		if( strand == "+" )
+			cr.SetStart( seqI - 1, start );
+		else if( start < stop ){
+			if( chars == 0 )
+				cr.SetStart( seqI - 1, 0 );
+			else
+				cr.SetStart( seqI - 1, -start );
+			if( chars != stop - start + 1 && !(chars == 0 && stop - start == 1) ){
+				std::cerr << "Error in XMFA file format\n";
+				std::cerr << "Before line " << line_count << std::endl;
+				std::cerr << "Expecting " << stop - start + 1 << " characters based on defline\n";
+				std::cerr << "Actually read " << chars << " characters of sequence\n";
+				Throw_gnEx(InvalidFileFormat());
+			}
+		}else{
+			if( chars == 0 )
+				cr.SetStart( seqI - 1, 0 );
+			else
+				cr.SetStart( seqI - 1, -stop );
+			if( chars != start - stop + 1 && !(chars == 0 && stop - start == 1) ){
+				std::cerr << "Error in XMFA file format\n";
+				std::cerr << "Before line " << line_count << std::endl;
+				std::cerr << "Expecting " << start - stop + 1 << " characters based on defline\n";
+				std::cerr << "Actually read " << chars << " characters of sequence\n";
+				Throw_gnEx(InvalidFileFormat());
+			}
+		}
+
+		if( chars > max_len )
+			max_len = aln_mat[ seqI - 1 ].length();
+			
+		if( cur_line.size() == 0 )
+			break;
+		// did we finish an aligned region?
+		if( cur_line[ 0 ] != '>' ){
+			GappedAlignment *new_cr = new GappedAlignment( aln_mat.size(), max_len );
+			for( uint seqJ = 0; seqJ < seqI; seqJ++ ){
+				new_cr->SetStart( seqJ, cr.Start( seqJ ) );
+				new_cr->SetLength( lengths[ seqJ ], seqJ );
+				cr.SetStart( seqJ, NO_MATCH );
+			}
+			for( uint seqJ = 0; seqJ < seqI; seqJ++ )
+				aln_mat[seqJ].resize( max_len, '-' );
+
+			new_cr->SetAlignment(aln_mat);
+			lengths.clear();
+			if( seq_count < seqI )
+				seq_count = seqI;
+
+			ga_list.push_back( new_cr );
+
+			max_len = 0;	// reset length for the next interval
+			aln_mat.clear();	// reset cr for next interval
+
+			// bail out on EOF or corruption
+			if( cur_line[ 0 ] != '=' )
+				break;
+			// otherwise read up to the next def. line
+			while( std::getline( in_stream, cur_line ) ){
+				line_count++;
+				if( cur_line[ 0 ] == '>' )
+					break;
+			}
+			if( cur_line[ 0 ] != '>' )
+				break;
+		}
+	}
+
+	// now process all GappedAlignments into Intervals
+	for( uint ivI = 0; ivI < ga_list.size(); ivI++ ){
+		GappedAlignment* cr = ga_list[ ivI ];
+		GappedAlignment* new_cr = new GappedAlignment( seq_count, cr->AlignmentLength() );
+
+		const std::vector< std::string >& align_matrix = GetAlignment( *cr, seq_table );
+		std::vector< std::string > new_aln_mat(seq_count);
+		for( seqI = 0; seqI < align_matrix.size(); seqI++ ){
+			new_cr->SetLength( cr->Length( seqI ), seqI );
+			new_cr->SetStart( seqI, cr->Start(seqI) );
+			new_aln_mat[ seqI ] = align_matrix[ seqI ];
+			if( new_aln_mat[ seqI ].length() == 0 )
+				new_aln_mat[ seqI ] = std::string( new_cr->AlignmentLength(), '-' );
+		}
+		for( ; seqI < seq_count; seqI++ ){
+			new_cr->SetLength( 0, seqI );
+			new_cr->SetStart( seqI, 0 );
+			new_aln_mat[ seqI ] = std::string( new_cr->AlignmentLength(), '-' );
+		}
+		new_cr->SetAlignment(new_aln_mat);
+		delete cr;
+		cr = new_cr;
+		ga_list[ ivI ] = new_cr;
+
+		std::vector<AbstractMatch*> asdf(1, cr);
+		Interval iv( asdf.begin(), asdf.end() );
+		this->push_back( iv );
+	}
+}
+
+template< class MatchType >
+void GenericIntervalList<MatchType>::ReadStandardAlignmentCompact( std::istream& in_stream ) 
+{
+	uint seq_count = 0;
+	gnSeqI max_len = 0;
+	std::string cur_line;
+	std::getline( in_stream, cur_line );
+	uint seqI = 0;
+	std::vector< gnSeqI > lengths;
+	std::vector< GappedAlignment* > ga_list;
+	GappedAlignment cr;
+	std::string empty_line;
+	std::vector< std::string > aln_mat;
+	uint line_count = 1;
+	while( true ){
+		
+		while( cur_line[0] == '#' ){
+			// hit a comment or metadata.  try to parse it if it's a filename
+			std::getline( in_stream, cur_line );
+			line_count++;
+			std::stringstream ss( cur_line );
+			std::string token;
+			std::getline( ss, token, '\t' );
+			if( token.substr(1, 8) != "Sequence" || token.find( "File" ) == std::string::npos )
+				continue;
+			std::getline( ss, token );
+			seq_filename.push_back( token );
+		}
+		
+		// read and parse the def. line
+		std::stringstream line_str( cur_line );
+		std::getline( line_str, cur_line, '>' );
+		std::getline( line_str, cur_line, ':' );
+		// take off leading whitespace
+		std::stringstream parse_str( cur_line );
+
+		parse_str >> seqI;	// the sequence number
+				
+		int64 start, stop;
+		std::getline( line_str, cur_line, '-' );
+		parse_str.clear();
+		parse_str.str( cur_line );
+		parse_str >> start;
+		line_str >> stop;
+		std::string strand;
+		line_str >> strand;
+
+		std::string name;	// anything left is the name
+		std::getline( line_str, name );
+
+		// read and parse the sequence
+		while( aln_mat.size() < seqI )
+			aln_mat.push_back( empty_line );
+
+		gnSeqI chars = 0;
+		while( std::getline( in_stream, cur_line ) ){
+			line_count++;
+			if( (cur_line[ 0 ] == '>' ) || (cur_line[ 0 ] == '=' ))
+				break;
+			for( uint charI = 0; charI < cur_line.length(); charI++ )
+				if( cur_line[ charI ] != '-' )
+					chars++;
+			aln_mat[ seqI - 1 ] += cur_line;
+		}
+		while( lengths.size() < seqI )
+			lengths.push_back(0);
+
+		lengths[ seqI - 1 ] = chars;
+
+// temporary workaround for file format inconsistency
+		if( strand == "+" )
+			cr.SetStart( seqI - 1, start );
+		else if( start < stop ){
+			if( chars == 0 )
+				cr.SetStart( seqI - 1, 0 );
+			else
+				cr.SetStart( seqI - 1, -start );
+			if( chars != stop - start + 1 && !(chars == 0 && stop - start == 1) ){
+				std::cerr << "Error in XMFA file format\n";
+				std::cerr << "Before line " << line_count << std::endl;
+				std::cerr << "Expecting " << stop - start + 1 << " characters based on defline\n";
+				std::cerr << "Actually read " << chars << " characters of sequence\n";
+				Throw_gnEx(InvalidFileFormat());
+			}
+		}else{
+			if( chars == 0 )
+				cr.SetStart( seqI - 1, 0 );
+			else
+				cr.SetStart( seqI - 1, -stop );
+			if( chars != start - stop + 1 && !(chars == 0 && stop - start == 1) ){
+				std::cerr << "Error in XMFA file format\n";
+				std::cerr << "Before line " << line_count << std::endl;
+				std::cerr << "Expecting " << start - stop + 1 << " characters based on defline\n";
+				std::cerr << "Actually read " << chars << " characters of sequence\n";
+				Throw_gnEx(InvalidFileFormat());
+			}
+		}
+
+		if( chars > max_len )
+			max_len = aln_mat[ seqI - 1 ].length();
+			
+		if( cur_line.size() == 0 )
+			break;
+		// did we finish an aligned region?
+		if( cur_line[ 0 ] != '>' ){
+			GappedAlignment *new_cr = new GappedAlignment( aln_mat.size(), max_len );
+			for( uint seqJ = 0; seqJ < seqI; seqJ++ ){
+				new_cr->SetStart( seqJ, cr.Start( seqJ ) );
+				new_cr->SetLength( lengths[ seqJ ], seqJ );
+				cr.SetStart( seqJ, NO_MATCH );
+			}
+			for( uint seqJ = 0; seqJ < seqI; seqJ++ )
+				aln_mat[seqJ].resize( max_len, '-' );
+
+			new_cr->SetAlignment(aln_mat);
+			lengths.clear();
+			if( seq_count < seqI )
+				seq_count = seqI;
+
+			ga_list.push_back( new_cr );
+
+			max_len = 0;	// reset length for the next interval
+			aln_mat.clear();	// reset cr for next interval
+
+			// bail out on EOF or corruption
+			if( cur_line[ 0 ] != '=' )
+				break;
+			// otherwise read up to the next def. line
+			while( std::getline( in_stream, cur_line ) ){
+				line_count++;
+				if( cur_line[ 0 ] == '>' )
+					break;
+			}
+			if( cur_line[ 0 ] != '>' )
+				break;
+		}
+	}
+
+	// now process all GappedAlignments into Intervals
+	//cerr << "Stuffing all GappedAlignments into Intervals" << endl;
+	for( uint ivI = 0; ivI < ga_list.size(); ivI++ )
+	{	
+		GappedAlignment* cr = ga_list[ ivI ];
+		uint compact_seq_count =  cr->SeqCount();
+		CompactGappedAlignment<>* new_cr = new CompactGappedAlignment<>(compact_seq_count, cr->AlignmentLength() );
+		const std::vector< std::string > align_matrix = GetAlignment( *cr, seq_table );
+		//cout << cr->SeqCount() << " " << seq_count << " "  << align_matrix.size() << endl;
+		
+		std::vector< std::string > new_aln_mat(compact_seq_count);
+		for( seqI = 0; seqI < compact_seq_count; seqI++ ){
+			new_cr->SetLength( cr->Length( seqI ), seqI );
+			new_cr->SetStart( seqI, cr->Start(seqI) );
+			new_aln_mat[ seqI ] = align_matrix[ seqI ];
+			if( new_aln_mat[ seqI ].length() == 0 )
+				new_aln_mat[ seqI ] = std::string( new_cr->AlignmentLength(), '-' );
+		}
+		
+		for( ; seqI < compact_seq_count; seqI++ ){
+			new_cr->SetLength( 0, seqI );
+			new_cr->SetStart( seqI, 0 );
+			new_aln_mat[ seqI ] = std::string( new_cr->AlignmentLength(), '-' );
+		}
+		
+		new_cr->SetAlignment( new_aln_mat );
+		delete cr;
+
+		//CompactGappedAlignment<>* cga =  new_cr;
+		//ga_list[ ivI ] = dynamic_cast<GappedAlignment*>(cga);
+		Interval iv;
+		this->push_back( iv );
+		std::vector< AbstractMatch* > matches(1, new_cr);
+		this->back().SetMatches( matches );
+	}
+}
+
+
+template< class MatchType >
+void GenericIntervalList<MatchType>::WriteAlignedSequences(std::ostream& match_file) const
+{
+
+	unsigned int seq_count = seq_table.size();
+	uint seqI;
+	
+	match_file << "mauveAligner data\n";
+	match_file << "FormatVersion" << '\t' << 5 << "\n";
+	match_file << "SequenceCount" << '\t' << seq_count << "\n";
+	for(seqI = 0; seqI < seq_count; seqI++){
+		match_file << "Sequence" << seqI << "File" << '\t';
+		if( seq_filename.size() > seqI )
+			match_file << seq_filename[seqI];
+		else
+			match_file << "null";
+		match_file << "\n";
+		match_file << "Sequence" << seqI << "Length" << '\t';
+		if( seq_table.size() > seqI )
+			match_file << seq_table[seqI]->length();
+		else
+			match_file << "0";
+		match_file << "\n";
+	}
+
+	match_file << "AlignmentCount" << '\t' << this->size() << std::endl;
+
+	if( this->size() == 0 )
+		return;
+	
+	for( uint ivI = 0; ivI < this->size(); ivI++ ){
+		
+		match_file << (*this)[ ivI ].AlignmentLength();
+		for( seqI = 0; seqI < seq_count; seqI++ )
+			match_file << '\t' << (*this)[ ivI ].Start( seqI );
+		match_file << std::endl;
+
+		std::vector<std::string> alignment;
+		GetAlignment( (*this)[ ivI ], this->seq_table, alignment );
+		for( seqI = 0; seqI < seq_count; seqI++ )
+			match_file << alignment[ seqI ] << std::endl;
+		match_file << std::endl;
+	}
+	
+}
+
+
+}
+
+#endif	//_IntervalList_h_
diff --git a/libMems/Islands.cpp b/libMems/Islands.cpp
new file mode 100644
index 0000000..7cfde9c
--- /dev/null
+++ b/libMems/Islands.cpp
@@ -0,0 +1,320 @@
+/*******************************************************************************
+ * $Id: Islands.cpp,v 1.12 2004/04/19 23:11:19 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * Please see the file called COPYING for licensing, copying, and modification
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/Islands.h"
+#include "libMems/Aligner.h"
+#include "libMems/GappedAlignment.h"
+
+using namespace std;
+using namespace genome;
+namespace mems {
+
+/**
+ * Identifies gaps in the alignment between pairs of sequences that are longer than
+ * some number of base pairs in length.  Prints islands to an output stream
+ */
+void simpleFindIslands( IntervalList& iv_list, uint island_size, ostream& island_out ){
+	vector< Island > island_list;
+	simpleFindIslands( iv_list, island_size, island_list );
+	for( size_t isleI = 0; isleI < island_list.size(); isleI++ )
+	{
+		Island& i = island_list[isleI];
+		island_out << i.seqI << '\t' << i.leftI << '\t' << i.rightI << '\t' 
+				<< i.seqJ << '\t' << i.leftJ << '\t' << i.rightJ << endl;
+	}
+}
+
+
+void simpleFindIslands( IntervalList& iv_list, uint island_size, vector< Island >& island_list ){
+	if( iv_list.size() == 0 )
+		return;
+	for( uint iv_listI = 0; iv_listI < iv_list.size(); iv_listI++ ){
+		Interval& iv = iv_list[ iv_listI ];
+		gnAlignedSequences gnas;
+		iv.GetAlignedSequences( gnas, iv_list.seq_table );
+		uint seq_count = iv_list.seq_table.size();
+		
+		for( uint seqI = 0; seqI < seq_count; seqI++ ){
+			uint seqJ;
+			for( seqJ = seqI + 1; seqJ < seq_count; seqJ++ ){
+				uint columnI = 0;
+				gnSeqI curI = 0;
+				gnSeqI curJ = 0;
+				gnSeqI lastI = 0;
+				gnSeqI lastJ = 0;
+				for( columnI = 0; columnI < gnas.alignedSeqsSize(); columnI++ ){
+					if( gnas.sequences[ seqI ][ columnI ] != '-' )
+						curI++;
+					if( gnas.sequences[ seqJ ][ columnI ] != '-' )
+						curJ++;
+					if( toupper( gnas.sequences[ seqI ][ columnI ] ) == 
+						toupper( gnas.sequences[ seqJ ][ columnI ] ) &&
+						gnas.sequences[ seqJ ][ columnI ] != '-' ){
+						// check for an island that was big enough
+						if( curI - lastI > island_size ||
+							curJ - lastJ > island_size ){
+							int64 leftI = iv.Start( seqI );
+							int64 rightI = leftI < 0 ? leftI - curI : leftI + curI;
+							leftI = leftI < 0 ? leftI - lastI : leftI + lastI;
+							int64 leftJ = iv.Start( seqJ );
+							int64 rightJ = leftJ < 0 ? leftJ - curJ : leftJ + curJ;
+							leftJ = leftJ < 0 ? leftJ - lastJ : leftJ + lastJ;
+							Island isle;
+							isle.seqI = seqI;
+							isle.seqJ = seqJ;
+							isle.leftI = leftI;
+							isle.leftJ = leftJ;
+							isle.rightI = rightI;
+							isle.rightJ = rightJ;
+							island_list.push_back(isle);
+						}
+						
+						lastI = curI;
+						lastJ = curJ;
+					}
+				}
+			}
+		}
+	}
+}
+
+
+/**
+ * Identifies stretches of alignment existing in all sequences that doesn't
+ * contain a gap larger than a particular size.  Such regions are considered
+ * the backbone of the alignment.
+ */
+void simpleFindBackbone( IntervalList& iv_list, uint backbone_size, uint max_gap_size, vector< GappedAlignment >& backbone_regions ){
+	if( iv_list.size() == 0 )
+		return;
+	for( uint iv_listI = 0; iv_listI < iv_list.size(); iv_listI++ ){
+		Interval& iv = iv_list[ iv_listI ];
+		gnAlignedSequences gnas;
+		uint seqI;
+		uint seq_count = iv_list.seq_table.size();
+		vector< int64 > positions( seq_count );
+		vector< int64 > starts( seq_count );
+		vector< int64 > ends( seq_count );
+		vector< uint > gap_size( seq_count, 0 );
+		uint seqJ;
+		gnSeqI bb_start_col = 0;
+		gnSeqI bb_end_col = 0;
+		GappedAlignment cur_backbone( seq_count, 0 );
+		
+		// initialize positions and starts
+		for( seqI = 0; seqI < seq_count; seqI++ ){
+			positions[ seqI ] = iv_list[ iv_listI ].Start( seqI );
+			if( positions[ seqI ] < 0 )
+				positions[ seqI ] -= iv_list[ iv_listI ].Length( seqI ) + 1;
+		}
+		starts = positions;
+		ends = positions;
+
+		iv.GetAlignedSequences( gnas, iv_list.seq_table );
+		bool backbone = true;	// assume we are starting out with a complete alignment column
+		uint columnI = 0;
+		vector< int64 > prev_positions;
+		for( ; columnI < gnas.alignedSeqsSize(); columnI++ ){
+			bool no_gaps = true;
+			prev_positions = positions;
+			for( seqI = 0; seqI < seq_count; seqI++ ){
+				char cur_char = gnas.sequences[ seqI ][ columnI ];
+				if( cur_char != '-' && toupper(cur_char) != 'N' ){
+					if( gap_size[ seqI ] > max_gap_size && backbone ){
+						// end a stretch of backbone here only
+						// if the backbone meets size requirements in each
+						// sequence.
+						for( seqJ = 0; seqJ < seq_count; seqJ++ ){
+							if( ends[ seqJ ] - starts[ seqJ ] < backbone_size ){
+								break;
+							}
+						}
+						if( seqJ == seq_count ) {
+							// it's a legitimate stretch of backbone
+							backbone_regions.push_back( cur_backbone );
+							uint bbI = backbone_regions.size() - 1;
+							vector< string > aln_mat( seq_count );
+							for( seqJ = 0; seqJ < seq_count; seqJ++ ){
+								if( starts[ seqJ ] < 0 )
+									backbone_regions[ bbI ].SetStart( seqJ, ends[ seqJ ] + 1);
+								else
+									backbone_regions[ bbI ].SetStart( seqJ, starts[ seqJ ] );
+								backbone_regions[ bbI ].SetLength( ends[ seqJ ] - starts[ seqJ ], seqJ );
+								aln_mat[ seqJ ] = gnas.sequences[ seqJ ].substr( bb_start_col, bb_end_col - bb_start_col + 1);
+							}
+							backbone_regions[ bbI ].SetAlignment(aln_mat);
+							
+						}
+						// we either just finished backbone or a short area that didn't
+						// qualify as backbone
+						// look for a new backbone region
+						backbone = false;
+					}
+					positions[ seqI ]++;
+					gap_size[ seqI ] = 0;
+				}else{
+					gap_size[ seqI ]++;
+					no_gaps = false;
+				}
+			}
+			if( no_gaps ){
+				bb_end_col = columnI;
+				ends = positions;
+				if( !backbone ){
+					starts = prev_positions;
+					bb_start_col = columnI;
+					backbone = true;
+				}
+			}
+		}
+
+		// check for backbone one last time
+		for( seqJ = 0; seqJ < seq_count; seqJ++ ){
+			if( ends[ seqJ ] - starts[ seqJ ] < backbone_size ){
+				break;
+			}
+		}
+		if( seqJ == seq_count ) {
+			// it's a legitimate stretch of backbone
+			backbone_regions.push_back( cur_backbone );
+			uint bbI = backbone_regions.size() - 1;
+			vector< string > aln_mat( seq_count );
+			for( seqJ = 0; seqJ < seq_count; seqJ++ ){
+				if( starts[ seqJ ] < 0 )
+					backbone_regions[ bbI ].SetStart( seqJ, ends[ seqJ ] + 1);
+				else
+					backbone_regions[ bbI ].SetStart( seqJ, starts[ seqJ ] );
+				backbone_regions[ bbI ].SetLength( ends[ seqJ ] - starts[ seqJ ], seqJ );
+				aln_mat[ seqJ ] = gnas.sequences[ seqJ ].substr( bb_start_col, bb_end_col - bb_start_col + 1);
+			}
+			backbone_regions[ bbI ].SetAlignment( aln_mat );
+		}
+	}
+}
+
+
+void outputBackbone( const vector< GappedAlignment >& backbone_regions, ostream& backbone_out ){
+	for( uint bbI = 0; bbI < backbone_regions.size(); bbI++ ){
+		for( uint seqJ = 0; seqJ < backbone_regions[ bbI ].SeqCount(); seqJ++ ){
+			if( seqJ > 0 )
+				backbone_out << '\t';
+			int64 bb_rend = backbone_regions[ bbI ].Start( seqJ );
+			if( backbone_regions[ bbI ].Start( seqJ ) < 0 )
+				bb_rend -= (int64)backbone_regions[ bbI ].Length( seqJ );
+			else
+				bb_rend += (int64)backbone_regions[ bbI ].Length( seqJ );
+			backbone_out << backbone_regions[ bbI ].Start( seqJ ) << '\t' <<  bb_rend;
+		}
+		backbone_out << endl;
+	}
+}
+
+
+// always return the left end of the one to the left and the right of the one to the right
+
+void getGapBounds( vector<gnSeqI>& seq_lengths, vector< LCB >& adjacencies, uint seqJ, int leftI, int rightI, int64& left_start, int64& right_start ){
+	if( rightI != -1 )
+		right_start = absolut( adjacencies[ rightI ].left_end[ seqJ ] );
+	else
+		right_start = seq_lengths[seqJ] + 1;
+	
+	if( leftI != -1 )
+		left_start = absolut( adjacencies[ leftI ].right_end[ seqJ ] );
+	else
+		left_start = 1;
+}
+
+
+void addUnalignedIntervals( IntervalList& iv_list, set< uint > seq_set, vector<gnSeqI> seq_lengths ){
+	vector< LCB > adjacencies;
+	vector< int64 > weights;
+	uint lcbI;
+	uint seqI;
+	if( seq_lengths.size() == 0 )
+		for( seqI = 0; seqI < iv_list.seq_table.size(); seqI++ )
+			seq_lengths.push_back(iv_list.seq_table[seqI]->length());
+
+	uint seq_count = seq_lengths.size();
+
+
+	if( seq_set.size() == 0 )
+	{
+		// if an empty seq set was passed then assume all seqs
+		// should be processed
+		for( seqI = 0; seqI < seq_count; seqI++ )
+			seq_set.insert( seqI );
+	}
+	
+	weights = vector< int64 >( iv_list.size(), 0 );
+	computeLCBAdjacencies_v2( iv_list, weights, adjacencies );
+
+	vector< int > rightmost;
+	for( seqI = 0; seqI < seq_count; seqI++ ){
+		rightmost.push_back( -1 );
+	}
+	for( lcbI = 0; lcbI <= adjacencies.size(); lcbI++ ){
+		set< uint >::iterator seq_set_iterator = seq_set.begin();
+		for( ; seq_set_iterator != seq_set.end(); seq_set_iterator++ ){
+			seqI = *seq_set_iterator;
+			// scan left
+			int leftI;
+			if( lcbI < adjacencies.size() ){
+// left is always to the left!!
+				leftI = adjacencies[ lcbI ].left_adjacency[ seqI ];
+			}else
+				leftI = rightmost[ seqI ];
+
+			int rightI = lcbI < adjacencies.size() ? lcbI : -1;
+// right is always to the right!!
+			if( lcbI < adjacencies.size() )
+				if( adjacencies[ lcbI ].right_adjacency[ seqI ] == -1 )
+					rightmost[ seqI ] = lcbI;
+			
+			int64 left_start, right_start;
+			getGapBounds( seq_lengths, adjacencies, seqI, leftI, rightI, left_start, right_start );
+			int64 gap_len =  absolut( right_start ) - absolut( left_start );
+			if( gap_len > 0 ){
+				Match mm( seq_count );
+				Match* m = mm.Copy();
+				for( uint seqJ = 0; seqJ < seq_count; seqJ++ ){
+					m->SetStart( seqJ, 0 );
+				}
+				m->SetStart( seqI, left_start );
+				m->SetLength( gap_len );
+				vector<AbstractMatch*> tmp(1, m);
+				iv_list.push_back( Interval(tmp.begin(), tmp.end()) );
+				m->Free();
+			}
+		}
+	}
+}
+
+
+void findIslandsBetweenLCBs( IntervalList& iv_list, uint island_size, ostream& island_out ){
+	IntervalList iv_list_tmp = iv_list;
+	addUnalignedIntervals( iv_list_tmp );
+	uint seq_count = iv_list.seq_table.size();
+	
+	for( int ivI = iv_list.size(); ivI < iv_list_tmp.size(); ivI++ ){
+		for( uint seqI = 0; seqI < seq_count; seqI++ ){
+			if( iv_list_tmp[ ivI ].Length( seqI ) < island_size )
+				continue;
+
+			// this is an island, write the LCB island out
+			gnSeqI left_end = absolut( iv_list_tmp[ ivI ].Start( seqI ) );
+			gnSeqI right_end = left_end + iv_list_tmp[ ivI ].Length( seqI ) - 1;
+			island_out << "LCB island:\t" << seqI << '\t' << left_end << '\t' << right_end << endl;
+		}
+	}
+}
+
+}
diff --git a/libMems/Islands.h b/libMems/Islands.h
new file mode 100644
index 0000000..66dc18f
--- /dev/null
+++ b/libMems/Islands.h
@@ -0,0 +1,417 @@
+/*******************************************************************************
+ * $Id: Islands.h,v 1.7 2004/03/01 02:40:08 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * This file is licensed under the GPL.
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifndef __Islands_h__
+#define __Islands_h__
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libGenome/gnSequence.h"
+#include "libMems/SubstitutionMatrix.h"
+#include "libMems/IntervalList.h"
+#include "libMems/NumericMatrix.h"
+#include "libMems/MatchList.h"
+#include "libMems/GappedAlignment.h"
+#include "libMems/CompactGappedAlignment.h"
+#include "libMems/Aligner.h"
+#include <boost/multi_array.hpp>
+#include "libMems/HomologyHMM/homology.h"
+#include "libMems/Scoring.h"
+
+namespace mems {
+
+/**
+ * A class to represent an island in an alignment.  Islands are generally
+ * large insertions of a region of sequence relative to
+ * another sequence.
+ */
+class Island{
+public:
+	uint seqI;
+	uint seqJ;
+	int64 leftI;
+	int64 leftJ;
+	int64 rightI;
+	int64 rightJ;
+};
+
+/**
+ * Identifies gaps in the alignment between pairs of sequences that are longer than
+ * some number of base pairs in length.  Prints islands to an output stream
+ */
+void simpleFindIslands( IntervalList& iv_list, uint island_size, std::ostream& island_out );
+void findIslandsBetweenLCBs( IntervalList& iv_list, uint island_size, std::ostream& island_out );
+void simpleFindIslands( IntervalList& iv_list, uint island_size, std::vector< Island >& island_list );
+
+class HssCols{
+public:
+	uint seqI;
+	uint seqJ;
+	size_t left_col;
+	size_t right_col;
+};
+
+typedef std::vector< HssCols > hss_list_t;
+typedef boost::multi_array< hss_list_t, 3 > hss_array_t;
+
+typedef HssCols IslandCols;	// use the same structure for island segs
+
+template<typename MatchVector>
+void hssColsToIslandCols( const MatchVector& iv_list, std::vector< genome::gnSequence* >& seq_table, std::vector< HssCols >& hss_list, std::vector< IslandCols >& island_col_list );
+
+/**
+ *  Find regions in each sequence that do not belong to any LCB, add them to their own
+ * Interval (LCB) in the IntervalList.
+ */
+void addUnalignedIntervals( IntervalList& iv_list, std::set< uint > seq_set = std::set< uint >(), std::vector<gnSeqI> seq_lengths = std::vector<gnSeqI>() );
+
+/**
+ * Identifies stretches of alignment existing in all sequences that doesn't
+ * contain a gap larger than a particular size.  Such regions are considered
+ * the backbone of the alignment.
+ */
+void simpleFindBackbone( IntervalList& iv_list, uint backbone_size, uint max_gap_size, std::vector< GappedAlignment >& backbone_regions );
+
+/**
+ * writes out a list of backbone regions
+ */
+void outputBackbone( const std::vector< GappedAlignment >& backbone_regions, std::ostream& backbone_out );
+
+void getGapBounds( std::vector<gnSeqI>& seq_lengths, std::vector< LCB >& adjacencies, uint seqJ, int leftI, int rightI, int64& left_start, int64& right_start );
+
+
+static char charmap[128];
+inline
+char* getCharmap()
+{
+	static bool initialized = false;
+	if(initialized)
+		return charmap;
+	memset(charmap, 0, 128);
+	charmap['a'] = 0;
+	charmap['c'] = 1;
+	charmap['g'] = 2;
+	charmap['t'] = 3;
+	charmap['-'] = 4;
+	charmap['A'] = 0;
+	charmap['C'] = 1;
+	charmap['G'] = 2;
+	charmap['T'] = 3;
+	charmap['-'] = 4;
+	initialized = true;
+	return charmap;
+}
+// a mapping from pairwise alignment columns to HomologyHMM emission codes
+// row/column indices are as given by the charmap above (ACGT- == 01234).
+static char colmap[5][5] = {
+//    A   C   G   T   -
+	{'1','3','4','5','7'},	// A
+	{'3','2','6','4','7'},  // C
+	{'4','6','2','3','7'},  // G
+	{'5','4','3','1','7'},  // T
+	{'7','7','7','7','\0'},  // -
+};
+
+
+inline
+void findHssHomologyHMM( std::vector< std::string >& aln_table, hss_list_t& hss_list, uint seqI, uint seqJ, const Params& hmm_params,
+						boolean left_homologous, boolean right_homologous )
+{
+	static char* charmap = getCharmap();
+
+	// encode the alignment as column states
+	std::string column_states(aln_table[0].size(),'q');
+	vector< size_t > col_reference(column_states.size(), (std::numeric_limits<size_t>::max)() );
+	size_t refI = 0;
+	for( size_t colI = 0; colI < column_states.size(); colI++ )
+	{
+		char a = charmap[aln_table[seqI][colI]];
+		char b = charmap[aln_table[seqJ][colI]];
+		column_states[colI] = colmap[a][b];
+		if(column_states[colI] != 0 )
+			col_reference[refI++] = colI;
+	}
+	// filter out the gap/gap cols
+	std::string::iterator sitr = std::remove(column_states.begin(), column_states.end(), 0);
+	column_states.resize(sitr - column_states.begin());
+
+	for( size_t colI = 2; colI < column_states.size(); colI++ )
+	{
+		if( column_states[colI] == '7' &&
+			column_states[colI-1] == '7' &&
+			(column_states[colI-2] == '7' || column_states[colI-2] == '8') )
+			column_states[colI-1] = '8';
+	}
+	if( column_states.size() > 1 && column_states[0] == '7' && (column_states[1] == '7' || column_states[1] == '8'))
+		column_states[0] = '8';
+	if( column_states.size() > 1 && column_states[column_states.size()-1] == '7' && (column_states[column_states.size()-2] == '7'|| column_states[column_states.size()-2] == '8') )
+		column_states[column_states.size()-1] = '8';
+	// now feed it to the Homology prediction HMM
+	string prediction;
+	if( right_homologous && !left_homologous )
+		std::reverse(column_states.begin(), column_states.end());
+
+	run(column_states, prediction, hmm_params);
+
+	if( right_homologous && !left_homologous )
+		std::reverse(prediction.begin(), prediction.end());
+	size_t prev_h = 0;
+	size_t i = 1;
+	for( ; i < prediction.size(); i++ )
+	{
+		if( prediction[i] == 'H' && prediction[i-1] == 'N' )
+		{
+			prev_h = i;
+		}
+		if( prediction[i] == 'N' && prediction[i-1] == 'H' )
+		{
+			HssCols hc;
+			hc.seqI = seqI;
+			hc.seqJ = seqJ;
+			hc.left_col = col_reference[prev_h];
+			hc.right_col = col_reference[i-1];
+			hss_list.push_back(hc);
+			prev_h = i;
+		}
+	}
+	// get the last one
+	if( prediction[i-1] == 'H' )
+	{
+		HssCols hc;
+		hc.seqI = seqI;
+		hc.seqJ = seqJ;
+		hc.left_col = col_reference[prev_h];
+		hc.right_col = col_reference[i-1];
+		hss_list.push_back(hc);
+	}
+}
+
+
+template< typename MatchVector >
+void findHssHomologyHMM( const MatchVector& iv_list, std::vector< genome::gnSequence* >& seq_table,  hss_array_t& hss_array, const Params& hmm_params, boolean left_homologous, boolean right_homologous )
+{
+	typedef typename MatchVector::value_type MatchType;
+	if( iv_list.size() == 0 )
+		return;
+	uint seq_count = seq_table.size();
+	hss_array.resize( boost::extents[seq_count][seq_count][iv_list.size()] );
+	for( uint iv_listI = 0; iv_listI < iv_list.size(); iv_listI++ ){
+		const MatchType& iv = iv_list[ iv_listI ];
+		std::vector< std::string > aln_table;
+		GetAlignment( *iv, seq_table, aln_table );
+		
+		for( uint seqI = 0; seqI < seq_count; seqI++ ){
+			uint seqJ;
+			for( seqJ = seqI + 1; seqJ < seq_count; seqJ++ ){
+
+				hss_list_t& hss_list = hss_array[seqI][seqJ][iv_listI];
+				hss_list.clear();
+				findHssHomologyHMM( aln_table, hss_list, seqI, seqJ, hmm_params, left_homologous, right_homologous );
+			}
+		}
+	}
+}
+
+
+template< typename MatchVector >
+void HssColsToIslandCols( const MatchVector& iv_list, std::vector< genome::gnSequence* >& seq_table, hss_array_t& hss_array, hss_array_t& island_col_array )
+{
+
+	typedef typename MatchVector::value_type MatchType;
+	uint seq_count = seq_table.size();
+	island_col_array.resize( boost::extents[seq_count][seq_count][iv_list.size()] );
+	for( uint iv_listI = 0; iv_listI < iv_list.size(); iv_listI++ ){
+		const MatchType& iv = iv_list[ iv_listI ];
+		for( uint seqI = 0; seqI < seq_count; seqI++ ){
+			uint seqJ;
+			for( seqJ = seqI + 1; seqJ < seq_count; seqJ++ ){
+				hss_list_t& hss_list = hss_array[seqI][seqJ][iv_listI];
+				hss_list_t& island_col_list = island_col_array[seqI][seqJ][iv_listI];
+				ComplementHss(iv_list[iv_listI]->AlignmentLength(),hss_list,island_col_list,seqI,seqJ);
+			}
+		}
+	}
+}
+inline
+void ComplementHss( const size_t alignment_length, hss_list_t& hss_list, hss_list_t& island_col_list, uint seqI=0, uint seqJ=0 )
+{
+
+
+	size_t left_col = 0;
+	for( size_t hssI = 0; hssI < hss_list.size(); ++hssI )
+	{
+		if( left_col >= hss_list[hssI].left_col ) 
+		{
+			left_col = hss_list[hssI].right_col + 1;
+			continue;	// handle the case where the HSS starts at col 0
+		}
+		// ending an island
+		IslandCols isle;
+		isle.seqI = seqI;
+		isle.seqJ = seqJ;
+		isle.left_col = left_col;
+		isle.right_col = hss_list[hssI].left_col;
+		island_col_list.push_back(isle);
+		left_col = hss_list[hssI].right_col + 1;
+	}
+
+	if( left_col < alignment_length )
+	{
+		// add the last island
+		IslandCols isle;
+		isle.seqI = seqI;
+		isle.seqJ = seqJ;
+		isle.left_col = left_col;
+		isle.right_col = alignment_length-1;
+		island_col_list.push_back(isle);
+	}
+}
+
+template< typename MatchVector >
+void HssArrayToCga( const MatchVector& iv_list, std::vector< genome::gnSequence* >& seq_table, hss_array_t& hss_array, std::vector< CompactGappedAlignment<>* >& cga_list )
+{
+	typedef typename MatchVector::value_type MatchType;
+	uint seq_count = seq_table.size();
+	for( uint iv_listI = 0; iv_listI < iv_list.size(); iv_listI++ ){
+		const MatchType& iv = iv_list[ iv_listI ];
+		
+		CompactGappedAlignment<>* iv_cga = dynamic_cast< CompactGappedAlignment<>* >(iv);
+		bool allocated = false;
+		if( iv_cga == NULL )
+		{
+			CompactGappedAlignment<> tmp_cga;
+			iv_cga = tmp_cga.Copy();
+			new (iv_cga) CompactGappedAlignment<>(*iv);
+			allocated = true;
+		}
+		for( uint seqI = 0; seqI < seq_count; seqI++ ){
+			for( uint seqJ = seqI + 1; seqJ < seq_count; seqJ++ ){
+				hss_list_t& isle_list = hss_array[seqI][seqJ][iv_listI];
+				for( size_t curI = 0; curI < isle_list.size(); ++curI )
+				{
+					// extract a cga
+					CompactGappedAlignment<> tmp_cga;
+					cga_list.push_back( tmp_cga.Copy() );
+					iv_cga->copyRange( *(cga_list.back()), isle_list[curI].left_col, isle_list[curI].right_col - isle_list[curI].left_col + 1 );
+					if( cga_list.back()->LeftEnd(0) == NO_MATCH )
+					{
+						// this one must have been covering an invalid region (gaps aligned to gaps)
+						cga_list.back()->Free();
+						cga_list.erase( cga_list.end()-1 );
+					}
+				}
+			}
+		}
+		if( allocated )
+			iv_cga->Free();
+	}
+}
+
+
+template< class IntervalListType >
+void addUnalignedRegions( IntervalListType& iv_list)
+{
+	std::vector< AbstractMatch* > new_ivs;
+	std::vector< AbstractMatch* > iv_ptrs(iv_list.size());
+	for( size_t i = 0; i < iv_list.size(); ++i )
+		iv_ptrs[i] = &iv_list[i];
+	for( size_t seqI = 0; seqI < iv_list.seq_table.size(); ++seqI )
+	{
+		SingleStartComparator< AbstractMatch > ssc( seqI );
+		std::sort( iv_ptrs.begin(), iv_ptrs.end(), ssc );
+		size_t ivI = 0;
+		for( ; ivI < iv_ptrs.size(); ++ivI )
+			if( iv_ptrs[ivI]->LeftEnd(seqI) != NO_MATCH )
+				break;
+		std::list< AbstractMatch* > iv_ptr_list;
+		iv_ptr_list.insert( iv_ptr_list.end(), iv_ptrs.begin()+ivI, iv_ptrs.end() );
+		AddGapMatches( iv_ptr_list, iv_ptr_list.begin(), iv_ptr_list.end(), seqI, 1, iv_list.seq_table[seqI]->length()+1, AbstractMatch::forward, iv_list.seq_table.size() );
+		std::list< AbstractMatch* >::iterator iter = iv_ptr_list.begin();
+		while( ivI != iv_ptrs.size() && iter != iv_ptr_list.end() )
+		{
+			if( iv_ptrs[ivI] == *iter )
+				ivI++;
+			else
+				new_ivs.push_back( *iter );
+			++iter;
+		}
+		while( iter != iv_ptr_list.end() )
+		{
+			new_ivs.push_back( *iter );
+			++iter;
+		}
+	}
+	// now add all the new intervals to iv_list
+	size_t prev_size = iv_list.size();
+	iv_list.resize( iv_list.size() + new_ivs.size() );
+	for( size_t newI = 0; newI < new_ivs.size(); ++newI )
+	{
+		Interval iv( new_ivs.begin() + newI, new_ivs.begin() + newI + 1 );
+		iv_list[prev_size + newI] = iv;
+		new_ivs[newI]->Free();
+	}
+}
+
+
+template< typename MatchVector >
+void findBigGaps( const MatchVector& iv_list, std::vector< genome::gnSequence* >& seq_table,  hss_array_t& hss_array, size_t big_gap_size  )
+{
+	typedef typename MatchVector::value_type MatchType;
+	if( iv_list.size() == 0 )
+		return;
+	uint seq_count = seq_table.size();
+	hss_array.resize( boost::extents[seq_count][seq_count][iv_list.size()] );
+	for( uint iv_listI = 0; iv_listI < iv_list.size(); iv_listI++ ){
+		const MatchType& iv = iv_list[ iv_listI ];
+		std::vector< std::string > aln_table;
+		GetAlignment( *iv, seq_table, aln_table );
+		
+		for( uint seqI = 0; seqI < seq_count; seqI++ ){
+			uint seqJ;
+			for( seqJ = seqI + 1; seqJ < seq_count; seqJ++ )
+			{
+				if( iv->LeftEnd(seqI) == NO_MATCH || iv->LeftEnd(seqJ) == NO_MATCH )
+					continue;
+
+				hss_list_t& hss_list = hss_array[seqI][seqJ][iv_listI];
+				hss_list.clear();
+				size_t gap_count = 0;
+				size_t gap_lend = 0;
+				for( size_t cI = 0; cI < aln_table[seqI].size(); cI++ )
+				{
+					if( aln_table[seqI][cI] == '-' || aln_table[seqJ][cI] == '-' )
+					{
+						if( aln_table[seqI][cI] == '-' ^ aln_table[seqJ][cI] == '-' )
+						{
+							if( gap_count == 0 )
+								gap_lend = cI;
+							gap_count++;
+						}
+					}else if( gap_count >= big_gap_size )
+					{
+						HssCols hc;
+						hc.seqI = seqI;
+						hc.seqJ = seqJ;
+						hc.left_col = gap_lend;
+						hc.right_col = cI-1;
+						hss_list.push_back( hc );
+						gap_count = 0;
+					}else
+						gap_count = 0;
+				}
+			}
+		}
+	}
+}
+
+
+}
+
+#endif // __Islands_h__
diff --git a/libMems/LCB.h b/libMems/LCB.h
new file mode 100644
index 0000000..65609b6
--- /dev/null
+++ b/libMems/LCB.h
@@ -0,0 +1,70 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifndef __LCB_h__
+#define __LCB_h__
+
+#include <vector>
+#include <libGenome/gnDefs.h>
+
+namespace mems {
+
+/** 
+ * This class is used to track relationships between LCBs during the LCB determination process.
+ */
+class LCB{
+public:
+	LCB() : lcb_id(0), weight(0), to_be_deleted(false) {};
+	std::vector< int64 > left_end;	/**< The left end position of the LCB in each sequence */
+	std::vector< int64 > right_end;  /**< The right end position of the LCB in each sequence */
+	std::vector< uint > left_adjacency;	/**< 'Pointers' (actually IDs) to the LCBs on the left in each sequence */
+	std::vector< uint > right_adjacency;	/**< 'Pointers' (actually IDs) to the LCBs on the right in each sequence */
+	int lcb_id;			/**< A numerical ID that can be assigned to this LCB */
+	double weight;		/**< The weight (or coverage) of this LCB */
+	bool to_be_deleted;	/**< set to true if this LCB is about to be deleted, but the deletion hasn't yet been processed */
+};
+
+/**
+ * Compares LCBs.
+ * Used by LCB construction algorithm 
+ */
+class LCBLeftComparator {
+public:
+	LCBLeftComparator( uint seq ) : m_seq(seq){};
+	bool operator()(const LCB& a, const LCB& b) const{
+		
+		int64 a_start = a.left_end[ m_seq ], b_start = b.left_end[ m_seq ];
+		if( a_start == NO_MATCH || b_start == NO_MATCH ){
+			if( b_start != NO_MATCH )
+				return true;
+			return false;
+		}
+		if(a_start < 0)
+			a_start = -a_start;
+		if(b_start < 0)
+			b_start = -b_start;
+
+		int64 diff = a_start - b_start;
+		return diff < 0;
+	}
+protected:
+	uint m_seq;
+private:
+	LCBLeftComparator();
+};
+
+class LCBIDComparator {
+public:
+	bool operator()(const LCB& a, const LCB& b) const
+	{
+		return a.lcb_id < b.lcb_id;
+	}
+};
+
+
+} // namespace mems
+
+
+#endif  // __LCB_h__
+
diff --git a/libMems/Makefile.am b/libMems/Makefile.am
new file mode 100644
index 0000000..936b26a
--- /dev/null
+++ b/libMems/Makefile.am
@@ -0,0 +1,85 @@
+
+if DEBUG
+D_CXXFLAGS = -Wall -g -D__GNDEBUG__
+endif
+OPTIMIZATION = -O2 -funroll-loops -fomit-frame-pointer -ftree-vectorize 
+AM_CFLAGS = $(OPTIMIZATION) @DEPS_CFLAGS@ -DUSE_POSIX_AIO @OPENMP_CFLAGS@
+AM_CXXFLAGS = $(OPTIMIZATION) @DEPS_CFLAGS@ @BOOST_CPPFLAGS@ $(D_CXXFLAGS) @EXTRA_CXX_FLAGS@ @OPENMP_CXXFLAGS@ 
+AM_LDFLAGS = $(OPTIMIZATION)
+
+LIBMEMS_H = \
+RepeatHash.h      MatchHashEntry.h \
+DNAFileSML.h     MemorySML.h         MatchProjectionAdapter.h \
+DNAMemorySML.h  MatchFinder.h           SortedMerList.h IntervalList.h \
+FileSML.h      gnAlignedSequences.h  Interval.h        \
+MemHash.h      AbstractMatch.h    SlotAllocator.h \
+Aligner.h   Match.h     MatchList.h Matrix.h NumericMatrix.h \
+Islands.h   MaskedMemHash.h   SeedMasks.h GappedAlignment.h \
+MuscleInterface.h GappedAligner.h PhyloTree.h SparseAbstractMatch.h \
+DenseAbstractMatch.h RepeatMatch.h UngappedLocalAlignment.h \
+AbstractGappedAlignment.h CompactGappedAlignment.h HybridAbstractMatch.h \
+twister.h SubstitutionMatrix.h RepeatMatchList.h \
+Backbone.h ProgressiveAligner.h PairwiseMatchAdapter.h PairwiseMatchFinder.h \
+SeedOccurrenceList.h TreeUtilities.h SuperInterval.h GreedyBreakpointElimination.h \
+LCB.h DistanceMatrix.h Scoring.h configuration.h Memory.h Files.h gnRAWSequence.h
+
+HOMOLOGYHMM_H = HomologyHMM/homology.h HomologyHMM/dptables.h HomologyHMM/algebras.h HomologyHMM/parameters.h
+
+DMSML_H = \
+dmSML/asyncio.h dmSML/alinuxaio.h dmSML/aPOSIXaio.h \
+dmSML/alibc.h dmSML/awin32aio.h dmSML/buffer.h \
+dmSML/util.h dmSML/sorting.h dmSML/dmsort.h \
+dmSML/timing.h dmSML/sml.h
+
+LIBMEMS_SRC = \
+RepeatHash.cpp       \
+DNAFileSML.cpp       MatchFinder.cpp       \
+DNAMemorySML.cpp     MemorySML.cpp        SortedMerList.cpp \
+FileSML.cpp          MemHash.cpp          MatchHashEntry.cpp \
+Interval.cpp	     IntervalList.cpp     twister.c \
+gnAlignedSequences.cpp                     \
+MatchList.cpp        Aligner.cpp \
+Islands.cpp          MaskedMemHash.cpp    GappedAlignment.cpp \
+MuscleInterface.cpp  PhyloTree.cpp         \
+RepeatMatchList.cpp  RepeatMatch.cpp \
+Backbone.cpp	PairwiseMatchFinder.cpp	ProgressiveAligner.cpp \
+SuperInterval.cpp	GreedyBreakpointElimination.cpp
+
+HOMOLOGYHMM_SRC = \
+HomologyHMM/algebras.cc HomologyHMM/homology.cc HomologyHMM/homologymain.cc
+
+DMSML_SRC = \
+dmSML/asyncio.c dmSML/alinuxaio.c dmSML/aPOSIXaio.c \
+dmSML/alibc.c dmSML/awin32aio.c dmSML/buffer.c \
+dmSML/util.c dmSML/sorting.c dmSML/dmsort.c \
+dmSML/timing.c dmSML/sml.c
+
+
+libmems_includedir=$(includedir)/$(GENERIC_LIBRARY_NAME)-$(GENERIC_API_VERSION)/$(GENERIC_LIBRARY_NAME)
+libmems_include_HEADERS = $(LIBMEMS_H) 
+
+# build libraries with gcc (no suffix)
+lib_LTLIBRARIES = libMems-1.6.la
+libMems_1_6_la_SOURCES = $(LIBMEMS_SRC) $(HOMOLOGYHMM_SRC) $(DMSML_SRC)
+
+libMems_1_6_la_LDFLAGS= -version-info $(GENERIC_LIBRARY_VERSION)  
+
+homologyhmm_includedir=$(includedir)/$(GENERIC_LIBRARY_NAME)-$(GENERIC_API_VERSION)/$(GENERIC_LIBRARY_NAME)/HomologyHMM
+homologyhmm_include_HEADERS = $(HOMOLOGYHMM_H)
+
+dmsml_includedir=$(includedir)/$(GENERIC_LIBRARY_NAME)-$(GENERIC_API_VERSION)/$(GENERIC_LIBRARY_NAME)/dmSML
+dmsml_include_HEADERS = $(DMSML_H)
+
+EXTRA_DIST = \
+HomologyHMM/homology.xml 
+
+#EXTRA_PROGRAMS = TestSML TestSMLstatic
+
+
+#TestSMLstatic_SOURCES = TestSML.cpp
+#TestSMLstatic_INCLUDES = -I$(top_srcdir)/include/ `wx-config --cxxflags`
+#TestSMLstatic_LDFLAGS = -static $(top_builddir)/libMems/libMems.a `wx-config --static --libs`
+
+
+
+
diff --git a/libMems/MaskedMemHash.cpp b/libMems/MaskedMemHash.cpp
new file mode 100644
index 0000000..8741728
--- /dev/null
+++ b/libMems/MaskedMemHash.cpp
@@ -0,0 +1,65 @@
+/*******************************************************************************
+ * $Id: MaskedMemHash.cpp,v 1.3 2004/03/01 02:40:08 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * Please see the file called COPYING for licensing, copying, and modification
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/MaskedMemHash.h"
+#include <list>
+
+using namespace std;
+using namespace genome;
+namespace mems {
+
+MaskedMemHash::MaskedMemHash(){
+	seq_mask = 0;
+}
+
+
+MaskedMemHash::MaskedMemHash(const MaskedMemHash& mh) : MemHash(mh){
+	*this = mh;
+}
+
+MaskedMemHash& MaskedMemHash::operator=( const MaskedMemHash& mh ){
+	seq_mask = mh.seq_mask;
+	return *this;
+}
+
+MaskedMemHash* MaskedMemHash::Clone() const{
+	return new MaskedMemHash(*this);
+}
+
+boolean MaskedMemHash::HashMatch(list<idmer>& match_list){
+	//check that there is at least one forward component
+	match_list.sort(&idmer_id_lessthan);
+	// initialize the hash entry
+	MatchHashEntry mhe = MatchHashEntry(seq_count, GetSar(0)->SeedLength());
+	mhe.SetLength(GetSar(0)->SeedLength());
+	
+	//Fill in the new Match and set direction parity if needed.
+	list<idmer>::iterator iter = match_list.begin();
+	for(; iter != match_list.end(); iter++)
+		mhe.SetStart(iter->id, iter->position + 1);
+	SetDirection(mhe);
+	mhe.CalculateOffset();
+	uint64 match_number = 0;
+	// compute "MatchNumber"
+	for( uint seqI = 0; seqI < mhe.SeqCount(); seqI++ )
+	{
+		match_number <<= 1;
+		if( mhe.Start(seqI) != NO_MATCH )
+			match_number |= 1;
+	}
+	if( seq_mask == 0 || match_number == seq_mask )
+		AddHashEntry(mhe);
+
+	return true;
+}
+
+} // namespace mems
diff --git a/libMems/MaskedMemHash.h b/libMems/MaskedMemHash.h
new file mode 100644
index 0000000..29c4460
--- /dev/null
+++ b/libMems/MaskedMemHash.h
@@ -0,0 +1,44 @@
+/*******************************************************************************
+ * $Id: MaskedMemHash.h,v 1.3 2004/03/01 02:40:08 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * This file is licensed under the GPL.
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifndef _MaskedMemHash_h_
+#define _MaskedMemHash_h_
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/MemHash.h"
+
+namespace mems {
+
+/**
+ * Finds matches that meet a particular sequence mask, e.g. 0b11111 for 5-way matches
+ * Doesn't filter anything unless a mask is set using SetMask().  The
+ * filter can be cleared by calling SetMask(0)
+ */
+class MaskedMemHash : public MemHash{
+public:
+	MaskedMemHash();
+	~MaskedMemHash(){};
+	MaskedMemHash(const MaskedMemHash& mh);
+	MaskedMemHash& operator=( const MaskedMemHash& mh );
+	virtual MaskedMemHash* Clone() const;
+	virtual void SetMask( uint64 seq_mask ){ this->seq_mask = seq_mask; }
+protected:
+	/**
+	 * Can't find subsets when there is only one permitted sequence mask!
+	 */
+	virtual void FindSubsets(const Match& mhe, std::vector<Match>& subset_matches){};
+	virtual boolean HashMatch(std::list<idmer>& match_list);
+	uint64 seq_mask;
+};
+
+}
+
+#endif //_MaskedMemHash_h_
diff --git a/libMems/Match.h b/libMems/Match.h
new file mode 100644
index 0000000..b7f9200
--- /dev/null
+++ b/libMems/Match.h
@@ -0,0 +1,33 @@
+/*******************************************************************************
+ * $Id: GenericMatch.h,v 1.10 2004/03/01 02:40:08 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * This file is licensed under the GPL.
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifndef _Match_h_
+#define _Match_h_
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libGenome/gnClone.h"
+#include <iostream>
+#include <set>
+#include "libMems/UngappedLocalAlignment.h"
+#include "libMems/SparseAbstractMatch.h"
+#include "libMems/DenseAbstractMatch.h"
+#include "libMems/HybridAbstractMatch.h"
+
+namespace mems {
+
+typedef UngappedLocalAlignment< HybridAbstractMatch<> > Match;
+
+static uint seq_compare_start;
+
+
+}
+
+#endif // _Match_h_
diff --git a/libMems/MatchFinder.cpp b/libMems/MatchFinder.cpp
new file mode 100644
index 0000000..3c9c0fa
--- /dev/null
+++ b/libMems/MatchFinder.cpp
@@ -0,0 +1,444 @@
+/*******************************************************************************
+ * $Id: MatchFinder.cpp,v 1.39 2004/03/01 02:40:08 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * Please see the file called COPYING for licensing, copying, and modification
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/MatchFinder.h"
+
+using namespace std;
+using namespace genome;
+namespace mems {
+
+MatchFinder::MatchFinder(){
+	mer_size = DNA_MER_SIZE;
+	seq_count = 0;
+	ambiguity_tolerance = 0;
+	m_progress = -1;
+	log_stream = NULL;
+	offset_stream = NULL;
+}
+
+//make sure this calls the destructor on each element
+MatchFinder::~MatchFinder(){
+}
+
+MatchFinder::MatchFinder(const MatchFinder& mf){
+	mer_size = mf.mer_size;
+	seq_count = mf.seq_count;
+	ambiguity_tolerance = mf.ambiguity_tolerance;
+
+	m_progress = mf.m_progress;
+	sar_table = mf.sar_table;
+	seq_table = mf.seq_table;
+	log_stream = mf.log_stream;
+	offset_stream = mf.offset_stream;
+}
+
+void MatchFinder::Clear(){
+	mer_size = DNA_MER_SIZE;
+	seq_count = 0;
+	ambiguity_tolerance = 0;
+	m_progress = -1;
+	sar_table.clear();
+	seq_table.clear();
+	log_stream = NULL;
+	offset_stream = NULL;
+}
+
+void MatchFinder::LogProgress( ostream* os ){
+	log_stream = os;
+}
+
+boolean MatchFinder::AddSequence( SortedMerList* sar, gnSequence* seq ){
+	if(sar == NULL){
+		Throw_gnExMsg( NullPointer(), "Null SortedMerList pointer" );
+	}
+	if(sar == NULL){
+		Throw_gnExMsg( NullPointer(), "Null gnSequence pointer" );
+	}
+	
+	//check for consistency between sequence length and sorted mer list lengths
+/*	if(seq != NULL && seq->length() != sar->Length()){
+		cerr << "MatchFinder::AddSequence: Error mismatched sml and sequence length.\n";
+		cerr << "Seq length: " << seq->length() << "\tSML length: " << sar->Length() << endl;
+		DebugMsg("MatchFinder::AddSequence: Error mismatched sml and sequence length.");
+		return false;
+	}
+*/	
+	//passed checks, add it to the data structures
+	sar_table.push_back(sar);
+	++seq_count;
+	if(seq != NULL){
+		seq_table.push_back(seq);
+	}
+	
+	SMLHeader header = sar->GetHeader();
+	alphabet_bits = header.alphabet_bits;
+	
+	return true;
+
+}
+
+void MatchFinder::GetBreakpoint( uint32 sarI, gnSeqI startI, vector<gnSeqI>& breakpoints ) const{
+	breakpoints.clear();
+	
+	//put the mer to break on in break_mer
+	bmer break_mer  = (*GetSar(sarI))[startI];
+	uint64 mer_mask = GetSar(sarI)->GetSeedMask();
+	bmer prev_mer = break_mer;
+	//search backwards for the first index of this mer
+	while((prev_mer.mer & mer_mask) == (break_mer.mer & mer_mask)){
+		if(startI == 0){
+			startI--;
+			break;
+		}
+		startI--;
+		prev_mer = (*GetSar(sarI))[startI];
+	}
+	++startI;
+
+	//find the mer's location in the other sorted mer lists
+	for(uint32 i=0; i < seq_count; ++i){
+		if(i == sarI){
+			breakpoints.push_back(startI);
+		}else{
+			gnSeqI cur_start;
+			if(GetSar(i)->FindMer(break_mer.mer, cur_start)){
+				//we found a match, see how far backwards we can go.
+				int64 cur_matchI = cur_start;
+				bmer matchmer = (*GetSar(i))[cur_start];
+				while(cur_matchI >= 0 && ((matchmer.mer & mer_mask) == (break_mer.mer && mer_mask))){
+					cur_matchI--;
+					matchmer = (*GetSar(i))[cur_start];
+				}
+				cur_start = cur_matchI+1;
+			}
+			breakpoints.push_back(cur_start);
+		}
+	}
+}
+
+void MatchFinder::FindMatchSeeds(){
+	vector<gnSeqI> start_points;
+
+	for(uint32 i=0; i < sar_table.size(); ++i){
+		start_points.push_back(0);
+	}
+	FindMatchSeeds( start_points );
+}
+
+void MatchFinder::FindMatchSeeds( const vector<gnSeqI>& start_offsets ){
+	vector<gnSeqI> start_points = start_offsets;
+	vector<gnSeqI> search_len;
+	// keep track of the number of mers processed and the total for progress reporting
+	mers_processed = 0;
+	total_mers = 0;
+	m_progress = -1;
+	for(uint32 i=0; i < sar_table.size(); ++i){
+		search_len.push_back(GNSEQI_END);
+		total_mers += search_len[i] == GNSEQI_END ? sar_table[i]->Length() : search_len[i];
+		mers_processed += start_points[ i ];
+	}
+	while( !SearchRange(start_points, search_len) ){
+		mers_processed = 0;
+		for( uint32 seqI = 0; seqI < sar_table.size(); ++seqI ){
+			if( offset_stream != NULL ){
+				if( seqI > 0 )
+					*offset_stream << '\t';
+				*offset_stream << start_points[ seqI ];
+			}
+			mers_processed += start_points[ seqI ];
+		}
+		if( offset_stream != NULL ){
+			*offset_stream << endl;
+			offset_stream->flush();
+		}
+	}
+}
+
+#define MER_REPEAT_LIMIT 1000 // The maximum number of matching mers before they are completely
+								// ignored.
+
+boolean print_sp = false;
+//startI must be 0
+//At most search_length mers in any one genome will be checked.
+boolean MatchFinder::SearchRange(vector<gnSeqI>& start_points, vector<gnSeqI>& search_len){
+	//picked a semi-arbitrary number for buffer size.
+	uint32 MER_BUFFER_SIZE = 10000;
+	vector<uint32> mer_index;   // stores the indexes of the current mers in mer_vector
+	vector<uint32> mer_baseindex;   // stores the index in the SortedMerList of each of the first mers in mer_vector
+	IdmerList cur_mers;	// stores the current mers.
+	IdmerList cur_match;	// stores the current matching mers.
+	list<uint32> sar_hitlist;	// list of sars to replace
+	uint32 read_size;
+	
+	//make sure there is at least one sequence
+	if(sar_table.size() < 1)
+		return true;
+	
+	//check for consistency in seed patterns.
+	uint64 mer_mask = sar_table[0]->GetSeedMask();
+	uint64 seed = sar_table[0]->Seed();
+	mer_size = sar_table[0]->SeedWeight();
+	for(uint32 maskI = 0; maskI < sar_table.size(); ++maskI){
+		if(seed != sar_table[maskI]->Seed()){
+			Throw_gnExMsg(InvalidData(), "Different seed patterns.");
+		}
+	}
+	
+	//check that start_points and end_points are ok.
+	if((start_points.size() != sar_table.size()) || (search_len.size() != sar_table.size())){
+		Throw_gnExMsg(InvalidData(), "Inconsistent search range specification.");
+	}
+	
+	//allocate buffer space
+	// stores arrays of bmers for each sml.
+
+	vector< vector< bmer > > mer_vector;
+	for( uint vecI = 0; vecI < sar_table.size(); ++vecI ){
+		vector< bmer > vec;
+		mer_vector.push_back( vec );
+	}
+
+	//initialize the data structures
+	idmer newmer;
+	for(uint32 n = 0; n < sar_table.size(); ++n){
+		read_size = MER_BUFFER_SIZE < search_len[n] ? MER_BUFFER_SIZE : search_len[n]; 
+		mer_vector[n].reserve(read_size);
+		sar_table[n]->Read(mer_vector[n], read_size, start_points[n]);
+		mer_index.push_back(0);
+		mer_baseindex.push_back(0);
+		if( mer_vector[n].size() > 0 ){
+			newmer.position = mer_vector[n][0].position;
+			newmer.mer = mer_vector[n][0].mer & mer_mask;
+			newmer.id = n;
+			cur_mers.push_back(newmer);  //cur_mers gets the first mer from each sorted mer list
+		}
+	}
+	
+	if( print_sp ){
+	cerr << "First mers are: " << mer_vector[0][0].mer << endl;
+	cerr << "First mers are: " << mer_vector[1][0].mer << endl;
+	cerr << "First mers are: " << mer_vector[2][0].mer << endl;
+	print_sp = false;
+	}	
+	//nobody reads these fucking things.  why am i writing this.because my fucking 
+	//roomate needs a goddamn roadmap......   ohhh ecstasy.... haptic pumpkins
+
+	//loop while there is data to hash.
+	cur_mers.sort(&idmer_lessthan);
+	while(cur_mers.size() > 0){
+		IdmerList::iterator mer_iter = cur_mers.begin();
+		sarID_t cur_id = mer_iter->id;
+		//first check for matches across genomes.
+		if(cur_match.size() > 0){
+			if(mer_iter->mer > cur_match.begin()->mer){
+				//we are done with this matching.  hash it.
+				if(cur_match.size() > 1)
+					EnumerateMatches(cur_match);
+				cur_match.clear();
+			}else if(mer_iter->mer < cur_match.begin()->mer){
+				//error checking stuff.
+				ErrorMsg("Horrible error occurred!!\n");
+			}
+		}
+
+		if( cur_match.size() > MER_REPEAT_LIMIT ){
+			// scan past the repetitive mers
+			// create the lexicographically next mer
+			uint64 next_mer = cur_match.begin()->mer;
+			next_mer += ~mer_mask + 1;
+//			cerr << "Searching to: " << next_mer << endl;
+			gnSeqI next_pos = 0;
+			uint seqI = 0;
+			for( ; seqI < sar_table.size(); ++seqI ){
+				if( !sar_table[ seqI ]->FindMer( next_mer, next_pos ))
+					++next_pos;
+				if( next_pos < sar_table[ seqI ]->SMLLength() )
+					break;
+			}
+			vector< gnSeqI > old_starts = start_points;
+			if( seqI < sar_table.size() )
+				GetBreakpoint( seqI, next_pos, start_points );
+			for( int spI = 0; spI < start_points.size(); ++spI ){
+				// don't allow it to move backwards!
+				start_points[ spI ] = start_points[ spI ] < mer_index[ spI ] + mer_baseindex[ spI ] + old_starts[ spI ] ? old_starts[ spI ] + mer_index[ spI ] + mer_baseindex[ spI ] : start_points[ spI ];
+				if( spI < seqI )
+					start_points[ spI ] = sar_table[ spI ]->SMLLength();
+			} 
+			return false;
+		}
+		//check for matches within the same genome
+		gnSeqI merI = mer_index[cur_id];
+		boolean buffer_exhausted = merI < mer_vector[cur_id].size() ? false : true;
+		while(!buffer_exhausted && (mer_iter->mer == (mer_vector[cur_id][merI].mer & mer_mask))){
+			newmer.position = mer_vector[cur_id][merI].position;
+			newmer.mer = mer_vector[cur_id][merI].mer & mer_mask;
+			newmer.id = cur_id;
+			cur_match.push_back(newmer);
+			++merI;
+			++mer_index[cur_id];
+			//check if we've exhausted our buffer
+			if(merI == mer_vector[cur_id].size())
+				buffer_exhausted = true;
+		}
+
+		if(buffer_exhausted)
+		{
+			//if we've exhausted our buffer then refill it
+			mer_baseindex[cur_id] += mer_vector[cur_id].size();
+			
+			// update the mers processed
+			mers_processed += mer_vector[cur_id].size();
+			float64 m_oldprogress = m_progress;
+			m_progress = ((float64)mers_processed / (float64)total_mers) * PROGRESS_GRANULARITY;
+			if( log_stream != NULL ){
+				if((int)m_oldprogress != (int)m_progress){
+					(*log_stream) << (int)((m_progress / PROGRESS_GRANULARITY) * 100) << "%..";
+					log_stream->flush();
+				}
+				if(((int)m_oldprogress / 10) != ((int)m_progress / 10))
+					(*log_stream) << std::endl;
+			}
+			uint32 read_size = MER_BUFFER_SIZE;
+			if(MER_BUFFER_SIZE + mer_baseindex[cur_id] > search_len[cur_id])
+				read_size = search_len[cur_id] - mer_baseindex[cur_id];
+
+			sar_table[cur_id]->Read(mer_vector[cur_id], read_size, start_points[cur_id] + mer_baseindex[cur_id]);
+			mer_index[cur_id] = 0;
+			if(mer_vector[cur_id].size() == 0){
+				//remove mer_iter so that this sar is forgotten
+				cur_mers.erase(mer_iter);
+			}
+		}else{
+			//if we haven't exhausted our buffer then we must have
+			//run out of matching mers.
+			//remove mer_iter and put in a new idmer with the same id
+			cur_mers.erase(mer_iter);
+			newmer.position = mer_vector[cur_id][merI].position;
+			newmer.mer = mer_vector[cur_id][merI].mer & mer_mask;
+			newmer.id = cur_id;
+			mer_iter = cur_mers.begin();
+			while(mer_iter != cur_mers.end() && mer_iter->mer < newmer.mer )
+				++mer_iter;
+			cur_mers.insert(mer_iter, newmer);
+		}
+		
+	}
+	//very last match in the dataset wasn't getting hashed.
+    if(cur_match.size() > 1)
+       EnumerateMatches(cur_match);
+
+	return true;
+}
+
+boolean MatchFinder::EnumerateMatches( IdmerList& match_list ){
+	//this must call HashMatch on every possible combination of matches in the list.
+	if(match_list.size() == 2){
+		//this is the smallest possible match.  simply hash it.
+		return HashMatch(match_list);
+	}
+	
+	match_list.sort(&idmer_id_lessthan);
+	vector<uint32> id_start;
+	vector<IdmerList::iterator> id_pos;
+	vector<IdmerList::iterator> id_end;
+	IdmerList::iterator iter = match_list.begin();
+	IdmerList::iterator iter2 = match_list.begin();
+	++iter2;
+	id_start.push_back(0);
+	id_pos.push_back(iter);
+	for(uint32 i=0; iter2 != match_list.end(); ++i){
+		if(iter->id != iter2->id){
+			id_start.push_back(i);
+			id_pos.push_back(iter2);
+		}
+		++iter;
+		++iter2;
+	}
+	//the following loop iterates through all possible combinations of idmers with
+	//different id's and hashes them.
+	id_end = id_pos;
+	id_end.push_back(match_list.end());
+	while(true){
+		IdmerList cur_match;
+		for(uint32 k = 0; k < id_pos.size(); ++k){
+			cur_match.push_back(*id_pos[k]);
+		}
+		HashMatch(cur_match);
+		cur_match.clear();
+
+		//increment the iterators (like an odometer)
+		uint32 m = id_pos.size() - 1;
+		while(true){
+			++id_pos[m];
+			if(id_pos[m] == id_end[m+1]){
+				if(m == 0)
+					return true;
+				id_pos[m] = id_end[m];
+				m--;
+			}else
+				break;
+		}
+	}
+
+	return true;
+}
+/*
+boolean MatchFinder::MatchAmbiguities(MatchHashEntry& mhe, uint32 match_size){
+	if(ambiguity_tolerance == 0)
+		return false;
+			//check that all mers at the new position match
+	//which sequences are used in this match?
+	uint32* cur_seqs = new uint32[mhe.SeqCount()];
+	uint32 used_seqs = 0;
+	for(uint32 seqI = 0; seqI < mhe.SeqCount(); ++seqI){
+		if(mhe[seqI] != NO_MATCH){
+			cur_seqs[used_seqs] = seqI;
+			++used_seqs;
+		}
+	}
+	string cur_mer, mer_i;
+	gnSequence mer_seq;
+	int64 mer_to_get = mhe[cur_seqs[0]];
+	if(mer_to_get < 0){
+		mer_to_get *= -1;
+		mer_to_get += mhe.Length() - mer_size;
+	}
+	cur_mer = seq_table[cur_seqs[0]]->subseq(mer_to_get, match_size).ToString();
+	
+	for(uint32 i=1; i < used_seqs; ++i){
+		mer_to_get = mhe[cur_seqs[i]];
+		if(mer_to_get < 0){
+			//Convert the cur_seqs[i] entry since negative implies reverse complement
+			mer_to_get *= -1;
+			mer_to_get += mhe.Length() - mer_size;
+		}
+		mer_seq = seq_table[cur_seqs[i]]->subseq(mer_to_get, match_size);
+		if(mer_seq.compare(cur_mer) != 0){
+			delete[] cur_seqs;
+			return false;
+		}
+		mer_i = mer_seq.ToString();
+		uint32 ambiguity_count = 0;
+		for(uint32 baseI = 0; baseI < match_size; ++baseI)
+			if(cur_mer[baseI] != mer_i[baseI])
+				++ambiguity_count;
+		if(ambiguity_count > ambiguity_tolerance){
+			delete[] cur_seqs;
+			return false;
+		}
+	}
+	delete[] cur_seqs;
+	return true;
+}
+*/
+
+} // namespace mems
diff --git a/libMems/MatchFinder.h b/libMems/MatchFinder.h
new file mode 100644
index 0000000..23e15c9
--- /dev/null
+++ b/libMems/MatchFinder.h
@@ -0,0 +1,380 @@
+/*******************************************************************************
+ * $Id: MatchFinder.h,v 1.23 2004/03/01 02:40:08 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * This file is licensed under the GPL.
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifndef _MatchFinder_h_
+#define _MatchFinder_h_
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/SortedMerList.h"
+#include "libMems/Match.h"
+#include "libMems/MatchList.h"
+#include <list>
+#include <iostream>
+#include <boost/pool/pool_alloc.hpp>
+
+namespace mems {
+
+struct idmer{
+	gnSeqI	position;	//starting position of this mer in the genome
+	uint64 	mer; 		//the actual sequence
+	sarID_t	id;			//the sequence identifier.
+};
+
+// typedef std::list<idmer, boost::fast_pool_allocator<idmer> > IdmerList;
+// using boost::fast_pool_allocator<idmer> results in a significant speedup
+// over std::allocator.  testing on a Salmonella vs. Y. pestis comparison shows
+// a 30% speedup
+typedef std::list<idmer> IdmerList;
+
+const unsigned int PROGRESS_GRANULARITY = 100;
+
+/**
+ * This pure virtual class implements a general framework for finding
+ * exactly matching mers.  It is extended by the MemHash and MemScorer
+ * classes.
+ * @see MemHash
+ * @see MemScorer
+ */
+class MatchFinder : public genome::gnClone{
+public:
+	MatchFinder();
+	~MatchFinder();
+	MatchFinder(const MatchFinder& mf);
+	virtual void Clear();
+	/**
+	 * Adds a sequence to use when searching for exact matches.
+	 * @param sar A pointer to the sorted mer list for the new sequence
+	 * @param seq A pointer to the genome::gnSequence corresponding to the new sequence.
+	 */
+	virtual boolean AddSequence( SortedMerList* sar, genome::gnSequence* seq = NULL );
+	/**
+	 * Given the index of a sequence and an index into the sorted mer list, this function
+	 * will search the other sorted mer lists for the same mer.  This function returns the
+	 * position of the mer in each sequence in the breakpoints vector.
+	 */
+	virtual void GetBreakpoint( uint32 sarI, gnSeqI startI, std::vector<gnSeqI>& breakpoints ) const;
+	virtual uint32 Multiplicity(void){return seq_count;};
+	/** NOT IMPLEMENTED: Sets the number of ambiguities allowed in a mer match*/
+	virtual void SetAmbiguityTolerance(uint32 ambiguity_tol){ambiguity_tolerance = ambiguity_tol;}
+	/** @return the number of ambiguities allowed in a mer match */
+	virtual uint32 AmbiguityTolerance(){return ambiguity_tolerance;}
+	/** @return The progress of the current operation.  Ranges from 0 to 100.  -1 indicates no computation is being performed */
+	virtual float GetProgress() const {return m_progress;}
+
+	/** Finds all the matches between the sequences */
+	virtual void FindMatchSeeds();
+	/** Finds all the matches between the sequences, starting at a particular offset */
+	virtual void FindMatchSeeds( const std::vector<gnSeqI>& start_offsets );
+
+	/**
+	 * Logs progress to the designated ostream.  Set to null to skip progress logging.
+	 */
+	virtual void LogProgress( std::ostream* os );
+	void SetOffsetLog( std::ostream* offset_stream ){ this->offset_stream = offset_stream; }
+protected:
+	/** 
+	 * Searches for mer matches in a designated range of the sequence's sorted mer lists 
+	 * @throws InvalidData thrown if the start_points are bad or if the sorted mer lists were sorted on different mer sizes
+	 * @return true if completed searching, false if repetitive mers were encountered and FindMatches must be called again.
+	 */
+	virtual boolean SearchRange(std::vector<gnSeqI>& start_points, std::vector<gnSeqI>& search_len);
+	/** Called whenever a mer match is found */
+	virtual boolean HashMatch(IdmerList& match_list) = 0;
+	virtual boolean EnumerateMatches(IdmerList& match_list);
+
+	template< class MatchType >
+	void FindSubsets(const MatchType& mhe, std::vector<MatchType>& subset_matches);
+
+	template< class UngappedMatchType >
+	void ExtendMatch(UngappedMatchType& mhe, std::vector<UngappedMatchType>& subset_matches, gnSeqI max_backward = GNSEQI_END, gnSeqI max_forward = GNSEQI_END);
+
+	virtual SortedMerList* GetSar(uint32 sarI) const;
+	std::vector<SortedMerList*> sar_table;
+	std::vector<genome::gnSequence*> seq_table;
+	
+	uint32 mer_size;
+	uint32 seq_count;
+	uint32 ambiguity_tolerance;
+	
+	// for subset matches
+	std::vector< std::vector< uint32 > > alpha_map;
+	uint alpha_map_size;
+	uint alphabet_bits;
+	
+	float m_progress;
+	std::ostream* log_stream;
+
+	uint64 mers_processed;	/**< The number of mers processed thus far */
+	uint64 total_mers;	/**< The total number of mers to search */
+	std::ostream* offset_stream;	/**< log for the current offset in each SML */
+};
+
+/** 
+ * InvalidData exceptions are thrown when the input to an algorithm is invalid
+ */
+CREATE_EXCEPTION( InvalidData );
+
+inline
+SortedMerList* MatchFinder::GetSar(uint32 sarI) const{
+	return sar_table[sarI];
+}
+
+inline
+bool idmer_lessthan(idmer& a_v, idmer& m_v){
+	return (a_v.mer < m_v.mer);// ? true : false;
+};
+
+//id less than function for STL sort functions
+inline
+bool idmer_id_lessthan(idmer& a_v, idmer& m_v){
+	return (a_v.id < m_v.id);// ? true : false;
+};
+
+
+
+// takes as input a fully extended mem and returns the subset matches on the lower side
+template< class MatchType >
+void MatchFinder::FindSubsets(const MatchType& mhe, std::vector<MatchType>& subset_matches){
+
+	SMLHeader head = GetSar( 0 )->GetHeader();
+	uint shift_amt = 64 - head.alphabet_bits;
+	uint rshift_amt = head.alphabet_bits * ( GetSar(0)->SeedLength() - 1 );
+
+	uint seqI, alphaI;
+
+	// initialize subset match data structures
+	alpha_map_size = 1;
+	alpha_map_size <<= alphabet_bits;
+	if( alpha_map.size() != alpha_map_size ){
+		alpha_map.clear();
+		alpha_map.reserve( alpha_map_size );
+		std::vector< uint32 > tmp_list;
+		tmp_list.reserve( seq_count );
+		for( uint alphaI = 0; alphaI < alpha_map_size; ++alphaI )
+			alpha_map.push_back( tmp_list );
+	}else{
+		for( uint alphaI = 0; alphaI < alpha_map_size; ++alphaI )
+			alpha_map[ alphaI ].clear();
+	}
+	
+	
+	for( seqI = 0; seqI < seq_count; ++seqI ){
+		//check that all mers at the new position match
+		int64 mer_to_get = mhe[ seqI ];
+		if( mer_to_get == NO_MATCH )
+			continue;
+		if(mer_to_get < 0){
+			mer_to_get *= -1;
+			mer_to_get += mhe.Length() - GetSar(0)->SeedLength();
+		}
+
+		uint64 cur_mer = GetSar( seqI )->GetMer( mer_to_get - 1 );
+
+		boolean parity;
+		if( mhe[ seqI ] < 0 )
+			parity = cur_mer & 0x1;
+		else
+			parity = !(cur_mer & 0x1);
+
+		if( parity ){
+			cur_mer >>= shift_amt;
+		}else{
+			cur_mer <<= rshift_amt;
+			cur_mer = ~cur_mer;
+			cur_mer >>= shift_amt;
+		}
+
+		alpha_map[ cur_mer ].push_back( seqI );
+
+	}
+	
+	for( alphaI = 0; alphaI < alpha_map_size; ++alphaI ){
+		if( alpha_map[ alphaI ].size() < 2 ){
+			alpha_map[ alphaI ].clear();
+			continue;
+		}
+		// this is a subset
+		MatchType cur_subset = mhe;
+		cur_subset.SetLength( mhe.Length() );
+		for( uint sqI = 0; sqI < mhe.SeqCount(); ++sqI )
+			cur_subset.SetStart( sqI, NO_MATCH );	// init everything to NO_MATCH
+		for( uint subI = 0; subI < alpha_map[ alphaI ].size(); ++subI )
+			cur_subset.SetStart( alpha_map[ alphaI ][ subI ], mhe[ alpha_map[ alphaI ][ subI ] ] );
+		subset_matches.push_back( cur_subset );
+		alpha_map[ alphaI ].clear();
+	}
+}
+
+// BUGS:
+// matches which span the end-start of a circular sequence will be hashed a second time
+template< class UngappedMatchType >
+void MatchFinder::ExtendMatch(UngappedMatchType& mhe, std::vector<UngappedMatchType>& subset_matches, gnSeqI max_backward, gnSeqI max_forward){
+	uint64 cur_mer;
+	uint64 mer_mask = GetSar(0)->GetSeedMask();
+
+	//which sequences are used in this match?
+	uint32* cur_seqs = new uint32[mhe.SeqCount()];
+	uint32 used_seqs = 0;
+	for(uint32 seqI = 0; seqI < mhe.SeqCount(); ++seqI){
+		if(mhe[seqI] != NO_MATCH){
+			cur_seqs[used_seqs] = seqI;
+			++used_seqs;
+		}
+	}
+	//First extend backwards then extend forwards.  The following loop does them both.
+	int jump_size = GetSar(0)->SeedLength();
+	uint extend_limit = 0;	/**< Tracks the distance to the most distant overlapping matching seed */
+	uint extend_attempts = 0;	/**< Counts the total number of overlapping seeds checked */
+	boolean extend_again = false;	/**< Set to true if any overlapping seeds matched, the search will be restarted from that point */
+	for(uint32 directionI = 0; directionI < 4; ++directionI){
+		//how far can we go?	
+		//first calculate the maximum amount of traversal
+		//then do fewer comparisons.
+		int64 maxlen = GNSEQI_END;
+		if(directionI == 0)
+			maxlen = max_backward;
+		else if(directionI == 1)
+			maxlen = max_forward;
+		else
+			maxlen = GetSar(0)->SeedLength();
+		for(uint32 maxI = 0; maxI < used_seqs; ++maxI)
+			if(GetSar(cur_seqs[maxI])->IsCircular()){
+				if(GetSar(cur_seqs[maxI])->Length() < maxlen)
+					maxlen = GetSar(cur_seqs[maxI])->Length();
+			}else if(mhe[cur_seqs[maxI]] < 0){
+				int64 rc_len = GetSar(cur_seqs[maxI])->Length() - mhe.Length() + mhe[cur_seqs[maxI]] + 1;
+				if( rc_len < maxlen)
+					maxlen = rc_len;
+			}else if(mhe[cur_seqs[maxI]] - 1 < maxlen)
+				maxlen = mhe[cur_seqs[maxI]] - 1;
+		uint32 j=0;
+		uint32 i = used_seqs;	// set to used_seqs in case maxlen is already less than jump size.
+
+		extend_limit = 0;
+		extend_attempts = 0;
+
+		while(maxlen - jump_size >= 0){
+			mhe.SetLength(mhe.Length() + jump_size);
+			maxlen -= jump_size;
+			for(j=0; j < used_seqs; ++j){
+				if(mhe[cur_seqs[j]] > 0){
+					mhe.SetStart(cur_seqs[j], mhe[cur_seqs[j]] - jump_size);
+					if(mhe[cur_seqs[j]] <= 0)
+						mhe.SetStart(cur_seqs[j], mhe[cur_seqs[j]] + GetSar(cur_seqs[j])->Length());
+				}
+			}
+			//check that all mers at the new position match
+			int64 mer_to_get = mhe[cur_seqs[0]];
+			if(mer_to_get < 0){
+				mer_to_get *= -1;
+				mer_to_get += mhe.Length() - GetSar(0)->SeedLength();
+			}
+			cur_mer = GetSar(cur_seqs[0])->GetSeedMer(mer_to_get - 1);
+			boolean parity;
+			if( mhe[cur_seqs[0]] < 0 )
+				parity = cur_mer & 0x1;
+			else
+				parity = !(cur_mer & 0x1);
+			cur_mer &= mer_mask;
+
+			for(i=1; i < used_seqs; ++i){
+				mer_to_get = mhe[cur_seqs[i]];
+				if(mer_to_get < 0){
+					//Convert the cur_seqs[i] entry since negative implies reverse complement
+					mer_to_get *= -1;
+					mer_to_get += mhe.Length() - GetSar(0)->SeedLength();
+				}
+				uint64 comp_mer = GetSar(cur_seqs[i])->GetSeedMer(mer_to_get - 1);
+				boolean comp_parity;				
+				if( mhe[cur_seqs[i]] < 0 )
+					comp_parity = comp_mer & 0x1;
+				else
+					comp_parity = !(comp_mer & 0x1);
+				comp_mer &= mer_mask;
+				
+				if(cur_mer != comp_mer || parity != comp_parity ){
+					if( directionI < 2 )
+						maxlen = 0;
+					break;
+				}
+			}
+			extend_attempts += jump_size;
+			if( i == used_seqs )
+				extend_limit = extend_attempts;
+			if( directionI > 1 && extend_attempts == GetSar(0)->SeedLength() )
+				break;
+		}
+		//this stuff cleans up if there was a mismatch
+		if(i < used_seqs){
+			mhe.SetLength(mhe.Length() - jump_size);
+			for(;j > 0; j--){
+				if(mhe[cur_seqs[j - 1]] >= 0)
+					mhe.SetStart(cur_seqs[j - 1], mhe[cur_seqs[j - 1]] + jump_size);
+			}
+		}
+		// check whether any of the overlapping seeds matched.
+		// if so, set the match to that length and set the flag to start the search again
+		if( directionI > 1 && extend_attempts > 0 ){
+			if( extend_limit > 0 )
+				extend_again = true;
+			// minus jump_size because the cleanup above already moved the length back a little
+			int unmatched_diff = extend_attempts - extend_limit;
+			if( i < used_seqs )
+				unmatched_diff -= jump_size;
+			if( (unmatched_diff > mhe.Length()) && unmatched_diff >= 0 )
+				std::cerr << "oh sheethockey mushrooms\n";
+			mhe.SetLength(mhe.Length() - unmatched_diff);
+			for(j=0; j < used_seqs; ++j){
+				if(mhe[cur_seqs[j]] > 0){
+					mhe.SetStart(cur_seqs[j], mhe[cur_seqs[j]] + unmatched_diff);
+					if(mhe[cur_seqs[j]] > GetSar(cur_seqs[j])->Length() )
+						mhe.SetStart(cur_seqs[j], mhe[cur_seqs[j]] - GetSar(cur_seqs[j])->Length() );
+				}
+			}
+		}
+		//Invert the sequence directions so that we extend in the other direction
+		//next time through the loop.  The second time we do this we are setting
+		//sequence directions back to normal.
+		mhe.Invert();
+
+		//if we've already been through twice then decrease the jump size
+		if(directionI >= 1)
+			jump_size = 1;
+		if( directionI == 3 && extend_again ){
+			directionI = -1;	// will become 0 on next iteration
+			jump_size = GetSar(0)->SeedLength();
+			extend_again = false;
+		}
+	}
+	// after the match has been fully extended, search for subset matches
+	// this code only works when using SOLID seeds-- so it's been disabled
+/*	if( used_seqs > 2 ){
+		FindSubsets( mhe, subset_matches );
+		mhe.Invert();
+		FindSubsets( mhe, subset_matches );
+		mhe.Invert();
+	}
+*/
+	// set the subsets so their reference sequence is always positive
+	for(uint32 subsetI = 0; subsetI < subset_matches.size(); ++subsetI){
+		if( subset_matches[subsetI][subset_matches[subsetI].FirstStart()] < 0 )
+			subset_matches[subsetI].Invert();
+		subset_matches[subsetI].CalculateOffset();
+	}
+
+	delete[] cur_seqs;
+}
+
+
+
+}
+
+#endif	//_MatchFinder_h_
diff --git a/libMems/MatchHashEntry.cpp b/libMems/MatchHashEntry.cpp
new file mode 100644
index 0000000..0a323a1
--- /dev/null
+++ b/libMems/MatchHashEntry.cpp
@@ -0,0 +1,203 @@
+/*******************************************************************************
+ * $Id: MatchHashEntry.cpp,v 1.9 2004/03/01 02:40:08 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * Please see the file called COPYING for licensing, copying, and modification
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/MatchHashEntry.h"
+#include "libGenome/gnException.h"
+#include "libGenome/gnDebug.h"
+
+using namespace std;
+using namespace genome;
+namespace mems {
+
+boolean MatchHashEntry::offset_lessthan(const MatchHashEntry& a, const MatchHashEntry& b){
+	return a.m_offset < b.m_offset;
+}
+
+boolean MatchHashEntry::start_lessthan_ptr(const MatchHashEntry* a, const MatchHashEntry* b){
+	int32 start_diff = a->FirstStart() - b->FirstStart();
+	if(start_diff == 0){
+		uint32 m_count = a->SeqCount();
+		m_count = m_count <= b->SeqCount() ? m_count : b->SeqCount();
+		for(uint32 seqI = seq_compare_start; seqI < m_count; seqI++){
+			int64 a_start = a->Start(seqI), b_start = b->Start(seqI);
+			if(a_start < 0)
+				a_start = -a_start + a->Length() - a->m_mersize;
+			if(b_start < 0)
+				b_start = -b_start + b->Length() - b->m_mersize;
+			int64 diff = a_start - b_start;
+			if(a_start == NO_MATCH || b_start == NO_MATCH)
+				continue;
+			else if(diff == 0)
+				continue;
+			else
+				return diff < 0;
+		}
+	}
+	return start_diff < 0;
+}
+
+boolean MatchHashEntry::strict_start_lessthan_ptr(const MatchHashEntry* a, const MatchHashEntry* b){
+	int start_diff = a->FirstStart() - b->FirstStart();
+	if(start_diff == 0){
+		uint m_count = a->SeqCount();
+		m_count = m_count <= b->SeqCount() ? m_count : b->SeqCount();
+		for(uint seqI = 0; seqI < m_count; seqI++){
+			int64 a_start = a->Start(seqI), b_start = b->Start(seqI);
+			if(a_start < 0)
+				a_start = -a_start + a->Length() - a->m_mersize;
+			if(b_start < 0)
+				b_start = -b_start + b->Length() - b->m_mersize;
+			int64 diff = a_start - b_start;
+			if(diff == 0)
+				continue;
+			else
+				return diff < 0;
+		}
+	}
+	return start_diff < 0;
+}
+
+
+//ignores mem_no_matches
+int64 MatchHashEntry::start_compare(const MatchHashEntry& a, const MatchHashEntry& b){
+	uint m_count = a.SeqCount();
+	m_count = m_count <= b.SeqCount() ? m_count : b.SeqCount();
+	for(uint seqI = 0; seqI < m_count; seqI++){
+		int64 a_start = a.Start(seqI), b_start = b.Start(seqI);
+		if(a_start < 0)
+			a_start = -a_start + a.Length() - a.m_mersize;
+		if(b_start < 0)
+			b_start = -b_start + b.Length() - b.m_mersize;
+		int64 diff = a_start - b_start;
+		if(a_start == NO_MATCH || b_start == NO_MATCH)
+			continue;
+		else if(diff == 0)
+			continue;
+		else
+			return diff;
+	}
+	return 0;
+}
+
+int64 MatchHashEntry::end_to_start_compare(const MatchHashEntry& a, const MatchHashEntry& b){
+	MatchHashEntry tmp_a = a;
+	tmp_a.CropStart(tmp_a.Length()-1);
+	return MatchHashEntry::start_compare(tmp_a, b);
+}
+
+
+MatchHashEntry::MatchHashEntry() : 
+Match(),
+m_extended( false ),
+m_mersize( 0 )
+{
+}
+
+
+MatchHashEntry::MatchHashEntry(uint32 seq_count, const gnSeqI mersize, MemType m_type) : 
+ Match( seq_count ),
+ m_mersize( mersize )
+{
+	m_extended = m_type == extended;
+}
+
+
+MatchHashEntry* MatchHashEntry::Clone() const{
+	return new MatchHashEntry(*this);
+}
+
+MatchHashEntry& MatchHashEntry::operator=(const MatchHashEntry& mhe)
+{
+	Match::operator=( mhe );
+	m_extended = mhe.m_extended;
+	m_mersize = 0;
+	m_offset = mhe.m_offset;
+
+	return *this;
+}
+
+boolean MatchHashEntry::operator==(const MatchHashEntry& mhe) const
+{
+	if(m_seq_count != mhe.m_seq_count)
+		return false;
+	if(m_mersize != mhe.m_mersize)
+		return false;
+	if(m_extended != mhe.m_extended)
+		return false;
+	if( m_offset != mhe.m_offset )
+		return false;
+	return Match::operator ==(mhe);
+}
+
+void MatchHashEntry::CalculateOffset()
+{
+	if( SeqCount() == 0 )
+		return;
+
+	int64 tmp_off = 0;
+	m_offset = 0;
+
+	uint seqI = FirstStart();
+	int64 ref_start = Start(seqI);
+
+	for(seqI++; seqI < SeqCount(); seqI++){
+		if(Start(seqI) != NO_MATCH){
+			tmp_off = Start(seqI) - ref_start;
+			if( Start(seqI) < 0 )
+				tmp_off -= (int64)Length( seqI );
+			m_offset += tmp_off;
+		}
+	}
+}
+
+// checks if mhe is _perfectly_ contained in this match.
+// all offsets in all sequences must be aligned to each other
+boolean MatchHashEntry::Contains(const MatchHashEntry& mhe) const{
+	uint i;
+	int64 diff_i;
+	int64 diff;
+	uint seq_count = mhe.SeqCount();
+	//check for a consistent number of genomes and
+	//identical generalized offsets
+	if(SeqCount() != seq_count || m_offset != mhe.m_offset)
+		return false;
+
+	i = mhe.FirstStart();
+	diff = mhe.Start(i) - Start(i);
+	if(Start(i) == NO_MATCH)
+		return false;
+
+	//check for containment properties
+	if(diff < 0 || Length() < mhe.Length() + diff)
+		return false;
+
+	//everything is ok so far, check for alignment
+	int64 diff_rc = (int64)mhe.Length() - (int64)Length() + diff;
+	for(i++; i < seq_count; i++){
+		//check for consistent alignment between all genomes
+		//in the case of revcomp, diff_i must equal diff_rc
+		diff_i = mhe.Start(i) - Start(i);
+
+		//it's ok if neither matches in a sequence
+		if(mhe.Start(i) == NO_MATCH && Start(i) == NO_MATCH)
+			continue;
+		else if(mhe.Start(i) < 0 && diff_rc == diff_i)
+			continue;
+		else if(diff != diff_i )
+			return false;
+	}
+	//it was contained.
+	return true;
+}
+
+
+} // namespace mems
diff --git a/libMems/MatchHashEntry.h b/libMems/MatchHashEntry.h
new file mode 100644
index 0000000..42dd0df
--- /dev/null
+++ b/libMems/MatchHashEntry.h
@@ -0,0 +1,147 @@
+/*******************************************************************************
+ * $Id: Match.h,v 1.10 2004/03/01 02:40:08 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * This file is licensed under the GPL.
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifndef __MatchHashEntry_h__
+#define __MatchHashEntry_h__
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libGenome/gnClone.h"
+#include <iostream>
+#include <set>
+#include "libMems/Match.h"
+
+namespace mems {
+
+/**
+ * The Match class stores the location of an <b>equal size</b> (inexact or exactly) 
+ * matching region
+ * between several sequences.  There are numerous functions in this
+ * class which can be used to compare and manipulate this match.
+ */
+
+class MatchHashEntry : public Match
+{
+public:
+	enum MemType
+	{
+		seed,
+		extended
+	};
+
+public:
+	MatchHashEntry();
+	/**
+	 * Creates a new Match.
+	 * @param seq_count The total number of sequences in the alignment
+	 * @param mersize The size of the mers used in the sorted mer lists.
+	 * @param m_type The type of mem to create, can either be a seed or already extended.
+	 * @see MemType
+	 */
+	MatchHashEntry( const uint seq_count, const gnSeqI mersize, const MemType m_type = seed );
+	MatchHashEntry* Clone() const;
+	MatchHashEntry* Copy() const;
+	virtual void Free();
+	MatchHashEntry( const MatchHashEntry& mhe ){ *this = mhe; }
+	MatchHashEntry& operator=(const MatchHashEntry& mhe);
+
+	/** comparison operator, compares two matches to see if they are the same */
+	boolean operator==(const MatchHashEntry& mhe) const;
+
+
+	/** @return true if this match has already been extended */
+	boolean Extended() const{return m_extended;}
+	/** Sets this match to be extended if the value passed in "extended" is true */
+	void SetExtended(boolean extended){m_extended = extended;}
+	/** @return the mer size of the sorted mer lists used to find this match */
+	uint MerSize() const{return m_mersize;}
+
+	/**
+	 * Calculates the generalized offset and other bookkeeping information
+	 * for this mem.  This should <b>always</b> be called after changing the start
+	 * positions of the mem.
+	 */
+	virtual void CalculateOffset();
+	
+	/** Returns the generalized offset of this match */
+	int64 Offset() const{return m_offset;};
+
+	/** Sets the generalized offset of this match to "offset" */
+	void SetOffset(int64 offset){m_offset = offset;};		
+
+	static boolean offset_lessthan(const MatchHashEntry& a, const MatchHashEntry& b);
+	static boolean start_lessthan_ptr(const MatchHashEntry* a, const MatchHashEntry* b);
+	static bool start_lessthan(const MatchHashEntry& a, const MatchHashEntry& b);
+	static boolean strict_start_lessthan_ptr(const MatchHashEntry* a, const MatchHashEntry* b);
+	/** compare the end of a to the start of b 
+	 */
+	static int64 end_to_start_compare(const MatchHashEntry& a, const MatchHashEntry& b);
+	static int64 start_compare(const MatchHashEntry& a, const MatchHashEntry& b);
+
+	/**
+	 *	Will return true if this match contains mhe
+	 *  Containment implies that a match has a length >= the contained
+	 *  match, it has coordinates in every genome the contained match has,
+	 *  the difference in start positions in each genome is the same.
+	 * @param mhe The match to check for containment.
+	 * @return True if this match contains mhe.
+	 */
+	boolean Contains(const MatchHashEntry& mhe) const;
+
+private:
+
+	boolean m_extended;
+	gnSeqI m_mersize;
+	int64 m_offset;
+};
+
+inline
+MatchHashEntry* MatchHashEntry::Copy() const
+{
+	return m_allocateAndCopy(*this);
+}
+inline
+void MatchHashEntry::Free()
+{
+	m_free(this);
+}
+
+inline
+bool MatchHashEntry::start_lessthan(const MatchHashEntry& a, const MatchHashEntry& b){
+	return start_lessthan_ptr(&a, &b);
+}
+
+class MheCompare {
+public:
+	bool operator()(const MatchHashEntry* a, const MatchHashEntry* b) const{
+		if( a->FirstStart() > b->FirstStart() ){
+			return true;
+		}else if( a->FirstStart() == b->FirstStart() ){
+			// check that the matches hit the same genomes
+			for( size_t i = a->FirstStart(); i < a->SeqCount(); i++ )
+			{
+				if( a->LeftEnd(i) == NO_MATCH && b->LeftEnd(i) != NO_MATCH )
+					return true;
+				else if( a->LeftEnd(i) != NO_MATCH && b->LeftEnd(i) == NO_MATCH )
+					return false;
+			}
+			//offsets are the same, check for containment...
+			if(a->Contains(*b) || b->Contains(*a)){
+				return false;
+			}else
+				return MatchHashEntry::strict_start_lessthan_ptr(a, b);
+		}
+		return false;
+	}
+};
+
+}
+
+#endif // __MatchHashEntry_h__
diff --git a/libMems/MatchList.cpp b/libMems/MatchList.cpp
new file mode 100644
index 0000000..25648e6
--- /dev/null
+++ b/libMems/MatchList.cpp
@@ -0,0 +1,26 @@
+/*******************************************************************************
+ * $Id: MatchList.cpp,v 1.22 2004/03/01 02:40:08 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * Please see the file called COPYING for licensing, copying, and modification
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/MatchList.h"
+#include "libMems/DNAFileSML.h"
+#include "libMems/DNAMemorySML.h"
+#include "libMems/MemHash.h"
+#include <map>
+#include <sstream>
+#include <ctime>
+
+using namespace std;
+using namespace genome;
+namespace mems {
+
+
+} // namespace mems
diff --git a/libMems/MatchList.h b/libMems/MatchList.h
new file mode 100644
index 0000000..fc25a01
--- /dev/null
+++ b/libMems/MatchList.h
@@ -0,0 +1,668 @@
+/*******************************************************************************
+ * $Id: MatchList.h,v 1.10 2004/03/01 02:40:08 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * This file is licensed under the GPL.
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifndef _MatchList_h_
+#define _MatchList_h_
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <iostream>
+#include <list>
+#include "libMems/SortedMerList.h"
+#include "libMems/DNAFileSML.h"
+#include "libMems/DNAMemorySML.h"
+#include "libGenome/gnSequence.h"
+#include "libMems/Match.h"
+#include "libMems/gnRAWSequence.h"
+#include "libGenome/gnRAWSource.h"
+#include "libMems/Files.h"
+#include <sstream>
+#include <map>
+#include <ctime>
+
+namespace mems {
+
+template< typename MatchPtrType >
+class GenericMatchList : public std::vector< MatchPtrType > 
+{
+public:
+	GenericMatchList(){};
+	GenericMatchList( const GenericMatchList& ml );
+	GenericMatchList& operator=( const GenericMatchList& ml );
+
+
+	/**
+	 * Attempts to load SMLs designated by the
+	 * elements of the sml_filename vector.  This
+	 * method will create the sorted mer lists if they do not exist.
+	 * The DNAFileSML objects are created on the heap
+	 * and are not deallocated when this class is destroyed.  They should
+	 * be manually destroyed when no longer in use.
+	 * @param	seed_rank	The rank of the seed to use, 0-2 are ranked spaced seeds, 
+	 *						other options include CODING_SEED and SOLID_SEED
+	 */
+	void LoadSMLs( uint mer_size, std::ostream* log_stream, int seed_rank = 0, bool solid = false, bool force_recreate = false );
+
+	/**
+	 * Loads sequences to align from a Multi-FastA file and constructs a SML
+	 * for each sequence entry in the file.
+	 * The genome::gnSequence and SortedMerList objects are created on the heap
+	 * and are not deallocated when this class is destroyed.  They should
+	 * be manually destroyed when no longer in use.
+	 *
+	 * @param mer_size		The seed size to use when constructing the sorted mer lists
+	 * @param log_stream	An output stream to log messages to.  If NULL no logging is done
+	 * @param load_smls		Specifies whether sorted mer lists should be created 
+	 * 						for each sequence entry
+	 */
+	void CreateMemorySMLs( uint mer_size, std::ostream* log_stream, int seed_rank = 0 );
+
+	/**
+	 * Calculates a default search mer size for the given set of sequences
+	 * @param seq_table		The vector of sequences to calculate a default mer size for
+	 */
+	static uint GetDefaultMerSize( const std::vector< genome::gnSequence* >& seq_table );
+	
+	/**
+	 * Deletes the genome::gnSequence, SortedMerList, and Match objects associated
+	 * with this GenericMatchList.
+	 */
+	void Clear();
+	
+	/**
+	 * Removes all matches that have a multiplicity lower than the specified level
+	 * @param mult	The multiplicity filter threshold
+	 */
+	void MultiplicityFilter( unsigned mult );
+
+	/**
+	 * Removes all matches that shorter than the specified length
+	 * @param length	The minimum length
+	 */
+	void LengthFilter( gnSeqI length );
+
+	/**
+	 * Removes matches that do not match in exactly the sequences specified in filter_spec
+	 * @param filter_spec 	The specification of the exact filter, true designates that the
+	 *						match must exist in that sequence.  filter_spec must contain
+	 *						one boolean entry for every sequence.
+	 */
+//	void ExactFilter( valarray< bool >& filter_spec );
+	/**
+	 * Removes matches that do not intersect with the sequences specified in filter_spec
+	 * @param filter_spec 	The specification of the intersection filter, true designates
+	 *						match must exist in that sequence.  filter_spec must contain
+	 *						one boolean entry for every sequence.
+	 */
+//	void IntersectFilter( valarray< bool >& filter_spec );
+
+	
+	std::vector<std::string> sml_filename;		/**< The file names of the sorted mer list for each sequence, may be empty or null */
+	std::vector<std::string> seq_filename;		/**< The file names of the sequence data, may be empty or null */
+	std::vector<SortedMerList*> sml_table;	/**< The sorted mer list associated with each sequence, may be empty or null */
+	std::vector<genome::gnSequence*> seq_table;		/**< The actual sequences associated with the matches stored in this list.  Should not be empty or null. */
+
+protected:
+
+};
+
+typedef GenericMatchList< Match* > MatchList;
+
+CREATE_EXCEPTION( InvalidArgument );
+
+/**
+ * Thrown when a file being read is invalid
+ */
+CREATE_EXCEPTION(InvalidFileFormat)
+
+
+/**
+ * Reads a GenericMatchList from an input stream
+ * Sequence and SML file names are read into the seq_filename
+ * and sml_filename vectors, but the actual files are not
+ * opened.  The calling function should load them after
+ * using this method.
+ * @param match_stream The input stream to read from
+ */
+void ReadList( MatchList& mlist, std::istream& match_stream );
+
+/**
+ *  Writes a GenericMatchList to the designated output stream
+ * @param match_stream The output stream to write to
+ */
+void WriteList( const MatchList& mlist, std::ostream& match_stream );
+
+typedef void* MatchID_t;
+
+template< typename MatchPtrType >
+GenericMatchList< MatchPtrType >::GenericMatchList( const GenericMatchList< MatchPtrType >& ml ){
+	*this = ml;
+}
+
+template< typename MatchPtrType >
+GenericMatchList< MatchPtrType >& GenericMatchList< MatchPtrType >::operator=( const GenericMatchList< MatchPtrType >& ml ){
+	std::vector< MatchPtrType >::operator=( ml );
+	sml_filename = ml.sml_filename;
+	seq_filename = ml.seq_filename;
+	sml_table = ml.sml_table;
+	seq_table = ml.seq_table;
+	return *this;
+}
+
+/**
+ * Attempts to load the sequences designated by the
+ * elements of the seq_filename vector.
+ * The genome::gnSequence objects are created on the heap
+ * and are not deallocated when this class is destroyed.  They should
+ * be manually destroyed when no longer in use.
+ */
+template< typename MatchListType >
+void LoadSequences( MatchListType& mlist, std::ostream* log_stream ){
+	
+	if( mlist.seq_filename.size() == 0 )
+		return;
+
+	for( uint seqI = 0; seqI < mlist.seq_filename.size(); seqI++ ){
+		genome::gnSequence* file_sequence = new genome::gnSequence();
+		// Load the sequence and tell the user if it loaded successfully
+		try{
+			file_sequence->LoadSource( mlist.seq_filename[ seqI ] );
+		}catch( genome::gnException& gne ){
+			delete file_sequence;
+			if( gne.GetCode() == genome::FileNotOpened() )
+				std::cerr << "Error loading " << mlist.seq_filename[ seqI ] << std::endl;
+			else
+				std::cerr << gne;
+			return;
+		}catch( std::exception& e ){
+			delete file_sequence;
+			std::cerr << "Unhandled exception loading " << mlist.seq_filename[ seqI ] << std::endl;
+			std::cerr << "At: " << __FILE__ << ":" << __LINE__ << std::endl;
+			std::cerr << e.what();
+			return;
+		}catch( ... ){
+			delete file_sequence;
+			std::cerr << "Unknown exception when loading " << mlist.seq_filename[ seqI ] << std::endl;
+			return;
+		}
+		
+		mlist.seq_table.push_back( file_sequence );
+		if( log_stream != NULL ){
+			(*log_stream) << "Sequence loaded successfully.\n";
+			(*log_stream) << mlist.seq_filename[ seqI ] << " " << file_sequence->length() << " base pairs.\n";
+		}
+	}
+
+}
+
+/**
+ * Loads the sequences designated by the elements of the seq_filename vector and
+ * creates temporary RAW sequence files.  The resulting gnSequences are gnRAWSequences.
+ * The genome::gnRAWSequence objects are created on the heap
+ * and are not deallocated when this class is destroyed.  They should
+ * be manually destroyed when no longer in use.
+ */
+template< typename MatchListType >
+void LoadAndCreateRawSequences( MatchListType& mlist, std::ostream* log_stream ){
+	
+	if( mlist.seq_filename.size() == 0 )
+		return;
+
+	for( uint seqI = 0; seqI < mlist.seq_filename.size(); seqI++ ){
+		genome::gnSequence* file_sequence = new genome::gnSequence();
+		// Load the sequence and tell the user if it loaded successfully
+		try{
+			file_sequence->LoadSource( mlist.seq_filename[ seqI ] );
+		}catch( genome::gnException& gne ){
+			delete file_sequence;
+			if( gne.GetCode() == genome::FileNotOpened() )
+				std::cerr << "Error loading " << mlist.seq_filename[ seqI ] << std::endl;
+			else
+				std::cerr << gne;
+			return;
+		}catch( std::exception& e ){
+			delete file_sequence;
+			std::cerr << "Unhandled exception loading " << mlist.seq_filename[ seqI ] << std::endl;
+			std::cerr << "At: " << __FILE__ << ":" << __LINE__ << std::endl;
+			std::cerr << e.what();
+			return;
+		}catch( ... ){
+			delete file_sequence;
+			std::cerr << "Unknown exception when loading " << mlist.seq_filename[ seqI ] << std::endl;
+			return;
+		}
+
+		// now create a temporary raw sequence
+		std::string tmpfilename = "rawseq";
+		tmpfilename = CreateTempFileName("rawseq");
+		genome::gnRAWSource::Write( *file_sequence, tmpfilename );
+		delete file_sequence;
+		registerFileToDelete( tmpfilename );
+
+		if( log_stream != NULL )
+			(*log_stream) << "Storing raw sequence at " << tmpfilename << std::endl;	
+		genome::gnRAWSequence* raw_seq = new genome::gnRAWSequence( tmpfilename );
+		mlist.seq_table.push_back( raw_seq );
+		if( log_stream != NULL ){
+			(*log_stream) << "Sequence loaded successfully.\n";
+			(*log_stream) << mlist.seq_filename[ seqI ] << " " << raw_seq->length() << " base pairs.\n";
+		}
+	}
+}
+
+
+template< typename MatchPtrType >
+void GenericMatchList< MatchPtrType >::LoadSMLs( uint mer_size, std::ostream* log_stream, int seed_rank, bool solid, bool force_create ){
+
+	// if the mer_size parameter is 0 then calculate a default mer size for these sequences
+	if( mer_size == 0 ){
+		mer_size = GetDefaultMerSize( seq_table );
+		if( log_stream != NULL ){
+			(*log_stream) << "Using weight " << mer_size << " mers for initial seeds\n";
+		}
+	}
+
+	// load and creates SMLs as necessary
+	uint64 default_seed = getSeed( mer_size, seed_rank );
+	if (solid)
+		uint64 default_seed = getSolidSeed( mer_size );
+	std::vector< uint > create_list;
+	uint seqI = 0;
+	for( seqI = 0; seqI < seq_table.size(); seqI++ ){
+		// define a DNAFileSML to store a sorted mer list
+		DNAFileSML* file_sml = new DNAFileSML();
+		sml_table.push_back( file_sml );
+
+		boolean success = true;
+		try{
+			file_sml->LoadFile( sml_filename[ seqI ] );
+		}catch( genome::gnException& gne ){
+			success = false;
+			create_list.push_back( seqI );
+		}
+		boolean recreate = false;
+		if(success && force_create){
+			if( log_stream != NULL )
+				(*log_stream) << "SML exists, but forcefully recreating.  A new sorted mer list will be created.\n";
+			recreate = true;
+			create_list.push_back( seqI );
+		}
+		else if(success && (file_sml->Seed() != default_seed )){
+			if( log_stream != NULL )
+				(*log_stream) << "Default seed mismatch.  A new sorted mer list will be created.\n";
+			recreate = true;
+			create_list.push_back( seqI );
+		}
+
+		if( success && !recreate && log_stream != NULL && !force_create )
+			(*log_stream) << "Sorted mer list loaded successfully\n";
+	}
+
+	// free up memory before creating any SMLs
+	if( create_list.size() > 0 )
+		for( seqI = 0; seqI < sml_table.size(); seqI++ ){
+			sml_table[ seqI ]->Clear();
+			delete sml_table[ seqI ];
+			sml_table[ seqI ] = NULL;
+		}
+	
+	// create any SMLs that need to be created
+	for( uint createI = 0; createI < create_list.size(); createI++ ){
+		if( log_stream != NULL )
+			(*log_stream) << "Creating sorted mer list\n";
+		try{
+
+		time_t start_time = time(NULL);
+		sml_table[ create_list[ createI ] ] = new DNAFileSML( sml_filename[ create_list[ createI ] ] );
+		sml_table[ create_list[ createI ] ]->Create( *seq_table[ create_list[ createI ] ], default_seed );
+		time_t end_time = time(NULL);
+	 	if( log_stream != NULL )
+			(*log_stream) << "Create time was: " << end_time - start_time << " seconds.\n";
+		
+		}catch(...){
+			std::cerr << "Error creating sorted mer list\n";
+			throw;
+		}
+	}
+	
+	// reload the other SMLs now that creation has completed
+	if( create_list.size() > 0 ){
+		for( seqI = 0; seqI < seq_filename.size(); seqI++ ){
+			if( sml_table[ seqI ] != NULL )
+				continue;
+			sml_table[ seqI ] = new DNAFileSML( sml_filename[ seqI ] );
+			try{
+				((DNAFileSML*)sml_table[ seqI ])->LoadFile( sml_filename[ seqI ] );
+			}catch( genome::gnException& gne ){
+				std::cerr << "Error loading sorted mer list\n";
+				throw;
+			}
+		}
+	}
+}
+
+template< typename MatchPtrType >
+uint GenericMatchList< MatchPtrType >::GetDefaultMerSize( const std::vector< genome::gnSequence* >& seq_table ){
+	gnSeqI total_len = 0;
+	for( uint seqI = 0; seqI < seq_table.size(); seqI++ )
+		total_len += seq_table[ seqI ]->length();
+	return getDefaultSeedWeight( total_len / seq_table.size() );
+}
+
+
+/**
+ * Loads sequences to align from a Multi-FastA file 
+ * The genome::gnSequence and SortedMerList objects are created on the heap
+ * and are not deallocated when this class is destroyed.  They should
+ * be manually destroyed when no longer in use.
+ *
+ * @param mfa_filename	The name of the Multi-FastA file to read in.  Each 
+ *						sequence entry will be treated as a separate sequence to 
+ *						be aligned.
+ * @param log_stream	An output stream to log messages to.  If NULL no logging is done
+ */
+template< typename MatchListType >
+void LoadMFASequences( MatchListType& mlist, const std::string& mfa_filename, std::ostream* log_stream ) {
+	genome::gnSequence file_sequence;
+	// Load the sequence and tell the user if it loaded successfully
+	try{
+		file_sequence.LoadSource( mfa_filename );
+	}catch( genome::gnException& gne ){
+		if( gne.GetCode() == genome::FileNotOpened() )
+			std::cerr << "Error loading " << mfa_filename << std::endl;
+		else
+			std::cerr << gne;
+		return;
+	}catch( std::exception& e ){
+		std::cerr << "Unhandled exception loading " << mfa_filename << std::endl;
+		std::cerr << "At: " << __FILE__ << ":" << __LINE__ << std::endl;
+		std::cerr << e.what();
+		return;
+	}catch( ... ){
+		std::cerr << "Unknown exception when loading " << mfa_filename << std::endl;
+		return;
+	}
+
+	mlist.seq_filename.clear();
+	gnSeqI total_len = 0;
+	for( uint contigI = 0; contigI < file_sequence.contigListSize(); contigI++ ){
+		genome::gnSequence* contig_seq = new genome::gnSequence( file_sequence.contig( contigI ) );
+		mlist.seq_filename.push_back( mfa_filename );
+//		mlist.seq_filename.push_back( file_sequence.contigName( contigI ) );
+		if( log_stream != NULL ){
+			(*log_stream) << "Sequence loaded successfully.\n";
+			(*log_stream) << mlist.seq_filename[ contigI ] << " " << contig_seq->length() << " base pairs.\n";
+		}
+		mlist.seq_table.push_back( contig_seq );
+	}
+}
+
+template< typename MatchPtrType >
+void GenericMatchList< MatchPtrType >::CreateMemorySMLs( uint mer_size, std::ostream* log_stream, int seed_rank ) 
+{
+	// if the mer_size parameter is 0 then calculate a default mer size for these sequences
+	if( mer_size == 0 ){
+		mer_size = GetDefaultMerSize( seq_table );
+		if( log_stream != NULL ){
+			(*log_stream) << "Using " << mer_size << "-mers for initial seeds\n";
+		}
+	}
+
+	uint64 default_seed = getSeed( mer_size, seed_rank );
+
+	// define a DNAMemorySML to store a sorted mer list
+	for( uint contigI = 0; contigI < seq_table.size(); contigI++ )
+	{
+		DNAMemorySML* contig_sml = new DNAMemorySML();
+		boolean success = true;
+		if( log_stream != NULL )
+			(*log_stream) << "Creating sorted mer list\n";
+		time_t start_time = time(NULL);
+		contig_sml->Create( *seq_table[contigI], default_seed );
+		time_t end_time = time(NULL);
+	 	if( log_stream != NULL )
+			(*log_stream) << "Create time was: " << end_time - start_time << " seconds.\n";
+		
+		sml_table.push_back( contig_sml );
+	}
+}
+
+template< typename MatchPtrType >
+void GenericMatchList< MatchPtrType >::Clear() {
+	for( uint seqI = 0; seqI < seq_table.size(); seqI++ ){
+		if( seq_table[ seqI ] != NULL )
+			delete seq_table[ seqI ];
+	}
+	for( uint seqI = 0; seqI < sml_table.size(); seqI++ ){
+		if( sml_table[ seqI ] != NULL )
+			delete sml_table[ seqI ];
+	}
+	typename std::vector<MatchPtrType>::iterator match_iter = this->begin();
+	for(; match_iter != this->end(); match_iter++ ){
+		(*match_iter)->Free();
+		(*match_iter) = NULL;
+	}
+	seq_table.clear();
+	sml_table.clear();
+	this->clear();
+	seq_filename.clear();
+	sml_filename.clear();
+}
+
+/**
+ * Use this to update linkage pointers after copying an entire set of Matches
+ */
+template< class FromType, class ToType, class MatchListType >
+void RemapSubsetMatchAddresses( std::map<FromType, ToType>& old_to_new_map, MatchListType& match_list );
+
+
+template< class FromType, class ToType, class MatchListType >
+void RemapSubsetMatchAddresses( std::map<FromType, ToType>& old_to_new_map, MatchListType& match_list )
+{
+	// now remap the subset and superset links
+	typename MatchListType::iterator match_iter = match_list.begin();
+	//typedef typename MatchListType::value_type MatchType;
+	//typedef typename Match MatchType;
+	typename std::map<FromType, ToType>::iterator map_iter;
+	for(; match_iter != match_list.end(); ++match_iter ){
+		// remap all subsets
+		std::set< Match* >& subsets = (*match_iter)->Subsets();
+		std::set< Match* > new_subsets;
+		std::set< Match* >::iterator sub_iter = subsets.begin();
+		for(; sub_iter != subsets.end(); ++sub_iter ){
+			map_iter = old_to_new_map.find( (FromType)*sub_iter );
+			new_subsets.insert( map_iter->second );
+		}
+		subsets = new_subsets;
+
+		// remap all supersets
+		std::set< Match* >& supersets = (*match_iter)->Supersets();
+		std::set< Match* > new_supersets;
+		std::set< Match* >::iterator super_iter = supersets.begin();
+		for(; super_iter != supersets.end(); ++super_iter ){
+			map_iter = old_to_new_map.find( (FromType)*super_iter );
+			new_supersets.insert( map_iter->second );
+		}
+		supersets = new_supersets;
+	}
+}
+
+inline
+void ReadList(MatchList& mlist, std::istream& match_file)
+{
+	std::string tag;
+	gnSeqI len;
+	int64 start;
+	unsigned int seq_count;
+	
+	match_file >> tag;	//format version tag
+	if( tag != "FormatVersion" ){
+		Throw_gnEx(InvalidFileFormat());
+	}
+	match_file >> tag;	//format version
+	if( tag != "3" ){
+		Throw_gnEx(InvalidFileFormat());
+	}
+	match_file >> tag;	//sequence count tag
+	if( tag != "SequenceCount" ){
+		Throw_gnEx(InvalidFileFormat());
+	}
+	match_file >> seq_count;	//sequence count
+	if(seq_count < 2){
+		Throw_gnEx(InvalidFileFormat());
+	}
+	
+	// read the sequence file names and lengths
+	for( unsigned int seqI = 0; seqI < seq_count; seqI++ ){		
+		match_file >> tag;	// name tag
+		std::getline( match_file, tag );
+		// skip the tab character
+		tag = tag.substr( 1 );
+		mlist.seq_filename.push_back(tag);
+		match_file >> tag;	// length tag
+		gnSeqI seq_len;
+		match_file >> seq_len;	// length
+		if( seqI < mlist.seq_table.size() )
+			if( mlist.seq_table[ seqI ]->length() != seq_len ){
+				std::cerr << "Warning: Genome sizes in the match list differ.\n";
+				std::cerr << "seq_table[ " << seqI << " ]->length() " << mlist.seq_table[ seqI ]->length() << " seq_len: " << seq_len << std::endl;
+			}
+	}
+
+	// read the number of matches
+	unsigned int match_count;
+	match_file >> tag;	// match count tag
+	match_file >> match_count;	// match count
+		
+	// read the matches
+	std::map< MatchID_t, Match* > match_map;
+	std::string cur_line;
+	std::getline( match_file, cur_line );
+	while( getline( match_file, cur_line ) ){
+		Match mhe( seq_count );
+		std::stringstream line_stream( cur_line );
+		
+		line_stream >> len;
+		mhe.SetLength(len);
+
+		for(uint32 seqI = 0; seqI < seq_count; seqI++){
+			line_stream >> start;
+			mhe.SetStart(seqI, start);
+		}
+		
+		MatchID_t match_id;
+		line_stream >> match_id;
+		
+		uint sub_count;
+		boolean bad_stream = false;
+		line_stream >> sub_count;
+		if(sub_count > 0)
+			throw "Unable to read file, invalid format, cannot read subset data\n";
+
+		if( bad_stream )
+			break;
+
+		uint sup_count;
+		line_stream >> sup_count;
+		if(sub_count > 0)
+			throw "Unable to read file, invalid format, cannot read superset data\n";
+		if( bad_stream )
+			break;
+		
+		Match* new_match = mhe.Copy();
+		mlist.push_back( new_match );
+		match_map.insert( std::map< MatchID_t, Match* >::value_type( match_id, new_match ));
+	}
+	if( match_count != mlist.size() ){
+		Throw_gnEx(InvalidFileFormat());
+	}
+}
+
+inline
+void WriteList( const MatchList& mlist, std::ostream& match_file)
+{
+	if( mlist.size() == 0 )
+		return;
+	Match* first_mem = *(mlist.begin());
+	unsigned int seq_count = first_mem->SeqCount();
+
+	match_file << "FormatVersion" << '\t' << 3 << "\n";
+	match_file << "SequenceCount" << '\t' << seq_count << "\n";
+	for(unsigned int seqI = 0; seqI < seq_count; seqI++){
+		match_file << "Sequence" << seqI << "File" << '\t';
+		if( mlist.seq_filename.size() > seqI )
+			match_file << mlist.seq_filename[seqI];
+		else
+			match_file << "null";
+		match_file << "\n";
+		match_file << "Sequence" << seqI << "Length" << '\t';
+		if( mlist.seq_table.size() > seqI )
+			match_file << mlist.seq_table[seqI]->length();
+		else
+			match_file << "0";
+		match_file << "\n";
+	}
+
+	match_file << "MatchCount" << '\t' << mlist.size() << std::endl;
+
+	//get all the mems out of the hash table and write them out
+	std::vector<Match*>::const_iterator match_iter;
+	match_iter = mlist.begin();
+	std::set<Match*> cur_set;
+	std::set<Match*>::iterator set_iter;
+	for(; match_iter != mlist.end(); match_iter++){
+		// print the match
+		match_file << **match_iter << '\t';
+
+		// print the match address
+		match_file << (MatchID_t)(*match_iter) << '\t';
+		
+		// print subset id's
+		match_file << 0;
+
+		// print superset id's
+		match_file << '\t' << 0;
+		match_file << std::endl;
+	}
+}
+
+template< typename MatchPtrType >
+void GenericMatchList< MatchPtrType >::MultiplicityFilter( unsigned mult ){
+
+	size_t cur = 0;
+	for( uint memI = 0; memI < this->size(); memI++ ){
+		if( (*this)[ memI ]->Multiplicity() == mult )
+			(*this)[cur++] = (*this)[memI];
+		else{
+			(*this)[ memI ]->Free();
+			(*this)[ memI ] = NULL;
+		}
+	}
+	this->resize(cur);
+}
+
+template< typename MatchPtrType >
+void GenericMatchList< MatchPtrType >::LengthFilter( gnSeqI length ){
+
+	size_t cur = 0;
+	for( size_t memI = 0; memI < this->size(); memI++ ){
+		if( (*this)[ memI ]->Length() >= length )
+			(*this)[cur++] = (*this)[memI];
+		else{
+			(*this)[ memI ]->Free();
+			(*this)[ memI ] = NULL;
+		}
+	}
+	this->resize(cur);
+}
+
+}
+
+#endif	//_MatchList_h_
diff --git a/libMems/MatchProjectionAdapter.h b/libMems/MatchProjectionAdapter.h
new file mode 100644
index 0000000..91d2193
--- /dev/null
+++ b/libMems/MatchProjectionAdapter.h
@@ -0,0 +1,142 @@
+/*******************************************************************************
+ * $Id: MatchProjectionAdapter.h,v 1.8 2004/02/27 23:08:55 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * This file is licensed under the GPL.
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifndef __MatchProjectionAdapter_h__
+#define __MatchProjectionAdapter_h__
+
+#include "libMems/AbstractMatch.h"
+#include <vector>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+namespace mems {
+
+/**
+ * MatchProjectionAdapter is a wrapper around an AbstractMatch that effectively projects a multi-match to a
+ * subset match.  The adapter class forwards most function calls to the original match
+ * class, to which it stores a pointer.  Use of non-const functions results in undefined state.
+ */
+class MatchProjectionAdapter : public mems::AbstractMatch
+{
+public:
+	MatchProjectionAdapter() : m(NULL){};
+	MatchProjectionAdapter( mems::AbstractMatch* match, const std::vector< size_t >& projection ) :
+	  seq(projection)
+	{
+		m = match->Copy();
+	}
+
+	MatchProjectionAdapter( const MatchProjectionAdapter& mpa ) : 
+	seq( mpa.seq )
+	{
+		if( mpa.m != NULL )
+			m = mpa.m->Copy();
+		else
+			m = NULL;
+	}
+
+	~MatchProjectionAdapter()
+	{
+		if( m != NULL )
+			m->Free();
+	}
+
+	MatchProjectionAdapter* Clone() const { return new MatchProjectionAdapter( *this ); }
+
+	inline
+	MatchProjectionAdapter* Copy() const
+	{
+		return m_allocateAndCopy( *this );
+	}
+
+	void Free()
+	{
+		m_free(this);
+	}
+
+	MatchProjectionAdapter& operator=( const MatchProjectionAdapter& mpa )
+	{
+		if( m != NULL )
+			m->Free();
+		m = mpa.m->Copy();
+		seq = mpa.seq;
+		return *this;
+	}
+
+	//
+	// forward all function calls to match
+	//
+	gnSeqI Length( uint seqI ) const { return m->Length(seq[seqI]); }
+	void SetLength( gnSeqI len, uint seqI ) { m->SetLength(len, seq[seqI]); }
+	int64 Start(uint startI) const { return m->Start(seq[startI]); }
+	void SetStart(uint seqI, int64 start) { m->SetStart(seq[seqI],start); }
+	gnSeqI LeftEnd(uint seqI) const { return m->LeftEnd(seq[seqI]); }
+	orientation Orientation(uint seqI) const { return m->Orientation(seq[seqI]); }
+	void SetLeftEnd(uint seqI, gnSeqI start) { m->SetLeftEnd(seq[seqI],start); }
+	void SetOrientation(uint seqI, orientation o) { m->SetOrientation(seq[seqI],o); }
+	void MoveStart(int64 move_amount) { m->MoveStart(move_amount); }
+	void MoveEnd(int64 move_amount) { m->MoveEnd(move_amount); }
+	uint Multiplicity() const 
+	{ 
+		size_t mult = 0;
+		for( size_t projI = 0; projI < seq.size(); projI++ )
+			if( m->LeftEnd(projI) != mems::NO_MATCH )
+				++mult;
+		return mult; 
+	}
+	uint SeqCount() const { return seq.size(); }
+	uint FirstStart() const { return 0; }	
+	gnSeqI AlignmentLength() const { return m->AlignmentLength(); }
+	void Invert() { m->Invert(); }
+	void CropStart(gnSeqI crop_amount) { m->CropStart(crop_amount); }
+	void CropEnd(gnSeqI crop_amount) { m->CropEnd(crop_amount); }
+	void CropLeft(gnSeqI crop_amount, uint seqI) { m->CropLeft(crop_amount, seq[seqI]); }
+	void CropRight(gnSeqI crop_amount, uint seqI) { m->CropRight(crop_amount, seq[seqI]); }
+	void GetAlignment( std::vector< mems::bitset_t >& align_matrix ) const 
+	{
+		std::vector< mems::bitset_t > aln_mat;
+		m->GetAlignment(aln_mat);
+		align_matrix.clear();
+		for( size_t seqI = 0; seqI < seq.size(); ++seqI )
+			align_matrix.push_back(aln_mat[seq[seqI]]);
+	}
+	void GetColumn( gnSeqI col, std::vector<gnSeqI>& pos, std::vector<bool>& column ) const 
+	{
+		std::vector<gnSeqI> m_pos; 
+		std::vector<bool> m_column;
+		m->GetColumn(col,m_pos,m_column);
+		pos.clear();
+		for( size_t seqI = 0; seqI < seq.size(); ++seqI )
+		{
+			pos.push_back(m_pos[seq[seqI]]);
+			column.push_back(m_column[seq[seqI]]);
+		}
+	}
+	bool IsGap( uint seqI, gnSeqI col ) const { return m->IsGap( seq[seqI],col ); }
+	uint UsedSeq( uint seqI ) const 
+	{
+		uint c = 0;
+		for( uint i = 0; i < seq.size(); i++ )
+		{
+			if(m->Start(seq[i]) != 0)
+				c++;
+			if(c>seqI)
+				return i;
+		}
+		return (std::numeric_limits<uint>::max)();
+	};
+
+	mems::AbstractMatch* m;
+	std::vector< size_t > seq;
+};
+
+}
+
+#endif // __MatchProjectionAdapter_h__
diff --git a/libMems/Matrix.h b/libMems/Matrix.h
new file mode 100644
index 0000000..568d521
--- /dev/null
+++ b/libMems/Matrix.h
@@ -0,0 +1,174 @@
+/*******************************************************************************
+ * $Id: Matrix.h,v 1.6 2004/02/27 23:08:55 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * This file is licensed under the GPL.
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifndef __Matrix_h__
+#define __Matrix_h__
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libGenome/gnSetup.h"
+#include <iostream>
+#include <sstream>
+#include <vector>
+#include <stdexcept>
+
+template<class T>
+class Matrix
+{
+public:
+	Matrix();
+	Matrix(unsigned nrows, unsigned ncols);
+	//�Throws a BadSize object if either size is zero
+	class BadSize : public std::range_error{
+	public:
+		BadSize() : std::range_error( "Bad matrix size" ){}
+	};
+
+	// Based on the Law Of The Big Three:
+	~Matrix();	
+	Matrix(const Matrix<T>& m);
+	Matrix<T>& operator= (const Matrix<T>& m);
+   	// Access methods to get the (i,j) element:	
+	T& operator() (unsigned i, unsigned j);
+	const T& operator() (unsigned i, unsigned j) const;
+	// These throw a BoundsViolation object if i or j is too big
+	class BoundsViolation : public std::range_error{
+	public:
+		BoundsViolation() : std::range_error( "Index out of bounds" ){}
+	 };
+	// Support for initializing each matrix element to a value
+	void init( const T& init_val );
+	
+	void print( std::ostream& os ) const;
+	void read( std::istream& is );
+
+	unsigned rows() const;
+	unsigned cols() const;
+protected:
+	T* data_;
+	unsigned nrows_, ncols_;
+};
+   
+template<class T>
+inline Matrix<T>::Matrix()
+{
+	data_ = NULL;
+	nrows_ = 0;
+	ncols_ = 0;
+}
+
+template<class T>
+inline unsigned Matrix<T>::rows() const
+{
+	return nrows_;
+}
+
+template<class T>
+inline unsigned Matrix<T>::cols() const
+{
+	return ncols_;
+}
+
+template<class T>
+inline T& Matrix<T>::operator() (unsigned row, unsigned col)
+{
+	if (row >= nrows_ || col >= ncols_) 
+		throw BoundsViolation();
+	return data_[row*ncols_ + col];
+}
+   
+template<class T>
+inline const T& Matrix<T>::operator() (unsigned row, unsigned col) const
+{
+	if (row >= nrows_ || col >= ncols_) {
+		std::cout << "debug me ";
+		throw BoundsViolation();
+	}
+	return data_[row*ncols_ + col];
+}
+   
+template<class T>
+inline Matrix<T>::Matrix(unsigned nrows, unsigned ncols)
+	: data_  (new T[nrows * ncols]),
+	  nrows_ (nrows),
+	  ncols_ (ncols)
+{
+}
+template<class T>
+inline Matrix<T>::Matrix(const Matrix<T>& m){
+	*this = m;
+}
+
+template<class T>
+inline Matrix<T>& Matrix<T>::operator=( const Matrix<T>& m )
+{
+	if( data_ != NULL )
+		delete[] data_;
+	data_ = new T[m.nrows_ * m.ncols_];
+	nrows_ = m.nrows_;
+	ncols_ = m.ncols_;
+	memcpy( data_, m.data_, nrows_ * ncols_ * sizeof( T ) );
+	return *this;
+}
+
+template<class T>
+inline Matrix<T>::~Matrix()
+{
+	if( data_ != NULL )
+		delete[] data_;
+}
+
+template<class T>
+inline void Matrix<T>::init( const T& init_val )
+{
+	for( unsigned rowI = 0; rowI < nrows_; rowI++ )
+		for( unsigned colI = 0; colI < ncols_; colI++ )
+			data_[ rowI * ncols_ + colI ] = init_val;
+}
+
+template<class T>
+inline void Matrix<T>::print( std::ostream& os ) const{
+	for( unsigned rowI = 0; rowI < nrows_; rowI++ ){
+		for( unsigned colI = 0; colI < ncols_; colI++ ){
+			if( colI > 0 )
+				os << '\t';
+			os << data_[ rowI * ncols_ + colI ];
+		}
+		os << std::endl;
+	}
+}
+
+template<class T>
+inline void Matrix<T>::read( std::istream& is ){
+	std::vector< std::string > lines;
+	std::string cur_line;
+	while( std::getline( is, cur_line ) )
+		lines.push_back( cur_line );
+		
+	nrows_ = lines.size();
+	// count ncols
+	std::stringstream ss( lines[0] );
+	ncols_ = 0;
+	while( std::getline( ss, cur_line, '\t' ) )
+		ncols_++;
+
+	data_ = new T[nrows_ * ncols_];
+	
+	int valueI = 0;
+	for( int lineI = 0; lineI < lines.size(); lineI++ ){
+		ss = std::stringstream( lines[ lineI ] );
+		std::getline( ss, cur_line, '\t' );
+		std::stringstream type_stream( cur_line );
+		type_stream >> data_[ valueI ];
+		valueI++;
+	}
+}
+
+#endif // __Matrix_h__
diff --git a/libMems/MemHash.cpp b/libMems/MemHash.cpp
new file mode 100644
index 0000000..9c35f10
--- /dev/null
+++ b/libMems/MemHash.cpp
@@ -0,0 +1,330 @@
+/*******************************************************************************
+ * $Id: MemHash.cpp,v 1.32 2004/03/01 02:40:08 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * Please see the file called COPYING for licensing, copying, and modification
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/MemHash.h"
+#include "libGenome/gnFilter.h"
+#include <list>
+#include <map>
+#include <sstream>
+
+using namespace std;
+using namespace genome;
+namespace mems {
+
+	MemHash::MemHash() : MatchFinder(), allocator( SlotAllocator<MatchHashEntry>::GetSlotAllocator() )
+
+{
+	table_size = DEFAULT_MEM_TABLE_SIZE;
+	seq_count = 0;
+	m_mem_count = 0;
+	m_collision_count = 0;
+	m_repeat_tolerance = DEFAULT_REPEAT_TOLERANCE;
+	m_enumeration_tolerance = DEFAULT_ENUMERATION_TOLERANCE;
+	//allocate the hash table
+	mem_table.resize(table_size);
+	mem_table_count.reserve( table_size );
+	for(uint32 i=0; i < table_size; ++i)
+		mem_table_count.push_back(0);
+	match_log = NULL;
+}
+
+//make sure this calls the destructor on each element
+MemHash::~MemHash(){
+//	allocator.Free(allocated);
+}
+
+MemHash::MemHash(const MemHash& mh) : MatchFinder(mh), allocator( SlotAllocator<MatchHashEntry>::GetSlotAllocator() )
+{
+	*this = mh;
+}
+
+MemHash& MemHash::operator=( const MemHash& mh ){
+	table_size = mh.table_size;
+	mer_size = mh.mer_size;
+	seq_count = mh.seq_count;
+	m_mem_count = mh.m_mem_count;
+	m_collision_count = mh.m_collision_count;
+	m_repeat_tolerance = mh.m_repeat_tolerance;
+	m_enumeration_tolerance = mh.m_enumeration_tolerance;
+	mem_table.resize(table_size);
+	for(uint32 i=0; i < table_size; ++i){
+		mem_table_count.push_back(mh.mem_table_count[i]);
+		mem_table[i] = mh.mem_table[i];
+	}
+	match_log = mh.match_log;
+	return *this;
+}
+
+MemHash* MemHash::Clone() const{
+	return new MemHash(*this);
+}
+
+void MemHash::ClearSequences()
+{
+	MatchFinder::Clear();
+}
+
+void MemHash::Clear()
+{
+	MatchFinder::Clear();
+	m_mem_count = 0;
+	m_collision_count = 0;
+	m_repeat_tolerance = DEFAULT_REPEAT_TOLERANCE;
+	m_enumeration_tolerance = DEFAULT_ENUMERATION_TOLERANCE;
+	//clear the hash table
+	for(uint32 listI = 0; listI < table_size; ++listI){
+		mem_table[listI].clear();
+		mem_table_count[ listI ] = 0;
+	}
+	match_log = NULL;
+
+	allocator.Free(allocated);
+	// WARNING! WARNING! WARNING! this will destroy ALL objects since the allocator has static lifetime!!
+//	allocator.Purge();
+}
+
+void MemHash::SetTableSize(uint32 new_table_size){
+	//allocate the hash table
+	table_size = new_table_size;
+	mem_table.clear();
+	mem_table.resize(table_size);
+	mem_table_count.clear();
+	mem_table_count.resize(table_size,0);
+}
+
+boolean MemHash::CreateMatches(){
+	MatchFinder::FindMatchSeeds();
+	return true;
+}
+
+void MemHash::FindMatches( MatchList& ml ) {
+	vector<gnSeqI> start_points;
+	for( uint32 seqI = 0; seqI < ml.seq_table.size(); ++seqI ){
+		start_points.push_back( 0 );
+	}
+	FindMatchesFromPosition( ml, start_points );
+}
+
+void MemHash::FindMatchesFromPosition( MatchList& ml, const vector<gnSeqI>& start_points ){
+	for( uint32 seqI = 0; seqI < ml.seq_table.size(); ++seqI ){
+		if( !AddSequence( ml.sml_table[ seqI ], ml.seq_table[ seqI ] ) ){
+			ErrorMsg( "Error adding " + ml.seq_filename[seqI] + "\n");
+			return;
+		}
+	}
+	MatchFinder::FindMatchSeeds( start_points );
+
+	GetMatchList( ml );
+}
+
+MatchList MemHash::GetMatchList() const{
+	MatchList ml;
+	GetMatchList( ml );
+	ml.seq_table = seq_table;
+	ml.sml_table = sar_table;
+
+	return ml;
+}
+
+// an attempt to do this without sorting, which appears to be very slow...
+boolean MemHash::EnumerateMatches( IdmerList& match_list )
+{
+	vector< uint > enum_tally(seq_count, 0);
+	IdmerList::iterator iter = match_list.begin();
+	IdmerList hash_list;
+	for(; iter != match_list.end(); ++iter)
+	{
+		if( enum_tally[iter->id] < m_enumeration_tolerance )
+		{
+			hash_list.push_back(*iter);
+		}
+		if(enum_tally[iter->id] > m_repeat_tolerance)
+			return true;
+		++enum_tally[iter->id];
+	}
+
+	if(hash_list.size() > 1){
+		if(m_enumeration_tolerance == 1)
+			return HashMatch(hash_list);
+		else
+			return MatchFinder::EnumerateMatches( hash_list );
+	}
+	return true;
+}
+
+//why have separate hash tables? dunno.  no reason.  what was i thinking
+// at that coffeehouse in portland when i wrote this crappy code?
+// MemHashEntries use GENETICIST coordinates.  They start at 1, not 0.
+boolean MemHash::HashMatch(IdmerList& match_list){
+	//check that there is at least one forward component
+//	match_list.sort(&idmer_id_lessthan);
+	// initialize the hash entry
+	MatchHashEntry mhe = MatchHashEntry(seq_count, GetSar(0)->SeedLength());
+	mhe.SetLength( GetSar(0)->SeedLength() );
+	
+	//Fill in the new Match and set direction parity if needed.
+	IdmerList::iterator iter = match_list.begin();
+	for(; iter != match_list.end(); ++iter)
+		mhe.SetStart(iter->id, iter->position + 1);
+	SetDirection(mhe);
+	mhe.CalculateOffset();
+	if(mhe.Multiplicity() < 2){
+		cout << "red flag " << mhe << "\n";
+		cout << "match_list.size(): " << match_list.size() << endl;
+	}else 
+		AddHashEntry(mhe);
+
+	return true;
+}
+
+void MemHash::SetDirection(MatchHashEntry& mhe){
+	//get the reference direction
+	boolean ref_forward = false;
+	uint32 seqI=0;
+	for(; seqI < mhe.SeqCount(); ++seqI)
+		if(mhe[seqI] != NO_MATCH){
+			ref_forward = !(GetSar(seqI)->GetDnaSeedMer(mhe[seqI] - 1) & 0x1);
+			break;
+		}
+	//set directional parity for the rest
+	for(++seqI; seqI < mhe.SeqCount(); ++seqI)
+		if(mhe[seqI] != NO_MATCH)
+			if(ref_forward == (GetSar(seqI)->GetDnaSeedMer(mhe[seqI] - 1) & 0x1))
+				mhe.SetStart(seqI, -mhe[seqI]);
+}
+
+// Tries to add a new mem to the mem hash table
+// If the mem already exists in the table, a pointer to it
+// is returned.  Otherwise mhe is added and a pointer to
+// it is returned.
+MatchHashEntry* MemHash::AddHashEntry(MatchHashEntry& mhe){
+	//first compute which hash table bucket this is going into
+	int64 offset = mhe.Offset();
+
+	uint32 bucketI = ((offset % table_size) + table_size) % table_size;
+	vector<MatchHashEntry*>::iterator insert_he;
+	insert_he = std::lower_bound(mem_table[bucketI].begin(), mem_table[bucketI].end(), &mhe, mhecomp);
+//	insert_he = mem_table[bucketI].find(&mhe);
+	if( insert_he != mem_table[bucketI].end() && (!mhecomp(*insert_he, &mhe) && !mhecomp(&mhe, *insert_he)) ){
+		++m_collision_count;
+		return *insert_he;
+	}
+	
+	//if we made it this far there were no collisions
+	//extend the mem into the surrounding region.
+	vector<MatchHashEntry> subset_matches;
+	if( !mhe.Extended() )
+		ExtendMatch(mhe, subset_matches);
+
+	MatchHashEntry* new_mhe = allocator.Allocate();
+	new_mhe = new(new_mhe) MatchHashEntry(mhe); 
+//	*new_mhe = mhe;
+	allocated.push_back(new_mhe);
+	
+	// can't insert until after the extend!!
+	insert_he = std::lower_bound(mem_table[bucketI].begin(), mem_table[bucketI].end(), new_mhe, mhecomp);
+	mem_table[bucketI].insert(insert_he, new_mhe);
+
+	// log it.
+	if( match_log != NULL ){
+		(*match_log) << *new_mhe << endl;
+		match_log->flush();
+	}
+	
+	// link up the subset matches
+	for(uint32 subsetI = 0; subsetI < subset_matches.size(); ++subsetI){
+		MatchHashEntry* submem = AddHashEntry( subset_matches[ subsetI ] );
+	}
+	
+	++mem_table_count[bucketI];
+	++m_mem_count;
+	return new_mhe;
+}
+
+void MemHash::PrintDistribution(ostream& os) const{
+    vector<MatchHashEntry*>::const_iterator mem_iter;
+	gnSeqI base_count;
+	for(uint32 i=0; i < mem_table_count.size(); ++i){
+		mem_iter = mem_table[i].begin();
+		base_count = 0;
+		for(; mem_iter != mem_table[i].end(); ++mem_iter){
+			base_count += (*mem_iter)->Length();
+		}
+		os << i << '\t' << mem_table_count[i] << '\t' << base_count << '\n';
+	}
+}
+
+void MemHash::LoadFile(istream& mem_file){
+	string tag;
+	gnSeqI len;
+	int64 start;
+	MatchHashEntry mhe;
+	getline( mem_file, tag );
+	stringstream first_mum( tag );
+	seq_count = 0;
+	first_mum >> len;
+	while( first_mum >> start ){
+		seq_count++;
+	}
+	mhe = MatchHashEntry(seq_count, mer_size, MatchHashEntry::seed);
+	first_mum.str( tag );
+	first_mum.clear();
+	for(uint32 seqI = 0; seqI < seq_count; seqI++){
+		first_mum >> start;
+		mhe.SetStart(seqI, start);
+	}
+	mhe.SetLength( len );
+	mhe.CalculateOffset();
+	AddHashEntry(mhe);
+	
+	while(mem_file.good()){
+		mem_file >> len;
+		if(!mem_file.good())
+			break;
+		mhe.SetLength(len);
+		for(uint32 seqI = 0; seqI < seq_count; seqI++){
+			mem_file >> start;
+			mhe.SetStart(seqI, start);
+		}
+		//break if the stream ended
+		if(!mem_file.good())
+			break;
+		mhe.CalculateOffset();
+		AddHashEntry(mhe);
+	}
+}
+
+void MemHash::WriteFile(ostream& mem_file) const{
+	mem_file << "FormatVersion" << '\t' << 1 << "\n";
+	mem_file << "SequenceCount" << '\t' << sar_table.size() << "\n";
+	for(unsigned int seqI = 0; seqI < seq_count; seqI++){
+		mem_file << "Sequence" << seqI << "File";
+		gnGenomeSpec* specker = seq_table[seqI]->GetSpec();
+		string sourcename = specker->GetName();
+		if( sourcename == "" )
+			sourcename = "null";
+		mem_file << '\t' << sourcename << "\n";
+		mem_file << "Sequence" << seqI << "Length";
+		mem_file << '\t' << seq_table[seqI]->length() << "\n";
+	}
+	mem_file << "MatchCount" << '\t' << m_mem_count << endl;
+	//get all the mems out of the hash table and write them out
+    vector<MatchHashEntry*>::const_iterator mem_table_iter;
+	for(uint32 i=0; i < table_size; i++){
+		mem_table_iter = mem_table[i].begin();
+		for(; mem_table_iter != mem_table[i].end(); mem_table_iter++)
+			mem_file << **mem_table_iter << "\n";
+	}
+}
+
+
+} // namespace mems
diff --git a/libMems/MemHash.h b/libMems/MemHash.h
new file mode 100644
index 0000000..8ef145c
--- /dev/null
+++ b/libMems/MemHash.h
@@ -0,0 +1,208 @@
+/*******************************************************************************
+ * $Id: MemHash.h,v 1.23 2004/03/01 02:40:08 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * This file is licensed under the GPL.
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifndef _MemHash_h_
+#define _MemHash_h_
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <set>
+#include <map>
+#include <iostream>
+
+#include "libMems/MatchFinder.h"
+#include "libMems/Match.h"
+#include "libGenome/gnException.h"
+#include "libMems/MatchList.h"
+#include "libMems/MatchHashEntry.h"
+#include "libMems/SlotAllocator.h"
+#include "boost/pool/object_pool.hpp"
+
+namespace mems {
+
+static const uint32 DEFAULT_MEM_TABLE_SIZE = 40000;
+static const uint32 DEFAULT_REPEAT_TOLERANCE = 0;
+static const uint32 DEFAULT_ENUMERATION_TOLERANCE = 1;
+
+/**
+ * MemHash implements an algorithm for finding exact matches of a certain minimal
+ * length in several sequences.
+ */
+class MemHash : public MatchFinder{
+
+   
+
+public:
+	MemHash();
+	~MemHash();
+	MemHash(const MemHash& mh);
+	MemHash& operator=( const MemHash& mh );
+	virtual MemHash* Clone() const;
+	virtual void Clear();
+	virtual void ClearSequences();
+	
+	/**
+	 * Finds all maximal exact matches in the sequences contained by "match_list"
+	 * The resulting list of matches is stored within "match_list"
+	 */
+	virtual void FindMatches( MatchList& match_list );
+	virtual void FindMatchesFromPosition( MatchList& match_list, const std::vector<gnSeqI>& start_points );
+
+	/**
+	 * Generates exact matches for the sequences loaded into this MemHash 
+	 */
+	virtual boolean CreateMatches();
+
+	/**
+	 * Returns the size of the hash table being used. 
+	 * @return the size of the hash table being used. 
+	 */
+	virtual uint32 TableSize() const {return table_size;};
+	/**
+	 * Sets the size of the hash table to new_table_size.
+	 * @param new_table_size The new hash table size
+	 */
+	virtual void SetTableSize(uint32 new_table_size);
+	/**
+	 * Creates a new MatchList instance which contains all the matches found by calling Create().
+	 */
+	virtual MatchList GetMatchList() const;
+	/**
+	 * Places pointers to the mems that have been found into the vector mem_list
+	 * @param mem_list an empty vector.
+	 */
+	//virtual void GetMatchList( std::vector<Match*>& mem_list ) const;
+	
+    /**
+	* Use this to convert MatchHashEntry mem list to a generic match list type
+    * converts the mem_list into the type specified by MatchListType
+	*/
+	template< class MatchListType >
+	void GetMatchList( MatchListType& mem_list ) const;
+	
+	/**
+	 * Returns the number of mems found 
+	 * @return The number of mems found 
+	 */
+	virtual uint32 MemCount(){return m_mem_count;}
+	/**
+	 * Returns the number of mers thrown out because they were contained in an existing mem 
+	 * @return The number of mers thrown out because they were contained in an existing mem 
+	 */
+	virtual uint32 MemCollisionCount(){return m_collision_count;}
+	virtual void MemTableCount(std::vector<uint32>& table_count){table_count = mem_table_count;}
+	/**
+	 * Prints the number of matches in each hash table bucket to the ostream os.
+	 * @param os The stream to print to.
+	 */
+	virtual void PrintDistribution(std::ostream& os) const;
+	
+	/**
+	 * Reads in a list of mems from an input stream
+	 * @throws A InvalidFileFormat exception if the file format is unknown or the file is corrupt
+	 */
+	virtual void LoadFile(std::istream& mem_file);
+	/**
+	 * Writes the matches stored in this MemHash out to the ostream @param mem_file.
+	 */
+	virtual void WriteFile(std::ostream& mem_file) const;
+
+	/**
+	 * Sets the permitted repetitivity of match seeds.  
+	 * Set @param repeat_tolerance to 0 to generate MUMs, any higher setting will generate MEMs
+	 * Many possible combinations of repetitive seed matches may be ignored, depending on the 
+	 * setting of the repeat enumeration tolerance.
+	 * @see SetEnumerationTolerance
+	 * @param repeat_tolerance the permitted repetitivity of match seeds
+	 */
+	virtual void SetRepeatTolerance(uint32 repeat_tolerance){m_repeat_tolerance = repeat_tolerance;}
+	/**
+	 * @return the permitted repetitivity of match seeds.  
+	 * @see SetRepeatTolerance
+	 */
+	virtual uint32 GetRepeatTolerance() const{return m_repeat_tolerance;}
+	/**
+	 * Sets the match seed repeat enumeration tolerance.
+	 * When matching mers are found across sequences which also occur several times in any particular
+	 * sequence there are several possible match seeds which could be generated.
+	 * The enumeration tolerance controls how many of these possibilities are actually used as match
+	 * seeds and extended into full matches.  The selection of actual seeds from the realm of possibilities
+	 * is essentially arbitrary, though not explicitly randomized.
+	 */
+	virtual void SetEnumerationTolerance(uint32 enumeration_tolerance){m_enumeration_tolerance = enumeration_tolerance;}
+	/**
+	 * @return  the match seed repeat enumeration tolerance.
+	 * @see SetEnumerationTolerance
+	 */
+	virtual uint32 GetEnumerationTolerance() const{return m_enumeration_tolerance;}
+	
+	/**
+	 * Setting this to a non-null value causes matches to be logged as they are found
+	 */
+	void SetMatchLog( std::ostream* match_log ){ this->match_log = match_log; }
+
+	
+
+	//end void GetMatchList( std::vector<MatchListType*>& mem_list );
+
+protected:
+	virtual boolean EnumerateMatches( IdmerList& match_list );
+	virtual boolean HashMatch(IdmerList& match_list);
+	virtual void SetDirection(MatchHashEntry& mhe);
+	virtual MatchHashEntry* AddHashEntry(MatchHashEntry& mhe);
+	virtual uint32 quadratic_li(uint32 listI){return (listI*(listI+1))/2;}
+		
+	uint32 table_size;
+	std::vector< std::vector<MatchHashEntry*> > mem_table;
+	uint32 m_repeat_tolerance;
+	uint32 m_enumeration_tolerance;
+	uint64 m_mem_count;
+	uint64 m_collision_count;
+	std::vector<uint32> mem_table_count;
+
+	std::ostream* match_log;
+	SlotAllocator<MatchHashEntry>& allocator;
+	std::vector<MatchHashEntry*> allocated;	// used to track what needs to get explicitly destroyed later...
+//	boost::object_pool<MatchHashEntry> allocator;
+	MheCompare mhecomp;
+};
+
+
+/**
+ * Use this to convert MatchHashEntry mem list to a generic match list type
+ * converts the mem_list into the type specified by MatchListType
+ */
+template< class MatchListType >
+void MemHash::GetMatchList( MatchListType& mem_list ) const {
+	
+	mem_list.clear();
+	typedef typename MatchListType::value_type MatchType;
+   
+	//Boost to the rescue! use remove_pointer to get at MatchListType's type
+	typedef typename boost::remove_pointer<MatchType>::type SinPtrMatchType;
+	SinPtrMatchType mm;
+
+	for(uint32 i=0; i < table_size; ++i)
+	{
+		std::vector<MatchHashEntry*>::const_iterator iter = mem_table[i].begin();
+		for(; iter != mem_table[i].end(); iter++ )
+		{
+			MatchType m = mm.Copy();
+			*m = **iter;
+			mem_list.push_back( m );
+		}
+	}
+
+}
+
+
+}
+
+#endif //_MemHash_h_
diff --git a/libMems/Memory.h b/libMems/Memory.h
new file mode 100644
index 0000000..ec259ea
--- /dev/null
+++ b/libMems/Memory.h
@@ -0,0 +1,60 @@
+#ifndef __libMems_Memory_h__
+#define __libMems_Memory_h__
+
+
+void printMemUsage();
+static bool debugging_memory = false;
+#include <iostream>
+
+#ifdef WIN32
+#include <windows.h>
+#include <PSAPI.h>
+inline
+void printMemUsage()
+{
+//	if(!debugging_memory)
+//		return;
+
+	DWORD proclist[500];
+	DWORD cbNeeded;
+	BOOL rval;
+	rval = EnumProcesses( proclist, sizeof(proclist), &cbNeeded );
+	int p_count = cbNeeded / sizeof(DWORD);
+	HANDLE phand;
+	HMODULE hMod;
+	char szFileName[MAX_PATH];
+	for( int p = 0; p < p_count; p++ )
+	{
+		phand = OpenProcess( PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, 0, proclist[p] );
+		DWORD dwSize2;
+		if (EnumProcessModules(phand, &hMod, sizeof(hMod), &dwSize2)) 
+		{
+
+			// Get the module name
+			if ( !GetModuleBaseName(phand, hMod, szFileName, sizeof(szFileName)) )
+				szFileName[0] = 0;
+			if( strncmp( szFileName, "progressiveMauve", 16 ) == 0 )
+				break;	// found the right module
+		}
+		CloseHandle(phand);
+	}
+
+	PROCESS_MEMORY_COUNTERS mem_info;
+
+	if( GetProcessMemoryInfo( phand, &mem_info, sizeof(mem_info) ) )
+	{
+			std::cout << "Working set size: " << mem_info.WorkingSetSize / (1024 * 1024) << " Mb\n";
+//		cout << "Paged pool usage: " << mem_info.QuotaPagedPoolUsage << endl;
+//		cout << "Non-Paged pool usage: " << mem_info.QuotaNonPagedPoolUsage << endl;
+			std::cout << "Pagefile usage: " << mem_info.PagefileUsage / (1024 * 1024) << " Mb\n";
+			std::cout.flush();
+	}
+}
+#else
+inline
+void printMemUsage()
+{};
+#endif
+
+#endif	//__libMems_Memory_h__
+
diff --git a/libMems/MemorySML.cpp b/libMems/MemorySML.cpp
new file mode 100644
index 0000000..e42c3f8
--- /dev/null
+++ b/libMems/MemorySML.cpp
@@ -0,0 +1,96 @@
+/*******************************************************************************
+ * $Id: MemorySML.cpp,v 1.8 2004/03/01 02:40:08 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * Please see the file called COPYING for licensing, copying, and modification
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/MemorySML.h"
+#include <algorithm>
+
+using namespace std;
+using namespace genome;
+namespace mems {
+
+MemorySML::MemorySML(const uint8* table, const uint32 alpha_bits ){
+	header.alphabet_bits = alpha_bits;
+	memcpy(header.translation_table, table, UINT8_MAX);
+	header.version = 0;
+}
+
+MemorySML::MemorySML(const MemorySML& msa){
+	*this = msa;
+}
+
+MemorySML& MemorySML::operator=(const MemorySML& msa ) {
+ 	SortedMerList::operator=( msa );
+ 	positions = msa.positions;
+ 	return *this;
+}
+
+MemorySML* MemorySML::Clone() const{
+	return new MemorySML(*this);
+}
+
+void MemorySML::Clear(){
+	SortedMerList::Clear();
+	positions.clear();
+}
+
+void MemorySML::Create(const gnSequence& seq, const uint64 seed ){
+	SortedMerList::Create(seq, seed);
+
+	vector<bmer> sml_array;
+	boolean is_spaced_seed = header.seed_length != header.seed_weight;
+	if( is_spaced_seed )
+		FillDnaSeedSML( seq, sml_array );
+	else
+		FillSML( seq, sml_array );
+	sort( sml_array.begin(), sml_array.end(), &bmer_lessthan );
+	positions.reserve( sml_array.size() );
+	for(gnSeqI merI = 0; merI < sml_array.size(); merI++ ){
+		positions.push_back( sml_array[merI].position );
+	}
+
+}
+
+boolean MemorySML::Read(vector<bmer>& readVector, gnSeqI size, gnSeqI offset )
+{
+	readVector.clear();
+	if( offset > positions.size() )
+		return false;
+
+	gnSeqI last_mer = offset + size;
+	boolean success = true;
+	if( last_mer > positions.size() ){
+		last_mer = positions.size();
+		success = false;
+	}
+
+	bmer cur_mer;
+	for(gnSeqI merI = offset; merI < last_mer; merI++){
+		cur_mer.position = positions[merI];
+		cur_mer.mer = GetSeedMer( cur_mer.position );
+		readVector.push_back( cur_mer );
+	}
+	return success;
+}
+
+void MemorySML::Merge(SortedMerList& sa, SortedMerList& sa2){
+
+}
+
+bmer MemorySML::operator[](gnSeqI index)
+{
+	bmer cur_mer;
+	cur_mer.position = positions[index];
+	cur_mer.mer = GetSeedMer( cur_mer.position );
+	return cur_mer;
+}
+
+} // namespace mems
diff --git a/libMems/MemorySML.h b/libMems/MemorySML.h
new file mode 100644
index 0000000..d89229e
--- /dev/null
+++ b/libMems/MemorySML.h
@@ -0,0 +1,58 @@
+/*******************************************************************************
+ * $Id: MemorySML.h,v 1.7 2004/03/01 02:40:08 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * This file is licensed under the GPL.
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifndef _MemorySML_h_
+#define _MemorySML_h_
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libGenome/gnSequence.h"
+#include "libMems/SortedMerList.h"
+
+namespace mems {
+
+/** The MemorySML is an implementation of sorted mer lists which creates and
+ *  stores the sorted mer list entirely in memory.  A MemorySML consumes
+ *  roughly 32 + alpha_bits bits of memory per character in the sequences.
+ *  For unambiguous DNA sequences 4.25 bytes per base are required.
+ */
+class MemorySML : public SortedMerList
+{
+public:
+	/** 
+	 *  Create an empty MemorySML
+	 *  Creates an empty MemorySML with the supplied translation
+	 *  table and alphabet bit size.  Defaults to DNA settings
+	 *  @param table The array used to translate characters into binary code
+	 *  @param alpha_bits The number of bits each character consumes in binary
+	 */
+	MemorySML(const uint8* table = SortedMerList::BasicDNATable(), const uint32 alpha_bits = DNA_ALPHA_BITS);
+	MemorySML(const MemorySML& msa);
+	MemorySML& operator=(const MemorySML& msa );
+	MemorySML* Clone() const;
+	
+	virtual void Clear();
+
+	virtual void Create(const genome::gnSequence& seq, const uint64 seed);
+	virtual boolean Read(std::vector<bmer>& readVector, gnSeqI size, gnSeqI offset = 0);
+	virtual void Merge(SortedMerList& sa, SortedMerList& sa2);
+	
+	virtual bmer operator[](gnSeqI index);
+	
+protected:
+
+//	virtual void FillSML(const gnSeqI seq_len, vector<gnSeqI>& sml_array);
+	std::vector<smlSeqI_t> positions;
+
+};
+
+}
+
+#endif   //_MemorySML_h_
diff --git a/libMems/MuscleInterface.cpp b/libMems/MuscleInterface.cpp
new file mode 100644
index 0000000..25deb99
--- /dev/null
+++ b/libMems/MuscleInterface.cpp
@@ -0,0 +1,1192 @@
+/*******************************************************************************
+ * $Id: MuscleInterface.cpp,v 1.27 2004/03/01 02:40:08 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * Please see the file called COPYING for licensing, copying, and modification
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/MuscleInterface.h"
+
+#include "libGenome/gnFilter.h"
+#include "libGenome/gnFASSource.h"
+#include "libGenome/gnStringTools.h"
+#include "libMUSCLE/muscle.h"
+#include "libMUSCLE/params.h"
+#include "libMUSCLE/msa.h"
+#include "libMUSCLE/seq.h"
+#include "libMUSCLE/seqvect.h"
+#include "libMUSCLE/tree.h"
+#include "libMUSCLE/clust.h"
+#include "libMUSCLE/profile.h"
+#include "libMUSCLE/distfunc.h"
+#include "libMUSCLE/clustsetdf.h"
+#include "libMUSCLE/textfile.h"
+#include "libMUSCLE/types.h"
+#include "boost/algorithm/string/erase.hpp"
+#include "boost/algorithm/string/case_conv.hpp"
+
+#include <sstream>
+#include <fstream>
+
+using namespace std;
+using namespace genome;
+
+// this gets defined in muscle.cpp, but not declared in any headers
+namespace muscle {
+extern void MUSCLE(SeqVect &v, MSA &msaOut);
+extern void RefineW(const MSA &msaIn, MSA &msaOut);
+}
+
+using namespace muscle;
+
+namespace mems {
+
+bool debug_muscle = false;
+
+bool pipeExec( char** cmd_argv, const string& command, const string& input, string& output, string& error );
+
+char** parseCommand( const string& cmd );
+char** parseCommand( const string& cmd ){
+	// first count tokens
+
+	// tokenize on "
+	stringstream qs( cmd );
+	string cur_str;
+	boolean in_quote = true;
+	int token_count = 0;
+	vector< string > cmd_tokens;
+	while( getline( qs, cur_str, '"' ) ){
+		// never start out in a quote
+		in_quote = !in_quote;
+		if( cur_str.length() == 0 )
+			continue;
+		if( in_quote ){
+			cmd_tokens.push_back( cur_str );
+		}else{
+			stringstream ss( cur_str );
+			string asdf;
+			while( ss >> asdf )
+				cmd_tokens.push_back( asdf );
+		}
+	}
+	char ** cmd_array = new char*[ cmd_tokens.size() + 1 ];
+	for( int tokI = 0; tokI < cmd_tokens.size(); tokI++ ){
+		cmd_array[ tokI ] = new char[ cmd_tokens[ tokI ].length() + 1 ];
+		strcpy( cmd_array[ tokI ], cmd_tokens[ tokI ].c_str() );
+	}
+	cmd_array[ cmd_tokens.size() ] = NULL;
+	return cmd_array;
+}
+
+#if !defined(WIN32)
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+// unix pipelined execution code
+bool pipeExec( char** cmd_argv, const string& command, const string& input, string& output, string& error ){
+	int stdin_pipe[2], stdout_pipe[2], stderr_pipe[2];
+	boolean success = false;
+	pid_t sid;
+	pid_t pid1;
+	const char* fail;
+	char buf[1024];
+	ssize_t bread = 0;
+	int rval = 0;
+
+	if((sid = setsid()) < 0)sid = getpgrp();
+	
+	if((sid < 0 && (fail = "sid"))
+	 || (pipe(stdin_pipe) < 0 && (fail = "stdin"))
+	 || (pipe(stdout_pipe) < 0 && (fail = "stdout"))
+//	 || (pipe(stderr_pipe) < 0 && (fail = "stderr"))
+	)
+    {
+		fprintf(stderr, "Ouch, the world just collapsed (%s).\n", fail);
+		perror("muscle:");
+		goto cleanup;
+	}
+	
+	fcntl(stdin_pipe[0], F_SETFL, fcntl(stdin_pipe[0], F_GETFL) & ~O_NONBLOCK);
+	fcntl(stdin_pipe[1], F_SETFL, fcntl(stdin_pipe[1], F_GETFL) & ~O_NONBLOCK);
+	fcntl(stdout_pipe[0], F_SETFL, fcntl(stdout_pipe[0], F_GETFL) & ~O_NONBLOCK);
+	fcntl(stdout_pipe[1], F_SETFL, fcntl(stdout_pipe[1], F_GETFL) & ~O_NONBLOCK);
+/*	fcntl(stderr_pipe[0], F_SETFL, fcntl(stderr_pipe[0], F_GETFL) & ~O_NONBLOCK);
+	fcntl(stderr_pipe[1], F_SETFL, fcntl(stderr_pipe[1], F_GETFL) & ~O_NONBLOCK);
+*/	
+	if((pid1 = fork()) < 0)goto cleanup;	
+	if(pid1)
+		setpgid(pid1, sid);
+	else
+	{
+		dup2(stdin_pipe[0], 0);
+		dup2(stdout_pipe[1], 1);
+//		dup2(stderr_pipe[1], 2);
+		close( stdin_pipe[0] );
+		close( stdin_pipe[1] );
+		close( stdout_pipe[0] );
+		close( stdout_pipe[1] );
+//		close( stderr_pipe[0] );
+//		close( stderr_pipe[1] );
+		execvp(cmd_argv[0], cmd_argv);
+		_exit(errno);
+	}
+	rval = write( stdin_pipe[1], input.c_str(), input.size() );
+	if( rval == -1 )
+		perror( "write: " );
+	if( close( stdin_pipe[1] ) )
+		perror( "close stdin_w: " );
+	if( close( stdin_pipe[0] ) )
+		perror( "close stdin_r: " );
+
+	close( stdout_pipe[1] );
+	// read the alignment
+	while(true){
+		bzero( buf, sizeof(buf) );
+		bread = read( stdout_pipe[0], buf, 1023 );
+		if( bread == 0 )
+			break;	// reached EOF
+		if( bread == -1 ){
+			perror("muscle read: " );
+		}
+		output += buf;
+	}
+	wait( NULL );
+	success = true;
+	
+cleanup:
+	close(stdin_pipe[0]);
+	close(stdin_pipe[1]);
+	close(stdout_pipe[0]);
+	close(stdout_pipe[1]);
+//	close(stderr_pipe[0]);
+//	close(stderr_pipe[1]);
+	return success;
+};
+
+
+#else
+
+//windows piping code
+#include <windows.h>
+#define bzero(a) memset(a,0,sizeof(a)) //easier -- shortcut
+
+bool IsWinNT()  //check if we're running NT
+{
+  OSVERSIONINFO osv;
+  osv.dwOSVersionInfoSize = sizeof(osv);
+  GetVersionEx(&osv);
+  return (osv.dwPlatformId == VER_PLATFORM_WIN32_NT);
+}
+
+void ErrorMessage(char *str)  //display detailed error info
+{
+  LPVOID msg;
+  FormatMessage(
+    FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM,
+    NULL,
+    GetLastError(),
+    MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), // Default language
+    (LPTSTR) &msg,
+    0,
+    NULL
+  );
+  printf("%s: %s\n",str,msg);
+  LocalFree(msg);
+}
+
+bool pipeExec( char** cmd_argv, const string& command, const string& input, string& output, string& error ){
+
+	char buf[1024];           //i/o buffer
+
+	STARTUPINFO si;
+	SECURITY_ATTRIBUTES sa;
+	SECURITY_DESCRIPTOR sd;               //security information for pipes
+	PROCESS_INFORMATION pi;
+	HANDLE newstdin_w,newstdout_w,newstderr_w,newstdin_r,newstdout_r,newstderr_r;
+	HANDLE read_stdout,read_stderr,write_stdin;  //pipe handles
+	boolean success = false;
+
+	if (IsWinNT())        //initialize security descriptor (Windows NT)
+	{
+		InitializeSecurityDescriptor(&sd,SECURITY_DESCRIPTOR_REVISION);
+		SetSecurityDescriptorDacl(&sd, true, NULL, false);
+		sa.lpSecurityDescriptor = &sd;
+	}
+	else sa.lpSecurityDescriptor = NULL;
+
+	sa.nLength = sizeof(SECURITY_ATTRIBUTES);
+	sa.bInheritHandle = true;         //allow inheritable handles
+
+	if (!CreatePipe(&newstdin_r,&newstdin_w,&sa,0))   //create stdin pipe
+	{
+		ErrorMessage("CreatePipe");
+		goto finito;
+	}
+	if (!CreatePipe(&newstdout_r,&newstdout_w,&sa,0))  //create stdout pipe
+	{
+		ErrorMessage("CreatePipe");
+		goto finito;
+	}
+	if (!CreatePipe(&newstderr_r,&newstderr_w,&sa,0))  //create stdout pipe
+	{
+		ErrorMessage("CreatePipe");
+		goto finito;
+	}
+	// Duplicate the write handle to the pipe so it is not inherited. 
+	boolean fSuccess = DuplicateHandle(GetCurrentProcess(), newstdin_w, 
+		GetCurrentProcess(), &write_stdin, 0, 
+		FALSE,                  // not inherited 
+		DUPLICATE_SAME_ACCESS); 
+	if (! fSuccess){
+		ErrorMessage("DuplicateHandle failed"); 
+		goto finito;
+	}
+	CloseHandle(newstdin_w); 
+	newstdin_w = INVALID_HANDLE_VALUE;
+
+	// Duplicate the read handle to the pipe so it is not inherited. 
+	fSuccess = DuplicateHandle(GetCurrentProcess(), newstdout_r, 
+		GetCurrentProcess(), &read_stdout, 0, 
+		FALSE,                  // not inherited 
+		DUPLICATE_SAME_ACCESS); 
+	if (! fSuccess){
+		ErrorMessage("DuplicateHandle failed"); 
+		goto finito;
+	}
+	CloseHandle(newstdout_r); 
+	newstdout_r = INVALID_HANDLE_VALUE;
+
+	// Duplicate the read handle to the pipe so it is not inherited. 
+	fSuccess = DuplicateHandle(GetCurrentProcess(), newstderr_r, 
+		GetCurrentProcess(), &read_stderr, 0, 
+		FALSE,                  // not inherited 
+		DUPLICATE_SAME_ACCESS); 
+	if (! fSuccess){
+		ErrorMessage("DuplicateHandle failed"); 
+		goto finito;
+	}
+	CloseHandle(newstderr_r); 
+	newstderr_r = INVALID_HANDLE_VALUE;
+
+	GetStartupInfo(&si);      //set startupinfo for the spawned process
+	/*
+		The dwFlags member tells CreateProcess how to make the process.
+		STARTF_USESTDHANDLES validates the hStd* members. STARTF_USESHOWWINDOW
+		validates the wShowWindow member.
+	*/
+	si.dwFlags = STARTF_USESTDHANDLES|STARTF_USESHOWWINDOW;
+	si.wShowWindow = SW_HIDE;
+	si.hStdOutput = newstdout_w;
+	si.hStdError = newstderr_w;     //set the new handles for the child process
+	si.hStdInput = newstdin_r;
+
+	//spawn the child process
+	char* cmd = new char[ command.length() + 1 ];
+	strcpy( cmd, command.c_str() );
+	if (!CreateProcess(NULL,cmd,NULL,NULL,TRUE,0,
+						NULL,NULL,&si,&pi))
+	{
+		delete cmd;
+		ErrorMessage("CreateProcess");
+		goto finito;
+	}
+	delete cmd;
+
+	unsigned long exit=0;  //process exit code
+	unsigned long bread;   //bytes read
+	unsigned long avail;   //bytes available
+
+	WriteFile(write_stdin, input.c_str(), input.size(), &bread, NULL); //send data to stdin
+	CloseHandle(write_stdin);
+	write_stdin = INVALID_HANDLE_VALUE;
+
+	// Wait until child process exits.
+	while( true ){
+		GetExitCodeProcess( pi.hProcess, &exit );
+		if( exit != STILL_ACTIVE )
+			WaitForSingleObject( pi.hProcess, INFINITE );
+			
+		// read anything that came to stdout
+		PeekNamedPipe(read_stdout,buf,1023,&bread,&avail,NULL);
+		if( avail == 0 )
+			Sleep(5);	// didn't get anything, so take a break to avoid hogging the CPU...
+		while( avail > 0 ){
+			bzero(buf);
+			int read_size = 1023 < avail ? 1023 : avail;
+			ReadFile(read_stdout,buf,read_size,&bread,NULL);  //read the stdout pipe
+			avail -= bread;
+			output += buf;
+		}
+
+		// read anything that came to stderr
+		PeekNamedPipe(read_stderr,buf,1023,&bread,&avail,NULL);
+		while( avail > 0 ){
+			bzero(buf);
+			int read_size = 1023 < avail ? 1023 : avail;
+			ReadFile(read_stderr,buf,read_size,&bread,NULL);  //read the stdout pipe
+			avail -= bread;
+			error += buf;
+		}
+
+		if( exit != STILL_ACTIVE )
+			break;
+	}
+	// Wait until child process exits.
+    WaitForSingleObject( pi.hProcess, INFINITE );
+	success = true;
+
+	//clean up and exit
+finito:
+    if( pi.hThread != INVALID_HANDLE_VALUE )
+		CloseHandle(pi.hThread);
+    if( pi.hProcess != INVALID_HANDLE_VALUE )
+		CloseHandle(pi.hProcess);
+    if( newstdin_r != INVALID_HANDLE_VALUE )
+		CloseHandle(newstdin_r);
+    if( newstdout_w != INVALID_HANDLE_VALUE )
+		CloseHandle(newstdout_w);
+    if( newstderr_w != INVALID_HANDLE_VALUE )
+		CloseHandle(newstderr_w);
+    if( read_stdout != INVALID_HANDLE_VALUE )
+		CloseHandle(read_stdout);
+    if( read_stderr != INVALID_HANDLE_VALUE )
+		CloseHandle(read_stderr);
+    if( write_stdin != INVALID_HANDLE_VALUE )
+		CloseHandle(write_stdin);
+	return success;
+}
+
+#endif
+
+
+
+MuscleInterface& MuscleInterface::getMuscleInterface()
+{
+        static MuscleInterface m_ci;
+
+        return m_ci;
+}
+
+MuscleInterface::MuscleInterface() : GappedAligner() {
+	muscle_path = "muscle_aed";
+	muscle_arguments = "-stable -quiet -seqtype DNA";
+	muscle_cmdline = parseCommand( muscle_path + " " + muscle_arguments );
+	max_alignment_length = 12500;
+}
+
+void MuscleInterface::ParseMusclePath( const char* argv0 ){
+	// get the execution path
+	string path_str = argv0;
+	// trim quotes
+	if( path_str[0] == '"' )
+		path_str = path_str.substr( 1, path_str.size() - 2 );
+	standardizePathString( path_str );
+	string::size_type i = path_str.rfind('/');
+	if( i != string::npos )
+		path_str.erase(i+1, path_str.length() - (i+1));
+	else
+		path_str.clear();
+	SetMusclePath( '"' + path_str + "muscle_aed\"");
+}
+
+void MuscleInterface::SetMusclePath( const string& path ){
+	muscle_path = path;
+	ClearCommandLine();
+	muscle_cmdline = parseCommand( muscle_path + " " + muscle_arguments );
+}
+
+void MuscleInterface::SetExtraMuscleArguments( const string& args )
+{
+	extra_muscle_arguments = args;
+}
+
+void MuscleInterface::SetMuscleArguments( const string& args )
+{
+	ClearCommandLine();
+	muscle_arguments = args + " " + extra_muscle_arguments;
+	muscle_cmdline = parseCommand( muscle_path + " " + args + " " + extra_muscle_arguments );
+}
+
+MuscleInterface& MuscleInterface::operator=( const MuscleInterface& ci ){
+	GappedAligner::operator =( ci );
+	return *this;
+}
+
+//tjt: not the best way of doing this, should have just one Align function that takes an AbstractMatch*,
+	//     not both Match* & AbstractMatch* in separate, nearly identical functions..
+	//     Such a change would involve changes to GappedAligner, and would require some additional care taken
+	//     with SeqCount & Multiplicity, as well as seq_table[ seqI ]->length()/seq_table[ 0 ]->length(i),
+	//     for now, leave like this. hopefully sooner than later, make pretty!
+boolean MuscleInterface::Align( GappedAlignment& cr, Match* r_begin, Match* r_end, vector< gnSequence* >& seq_table ){
+	gnSeqI gap_size = 0;
+	boolean create_ok = true;
+	uint seq_count = seq_table.size();
+	//seq_count = r_begin->Multiplicity();
+	uint seqI;
+	uint align_seqs = 0;
+	vector< string > tmp_mat = vector< string >( seq_count );
+try{
+
+// 
+//	Get the sequence in the intervening gaps between these two matches
+//
+	vector< string > seq_data;
+	vector< int64 > starts;
+	vector< uint > seqs;
+	const gnFilter* rc_filter = gnFilter::DNAComplementFilter();
+	
+	for( seqI = 0; seqI < seq_count; seqI++ ){
+
+		// skip this sequence if it's undefined
+		if( (r_end != NULL && r_end->Start( seqI ) == NO_MATCH ) ||
+			(r_begin != NULL && r_begin->Start( seqI ) == NO_MATCH) ){
+			starts.push_back( NO_MATCH );
+			continue;
+		}
+
+		// determine the size of the gap
+		int64 gap_start = 0;
+		int64 gap_end = 0;
+		getInterveningCoordinates( seq_table, r_begin, r_end, seqI, gap_start, gap_end );
+
+		int64 diff = gap_end - gap_start;
+		if( diff <= 0 || diff > max_alignment_length ){
+			starts.push_back( NO_MATCH );
+			continue;	// skip this sequence if it's either too big or too small
+		}
+		seqs.push_back( seqI );
+// the gnSequence pointers are shared across threads and have a common ifstream
+		// extract sequence data
+		if( r_end == NULL || r_end->Start( seqI ) > 0 ){
+			starts.push_back( gap_start );
+			seq_data.push_back( seq_table[ seqI ]->ToString( diff , gap_start ) );
+		}else{
+			// reverse complement the sequence data.
+			starts.push_back( -gap_start );
+			string cur_seq_data = seq_table[ seqI ]->ToString( diff , gap_start );
+			rc_filter->ReverseFilter( cur_seq_data );
+			seq_data.push_back( cur_seq_data );
+		}
+	}
+
+	if( seqs.size() <= 1 )
+		create_ok = false;
+
+	if( create_ok ){
+//		SetMuscleArguments( " -quiet -stable -seqtype DNA " );
+		vector< string > aln_matrix;
+		if( !CallMuscleFast( aln_matrix, seq_data, 0, 0 ) ){
+			cout << "Muscle was unable to align:\n";
+			if( r_begin )
+				cout << "Left match: " << *r_begin << endl;
+			if( r_end )
+				cout << "Right match: " << *r_end << endl;
+			return false;
+		}
+
+		gnSeqI aln_length = aln_matrix.size() == 0 ? 0 : aln_matrix[0].length();
+		cr = GappedAlignment( seq_count, aln_length );
+		vector< string > aln_mat = vector< string >( seq_count );
+
+		// set sequence starts
+		for( uint seqI = 0; seqI < seqs.size(); seqI++ ){
+			cr.SetLength( seq_data[ seqI ].size(), seqs[ seqI ] );
+			aln_mat[ seqs[ seqI ] ] = aln_matrix[ seqI ];
+		}
+		for( uint seqI = 0; seqI < seq_count; seqI++ ){
+			cr.SetStart( seqI, starts[ seqI ] );
+			if( aln_mat[ seqI ].length() != aln_length )
+				aln_mat[ seqI ] = string( aln_length, '-' );
+		}
+
+		cr.SetAlignment( aln_mat );
+
+		return true;
+	}
+}catch(exception& e){
+	cerr << "At: " << __FILE__ << ":" << __LINE__ << endl;
+	cerr << e.what();
+}
+	return false;
+}
+
+static int failure_count = 0;
+
+boolean MuscleInterface::Align( GappedAlignment& cr, AbstractMatch* r_begin, AbstractMatch* r_end, vector< gnSequence* >& seq_table){
+	gnSeqI gap_size = 0;
+	boolean create_ok = true;
+	//tjt: set the seq_count to a match m's multiplicity
+	//     even though all components n of match m could be 
+	//     less than the k sequences
+	//     if n == k, then perhaps there is 1 match component per sequence
+	//     if k = 1, n == repeat match multiplicity, where n >= 2
+	//     
+	uint seq_count = r_begin->Multiplicity();
+	uint seqI;
+	uint align_seqs = 0;
+	vector< string > tmp_mat = vector< string >( seq_count );
+try{
+
+// 
+//	Get the sequence in the intervening gaps between these two matches
+//
+	vector< string > seq_data;
+	vector< int64 > starts;
+	vector< uint > seqs;
+	const gnFilter* rc_filter = gnFilter::DNAComplementFilter();
+	
+	//std::cout << "getting regions between match components to align" << std::endl;
+	for( seqI = 0; seqI < seq_count; seqI++ ){
+
+		// skip this sequence if it's undefined
+		if( (r_end != NULL && r_end->Start( seqI ) == NO_MATCH ) ||
+			(r_begin != NULL && r_begin->Start( seqI ) == NO_MATCH) ){
+			starts.push_back( NO_MATCH );
+			continue;
+		}
+
+		// determine the size of the gap
+		int64 gap_start = 0;
+		int64 gap_end = 0;
+
+		// determine the size of the gap
+		gap_end = r_end != NULL ? r_end->Start( seqI ) : seq_table[ seqI ]->length() + 1;
+		gap_start = r_begin != NULL ? r_begin->End( seqI ) + 1 : 1;
+		if( gap_end < 0 || gap_start < 0 ){
+			gap_end   = r_begin != NULL ? -r_begin->Start( seqI ) : seq_table[ 0 ]->length() + 1;
+			gap_start = r_end != NULL ? -r_end->Start( seqI ) + r_end->Length( seqI ) : 1;
+		}
+		if( gap_end <= 0 || gap_start <= 0 ){
+			// if either is still < 0 then there's a problem...
+			genome::ErrorMsg( "Error constructing intervening coordinates" );
+		}
+
+		int64 diff = gap_end - gap_start;
+		
+		//diff <= 0 ||
+		if( diff <= 0 || diff > max_alignment_length ){
+			starts.push_back( NO_MATCH );
+			continue;	// skip this sequence if it's either too big or too small
+		}
+
+		seqs.push_back( seqI );
+
+		// extract sequence data
+		if (0 )
+		{
+			starts.push_back( gap_start );
+			seq_data.push_back( "A" );
+			std::cout << "A" << std::endl;
+			diff = 1;
+		}
+// the gnSequence pointers are shared across threads and have a common ifstream
+		if( r_end == NULL || r_end->Start( seqI ) > 0 ){
+			starts.push_back( gap_start );
+			//std::cout << seq_table[ 0 ]->ToString( diff , gap_start ) << std::endl;
+			//tjt: all sequences are concatenated together into 1 seq_table entry
+			//
+			seq_data.push_back( seq_table[ 0 ]->ToString( diff , gap_start ) );
+		}else{
+			// reverse complement the sequence data.
+			starts.push_back( -gap_start );
+			//tjt: all sequences are concatenated together into 1 seq_table entry
+			//     
+			string cur_seq_data = seq_table[ 0 ]->ToString( diff , gap_start );
+			rc_filter->ReverseFilter( cur_seq_data );
+			seq_data.push_back( cur_seq_data );
+			//std::cout << cur_seq_data << std::endl;
+		}
+	}
+
+    //no seqs able to be aligned..
+    if( seqs.size() == 0)
+        create_ok = false;
+
+
+	if( create_ok ){
+//		SetMuscleArguments( " -quiet -stable -seqtype DNA " );
+		vector< string > aln_matrix;
+		if( !CallMuscleFast( aln_matrix, seq_data, 0, 0 ) ){
+			cout << "Muscle was unable to align:\n";
+			return false;
+		}
+        
+        //fill in regions between adjacent seeds with gaps
+        //if aln_matrix is smaller than multiplicity, then we know 
+        //that there are some regions between seeds that have len == 0
+        if (aln_matrix.size() != r_begin->Multiplicity() && 0)
+        {
+            for( uint seqI = 0; seqI < starts.size(); seqI++ )
+            {
+                //if this a position between two adjacent matches..
+                if (starts.at(seqI) == NO_MATCH)
+                {
+                    //calculate the number of gaps to fill in
+                    int64 gap_end = r_end != NULL ? r_end->Start( seqI ) : seq_table[ seqI ]->length() + 1;
+		            int64 gap_start = r_begin != NULL ? r_begin->End( seqI ) + 1 : 1;
+                    if( r_end == NULL || r_end->Start( seqI ) > 0 ){
+			            starts[seqI] = 0;//gap_start;
+			            seq_data.insert(seq_data.begin()+(seqI),"");
+		            }else{
+			            starts[seqI] = 0;//-gap_start;
+			            seq_data.insert(seq_data.begin()+(seqI),"");
+		            }
+                    string tmp(aln_matrix[0].length(), '-');
+                    aln_matrix.insert(aln_matrix.begin()+(seqI), tmp);
+                    seqs.insert(seqs.begin()+(seqI),seqI);
+                }
+            }
+        }
+		gnSeqI aln_length = aln_matrix.size() == 0 ? 0 : aln_matrix[0].length();
+		cr = GappedAlignment( seq_count, aln_length );
+		vector< string > aln_mat = vector< string >( seq_count );
+
+		// set sequence starts
+		for( uint seqI = 0; seqI < seqs.size(); seqI++ ){
+			cr.SetLength( seq_data[ seqI ].size(), seqs[ seqI ] );
+			aln_mat[ seqs[ seqI ] ] = aln_matrix[ seqI ];
+		}
+		for( uint seqI = 0; seqI < seq_count; seqI++ ){
+			cr.SetStart( seqI, starts[ seqI ] );
+			if( aln_mat[ seqI ].length() != aln_length )
+				aln_mat[ seqI ] = string( aln_length, '-' );
+		}
+
+		cr.SetAlignment( aln_mat );
+
+		return true;
+	}
+}catch(exception& e){
+	cerr << "At: " << __FILE__ << ":" << __LINE__ << endl;
+	cerr << e.what();
+}
+	return false;
+}
+
+boolean MuscleInterface::CallMuscle( vector< string >& aln_matrix, const vector< string >& seq_table )
+{
+	gnSequence seq;
+
+	try{
+		ostringstream input_seq_stream;
+		//istringstream muscle_input_seq_stream;
+		for( uint seqI = 0; seqI < seq_table.size(); seqI++ ){
+			seq += seq_table[ seqI ];
+			seq.setContigName( seqI, "seq" );
+		}
+		gnFASSource::Write( seq, input_seq_stream, false, true );		
+		// now open a pipe to Muscle
+		string muscle_cmd = muscle_path + " " + muscle_arguments;
+		string output;
+		string error;
+		boolean success = pipeExec( muscle_cmdline, muscle_cmd, input_seq_stream.str(), output, error );
+		if( !success || output.size() == 0 )
+		{
+			throw "b0rk3d";
+		}
+
+		istringstream output_aln_stream( output );
+		string cur_line;
+
+		// parse the fasta output
+		while( getline( output_aln_stream, cur_line ) ){
+			if( cur_line[0] == '>' ){
+				aln_matrix.push_back( "" );
+				continue;
+			}
+			gnSeqI len = cur_line.size();
+			len = cur_line[ len - 1 ] == '\r' ? len - 1 : len;
+			uint seqI = aln_matrix.size() - 1;
+			aln_matrix[ seqI ] += cur_line.substr( 0, len );
+		}
+
+		return true;
+	}catch( gnException& gne ){
+	}catch( exception& e ){
+	}catch(...){
+	}
+	cerr << "muscle failed!  saving failed input data to muscle_failure_" << failure_count << ".txt\n";
+	cerr << "Please contact the Mauve developers about this problem\n";
+	stringstream debug_fname;
+	debug_fname << "muscle_failure_" << failure_count++ << ".txt";
+	ofstream debug_file( debug_fname.str().c_str() );
+	gnFASSource::Write(seq, debug_file, false);
+	debug_file.close();
+	return false;
+}
+
+// version 2 of this code: attempt to call muscle without performing costly disk I/O!!
+boolean MuscleInterface::CallMuscleFast( vector< string >& aln_matrix, const vector< string >& seq_table, int gap_open, int gap_extend )
+{
+	if (gap_open != 0)
+		g_scoreGapOpen.get() = gap_open;
+	if (gap_extend != 0)
+		g_scoreGapExtend.get() = gap_extend;
+	g_SeqType.get() = SEQTYPE_DNA;	// we're operating on DNA
+	g_uMaxIters.get() = 1;			// and we don't want to refine the alignment...yet
+	g_bStable.get() = true;			// we want output seqs in the same order as input
+	g_bQuiet.get() = true;			// and don't print anything to the console
+	g_SeqWeight1.get() = SEQWEIGHT_ClustalW;	// not sure what weighting scheme works best for DNA
+
+	SetMaxIters(g_uMaxIters.get());
+	SetSeqWeightMethod(g_SeqWeight1.get());
+
+	// now construct a SeqVect containing input sequences
+	SeqVect sv;
+	const char* seqname = "seq00000";
+	for( size_t seqI = 0; seqI < seq_table.size(); seqI++ )
+	{
+		Seq curseq;
+		curseq.SetId(seqI);
+		curseq.SetName(seqname);
+		curseq.resize(seq_table[seqI].size());
+		std::copy(seq_table[seqI].begin(), seq_table[seqI].end(), curseq.begin());
+		sv.AppendSeq(curseq);
+	}
+
+	MSA msaTmp;
+	MUSCLE(sv,msaTmp);
+
+	// now extract the alignment
+	aln_matrix.clear();
+	aln_matrix.resize(msaTmp.GetSeqCount());
+	for( size_t seqI = 0; seqI < msaTmp.GetSeqCount(); seqI++ )
+	{
+		unsigned indie = msaTmp.GetSeqIndex(seqI);
+		const char* buf = msaTmp.GetSeqBuffer(indie);
+		string curseq(buf, msaTmp.GetColCount());
+		swap(aln_matrix[seqI],curseq);
+	}
+	return true;	// how can it possibly fail? :)
+}
+
+bool MuscleInterface::Refine( GappedAlignment& ga, size_t windowsize )
+{
+	const vector< string >& seq_table = GetAlignment( ga, vector< gnSequence* >() );
+	vector< string > aln_table;
+	for( uint seqI = 0; seqI < ga.SeqCount(); seqI++ )
+	{
+		if( ga.LeftEnd(seqI) != NO_MATCH )
+		{
+			aln_table.push_back( seq_table[seqI] );
+		}
+	}
+	vector< string > aln_matrix;
+	if( windowsize == 0 )
+		SetMuscleArguments( " -quiet -refine -seqtype DNA " );
+	else
+	{
+		stringstream sstr;
+		sstr << " -quiet -seqtype DNA -refinew -refinewindow " << windowsize << " ";
+		SetMuscleArguments( sstr.str() );
+	}
+	bool success = CallMuscle( aln_matrix, aln_table );
+	if( success )
+	{
+		aln_table.clear();
+		uint alnI = 0;
+		for( uint seqI = 0; seqI < ga.SeqCount(); seqI++ )
+		{
+			if( ga.LeftEnd(seqI) != NO_MATCH )
+				aln_table.push_back( aln_matrix[alnI++] );
+			else
+				aln_table.push_back( string( aln_matrix[0].size(), '-' ) );
+		}
+		ga.SetAlignment( aln_table );
+	}
+	return success;
+}
+
+void msaFromSeqTable(MSA& msa, const vector< string >& seq_table, unsigned id_base = 0)
+{
+	msa.SetSize(seq_table.size(), seq_table[0].size());
+	for( uint seqI = 0; seqI < seq_table.size(); seqI++ )
+	{
+		stringstream ss;
+		ss << "seq" << seqI;
+		msa.SetSeqName(seqI, ss.str().c_str());
+		msa.SetSeqId(seqI,seqI+id_base);
+		for(size_t i = 0; i < seq_table[seqI].size(); i++)
+			msa.SetChar(seqI, i, seq_table[seqI][i]);
+	}
+}
+
+
+bool MuscleInterface::RefineFast( GappedAlignment& ga, size_t windowsize )
+{
+	const vector< string >& seq_table = GetAlignment( ga, vector< gnSequence* >() );
+	vector< string > aln_table;
+	for( uint seqI = 0; seqI < ga.SeqCount(); seqI++ )
+	{
+		if( ga.LeftEnd(seqI) != NO_MATCH )
+		{
+			aln_table.push_back( seq_table[seqI] );
+		}
+	}
+
+	g_SeqType.get() = SEQTYPE_DNA;	// we're operating on DNA
+	g_uMaxIters.get() = 1;			// and we don't want to refine the alignment...yet
+	g_bStable.get() = true;			// we want output seqs in the same order as input
+	g_bQuiet.get() = true;			// and don't print anything to the console
+	g_SeqWeight1.get() = SEQWEIGHT_ClustalW;	// not sure what weighting scheme works best for DNA
+
+	g_uRefineWindow.get() = windowsize;
+	g_uWindowTo.get() = 0;
+
+	SetMaxIters(g_uMaxIters.get());
+	SetSeqWeightMethod(g_SeqWeight1.get());
+
+	MSA::SetIdCount(seq_table.size());
+
+	// create an MSA
+	MSA msa;
+	msaFromSeqTable(msa, seq_table);
+
+	SetAlpha(ALPHA_DNA);
+	msa.FixAlpha();
+	SetPPScore(PPSCORE_SPN);
+	SetMuscleInputMSA(msa);
+
+	Tree GuideTree;
+	TreeFromMSA(msa, GuideTree, g_Cluster2.get(), g_Distance2.get(), g_Root2.get());
+	SetMuscleTree(GuideTree);
+
+	MSA msaOut;
+	MSA* finalMsa;
+
+	if(windowsize == 0)
+	{
+		if (g_bAnchors.get())
+			RefineVert(msa, GuideTree, g_uMaxIters.get());
+		else
+			RefineHoriz(msa, GuideTree, g_uMaxIters.get(), false, false);
+		finalMsa = &msa;
+	}else{
+		RefineW(msa, msaOut);
+		finalMsa = &msaOut;
+	}
+
+
+	ValidateMuscleIds(*finalMsa);
+	ValidateMuscleIds(GuideTree);
+
+	// now extract the alignment
+	vector< string > aln_matrix;
+	aln_matrix.resize(finalMsa->GetSeqCount());
+	for( size_t seqI = 0; seqI < finalMsa->GetSeqCount(); seqI++ )
+	{
+		unsigned indie = finalMsa->GetSeqIndex(seqI);
+		const char* buf = finalMsa->GetSeqBuffer(indie);
+		string curseq(buf, finalMsa->GetColCount());
+		swap(aln_matrix[seqI],curseq);
+	}
+
+	ga.SetAlignment( aln_matrix );
+	return true;
+}
+
+
+void stripGapColumns( std::vector< std::string >& aln )
+{
+	size_t cur_col = 0;
+	size_t gap_seq = 0;
+	for( size_t colI = 0; colI < aln[0].size(); colI++ )
+	{
+		gap_seq = 0;
+		for( ; gap_seq < aln.size(); gap_seq++ )
+			if( aln[gap_seq][colI] != '-' )
+				break;
+		if( gap_seq != aln.size() )
+		{
+			for( gap_seq = 0; gap_seq < aln.size(); gap_seq++ )
+				aln[gap_seq][cur_col] = aln[gap_seq][colI];
+			cur_col++;
+		}
+	}
+	for( gap_seq = 0; gap_seq < aln.size(); gap_seq++ )
+		aln[gap_seq].resize(cur_col);
+}
+
+void stripGaps( std::string& str )
+{
+	std::string::iterator striter = std::remove(str.begin(), str.end(), '-');
+	str.resize(striter - str.begin());
+}
+
+bool MuscleInterface::ProfileAlign( const GappedAlignment& ga1, const GappedAlignment& ga2, GappedAlignment& aln, bool anchored )
+{
+	try{
+		const vector< string >& aln1 = GetAlignment( ga1, vector< gnSequence* >() );
+		const vector< string >& aln2 = GetAlignment( ga2, vector< gnSequence* >() );
+		vector< uint > order;
+		ostringstream input_seq_stream;
+		gnSequence seq;
+		vector< string > aln11( ga1.Multiplicity() );
+		vector< string > aln22( ga2.Multiplicity() );
+		size_t curI = 0;
+		for( uint seqI = 0; seqI < aln1.size(); seqI++ )
+		{
+			if( ga1.LeftEnd(seqI) != NO_MATCH )
+			{
+				aln11[curI++] = aln1[seqI];
+				order.push_back(seqI);
+			}
+		}
+		curI = 0;
+		for( uint seqI = 0; seqI < aln2.size(); seqI++ )
+		{
+			if( ga2.LeftEnd(seqI) != NO_MATCH )
+			{
+				aln22[curI++] = aln2[seqI];
+				order.push_back(seqI);
+			}
+		}
+// strip the gap columns only if we're doing unanchored PP alignment
+		if( !anchored )
+		{
+			stripGapColumns(aln11);
+			stripGapColumns(aln22);
+		}
+		for( uint seqI = 0; seqI < aln11.size(); seqI++ )
+		{
+			seq += aln11[ seqI ];
+			seq.setContigName( seq.contigListLength()-1, "seq" );
+		}
+
+		gnFASSource::Write( seq, input_seq_stream, false, true );
+		input_seq_stream << "=\n";
+
+		gnSequence seq2;
+		for( uint seqI = 0; seqI < aln22.size(); seqI++ )
+		{
+			seq2 += aln22[ seqI ];
+			seq2.setContigName( seq2.contigListLength()-1, "seq" );
+		}
+
+		gnFASSource::Write( seq2, input_seq_stream, false, true );
+		input_seq_stream << "=\n";
+
+		if( debug_muscle )
+		{
+			// for debugging: write the anchored profiles to a file
+			stringstream debug_fname;
+			debug_fname << "muscle_debug_" << failure_count++ << ".txt";
+			ofstream debug_file( debug_fname.str().c_str() );
+			debug_file << input_seq_stream.str();
+			debug_file.close();
+		}
+
+		// now open a pipe to Muscle
+		string musc_args = "-quiet -seqtype DNA -profile -ProfileOnStdIn ";
+		if( anchored )
+			musc_args += "-AnchoredPP ";
+		SetMuscleArguments( musc_args );
+		string output;
+		string error;
+		string muscle_cmd = muscle_path + " " + muscle_arguments;
+		if( debug_muscle )
+		{
+			cerr << "Running " << muscle_cmd << endl;
+		}
+		boolean success = pipeExec( muscle_cmdline, muscle_cmd, input_seq_stream.str(), output, error );
+		if( !success || output.size() == 0 )
+		{
+			if( output.size() == 0 )
+				cerr << "\nmuscle nothing\n";
+			else
+				cerr << "\nunsuccessful muscle\n";
+			return false;
+		}
+
+		istringstream output_aln_stream( output );
+		string cur_line;
+
+		// parse the fasta output
+		vector< string > aln_matrix( ga1.SeqCount() );
+		int ordI = -1;
+		while( getline( output_aln_stream, cur_line ) ){
+			if( cur_line[0] == '>' ){
+				ordI++;
+				continue;
+			}
+			gnSeqI len = cur_line.size();
+			len = cur_line[ len - 1 ] == '\r' ? len - 1 : len;
+			uint seqI = aln_matrix.size() - 1;
+			aln_matrix[ order[ordI] ] += cur_line.substr( 0, len );
+		}
+		for( size_t i = 0; i < aln_matrix.size(); i++ )
+		{
+			if( aln_matrix[i].size() == 0 )
+				aln_matrix[i].resize( aln_matrix[order[0]].size(), '-' );
+		}
+
+		aln.SetAlignment( aln_matrix );
+		for( uint seqI = 0; seqI < ga1.SeqCount(); seqI++ )
+			if( ga1.LeftEnd(seqI) != NO_MATCH )
+			{
+				aln.SetLeftEnd(seqI, ga1.LeftEnd(seqI));
+				aln.SetLength(ga1.Length(seqI), seqI);
+			}
+		for( uint seqI = 0; seqI < ga2.SeqCount(); seqI++ )
+			if( ga2.LeftEnd(seqI) != NO_MATCH )
+			{
+				aln.SetLeftEnd(seqI, ga2.LeftEnd(seqI));
+				aln.SetLength(ga2.Length(seqI), seqI);
+			}
+		return true;
+	}catch( gnException& gne ){
+	}catch( exception& e ){
+	}catch(...){
+	}
+	return false;
+}
+
+
+bool MuscleInterface::ProfileAlignFast( const GappedAlignment& ga1, const GappedAlignment& ga2, GappedAlignment& aln, bool anchored )
+{
+	try{
+		const vector< string >& aln1 = GetAlignment( ga1, vector< gnSequence* >() );
+		const vector< string >& aln2 = GetAlignment( ga2, vector< gnSequence* >() );
+		vector< uint > order;
+		vector< string > aln11( ga1.Multiplicity() );
+		vector< string > aln22( ga2.Multiplicity() );
+		size_t curI = 0;
+		for( uint seqI = 0; seqI < aln1.size(); seqI++ )
+		{
+			if( ga1.LeftEnd(seqI) != NO_MATCH )
+			{
+				aln11[curI++] = aln1[seqI];
+				order.push_back(seqI);
+			}
+		}
+		curI = 0;
+		for( uint seqI = 0; seqI < aln2.size(); seqI++ )
+		{
+			if( ga2.LeftEnd(seqI) != NO_MATCH )
+			{
+				aln22[curI++] = aln2[seqI];
+				order.push_back(seqI);
+			}
+		}
+// strip the gap columns only if we're doing unanchored PP alignment
+		if( !anchored )
+		{
+			stripGapColumns(aln11);
+			stripGapColumns(aln22);
+		}
+
+		g_SeqType.get() = SEQTYPE_DNA;	// we're operating on DNA
+		g_uMaxIters.get() = 1;			// and we don't want to refine the alignment...yet
+		g_bStable.get() = true;			// we want output seqs in the same order as input
+		g_bQuiet.get() = true;			// and don't print anything to the console
+		g_SeqWeight1.get() = SEQWEIGHT_ClustalW;	// not sure what weighting scheme works best for DNA
+
+		SetMaxIters(g_uMaxIters.get());
+		SetSeqWeightMethod(g_SeqWeight1.get());
+
+		MSA::SetIdCount(order.size());
+
+		MSA msa1;
+		MSA msa2;
+		MSA msaOut;
+		msaFromSeqTable(msa1, aln11);
+		msaFromSeqTable(msa2, aln22, msa1.GetSeqCount());
+
+		SetAlpha(ALPHA_DNA);
+		msa1.FixAlpha();
+		msa2.FixAlpha();
+		SetPPScore(PPSCORE_SPN);
+
+		if(anchored)
+		{
+			AnchoredProfileProfile(msa1, msa2, msaOut);
+		}else{
+			ProfileProfile(msa1, msa2, msaOut);
+		}
+
+		// get the output
+		vector< string > aln_matrix( aln1.size() );
+		for( size_t seqI = 0; seqI < msaOut.GetSeqCount(); seqI++ )
+		{
+			unsigned indie = msaOut.GetSeqIndex(seqI);
+			const char* buf = msaOut.GetSeqBuffer(indie);
+			string curseq(buf, msaOut.GetColCount());
+			swap(aln_matrix[order[indie]],curseq);
+
+			// debugging, check that sequences came out in the same order they went in!
+/*			string inseq = aln1[order[indie]];
+			string outseq = aln_matrix[order[indie]];
+			stripGaps(inseq);
+			stripGaps(outseq);
+			if(inseq != outseq)
+			{
+				unsigned indie = msaOut.GetSeqIndex(seqI);
+				cerr << "bad indie " << indie << endl;
+				genome::breakHere();
+			}
+*/
+		}
+		// fill empty seqs with gaps
+		for( size_t seqI = 0; seqI < aln_matrix.size(); seqI++ )
+			if(aln_matrix[seqI].size() == 0)
+				aln_matrix[seqI].resize(msaOut.GetColCount(), '-');
+
+		aln.SetAlignment( aln_matrix );
+		for( uint seqI = 0; seqI < ga1.SeqCount(); seqI++ )
+			if( ga1.LeftEnd(seqI) != NO_MATCH )
+			{
+				aln.SetLeftEnd(seqI, ga1.LeftEnd(seqI));
+				aln.SetLength(ga1.Length(seqI), seqI);
+			}
+		for( uint seqI = 0; seqI < ga2.SeqCount(); seqI++ )
+			if( ga2.LeftEnd(seqI) != NO_MATCH )
+			{
+				aln.SetLeftEnd(seqI, ga2.LeftEnd(seqI));
+				aln.SetLength(ga2.Length(seqI), seqI);
+			}
+		return true;
+
+	}catch( gnException& gne ){
+	}catch( exception& e ){
+	}catch(...){
+	}
+	return false;
+}
+
+
+void MuscleInterface::CreateTree( const NumericMatrix<double>& distances, const std::string& tree_filename  )
+{
+	g_bQuiet.get() = true;			// don't print anything to the console!
+	DistFunc df;
+	df.SetCount( distances.rows() );
+	for( size_t i = 0; i < distances.rows(); i++ )
+		for( size_t j = 0; j < distances.rows(); j++ )
+			df.SetDist( i, j, distances(i,j) );
+
+	for( size_t i = 0; i < distances.rows(); i++ )
+	{
+		stringstream ss;
+		ss << "seq";
+		ss << i + 1;
+		df.SetName( i, ss.str().c_str() );
+		df.SetId( i, i );
+	}
+	ClustSetDF csdf( df );
+	Clust crusty;
+	crusty.Create( csdf, CLUSTER_NeighborJoining );
+	Tree tt;
+	tt.FromClust( crusty );
+	TextFile tf( tree_filename.c_str(), true );
+	tt.ToFile( tf );
+}
+
+
+}
diff --git a/libMems/MuscleInterface.h b/libMems/MuscleInterface.h
new file mode 100644
index 0000000..9e9d502
--- /dev/null
+++ b/libMems/MuscleInterface.h
@@ -0,0 +1,148 @@
+/*******************************************************************************
+ * $Id: MuscleInterface.h,v 1.12 2004/04/19 23:10:50 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * This file is licensed under the GPL.
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifndef _MuscleInterface_h_
+#define _MuscleInterface_h_
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/NumericMatrix.h"
+#include "libGenome/gnFilter.h"
+#include "libGenome/gnSequence.h"
+#include "libMems/GappedAlignment.h"
+#include "libMems/GappedAligner.h"
+
+// attempt to auto-link the MUSCLE library on windows
+#if defined(WIN64)&&defined(NDEBUG)&&!defined(FASTDEBUG)&&defined(_OPENMP)
+#pragma comment(lib, "MUSCLE64omp.lib")
+#endif
+#if defined(WIN64)&&defined(FASTDEBUG)&&defined(_OPENMP)
+#pragma comment(lib, "MUSCLE64fdomp.lib")
+#endif
+#if defined(WIN32)&&!defined(WIN64)&&defined(NDEBUG)&&!defined(FASTDEBUG)&&defined(_OPENMP)
+#pragma comment(lib, "MUSCLEomp.lib")
+#endif
+#if defined(WIN32)&&!defined(WIN64)&&defined(FASTDEBUG)&&defined(_OPENMP)
+#pragma comment(lib, "MUSCLEfdomp.lib")
+#endif
+#if defined(WIN64)&&defined(NDEBUG)&&!defined(FASTDEBUG)&&!defined(_OPENMP)
+#pragma comment(lib, "MUSCLE64.lib")
+#endif
+#if defined(WIN64)&&defined(FASTDEBUG)&&!defined(_OPENMP)
+#pragma comment(lib, "MUSCLE64fd.lib")
+#endif
+#if defined(WIN32)&&!defined(WIN64)&&defined(NDEBUG)&&!defined(FASTDEBUG)&&!defined(_OPENMP)
+#pragma comment(lib, "MUSCLE.lib")
+#endif
+#if defined(WIN32)&&!defined(WIN64)&&defined(FASTDEBUG)&&!defined(_OPENMP)
+#pragma comment(lib, "MUSCLEfd.lib")
+#endif
+
+namespace mems {
+
+extern bool debug_muscle;
+
+//template< typename MatchType=AbstractMatch >
+class MuscleInterface : public GappedAligner {
+public:
+	~MuscleInterface()
+	{
+		ClearCommandLine();
+	}
+	/**
+	 * Returns a reference to a usable MuscleInterface
+	 */
+	static MuscleInterface& getMuscleInterface();
+
+	/**
+	 * Parse the execution path from argv[0] and set the muscle
+	 * path accordingly
+	 */
+	void ParseMusclePath( const char* argv0 );
+
+	/** 
+	 * Set the path to the muscle executable
+	 * Defaults to "muscle"
+	 */
+	void SetMusclePath( const std::string& path );
+
+	/** 
+	 * Set the arguments to use when executing muscle 
+	 */
+	void SetExtraMuscleArguments( const std::string& extra_args );
+	/** 
+	 * Get the arguments to use when executing muscle 
+	 */
+	std::string GetExtraMuscleArguments(){ return this->extra_muscle_arguments; };
+
+	/**
+	 * Attempts to perform a multiple alignment using Muscle between
+	 * <code>r_begin</code> and <code>r_end</code>
+	 */
+	
+	//tjt: not the best way of doing this, should have just one Align function that takes an AbstractMatch*,
+	//     not both Match* & AbstractMatch* in separate, nearly identical functions..
+	//     Such a change would involve changes to GappedAligner, and would require some additional care taken
+	//     with SeqCount & Multiplicity, as well as seq_table[ seqI ]->length()/seq_table[ 0 ]->length(i),
+	//     for now, leave like this. hopefully sooner than later, make pretty!
+	boolean Align( GappedAlignment& cr, Match* r_begin, Match* r_end, std::vector< genome::gnSequence* >& seq_table);
+    
+	boolean Align( GappedAlignment& cr, AbstractMatch* r_begin, AbstractMatch* r_end, std::vector< genome::gnSequence* >& seq_table);
+
+	bool Refine( GappedAlignment& ga, size_t windowsize = 0 );
+
+	/**
+	 * Given two gapped alignments in ga1 and ga2, align them and store the result in aln.  ga1 and
+	 * ga2 must have equal sequence count and contain disjoint sets of sequences, e.g. for any given
+	 * seqI, if ga1.LeftEnd(seqI) != NO_MATCH, then ga2.LeftEnd(seqI) == NO_MATCH 
+	 */
+	bool ProfileAlign( const GappedAlignment& ga1, const GappedAlignment& ga2, GappedAlignment& aln, bool anchored = true );
+	boolean CallMuscle( std::vector< std::string >& aln_matrix, const std::vector< std::string >& seq_table );
+	boolean CallMuscleFast( std::vector< std::string >& aln_matrix, const std::vector< std::string >& seq_table, int gap_open = 0, int gap_extend = 0);
+	bool RefineFast( GappedAlignment& ga, size_t windowsize = 0 );
+	bool ProfileAlignFast( const GappedAlignment& ga1, const GappedAlignment& ga2, GappedAlignment& aln, bool anchored = true );
+
+	void CreateTree( const NumericMatrix<double>& distances, const std::string& tree_filename  );
+
+protected:
+	std::string muscle_path;
+	std::string muscle_arguments;
+	std::string extra_muscle_arguments;
+	char** muscle_cmdline;
+
+	void SetMuscleArguments( const std::string& extra_args );
+	
+	void ClearCommandLine()
+	{
+		if( muscle_cmdline != NULL )
+		{
+			size_t cmdI = 0;
+			while(muscle_cmdline[cmdI] != NULL)
+			{
+				delete[] muscle_cmdline[cmdI];
+				cmdI++;
+			}
+			delete[] muscle_cmdline;
+		}
+	}
+
+private:
+	MuscleInterface( const MuscleInterface& ci ){ *this = ci; }
+	MuscleInterface& operator=( const MuscleInterface& ci );
+	MuscleInterface();
+};
+
+
+void stripGapColumns( std::vector< std::string >& aln );
+
+
+}
+
+#endif // _MuscleInterface_h_
diff --git a/libMems/NumericMatrix.h b/libMems/NumericMatrix.h
new file mode 100644
index 0000000..6917dab
--- /dev/null
+++ b/libMems/NumericMatrix.h
@@ -0,0 +1,164 @@
+/*******************************************************************************
+ * $Id: NumericMatrix.h,v 1.4 2004/03/01 02:40:08 darling Exp $
+ * This file is copyright 2002-2004 Aaron Darling.  All rights reserved.
+ * This file is licensed under the GPL.
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifndef _NumericMatrix_h_
+#define _NumericMatrix_h_
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/Matrix.h"
+
+template<class T>  // See section on templates for more
+class NumericMatrix : public Matrix<T>
+{
+public:
+	NumericMatrix(){};
+	NumericMatrix(unsigned nrows, unsigned ncols);
+   
+      // Based on the Law Of The Big Three:
+	~NumericMatrix();
+	NumericMatrix(const NumericMatrix<T>& m);
+	NumericMatrix<T>& operator= (const NumericMatrix<T>& m);
+	
+	// define some arithmetic operators 
+	NumericMatrix<T>& operator+= (const NumericMatrix<T>& m);
+	NumericMatrix<T>& operator-= (const NumericMatrix<T>& m);
+	// not implemented
+	NumericMatrix<T>& operator*= (const NumericMatrix<T>& m);
+	NumericMatrix<T>& operator*= (const T& m);
+	NumericMatrix<T>& operator/= (const NumericMatrix<T>& m);
+	NumericMatrix<T>& operator/= (const T& m);
+
+	// the following 5 are not implemented
+	NumericMatrix<T>& operator+ (const NumericMatrix<T>& m ) const;
+	const NumericMatrix<T>& operator- (const NumericMatrix<T>& m ) const;
+	const NumericMatrix<T>& operator* (const NumericMatrix<T>& m ) const;
+	const NumericMatrix<T>& operator* (const T& n) const;
+	const NumericMatrix<T>& operator/ (const T& n) const;
+
+};
+   
+template<class T>
+inline NumericMatrix<T>::NumericMatrix(unsigned nrows, unsigned ncols)
+	: Matrix<T>( nrows, ncols )
+{
+}
+   
+template<class T>
+inline NumericMatrix<T>::NumericMatrix(const NumericMatrix<T>& m){
+	*this = m;
+}
+
+template<class T>
+inline NumericMatrix<T>& NumericMatrix<T>::operator= (const NumericMatrix<T>& m)
+{
+	Matrix<T>::operator=( m );
+	return *this;
+}
+
+template<class T>
+inline NumericMatrix<T>::~NumericMatrix()
+{
+}
+
+template<class T>
+inline
+NumericMatrix<T>& NumericMatrix<T>::operator+= (const NumericMatrix<T>& m){
+	// make sure matrix dimensions agree
+	if (this->nrows_ != m.nrows_ || this->ncols_ != m.ncols_)
+		throw typename Matrix<T>::BadSize();
+
+	// do the arithmetic on each matrix entry
+	for(unsigned i = 0; i < Matrix<T>::nrows_ * Matrix<T>::ncols_; i++ )
+		this->data_[ i ] += m.data_[ i ];
+	return *this;
+}
+
+template<class T>
+inline
+NumericMatrix<T>& NumericMatrix<T>::operator-= (const NumericMatrix<T>& m){
+	// make sure matrix dimensions agree
+	if (this->nrows_ != m.nrows_ || this->ncols_ != m.ncols_)
+		throw typename Matrix<T>::BadSize();
+
+	// do the arithmetic on each matrix entry
+	for(unsigned i = 0; i < Matrix<T>::nrows_ * Matrix<T>::ncols_; i++ )
+		this->data_[ i ] -= m.data_[ i ];
+	return *this;
+}
+
+template<class T>
+inline
+NumericMatrix<T>& NumericMatrix<T>::operator*= (const NumericMatrix<T>& m){
+	// make sure matrix dimensions agree
+	if (this->ncols_ != m.nrows_)
+		throw typename Matrix<T>::BadSize();
+	// do a matrix multiply
+	return *this;
+}
+
+template<class T>
+inline
+NumericMatrix<T>& NumericMatrix<T>::operator*= (const T& m){
+	// do the arithmetic on each matrix entry
+	for(unsigned i = 0; i < Matrix<T>::nrows_ * Matrix<T>::ncols_; i++ )
+		this->data_[ i ] *= m;
+	return *this;
+}
+
+template<class T>
+inline
+NumericMatrix<T>& NumericMatrix<T>::operator/= (const T& m){
+	// do the arithmetic on each matrix entry
+	for(unsigned i = 0; i < Matrix<T>::nrows_ * Matrix<T>::ncols_; i++ )
+		this->data_[ i ] /= m;
+	return *this;
+}
+
+template<class T>
+inline
+NumericMatrix<T>& NumericMatrix<T>::operator/= ( const NumericMatrix<T>& m ){
+	// make sure matrix dimensions agree
+	if (this->nrows_ != m.nrows_ || this->ncols_ != m.ncols_)
+		throw typename Matrix<T>::BadSize();
+	// do the arithmetic on each matrix entry
+	for(unsigned i = 0; i < Matrix<T>::nrows_ * Matrix<T>::ncols_; i++ )
+		this->data_[ i ] /= m.data_[ i ];
+	return *this;
+}
+
+template<class T>
+inline
+NumericMatrix<T>& NumericMatrix<T>::operator+ (const NumericMatrix<T>& m) const {
+
+}
+template<class T>
+inline
+const NumericMatrix<T>& NumericMatrix<T>::operator- (const NumericMatrix<T>& m) const {
+
+}
+template<class T>
+inline
+const NumericMatrix<T>& NumericMatrix<T>::operator* (const NumericMatrix<T>& m) const {
+
+}
+template<class T>
+inline
+const NumericMatrix<T>& NumericMatrix<T>::operator* (const T& n) const {
+
+}
+template<class T>
+inline
+const NumericMatrix<T>& NumericMatrix<T>::operator/ (const T& n) const {
+
+}
+
+
+#endif // _NumericMatrix_h_
diff --git a/libMems/PairwiseMatchAdapter.h b/libMems/PairwiseMatchAdapter.h
new file mode 100644
index 0000000..fd83646
--- /dev/null
+++ b/libMems/PairwiseMatchAdapter.h
@@ -0,0 +1,117 @@
+#ifndef __PairwiseMatchAdapter_h__
+#define __PairwiseMatchAdapter_h__
+
+#include "libMems/AbstractMatch.h"
+#include "libMems/ProgressiveAligner.h"
+#include <vector>
+
+namespace mems {
+
+/**
+ * PairwiseMatchAdapter is a wrapper around an AbstractMatch that effectively projects a multi-match to a
+ * pairwise match.  The adapter class forwards most function calls to the original match
+ * class, to which it stores a pointer.  Use of non-const functions results in undefined state.
+ */
+class PairwiseMatchAdapter : public mems::AbstractMatch
+{
+public:
+	PairwiseMatchAdapter() : m(NULL) {}
+	PairwiseMatchAdapter( AbstractMatch* match, uint seq1, uint seq2 ) :
+	  m(match)
+	{
+		seq[0] = seq1;
+		seq[1] = seq2;
+		inverted = false;
+	}
+
+	PairwiseMatchAdapter* Clone() const { return new PairwiseMatchAdapter( *this ); }
+
+	PairwiseMatchAdapter* Copy() const
+	{
+		return m_allocateAndCopy( *this );
+	}
+
+	void Free()
+	{
+		m_free(this);
+	}
+
+	//
+	// forward all function calls to match
+	//
+	gnSeqI Length( uint seqI ) const { return m->Length(seq[seqI]); }
+	void SetLength( gnSeqI len, uint seqI ) { m->SetLength(len, seq[seqI]); }
+	int64 Start(uint startI) const { 
+		if(inverted)
+			return -m->Start(seq[startI]);
+		return m->Start(seq[startI]); 
+	}
+	void SetStart(uint seqI, int64 start) { m->SetStart(seq[seqI],start); }
+	gnSeqI LeftEnd(uint seqI) const { return m->LeftEnd(seq[seqI]); }
+	orientation Orientation(uint seqI) const { 
+		orientation o = m->Orientation(seq[seqI]);
+		if(inverted && o != AbstractMatch::undefined )
+			o = o == AbstractMatch::forward ? AbstractMatch::reverse : AbstractMatch::forward; 
+		return o; 
+	}
+	void SetLeftEnd(uint seqI, gnSeqI start) { m->SetLeftEnd(seq[seqI],start); }
+	void SetOrientation(uint seqI, orientation o) { m->SetOrientation(seq[seqI],o); }
+	void MoveStart(int64 move_amount) { m->MoveStart(move_amount); }
+	void MoveEnd(int64 move_amount) { m->MoveEnd(move_amount); }
+	uint Multiplicity() const { return 2; }
+	uint SeqCount() const { return 2; }
+	uint FirstStart() const { return 0; }	
+	gnSeqI AlignmentLength() const { return m->AlignmentLength(); }
+	void Invert() { inverted = !inverted; }
+	void CropStart(gnSeqI crop_amount) { m->CropStart(crop_amount); }
+	void CropEnd(gnSeqI crop_amount) { m->CropEnd(crop_amount); }
+	void CropLeft(gnSeqI crop_amount, uint seqI) { m->CropLeft(crop_amount, seq[seqI]); }
+	void CropRight(gnSeqI crop_amount, uint seqI) { m->CropRight(crop_amount, seq[seqI]); }
+	void GetAlignment( std::vector< mems::bitset_t >& align_matrix ) const 
+	{
+		if( inverted )
+			m->Invert();
+		std::vector< mems::bitset_t > aln_mat;
+		m->GetAlignment(aln_mat);
+		align_matrix.clear();
+		align_matrix.push_back(aln_mat[seq[0]]);
+		align_matrix.push_back(aln_mat[seq[1]]);
+		if( inverted )
+			m->Invert();
+	}
+	void GetColumn( gnSeqI col, std::vector<gnSeqI>& pos, std::vector<bool>& column ) const 
+	{
+		if( inverted )
+			m->Invert();
+		std::vector<gnSeqI> m_pos; 
+		std::vector<bool> m_column;
+		m->GetColumn(col,m_pos,m_column);
+		pos.clear();
+		pos.push_back(m_pos[seq[0]]);
+		pos.push_back(m_pos[seq[1]]);
+		column.push_back(m_column[seq[0]]);
+		column.push_back(m_column[seq[1]]);
+		if( inverted )
+			m->Invert();
+	}
+
+	bool IsGap( uint seqI, gnSeqI col ) const { return m->IsGap( seq[seqI],col ); }
+	uint UsedSeq( uint seqI ) const 
+	{
+		if(m->Start(seq[0]) != NO_MATCH)
+			return 0;
+		if(m->Start(seq[1]) != NO_MATCH)
+			return 1;
+		return (std::numeric_limits<uint>::max)();
+	};
+
+	AbstractMatch* m;
+	TrackingMatch* tm;
+	uint seq[2];
+	bool inverted;
+};
+
+}
+
+#endif	// __PairwiseMatchAdapter_h__  
+
diff --git a/libMems/PairwiseMatchFinder.cpp b/libMems/PairwiseMatchFinder.cpp
new file mode 100644
index 0000000..1e16f84
--- /dev/null
+++ b/libMems/PairwiseMatchFinder.cpp
@@ -0,0 +1,73 @@
+/*******************************************************************************
+ * $Id: PairwiseMatchFinder.cpp,v 1.13 2004/03/01 02:40:08 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * Please see the file called COPYING for licensing, copying, and modification
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/PairwiseMatchFinder.h"
+#include <list>
+
+using namespace std;
+using namespace genome;
+
+namespace mems {
+
+PairwiseMatchFinder::PairwiseMatchFinder(){
+}
+
+PairwiseMatchFinder::~PairwiseMatchFinder(){
+}
+
+PairwiseMatchFinder::PairwiseMatchFinder(const PairwiseMatchFinder& mh) : MemHash(mh){
+
+}
+
+PairwiseMatchFinder* PairwiseMatchFinder::Clone() const{
+	return new PairwiseMatchFinder(*this);
+}
+
+
+// enumerate out every pairwise match
+boolean PairwiseMatchFinder::EnumerateMatches( IdmerList& match_list ){
+
+	match_list.sort(&idmer_id_lessthan);
+	IdmerList::iterator iter = match_list.begin();
+	IdmerList::iterator iter2 = match_list.begin();
+	uint cur_id_count = 1;
+	IdmerList unique_list;
+	// identify all of the unique seeds and add them to unique_list
+	while(iter2 != match_list.end()){
+		++iter2;
+		if(iter2 == match_list.end() || iter->id != iter2->id){
+			if( cur_id_count == 1 )
+				unique_list.push_back( *iter );
+			else
+				cur_id_count = 1;
+		}else
+			cur_id_count++;
+		++iter;
+	}
+	// hash each pair of unique seeds
+	boolean success = true;
+	for( iter = unique_list.begin(); iter != unique_list.end(); ++iter )
+	{
+		for( iter2 = iter; iter2 != unique_list.end(); ++iter2 )
+		{
+			if( iter == iter2 )
+				continue;
+			IdmerList hash_list;
+			hash_list.push_back( *iter );
+			hash_list.push_back( *iter2 );
+			success = success && HashMatch(hash_list);
+		}
+	}
+	return success;
+}
+
+}  // namespace mems
diff --git a/libMems/PairwiseMatchFinder.h b/libMems/PairwiseMatchFinder.h
new file mode 100644
index 0000000..beac209
--- /dev/null
+++ b/libMems/PairwiseMatchFinder.h
@@ -0,0 +1,38 @@
+/*******************************************************************************
+ * $Id: PairwiseMatchFinder.h,v 1.8 2004/03/01 02:40:08 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * This file is licensed under the GPL.
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifndef _PairwiseMatchFinder_h_
+#define _PairwiseMatchFinder_h_
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/MemHash.h"
+
+namespace mems {
+
+/**
+ * Finds all pairwise matches with unique seeds among a group of sequences
+ */
+class PairwiseMatchFinder : public mems::MemHash
+{
+public:
+	PairwiseMatchFinder();
+	~PairwiseMatchFinder();
+
+	PairwiseMatchFinder(const PairwiseMatchFinder& mh);
+	virtual PairwiseMatchFinder* Clone() const;
+protected:
+
+	virtual boolean EnumerateMatches( mems::IdmerList& match_list );
+};
+
+}
+
+#endif //_PairwiseMatchFinder_h_
diff --git a/libMems/ParallelMemHash.cpp b/libMems/ParallelMemHash.cpp
new file mode 100644
index 0000000..38b67d5
--- /dev/null
+++ b/libMems/ParallelMemHash.cpp
@@ -0,0 +1,133 @@
+/*******************************************************************************
+ * $Id: ParallelMemHash.cpp,v 1.32 2004/03/01 02:40:08 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * Please see the file called COPYING for licensing, copying, and modification
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/ParallelMemHash.h"
+#include <vector>
+
+#ifdef _OPENMP
+
+
+using namespace std;
+using namespace genome;
+namespace mems {
+
+	ParallelMemHash::ParallelMemHash() : MemHash()
+
+{
+}
+
+ParallelMemHash::ParallelMemHash(const ParallelMemHash& mh) : MemHash(mh)
+{
+	*this = mh;
+}
+
+ParallelMemHash& ParallelMemHash::operator=( const ParallelMemHash& mh ){
+	thread_mem_table = mh.thread_mem_table;
+	return *this;
+}
+
+ParallelMemHash* ParallelMemHash::Clone() const{
+	return new ParallelMemHash(*this);
+}
+
+void ParallelMemHash::FindMatches( MatchList& ml ) 
+{
+	for( uint32 seqI = 0; seqI < ml.seq_table.size(); ++seqI ){
+                if( !AddSequence( ml.sml_table[ seqI ], ml.seq_table[ seqI ] ) ){
+                        ErrorMsg( "Error adding " + ml.seq_filename[seqI] + "\n");
+                        return;
+                }
+        }
+
+	size_t CHUNK_SIZE = 200000;
+	// break up the SMLs into nice small chunks
+	vector< vector< gnSeqI > > chunk_starts;
+	vector< gnSeqI > chunk_lengths;
+
+	// set the progress counter data
+	mers_processed = 0;
+	total_mers = 0;
+	m_progress = -1;
+	for( size_t i = 0; i < ml.sml_table.size(); i++ )
+		total_mers += ml.sml_table[i]->Length();
+
+	// break up on the longest SML
+	int max_length_sml = -1;
+	size_t maxlen = 0;
+	for( size_t i = 0; i < ml.sml_table.size(); i++ )
+	{
+		if( ml.sml_table[i]->Length() > maxlen )
+		{
+			maxlen = ml.sml_table[i]->Length();
+			max_length_sml = i;
+		}
+	}
+
+	chunk_starts.push_back( vector< gnSeqI >( seq_count, 0 ) );
+
+	while( chunk_starts.back()[max_length_sml] + CHUNK_SIZE < ml.sml_table[max_length_sml]->Length() )
+	{
+		vector< gnSeqI > tmp( seq_count, 0 );
+		GetBreakpoint(max_length_sml, chunk_starts.back()[max_length_sml] + CHUNK_SIZE, tmp);
+		chunk_starts.push_back(tmp);
+	}
+
+	
+	// now that it's all chunky, search in parallel
+#pragma omp parallel for schedule(dynamic)
+	for( int i = 0; i < chunk_starts.size(); i++ )
+	{
+		if(thread_mem_table.get().size() != mem_table.size())
+			thread_mem_table.get().resize( mem_table.size() );
+
+		vector< gnSeqI > chunk_lens(seq_count);
+		if( i + 1 < chunk_starts.size() )
+		{
+			for( size_t j = 0; j < seq_count; j++ )
+				chunk_lens[j] = chunk_starts[i+1][j] - chunk_starts[i][j];
+		}else
+			chunk_lens = vector< gnSeqI >( seq_count, GNSEQI_END );
+		SearchRange( chunk_starts[i], chunk_lens );
+		MergeTable();
+	}
+	GetMatchList( ml );	
+}
+
+void ParallelMemHash::MergeTable()
+{
+#pragma omp critical
+	{
+		size_t buckets = thread_mem_table.get().size();
+		for( size_t bI = 0; bI < buckets; bI++ )
+		{
+			vector< MatchHashEntry* >& bucket = thread_mem_table.get()[bI];
+			for( size_t mI = 0; mI < bucket.size(); mI++ )
+			{
+				MemHash::AddHashEntry((*(bucket[mI])), mem_table);
+//				bucket[mI]->Free();
+			}
+		}
+		thread_mem_table.get() = mem_table;
+	}
+}
+
+
+
+MatchHashEntry* ParallelMemHash::AddHashEntry(MatchHashEntry& mhe){
+	// do the normal procedure, but use the thread-local mem table.
+	return MemHash::AddHashEntry(mhe, thread_mem_table.get());
+}
+
+
+} // namespace mems
+
+#endif // _OPENMP
diff --git a/libMems/ParallelMemHash.h b/libMems/ParallelMemHash.h
new file mode 100644
index 0000000..8537201
--- /dev/null
+++ b/libMems/ParallelMemHash.h
@@ -0,0 +1,75 @@
+/*******************************************************************************
+ * $Id: ParallelMemHash.h,v 1.23 2004/03/01 02:40:08 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * This file is licensed under the GPL.
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifndef _ParallelMemHash_h_
+#define _ParallelMemHash_h_
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef _OPENMP
+
+#include "libMUSCLE/threadstorage.h"
+#include <omp.h>
+#include "libMems/MemHash.h"
+
+namespace mems {
+
+
+/**
+ * ParallelMemHash implements an algorithm for finding exact matches of a certain minimal
+ * length in several sequences.
+ */
+class ParallelMemHash : public MemHash {
+public:
+	ParallelMemHash();
+	ParallelMemHash(const ParallelMemHash& mh);
+	ParallelMemHash& operator=( const ParallelMemHash& mh );
+	virtual ParallelMemHash* Clone() const;
+	
+	/**
+	 * Finds (in parallel) all matches in the sequences contained by "match_list"
+	 * The resulting list of matches is stored within "match_list"
+	 */
+	virtual void FindMatches( MatchList& match_list );
+
+
+protected:
+	virtual MatchHashEntry* AddHashEntry(MatchHashEntry& mhe);
+	virtual void MergeTable();
+
+	TLS< std::vector< std::vector<MatchHashEntry*> > > thread_mem_table;
+};
+
+
+}
+
+#else // _OPENMP
+
+namespace mems {
+
+
+/**
+ * When built without OpenMP, the ParallelMemHash is just a stub wrapper around MemHash
+ */
+class ParallelMemHash : public MemHash {
+public:
+	ParallelMemHash() : MemHash();
+	ParallelMemHash(const ParallelMemHash& mh) : MemHash(mh);
+	ParallelMemHash& operator=( const ParallelMemHash& mh ) : MemHash::operator=(mh){ return *this; }
+	virtual ParallelMemHash* Clone() const{ return new ParallelMemHash(*this); }
+};
+
+
+}
+
+
+#endif // _OPENMP
+
+#endif //_ParallelMemHash_h_
diff --git a/libMems/PhyloTree.cpp b/libMems/PhyloTree.cpp
new file mode 100644
index 0000000..a790146
--- /dev/null
+++ b/libMems/PhyloTree.cpp
@@ -0,0 +1,9 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "PhyloTree.h"
+using namespace std;
+
+typedef unsigned uint;
+
diff --git a/libMems/PhyloTree.h b/libMems/PhyloTree.h
new file mode 100644
index 0000000..7267f9c
--- /dev/null
+++ b/libMems/PhyloTree.h
@@ -0,0 +1,378 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifndef __PhyloTree_h__
+#define __PhyloTree_h__
+
+#include <vector>
+#include <string>
+#include <iostream>
+#include <sstream>
+#include <stack>
+
+//typedef unsigned int node_id_t;
+typedef size_t node_id_t;
+class TreeNode 
+{
+public:
+	TreeNode() : distance(0) {};
+	std::string name;	/**< node name */
+	double distance;	/**< distance to parent */
+	std::vector< node_id_t > parents;	/**< if parents.size() == 0 this is a root node */
+	std::vector< node_id_t > children;	/**< if children.size() == 0 this is a leaf node */
+};
+
+template< class T >
+class PhyloTree 
+{
+public:
+	PhyloTree();
+	PhyloTree( const PhyloTree<T>& pt );
+	PhyloTree<T>& operator=( const PhyloTree<T>& pt );
+	double weight;	/**< Overall tree weight */
+	node_id_t root;	/**< root of the tree */
+	std::vector< T > nodes;	/**< nodes of the tree */
+	void clear();
+	/**
+	 * Reads a tree in Newick format.  WARNING:  only reads rooted trees correctly
+	 */
+	void readTree( std::istream& tree_file );
+	/**
+	 * Writes a tree in Newick format
+	 */
+	void writeTree( std::ostream& os ) const;
+	/**
+	 * Determines the height of the tree along the path from the root to the left-most leaf node
+	 */
+	double getHeight() const;
+	/**
+	 * Determines the height of the tree along the path from nodeI to its left-most descendant leaf node
+	 */
+	double getHeight( node_id_t nodeI ) const;
+
+	T& operator[]( const unsigned i ){ return nodes[i]; }
+	const T& operator[]( const unsigned i ) const{ return nodes[i]; }
+	size_t size() const{ return nodes.size(); }
+	void push_back( T& t ){ nodes.push_back(t); }
+	T& back() { return nodes.back(); }
+	const T& back() const{ return nodes.back(); }
+	void resize( const unsigned s ){ nodes.resize(s); }
+
+
+	void swap( PhyloTree<T>& other )
+	{
+		std::swap( weight, other.weight );
+		std::swap( root, other.root );
+		nodes.swap( other.nodes );
+	}
+protected:
+};
+
+
+template< class T >
+PhyloTree<T>::PhyloTree()
+{
+	weight = 0;
+	root = 0;
+}
+
+template< class T >
+PhyloTree<T>::PhyloTree( const PhyloTree<T>& pt ) :
+nodes( pt.nodes ),
+weight( pt.weight ),
+root( pt.root )
+{}
+
+template< class T >
+PhyloTree<T>& PhyloTree<T>::operator=( const PhyloTree<T>& pt )
+{
+	nodes = pt.nodes;
+	weight = pt.weight;
+	root = pt.root;
+	return *this;
+}
+
+template< class T >
+void PhyloTree<T>::clear()
+{
+	nodes.clear();
+	weight = 0;
+	root = 0;
+}
+
+
+/**
+ *  readTree version 2.0: read in a phylogenetic tree in the Newick file format.
+ *
+ */
+template< class T >
+void PhyloTree<T>::readTree( std::istream& tree_file )
+{
+	std::string line;
+	clear();
+	if( !std::getline( tree_file, line ) )
+		return;
+	// look for either a ; or a matched number of parenthesis, if
+	// not found then read another line
+	while(true){
+		int paren_count = 0;
+		for( size_t charI = 0; charI < line.size(); charI++ )
+		{
+			if( line[charI] == '(' )
+				paren_count++;
+			if( line[charI] == ')' )
+				paren_count--;
+		}
+		if( paren_count == 0 )
+			break;
+		if( paren_count != 0 ){
+			std::string another_line;
+			if( !std::getline( tree_file, another_line ) )
+				return;
+			line += another_line;
+		}
+	}
+
+	std::stringstream line_str( line );
+
+	// look for a weight
+	std::string::size_type open_bracket_pos = line.find( "[" );
+	std::string::size_type bracket_pos = line.find( "]" );
+	if( open_bracket_pos != std::string::npos && bracket_pos != std::string::npos && 
+		open_bracket_pos < bracket_pos && bracket_pos < line.find( "(" ) ){
+		// read in a weight
+		getline( line_str, line, '[' );
+		getline( line_str, line, ']' );
+		std::stringstream weight_str( line );
+		weight_str >> weight;
+	}
+	
+	// ready to begin parsing the tree data.
+	std::string tree_line;
+	std::getline( line_str, tree_line, ';' );
+	size_t read_state = 0;	/**< read_state of 0 indicates nothing has been parsed yet */
+	size_t section_start = 0;
+	std::stack< node_id_t > node_stack;
+	std::stringstream blen_str;
+	T new_node;
+	new_node.distance = 0;	// default the distance to 0
+	bool already_read_name = false;
+	bool blen_found = false;
+	for( size_t charI = 0; charI < tree_line.size(); charI++ ){
+		switch( tree_line[ charI ] ){
+			// if this is an open parens then simply create a new
+			// parent node and push it on the parent stack
+			case '(':
+				if( node_stack.size() > 0 ){
+					new_node.parents.clear();
+					new_node.parents.push_back( node_stack.top() );
+					(*this)[ node_stack.top() ].children.push_back( (node_id_t)(*this).size() );
+				}
+				node_stack.push( (node_id_t)(*this).size() );
+				nodes.push_back( new_node );
+				read_state = 1;
+				section_start = charI + 1;
+				break;
+			case ')':
+				if( blen_found )
+				{
+					// read off a branch length
+					blen_str.clear();
+					blen_str.str( tree_line.substr( section_start, charI - section_start ) );
+					blen_str >> (*this)[ node_stack.top() ].distance;
+				}else{
+					// read off a name, if possible
+					if( read_state == 1 ){
+						new_node.parents.clear();
+						new_node.parents.push_back( node_stack.top() );
+						(*this)[ node_stack.top() ].children.push_back( (node_id_t)(*this).size() );
+						node_stack.push( (node_id_t)(*this).size() );
+						nodes.push_back( new_node );
+						read_state = 2;	// pop this node after reading its branch length
+					}
+					(*this)[ node_stack.top() ].name = tree_line.substr( section_start, charI - section_start );
+				}
+				if( read_state == 2 )
+					node_stack.pop();
+				section_start = charI + 1;
+				blen_found = false;
+
+				// pop off the top of the node stack
+				read_state = 2;
+				break;
+			case ',':
+				if( blen_found ){
+					// read off a branch length
+					blen_str.clear();
+					blen_str.str( tree_line.substr( section_start, charI - section_start ) );
+					blen_str >> (*this)[ node_stack.top() ].distance;
+				}else{
+					// read off a name, if possible
+					if( read_state == 1 ){
+						new_node.parents.clear();
+						new_node.parents.push_back( node_stack.top() );
+						(*this)[ node_stack.top() ].children.push_back( (node_id_t)(*this).size() );
+						node_stack.push( (node_id_t)(*this).size() );
+						nodes.push_back( new_node );
+						read_state = 2;	// pop this node after reading its name
+					}
+					(*this)[ node_stack.top() ].name = tree_line.substr( section_start, charI - section_start );
+				}
+				if( read_state == 2 )
+					node_stack.pop();
+				section_start = charI + 1;
+				read_state = 1;	// indicates that we'll be creating a new node when we hit :
+				blen_found = false;
+				break;
+			case ':':
+				// read off a name, if possible
+				if( read_state == 1 ){
+					new_node.parents.clear();
+					new_node.parents.push_back( node_stack.top() );
+					(*this)[ node_stack.top() ].children.push_back( (node_id_t)(*this).size() );
+					node_stack.push( (node_id_t)(*this).size() );
+					nodes.push_back( new_node );
+					read_state = 2;	// pop this node after reading its branch length
+				}
+				(*this)[ node_stack.top() ].name = tree_line.substr( section_start, charI - section_start );
+				section_start = charI + 1;
+				blen_found = true;
+				break;
+			default:
+				break;
+		}
+	}
+
+}
+
+
+template< class T >
+void PhyloTree<T>::writeTree( std::ostream& os ) const{
+	std::stack< node_id_t > node_stack;
+	std::stack< size_t > child_stack;
+	node_stack.push( root );
+	child_stack.push( 0 );
+	bool write_branch_lengths = false;
+	for( size_t nodeI = 0; nodeI < this->size(); nodeI++ )
+	{
+		if( (*this)[nodeI].distance != 0 )
+		{
+			write_branch_lengths = true;
+			break;
+		}
+	}
+
+	if( (*this).weight != 0 )
+		os << "[" << weight << "]";
+	os << "(";
+
+	while( node_stack.size() > 0 ) {
+		if( (*this)[ node_stack.top() ].children.size() != 0 ){
+			// this is a parent node
+			// if we have scanned all its children then pop it
+			if( child_stack.top() == (*this)[ node_stack.top() ].children.size() ){
+				os << ")";
+				if( node_stack.size() > 1 && write_branch_lengths )
+					os << ":" << (*this)[ node_stack.top() ].distance;
+				node_stack.pop();
+				child_stack.pop();
+				continue;
+			}
+			// try to recurse to its children
+			// if the child is a parent as well spit out a paren
+			node_id_t child = (*this)[ node_stack.top() ].children[ child_stack.top() ];
+			node_stack.push( child );
+			child_stack.top()++;
+			// print a comma to separate multiple children
+			if( child_stack.top() > 1 )
+				os << ",";
+			if( (*this)[ child ].children.size() > 0 ){
+				child_stack.push( 0 );
+				os << "(";
+			}
+			continue;
+		}
+		
+		// this is a leaf node
+		os << (*this)[ node_stack.top() ].name;
+		if( write_branch_lengths )
+			os << ":" << (*this)[ node_stack.top() ].distance;
+		
+		// pop the child
+		node_stack.pop();
+	}
+	os << ";" << std::endl;
+}
+
+
+template< class T >
+double PhyloTree<T>::getHeight() const
+{
+	return getHeight( root );
+}
+
+template< class T >
+double PhyloTree<T>::getHeight( node_id_t nodeI ) const
+{
+	if( (*this)[ nodeI ].children.size() == 0 )
+		return (*this)[ nodeI ].distance;
+	return (*this)[ nodeI ].distance + getHeight( (*this)[ nodeI ].children[ 0 ] );
+}
+
+
+/** determine which nodes are descendants of a given node */
+template< class TreeType >
+void getDescendants( TreeType& alignment_tree, node_id_t node, std::vector< node_id_t >& descendants )
+{
+	// do a depth first search
+	std::stack< node_id_t > node_stack;
+	node_stack.push( node );
+	descendants.clear();
+	while( node_stack.size() > 0 )
+	{
+		node_id_t cur_node = node_stack.top();
+		node_stack.pop();
+		if( alignment_tree[cur_node].children.size() > 0 )
+		{
+			node_stack.push(alignment_tree[cur_node].children[0]);
+			node_stack.push(alignment_tree[cur_node].children[1]);
+		}
+		descendants.push_back(cur_node);
+	}
+}
+
+
+/** determine which nodes are leaf nodes below a given node */
+template< class TreeType >
+void getLeaves( TreeType& tree, node_id_t node, std::vector< node_id_t >& leaves )
+{
+	// do a depth first search
+	std::stack< node_id_t > node_stack;
+	node_stack.push( node );
+	leaves.clear();
+	while( node_stack.size() > 0 )
+	{
+		node_id_t cur_node = node_stack.top();
+		node_stack.pop();
+		if( tree[cur_node].children.size() > 0 )
+		{
+			node_stack.push(tree[cur_node].children[0]);
+			node_stack.push(tree[cur_node].children[1]);
+		}else
+			leaves.push_back(cur_node);
+	}
+}
+
+namespace std {
+
+template< class T > inline
+void swap( PhyloTree<T>& a, PhyloTree<T>& b )
+{
+	a.swap(b);
+}
+
+template<> inline void swap( PhyloTree<TreeNode>& a, PhyloTree<TreeNode>& b){ a.swap(b); }
+}
+
+#endif // __PhyloTree_h__
diff --git a/libMems/ProgressiveAligner.cpp b/libMems/ProgressiveAligner.cpp
new file mode 100644
index 0000000..50381c5
--- /dev/null
+++ b/libMems/ProgressiveAligner.cpp
@@ -0,0 +1,3945 @@
+/*******************************************************************************
+ * $Id: progressiveAligner.cpp,v 1.47 2004/04/19 23:10:30 darling Exp $
+ * BEWARE!!
+ * This code was created in the likeness of the flying spaghetti monster
+ *
+ * dedicated to Loren...
+ ******************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+
+#include "libMems/ProgressiveAligner.h"
+#include "libMems/GreedyBreakpointElimination.h"
+#include "libMems/Aligner.h"
+#include "libMems/Islands.h"
+#include "libMems/DNAFileSML.h"
+#include "libMems/MuscleInterface.h"	// it's the default gapped aligner
+#include "libMems/gnAlignedSequences.h"
+#include "libMems/CompactGappedAlignment.h"
+#include "libMems/MatchProjectionAdapter.h"
+#include "libMems/PairwiseMatchFinder.h"
+#include "libMems/TreeUtilities.h"
+#include "libMems/PairwiseMatchAdapter.h"
+#include "libMems/DistanceMatrix.h"
+
+#include <boost/dynamic_bitset.hpp>
+#include <boost/tuple/tuple.hpp>
+#include <boost/graph/graph_traits.hpp>
+#include <boost/graph/adjacency_list.hpp>
+#include <boost/graph/johnson_all_pairs_shortest.hpp>
+#include <boost/graph/undirected_dfs.hpp>
+
+#include <map>
+#include <fstream>	// for debugging
+#include <sstream>
+#include <stack>
+#include <algorithm>
+#include <limits>
+#include <iomanip>
+
+#include "stdlib.h"
+
+using namespace std;
+using namespace genome;
+
+namespace mems {
+
+
+bool progress_msgs = false;
+
+bool debug_me = false;
+static int dbg_count = 0; 	 
+
+
+double min_window_size = 200;
+double max_window_size = 20000;  // don't feed MUSCLE anything bigger than this
+double min_density = .5;
+double max_density = .9;
+size_t max_gap_length = 5000;
+size_t lcb_hangover = 300;
+
+
+void mergeUnalignedIntervals( uint seqI, vector< Interval* >& iv_list, vector< Interval* >& new_list );
+
+/**
+ * Test code to ensure that an individual LCB is truly collinear
+ * @return	true if the LCB is good
+ */
+boolean my_validateLCB( MatchList& lcb ){
+	vector< Match* >::iterator lcb_iter = lcb.begin();
+	if( lcb.size() == 0 )
+		return true;
+	uint seq_count = (*lcb_iter)->SeqCount();
+	uint seqI = 0;
+	boolean complain = false;
+	for(; seqI < seq_count; seqI++ ){
+		lcb_iter = lcb.begin();
+		int64 prev_coord = 0;
+		for(; lcb_iter != lcb.end(); ++lcb_iter ){
+			if( (*lcb_iter)->Start( seqI ) == NO_MATCH )
+				continue;
+			else if( prev_coord != 0 && (*lcb_iter)->Start( seqI ) < prev_coord ){
+				complain = true;
+			}
+			prev_coord = (*lcb_iter)->Start( seqI );
+		}
+	}
+	return !complain;
+}
+
+template< class BoostMatType >
+void print2d_matrix( BoostMatType& mat, std::ostream& os )
+{
+	for( size_t i = 0; i < mat.shape()[0]; ++i )
+	{
+		for( size_t j = 0; j < mat.shape()[1]; ++j )
+		{
+			if( j > 0 )
+				os << "\t";
+			os << mat[i][j];
+		}
+		os << endl;
+	}
+}
+
+double getDefaultBreakpointPenalty( std::vector< gnSequence* >& sequences )
+{
+	uint default_mer_size = MatchList::GetDefaultMerSize( sequences );
+	double avg_seq_len = 0;
+	for( size_t seqI = 0; seqI < sequences.size(); ++seqI )
+		avg_seq_len += (double)sequences[seqI]->length();
+	avg_seq_len /= (double)sequences.size();
+	avg_seq_len = log( avg_seq_len ) / log( 2.0 );
+	return avg_seq_len * 7000;	  // seems to work reasonably well?
+}
+
+
+double getDefaultBpDistEstimateMinScore( std::vector< gnSequence* >& sequences )
+{
+	// this value was empirically derived by a process that involved burning incense
+	// and uttering arcane words
+	return 3.0 * getDefaultBreakpointPenalty(sequences);
+}
+
+
+
+/*
+ * A progressive alignment algorithm for genomes with rearrangements.
+ * Start simple, add complexity later.
+ * TODO: rewrite the algorithm outline
+ */
+
+ProgressiveAligner::ProgressiveAligner( uint seq_count ) :
+Aligner( seq_count ),
+breakpoint_penalty( -1 ),
+min_breakpoint_penalty( 4000 ),
+debug(false),
+refine(true),
+scoring_scheme(ExtantSumOfPairsScoring),
+use_weight_scaling(true),
+conservation_dist_scale(1),
+bp_dist_scale(.9),
+max_gapped_alignment_length(20000),
+bp_dist_estimate_score(-1),
+use_seed_families(false),
+using_cache_db(true)
+{
+	gapped_alignment = true;
+	max_window_size = max_gapped_alignment_length;
+}
+
+void ProgressiveAligner::SetMaxGappedAlignmentLength( size_t len )
+{ 
+	max_gapped_alignment_length = len; 
+	max_window_size = max_gapped_alignment_length;
+}
+
+/** determine which extant sequences have been aligned at a given node */
+void ProgressiveAligner::getAlignedChildren( node_id_t node, vector< node_id_t >& descendants )
+{
+	// do a depth first search along edges that have been aligned
+	stack< node_id_t > node_stack;
+	node_stack.push( node );
+	vector< bool > visited( alignment_tree.size(), false );
+	descendants.clear();
+	while( node_stack.size() > 0 )
+	{
+		node_id_t cur_node = node_stack.top();
+		if(progress_msgs) cout << "Evaluating aligned nodes linked to node " << cur_node << endl;
+		node_stack.pop();
+		visited[cur_node] = true;
+		for( uint childI = 0; childI < alignment_tree[cur_node].children.size(); childI++ )
+		{
+			node_id_t child_id = alignment_tree[cur_node].children[childI];
+			if( alignment_tree[cur_node].children_aligned[childI] && !visited[child_id])
+				node_stack.push( child_id );
+		}
+		if( alignment_tree[ cur_node ].sequence != NULL )
+			descendants.push_back( cur_node );
+	}
+}
+
+
+/** determine which extant sequences have been aligned at a given node */
+void ProgressiveAligner::getPath( node_id_t first_n, node_id_t last_n, vector< node_id_t >& path )
+{
+	// do a depth first search along edges that have been aligned
+	stack< node_id_t > node_stack;
+	node_stack.push( last_n );
+	vector< bool > visited( alignment_tree.size(), false );
+	while( node_stack.top() != first_n )
+	{
+		node_id_t cur_node = node_stack.top();
+		size_t pre_size = node_stack.size();
+		visited[cur_node] = true;
+		for( uint childI = 0; childI < alignment_tree[cur_node].children.size(); childI++ )
+		{
+			node_id_t child_id = alignment_tree[cur_node].children[childI];
+			if(!visited[child_id])
+			{
+				node_stack.push( child_id );
+				break;
+			}
+		}
+		if( pre_size != node_stack.size() )
+			continue;
+		for( uint parentI = 0; parentI < alignment_tree[cur_node].parents.size(); parentI++ )
+		{
+			node_id_t parent_id = alignment_tree[cur_node].parents[parentI];
+			if(!visited[parent_id])
+			{
+				node_stack.push( parent_id );
+				break;
+			}
+		}
+		if( pre_size != node_stack.size() )
+			continue;
+		node_stack.pop();	// didn't make any progress
+	}
+	path = vector< node_id_t >( node_stack.size() );
+	for( size_t pI = 0; pI < path.size(); pI++ )
+	{
+		path[pI] = node_stack.top();
+		node_stack.pop();
+	}
+}
+
+
+
+
+
+
+template<class MatchType>
+void ProgressiveAligner::propagateDescendantBreakpoints( node_id_t node1, uint seqI, std::vector<MatchType*>& iv_list )
+{
+	SSC<MatchType> ilc(seqI);
+	sort( iv_list.begin(), iv_list.end(), ilc );
+	vector< SuperInterval >& ord = alignment_tree[ node1 ].ordering;
+	vector<gnSeqI> bp_list;
+	for( size_t sI = 0; sI < ord.size(); sI++ )
+		bp_list.push_back( ord[sI].LeftEnd() );
+
+	GenericMatchSeqManipulator<MatchType> ism( seqI );
+	applyBreakpoints( bp_list, iv_list, ism );
+}
+
+// T should be a pointer type
+template<class T, class Manipulator>
+void applyAncestralBreakpoints( const vector< SuperInterval >& siv_list, vector<T>& ord, uint seqI, Manipulator& m )
+{
+	// make bp list
+	vector<gnSeqI> bp_list(siv_list.size()*2, 0);
+	size_t cur = 0;
+	for( size_t i = 0; i < siv_list.size(); i++ )
+	{
+		if( siv_list[i].reference_iv.Start(seqI) == NO_MATCH )
+			continue;
+		bp_list[cur++] = siv_list[i].reference_iv.LeftEnd(seqI);
+		bp_list[cur++] = siv_list[i].reference_iv.LeftEnd(seqI) + siv_list[i].reference_iv.Length(seqI);
+	}
+	bp_list.resize(cur);
+	// sort the breakpoints and apply...
+	sort( bp_list.begin(), bp_list.end() );
+	applyBreakpoints( bp_list, ord, m );
+}
+
+
+// assuming breakpoints have been propagated in both directions
+// there should now be a 1-to-1 correspondence between superintervals
+// in the ancestor and descendants.
+void ProgressiveAligner::linkSuperIntervals( node_id_t node1, uint seqI, node_id_t ancestor )
+{
+	// TODO: speed this up by implementing O(N) instead of O(N^2)
+	vector<SuperInterval>& a_ord = alignment_tree[ancestor].ordering;
+	vector<SuperInterval>& c_ord = alignment_tree[node1].ordering;
+	// initialize all linkages to nothing
+	for( size_t aI = 0; aI < a_ord.size(); aI++ )
+		if( seqI == 0 )
+			a_ord[aI].c1_siv = (std::numeric_limits<size_t>::max)();
+		else
+			a_ord[aI].c2_siv = (std::numeric_limits<size_t>::max)();
+	for( size_t cI = 0; cI < c_ord.size(); cI++ )
+		c_ord[cI].parent_siv = (std::numeric_limits<size_t>::max)();
+
+	for( size_t aI = 0; aI < a_ord.size(); aI++ )
+	{
+		if( a_ord[aI].reference_iv.LeftEnd(seqI) == NO_MATCH )
+			continue;
+		size_t cI = 0;
+		for( ; cI < c_ord.size(); cI++ )
+		{
+			if( absolut(a_ord[aI].reference_iv.Start(seqI)) != c_ord[cI].LeftEnd() )
+				continue;
+			if( a_ord[aI].reference_iv.Length(seqI) != c_ord[cI].Length() )
+			{
+				breakHere();
+				cerr << "mapping length mismatch\n";
+				cerr << "ancestor: " << ancestor << "\t node1: " << node1 << endl;
+				cerr << "a_ord[" << aI << "].reference_iv.Length(" << seqI << "): " << a_ord[aI].reference_iv.Length(seqI) << endl;
+				cerr << "a_ord[" << aI << "].reference_iv.LeftEnd(" << seqI << "): " << a_ord[aI].reference_iv.LeftEnd(seqI) << endl;
+				cerr << "c_ord[" << cI << "].Length(): " << c_ord[cI].Length() << endl;
+				cerr << "c_ord[" << cI << "].LeftEnd(): " << c_ord[cI].LeftEnd() << endl;
+				cerr << "";
+				cerr << "";
+			}
+			// link these
+			if( seqI == 0 )
+				a_ord[aI].c1_siv = cI;
+			else
+				a_ord[aI].c2_siv = cI;
+			c_ord[cI].parent_siv = aI;
+			break;
+		}
+		if( cI == c_ord.size() )
+		{
+			breakHere();
+			cerr << "error no mapping\n";
+		}
+	}
+}
+
+
+void ProgressiveAligner::translateGappedCoordinates( vector<AbstractMatch*>& ml, uint seqI, node_id_t extant, node_id_t ancestor )
+{
+	// determine the path that must be traversed
+	vector< node_id_t > trans_path;
+	getPath( extant, ancestor, trans_path );
+
+	// set seqI to forward orientation 
+	for( size_t mI = 0; mI < ml.size(); mI++ )
+		if( ml[mI]->Orientation(seqI) == AbstractMatch::reverse )
+			ml[mI]->Invert();
+
+	// for each node on the path, construct a complete coordinate translation
+	for( size_t nI = 1; nI < trans_path.size(); nI++ )
+	{
+		// first sort matches on start pos and make them all forward oriented
+		// then split them on superinterval boundaries and assign each to a superinterval
+		// then convert each match's coordinates to be superinterval-local
+		// then apply the coordinate translation with transposeCoordinates
+		// then shift each match's coordinates to the global ancestral coordinate space
+		SSC<AbstractMatch> ssc(seqI);
+		sort(ml.begin(), ml.end(), ssc);
+
+		// split on superinterval boundaries
+		vector< SuperInterval >& siv_list = alignment_tree[trans_path[nI]].ordering;
+		vector< vector< AbstractMatch* > > siv_matches = vector< vector< AbstractMatch* > >(siv_list.size());
+		size_t cur_child = 0;
+		if( alignment_tree[trans_path[nI]].children[0] == trans_path[nI-1] )
+			cur_child = 0;
+		else if( alignment_tree[trans_path[nI]].children[1] == trans_path[nI-1] )
+			cur_child = 1;
+		else 
+		{
+			breakHere();
+			cerr << "forest fire\n";
+		}
+
+		AbstractMatchSeqManipulator amsm( seqI );
+		applyAncestralBreakpoints(siv_list, ml, cur_child, amsm );
+		
+		// sort matches again because new ones were added at the end
+		sort(ml.begin(), ml.end(), ssc);
+
+		// assign each match to a siv, and convert coords to siv-local
+		for( size_t mI = 0; mI < ml.size(); mI++ )
+		{
+			if( ml[mI]->LeftEnd(seqI) == 0 )
+			{
+				breakHere();
+				cerr << "fefefe";
+			}
+			size_t sivI = 0;
+			for( ; sivI < siv_list.size(); sivI++ )
+			{
+				if( siv_list[sivI].reference_iv.LeftEnd(cur_child) == NO_MATCH )
+					continue;
+				if( ml[mI]->LeftEnd(seqI) >= siv_list[sivI].reference_iv.LeftEnd(cur_child) &&
+					ml[mI]->LeftEnd(seqI) < siv_list[sivI].reference_iv.LeftEnd(cur_child) + siv_list[sivI].reference_iv.Length(cur_child) )
+					break;
+			}
+			if( sivI == siv_list.size() )
+			{
+				cerr << "nI is: "<< nI << endl;
+				cerr << "trans_path: ";
+				for( size_t ttI = 0; ttI < trans_path.size(); ttI++ )
+					cerr << "  " << trans_path[ttI];
+				cerr << endl;
+				cerr << "problem seq: " << seqI << std::endl;
+				cerr << "ml[" << mI << "]->Start(0) == " << ml[mI]->Start(0) << endl;
+				cerr << "ml[" << mI << "]->Length(0) == " << ml[mI]->Length(1) << endl;
+				cerr << "ml[" << mI << "]->Start(1) == " << ml[mI]->Start(0) << endl;
+				cerr << "ml[" << mI << "]->Length(1) == " << ml[mI]->Length(1) << endl;
+				cerr << "ml.size(): " << ml.size() << endl;
+				for( sivI = 0; sivI < siv_list.size(); sivI++ )
+				{
+					cerr << "siv_list[" << sivI << "] left end 0: " << siv_list[sivI].reference_iv.LeftEnd(0)  << endl;
+					if( siv_list[sivI].reference_iv.LeftEnd(0) != 0 )
+						cerr << "siv_list[" << sivI << "] right end 0: " << siv_list[sivI].reference_iv.LeftEnd(0) + siv_list[sivI].reference_iv.Length(0) << endl;
+					cerr << "siv_list[" << sivI << "] left end 1: " << siv_list[sivI].reference_iv.LeftEnd(1)  << endl;
+					if( siv_list[sivI].reference_iv.LeftEnd(1) != 0 )
+						cerr << "siv_list[" << sivI << "] right end 1: " << siv_list[sivI].reference_iv.LeftEnd(1) + siv_list[sivI].reference_iv.Length(1) << endl;
+				}
+				breakHere();
+			}
+			if( ml[mI]->LeftEnd(seqI) + ml[mI]->Length(seqI) > 
+				siv_list[sivI].reference_iv.LeftEnd(cur_child) + siv_list[sivI].reference_iv.Length(cur_child) )
+			{
+				cerr << "doesn't fit\n";
+				cerr << "ml[" << mI << "]->LeftEnd(" << seqI << "): " << ml[mI]->LeftEnd(seqI) << endl;
+				cerr << "ml[" << mI << "]->RightEnd(" << seqI << "): " << ml[mI]->RightEnd(seqI) << endl;
+				cerr << "siv_list[" << sivI << "] left end 0: " << siv_list[sivI].reference_iv.LeftEnd(0)  << endl;
+				if( siv_list[sivI].reference_iv.LeftEnd(0) != 0 )
+					cerr << "siv_list[" << sivI << "] right end 0: " << siv_list[sivI].reference_iv.LeftEnd(0) + siv_list[sivI].reference_iv.Length(0) << endl;
+				cerr << "siv_list[" << sivI << "] left end 1: " << siv_list[sivI].reference_iv.LeftEnd(1)  << endl;
+					if( siv_list[sivI].reference_iv.LeftEnd(1) != 0 )
+						cerr << "siv_list[" << sivI << "] right end 1: " << siv_list[sivI].reference_iv.LeftEnd(1) + siv_list[sivI].reference_iv.Length(1) << endl;
+				cerr << "ml.size(): " << ml.size() << endl;
+				cerr << "siv_list.size(): " << siv_list.size() << endl;
+				cerr << "trans_path:";
+				for( size_t tI = 0; tI < trans_path.size(); tI++ )
+					cerr << " " << trans_path[tI];
+				cerr << endl;
+				cerr << "trans_path[" << nI << "]: " << trans_path[nI] << endl;
+				breakHere();
+			}
+
+			ml[mI]->SetLeftEnd( seqI, ml[mI]->LeftEnd(seqI) - siv_list[sivI].reference_iv.LeftEnd(cur_child) + 1 );
+			// if this interval matches the reverse strand then we should effectively invert all matches
+			if( siv_list[sivI].reference_iv.Start(cur_child) < 0 )
+			{
+				int64 new_lend = siv_list[sivI].reference_iv.Length(cur_child) - ml[mI]->LeftEnd(seqI);
+				new_lend -= ml[mI]->Length( seqI ) - 2;
+				new_lend *= ml[mI]->Orientation(seqI) == AbstractMatch::forward ? 1 : -1;
+				ml[mI]->Invert();
+				ml[mI]->SetStart( seqI, new_lend ); 
+			}
+			siv_matches[sivI].push_back( ml[mI] );
+		}
+
+		// apply the coordinate translation
+		ml.clear();
+		for( size_t sivI = 0; sivI < siv_matches.size(); sivI++ )
+		{
+			if( siv_matches[sivI].size() == 0 )
+				continue;
+			
+			// get a CompactGappedAlignment<> for this interval
+			CompactGappedAlignment<>* siv_cga = dynamic_cast<CompactGappedAlignment<>*>(siv_list[sivI].reference_iv.GetMatches()[0]);
+			if( siv_list[sivI].reference_iv.GetMatches().size() > 1 )
+				siv_cga = NULL;
+			bool alloc_new_siv = false;
+			CompactGappedAlignment<> tmp_cga;
+			if( siv_cga == NULL )
+			{
+				alloc_new_siv = true;
+				siv_cga = tmp_cga.Copy();
+				CompactGappedAlignment<> dorkas(siv_list[sivI].reference_iv);
+				*siv_cga = dorkas;
+			}
+
+			// now translate each match...
+			for( size_t mI = 0; mI < siv_matches[sivI].size(); mI++ )
+			{
+				CompactGappedAlignment<>* match_cga = dynamic_cast<CompactGappedAlignment<>*>(siv_matches[sivI][mI]);
+				bool alloc_new = false;
+				if( match_cga == NULL )
+				{
+					match_cga = tmp_cga.Copy();
+					*match_cga = CompactGappedAlignment<>(*(siv_matches[sivI][mI]));
+					alloc_new = true;
+				}
+				siv_cga->translate( *match_cga, seqI, cur_child );
+
+				if( alloc_new )
+				{
+					siv_matches[sivI][mI]->Free();
+					siv_matches[sivI][mI] = match_cga;
+				}
+			}
+
+			// shift coordinates back to global space
+			for( size_t mI = 0; mI < siv_matches[sivI].size(); mI++ )
+			{
+				int64 cur_start = siv_matches[sivI][mI]->Start(seqI);
+				if( cur_start > 0 )
+					siv_matches[sivI][mI]->SetStart( seqI, cur_start + siv_list[sivI].LeftEnd() - 1 );
+				else
+					siv_matches[sivI][mI]->SetStart( seqI, cur_start - siv_list[sivI].LeftEnd() + 1);
+				if( (siv_matches[sivI][mI]->LeftEnd(seqI) + siv_matches[sivI][mI]->Length(seqI) > siv_list.back().LeftEnd() + siv_list.back().Length() )
+					 )
+				{
+					// is there something wrong with the translation table?
+					cerr << "siv left is: " << siv_list[sivI].LeftEnd() << endl;
+					cerr << "siv right is: " << siv_list[sivI].LeftEnd() + siv_list[sivI].Length() << endl;
+					cerr << "match right is: " << siv_matches[sivI][mI]->LeftEnd(seqI) + siv_matches[sivI][mI]->Length(seqI) << endl;
+					cerr << "superseq right is: " << siv_list.back().LeftEnd() + siv_list.back().Length() << endl;
+					cerr << "";
+					breakHere();
+				}
+				if( debug_aligner && siv_matches[sivI][mI]->Start(seqI) == 0 )
+				{
+					breakHere();
+				}
+			}
+			if(alloc_new_siv)
+				siv_cga->Free();
+			ml.insert( ml.end(), siv_matches[sivI].begin(), siv_matches[sivI].end() );
+		}
+	}
+	// restore forward orientation seqI
+	for( size_t mI = 0; mI < ml.size(); mI++ )
+		if( ml[mI]->Orientation(seqI) == AbstractMatch::reverse )
+			ml[mI]->Invert();
+}
+
+class SuperIntervalPtrComp
+{
+public:
+	bool operator()( const SuperInterval* a, const SuperInterval* b )
+	{
+		return (*a) < (*b);
+	}
+};
+
+void ProgressiveAligner::recursiveApplyAncestralBreakpoints( node_id_t ancestor )
+{
+	stack<node_id_t> node_stack;
+	node_stack.push(ancestor);
+	while( node_stack.size() > 0 )
+	{
+		// pop the current node, apply ancestral breakpoints, recurse on children
+		node_id_t cur_node = node_stack.top();
+		node_stack.pop();
+		SuperIntervalManipulator sim;
+		if( progress_msgs ) cout << "cur node: " << cur_node << endl;
+		for( size_t childI = 0; childI < alignment_tree[cur_node].children.size(); childI++ )
+		{
+			AlignmentTreeNode& atn = alignment_tree[alignment_tree[cur_node].children[childI]];
+			if( progress_msgs ) cout << "childI " << childI << " aab\n";
+			applyAncestralBreakpoints( alignment_tree[cur_node].ordering, atn.ordering, childI, sim );
+			if( progress_msgs ) cout << "sort childI " << childI << "\n";
+			vector<SuperInterval*> siv_ptr_list(atn.ordering.size());
+			for( size_t sivI = 0; sivI < atn.ordering.size(); ++sivI )
+				siv_ptr_list[sivI] = &(atn.ordering[sivI]);
+			SuperIntervalPtrComp sipc;
+			sort( siv_ptr_list.begin(), siv_ptr_list.end(), sipc );
+			vector< SuperInterval > siv_list;
+			for( size_t sivI = 0; sivI < siv_ptr_list.size(); ++sivI )
+				siv_list.push_back(*siv_ptr_list[sivI]);
+			swap(siv_list, atn.ordering);
+			node_stack.push( alignment_tree[cur_node].children[childI] );
+		}
+		if( debug_aligner && alignment_tree[cur_node].children.size() > 0 )
+			validateSuperIntervals(alignment_tree[cur_node].children[0], alignment_tree[cur_node].children[1], cur_node);
+		if( progress_msgs ) cout << "linking node " << cur_node << "'s" << alignment_tree[cur_node].ordering.size() << " superintervals\n"; 
+		for( size_t childI = 0; childI < alignment_tree[cur_node].children.size(); childI++ )
+			linkSuperIntervals( alignment_tree[cur_node].children[childI], childI, cur_node );
+	}
+}
+
+
+boolean getInterveningCoordinates( const AbstractMatch* iv, uint oseqI, Match* r_begin, Match* r_end, uint seqI, int64& gap_lend, int64& gap_rend ){
+	// skip this sequence if it's undefined
+	if( (r_end != NULL && r_end->Start( seqI ) == NO_MATCH) ||
+		(r_begin != NULL && r_begin->Start( seqI ) == NO_MATCH) ){
+		gap_lend = 0;
+		gap_rend = 0;
+		return true;
+	}
+			
+	// determine the size of the gap
+	gap_rend = r_end != NULL ? r_end->Start( seqI ) : iv->RightEnd( oseqI ) + 1;
+	gap_lend = r_begin != NULL ? r_begin->End( seqI ) + 1 : iv->LeftEnd( oseqI );
+	if( gap_rend < 0 || gap_lend < 0 ){
+		gap_rend = r_begin != NULL ? -r_begin->Start( seqI ) : iv->RightEnd( oseqI ) + 1;
+		gap_lend = r_end != NULL ? -r_end->Start( seqI ) + r_end->Length() : 1;
+	}
+	if( gap_rend <= 0 || gap_lend <= 0 ){
+		// if either is still < 0 then there's a problem...
+		genome::ErrorMsg( "Error constructing intervening coordinates" );
+	}
+	return true;
+}
+
+
+void ProgressiveAligner::pairwiseAnchorSearch( MatchList& r_list, Match* r_begin, Match* r_end, const AbstractMatch* iv, uint oseqI, uint oseqJ )
+{
+	uint seqI = 0;
+	MatchList gap_list;
+	vector< int64 > starts;
+// 
+//	Get the sequence in the intervening gaps between these two matches
+//
+	for( seqI = 0; seqI < 2; seqI++ )
+	{
+		int64 gap_end = 0;
+		int64 gap_start = 0;
+		getInterveningCoordinates( iv, (seqI == 0 ? oseqI : oseqJ), r_begin, r_end, seqI, gap_start, gap_end);
+		int64 diff = gap_end - gap_start;
+		diff = diff > 0 ? diff - 1 : 0;
+
+		starts.push_back( gap_start );
+		gnSequence* new_seq = NULL;
+		if(diff > 0 && gap_start + diff - 1 <= r_list.seq_table[ seqI ]->length())
+			new_seq = new gnSequence( r_list.seq_table[ seqI ]->ToString( diff, gap_start ) );
+		else
+			new_seq = new gnSequence();
+		gap_list.seq_table.push_back( new_seq );
+		gap_list.sml_table.push_back( new DNAMemorySML() );
+	}
+
+	gnSeqI avg_len = (gap_list.seq_table[0]->length() + gap_list.seq_table[1]->length())/2;
+	uint search_seed_size = getDefaultSeedWeight( avg_len );
+	gap_mh.get().Clear();
+	
+	uint seed_count = use_seed_families ? 3 : 1;
+	for( size_t seedI = 0; seedI < seed_count; seedI++ )
+	{
+		//
+		//	Create sorted mer lists for the intervening gap region
+		//
+		uint64 default_seed = getSeed( search_seed_size, seedI );
+		if( search_seed_size < MIN_DNA_SEED_WEIGHT )
+		{
+			for( uint seqI = 0; seqI < gap_list.seq_table.size(); seqI++ )
+				delete gap_list.seq_table[ seqI ];
+			for( uint seqI = 0; seqI < gap_list.sml_table.size(); seqI++ )
+				delete gap_list.sml_table[ seqI ];
+			return;
+		}
+		for( uint seqI = 0; seqI < gap_list.seq_table.size(); seqI++ ){
+			gap_list.sml_table[ seqI ]->Clear();
+			gap_list.sml_table[ seqI ]->Create( *(gap_list.seq_table[ seqI ]), default_seed );
+		}
+
+		//
+		//	Find all matches in the gap region
+		//
+		gap_mh.get().ClearSequences();
+		if(seed_count>1)
+		{
+			MatchList cur_list = gap_list;
+			gap_mh.get().FindMatches( cur_list );
+			for( size_t mI = 0; mI < cur_list.size(); mI++ )
+				cur_list[mI]->Free();
+		}else
+			gap_mh.get().FindMatches( gap_list );
+	}
+	if(seed_count>1)
+		gap_mh.get().GetMatchList(gap_list);
+
+	EliminateOverlaps_v2( gap_list );
+
+	// for anchor accuracy, throw out any anchors that are shorter than the minimum
+	// anchor length after EliminateOverlaps()
+	gap_list.LengthFilter( MIN_ANCHOR_LENGTH + 3 );
+
+	for( size_t gI = 0; gI < gap_list.size(); gI++ )
+	{
+		for( seqI = 0; seqI < 2; seqI++ )
+		{
+			int64 gap_rend = 0;
+			int64 gap_lend = 0;
+			getInterveningCoordinates( iv, (seqI == 0 ? oseqI : oseqJ), r_begin, r_end, seqI, gap_lend, gap_rend);
+			gap_list[gI]->SetLeftEnd(seqI, gap_list[gI]->LeftEnd(seqI) + gap_lend - 1);
+		}
+	}
+	r_list.insert(r_list.end(), gap_list.begin(), gap_list.end());
+
+	// delete sequences and smls
+	for( uint seqI = 0; seqI < gap_list.seq_table.size(); seqI++ )
+		delete gap_list.seq_table[ seqI ];
+	for( uint seqI = 0; seqI < gap_list.sml_table.size(); seqI++ )
+		delete gap_list.sml_table[ seqI ];
+}
+
+template<class GappedAlignmentType>
+void ProgressiveAligner::recurseOnPairs( const vector<node_id_t>& node1_seqs, const vector<node_id_t>& node2_seqs, const GappedAlignmentType& iv, Matrix<MatchList>& matches, Matrix< std::vector< search_cache_t > >& search_cache_db, Matrix< std::vector< search_cache_t > >& new_cache_db, boost::multi_array< vector< vector< int64 > >, 2 >& iv_regions )
+{
+	matches = Matrix<MatchList>(node1_seqs.size(),node2_seqs.size());
+
+	std::vector< bitset_t > aln_matrix;
+	iv.GetAlignment(aln_matrix);
+	Match tmp(2);
+	const size_t sizer = node1_seqs.size() * node2_seqs.size();
+	std::vector< std::pair<size_t,size_t> > node_pairs(sizer);
+	int nni = 0;
+	for( size_t n1 = 0; n1 < node1_seqs.size(); n1++ )
+		for( size_t n2 = 0; n2 < node2_seqs.size(); n2++ )
+			node_pairs[nni++] = make_pair(n1,n2);
+
+#pragma omp parallel for
+	for(int ni = 0; ni < node_pairs.size(); ni++)
+	{
+		size_t n1 = node_pairs[ni].first;
+		size_t n2 = node_pairs[ni].second;
+		vector<node_id_t>::const_iterator n1_iter = node1_seqs.begin() + n1;
+		vector<node_id_t>::const_iterator n2_iter = node2_seqs.begin() + n2;
+		
+		uint seqI = node_sequence_map[*n1_iter];
+		uint seqJ = node_sequence_map[*n2_iter];
+		MatchList& mlist = matches(n1, n2);
+		std::vector< search_cache_t >& cache = search_cache_db(n1, n2);
+		std::vector< search_cache_t >& new_cache = new_cache_db(n1, n2);
+		mlist.seq_table.push_back( alignment_tree[*n1_iter].sequence );
+		mlist.seq_table.push_back( alignment_tree[*n2_iter].sequence );
+
+		if( iv.LeftEnd(seqI) == NO_MATCH )
+		{
+			if( iv.LeftEnd(seqJ) != NO_MATCH )
+			{
+				iv_regions[n1][n2][1].push_back(iv.LeftEnd(seqJ));
+				iv_regions[n1][n2][1].push_back(iv.RightEnd(seqJ));
+			}
+			continue;	// no sense searching one isn't defined!
+		}
+		if(iv.LeftEnd(seqJ) == NO_MATCH )
+		{
+			if( iv.LeftEnd(seqI) != NO_MATCH )
+			{
+				iv_regions[n1][n2][0].push_back(iv.LeftEnd(seqI));
+				iv_regions[n1][n2][0].push_back(iv.RightEnd(seqI));
+			}
+			continue;	// no sense searching one isn't defined!
+		}
+
+		gnSeqI charI = 0;
+		gnSeqI charJ = 0;
+		const size_t iv_aln_length = iv.AlignmentLength();
+
+// first determine the outer aligned boundaries of the LCB and record them for
+// later use
+		pair< int64, int64 > pair_1l(0,0);
+		pair< int64, int64 > pair_1r(0,0);
+		pair< int64, int64 > pair_2l(0,0);
+		pair< int64, int64 > pair_2r(0,0);
+		for( uint colI = 0; colI <= iv_aln_length; colI++ )
+		{
+			if( colI == iv_aln_length || (aln_matrix[seqI].test(colI) && aln_matrix[seqJ].test(colI)) )
+			{
+				if( colI == 0 )
+					break;	// nothing to see here, move along...
+				if( iv.Orientation(seqI) == AbstractMatch::forward )
+					pair_1l = make_pair( iv.LeftEnd(seqI), iv.LeftEnd(seqI)+charI );
+				else
+					pair_1r = make_pair( iv.RightEnd(seqI)-charI+1, iv.RightEnd(seqI)+1 );
+				if( iv.Orientation(seqJ) == AbstractMatch::forward )
+					pair_2l = make_pair( iv.LeftEnd(seqJ), iv.LeftEnd(seqJ)+charJ );
+				else
+					pair_2r = make_pair( iv.RightEnd(seqJ)-charJ+1, iv.RightEnd(seqJ)+1 );
+				break;
+			}
+			if( colI < iv_aln_length && aln_matrix[seqI].test(colI) )
+				++charI;
+			if( colI < iv_aln_length && aln_matrix[seqJ].test(colI) )
+				++charJ;
+		}
+
+		charI = 0;
+		charJ = 0;
+		for( uint colI = iv_aln_length; colI > 0 ; colI-- )
+		{
+			if( (aln_matrix[seqI].test(colI-1) && aln_matrix[seqJ].test(colI-1)) )
+			{
+				if( colI == iv_aln_length )
+					break;	// nothing to see here, move along...
+				if( iv.Orientation(seqI) == AbstractMatch::forward )
+					pair_1r = make_pair( iv.RightEnd(seqI)-charI+1, iv.RightEnd(seqI)+1 );
+				else
+					pair_1l = make_pair( iv.LeftEnd(seqI), iv.LeftEnd(seqI)+charI );
+				if( iv.Orientation(seqJ) == AbstractMatch::forward )
+					pair_2r = make_pair( iv.RightEnd(seqJ)-charJ+1, iv.RightEnd(seqJ)+1 );
+				else
+					pair_2l = make_pair( iv.LeftEnd(seqJ), iv.LeftEnd(seqJ)+charJ );
+				break;
+			}
+			if( aln_matrix[seqI].test(colI-1) )
+				++charI;
+			if( aln_matrix[seqJ].test(colI-1) )
+				++charJ;
+		}
+		if( pair_1l.first < pair_1l.second )
+		{
+			iv_regions[n1][n2][0].push_back(pair_1l.first);
+			iv_regions[n1][n2][0].push_back(pair_1l.second);
+		}
+		if( pair_1r.first < pair_1r.second )
+		{
+			if( pair_1l.first < pair_1l.second && pair_1r.first == pair_1l.second )
+			{
+				// just merge them into a single interval
+				iv_regions[n1][n2][0].back() = pair_1r.second;
+			}else{
+				iv_regions[n1][n2][0].push_back(pair_1r.first);
+				iv_regions[n1][n2][0].push_back(pair_1r.second);
+				if( pair_1r.first <= pair_1l.second && pair_1r.second >= pair_1l.first )
+				{
+					cout << "Ohno.  Overlap in outside LCB search intervals\n";
+					cout << "Left: " << pair_1l.first << '\t' << pair_1l.second << " right:  " << pair_1r.first << '\t' << pair_1r.second << endl;
+					cout << "0 iv.Start(" << seqI << "): " << iv.Start(seqI) << '\t' << "iv.RightEnd(" << seqI << "): " << iv.RightEnd(seqI) << endl;
+					if( pair_1l.first == 0 )
+						genome::breakHere();
+				}
+			}
+		}
+
+		if( pair_2l.first < pair_2l.second )
+		{
+			iv_regions[n1][n2][1].push_back(pair_2l.first);
+			iv_regions[n1][n2][1].push_back(pair_2l.second);
+		}
+		if( pair_2r.first < pair_2r.second )
+		{
+			if( pair_2l.first < pair_2l.second && pair_2r.first == pair_2l.second )
+			{
+				// just merge them into a single interval
+				iv_regions[n1][n2][1].back() = pair_2r.second;
+			}else{
+				iv_regions[n1][n2][1].push_back(pair_2r.first);
+				iv_regions[n1][n2][1].push_back(pair_2r.second);
+				if( pair_2r.first <= pair_2l.second && pair_2r.second >= pair_2l.first )
+				{
+					cout << "Ohno.  Overlap in outside LCB search intervals\n";
+					cout << "Left: " << pair_2l.first << '\t' << pair_2l.second << " right:  " << pair_2r.first << '\t' << pair_2r.second << endl;
+					cout << "1 iv.Start(" << seqJ << "): " << iv.Start(seqJ) << '\t' << "iv.RightEnd(" << seqJ << "): " << iv.RightEnd(seqJ) << endl;
+					cout << "charI " << charI << "\tcharJ" << charJ << endl;
+					if( pair_2l.first == 0 )
+						genome::breakHere();
+				}
+			}
+		}
+
+		charI = 0;
+		charJ = 0;
+		gnSeqI prev_charI = 0;
+		gnSeqI prev_charJ = 0;
+		bool in_gap = false;
+
+		for( uint colI = 0; colI <= iv_aln_length; colI++ )
+		{
+			if( colI == iv_aln_length || 
+				(aln_matrix[seqI].test(colI) && aln_matrix[seqJ].test(colI)) )
+			{
+				if( in_gap && 
+					charI - prev_charI > min_recursive_gap_length &&
+					charJ - prev_charJ > min_recursive_gap_length )
+				{
+
+					Match* l_match = NULL;
+					l_match = tmp.Copy();
+					if(iv.Orientation(seqI) == AbstractMatch::forward)
+						l_match->SetLeftEnd(0, iv.LeftEnd(seqI)+prev_charI);
+					else
+					{
+						l_match->SetLeftEnd(0, iv.RightEnd(seqI)-prev_charI);
+						l_match->SetOrientation(0, AbstractMatch::reverse );
+					}
+					if(iv.Orientation(seqJ) == AbstractMatch::forward)
+						l_match->SetLeftEnd(1, iv.LeftEnd(seqJ)+prev_charJ);
+					else
+					{
+						l_match->SetLeftEnd(1, iv.RightEnd(seqJ)-prev_charJ);
+						l_match->SetOrientation(1, AbstractMatch::reverse );
+					}
+					l_match->SetLength(0);
+					Match* r_match = NULL;
+					if( charJ != iv.RightEnd(seqJ) && charI != iv.RightEnd(seqI) )
+					{
+						r_match = tmp.Copy();
+						if(iv.Orientation(seqI) == AbstractMatch::forward)
+							r_match->SetLeftEnd(0, iv.LeftEnd(seqI)+charI);
+						else
+						{
+							r_match->SetLeftEnd(0, iv.RightEnd(seqI)-charI);
+							r_match->SetOrientation(0, AbstractMatch::reverse );
+						}
+						if(iv.Orientation(seqJ) == AbstractMatch::forward)
+							r_match->SetLeftEnd(1, iv.LeftEnd(seqJ)+charJ);
+						else
+						{
+							r_match->SetLeftEnd(1, iv.RightEnd(seqJ)-charJ);
+							r_match->SetOrientation(1, AbstractMatch::reverse );
+						}
+						r_match->SetLength(0);
+					}
+
+					if( iv.Orientation(seqI) == AbstractMatch::reverse )
+					{
+						swap(l_match,r_match);
+						if( l_match != NULL ) l_match->Invert();
+						if( r_match != NULL ) r_match->Invert();
+					}
+					// check whether the current cache already has the searched region
+					search_cache_t cacheval = make_pair( l_match, r_match );
+					std::vector< search_cache_t >::iterator cache_entry = std::upper_bound( cache.begin(), cache.end(), cacheval, mems::cache_comparator );
+					if( cache_entry == cache.end() || 
+						(mems::cache_comparator( cacheval, *cache_entry ) || mems::cache_comparator( *cache_entry, cacheval )) )
+					{
+						// search this region
+							pairwiseAnchorSearch(mlist, l_match, r_match, &iv, seqI, seqJ);
+					}
+					if(using_cache_db)
+						new_cache.push_back( cacheval );
+				}
+				prev_charI = charI;
+				prev_charJ = charJ;
+				in_gap = false;
+			}
+			else
+				in_gap = true;
+			if( colI < iv.AlignmentLength() )
+			{
+				if( aln_matrix[seqI].test(colI) )
+					++charI;
+				if( aln_matrix[seqJ].test(colI) )
+					++charJ;
+			}
+		}
+	}
+}
+
+void ProgressiveAligner::getAncestralMatches( const vector< node_id_t > node1_seqs, const vector< node_id_t > node2_seqs, node_id_t node1, node_id_t node2, node_id_t ancestor, std::vector< AbstractMatch* >& ancestral_matches )
+{
+	// to save memory, always make node1_seqs the bigger vector
+//	if( node1_seqs.size() < node2_seqs.size() )
+//		swap( node1_seqs, node2_seqs );
+
+	// for each pair of genomes, extract pairwise matches and translate up
+	// eliminate overlaps
+	for( uint seqI = 0; seqI < node1_seqs.size(); seqI++ )
+	{
+		uint ii = this->node_sequence_map[node1_seqs[seqI]];
+		vector< AbstractMatch* > seqI_matches;
+
+		for( uint seqJ = 0; seqJ < node2_seqs.size(); seqJ++ )
+		{
+			uint jj = this->node_sequence_map[node2_seqs[seqJ]];
+			vector< AbstractMatch* > cur_matches;
+			for( size_t mI = 0; mI < original_ml.size(); mI++ )
+			{
+				if( original_ml[mI]->LeftEnd(ii) == NO_MATCH )
+					continue;
+				if( original_ml[mI]->LeftEnd(jj) == NO_MATCH )
+					continue;
+				Match mm( 2 );
+				Match* new_m = mm.Copy();
+				new_m->SetStart( 0, original_ml[mI]->Start(ii));
+				new_m->SetStart( 1, original_ml[mI]->Start(jj));
+				new_m->SetLength(original_ml[mI]->Length());
+				if( new_m->Start(0) < 0 )
+					new_m->Invert();	// assign reference orientation to seq 0
+				cur_matches.push_back( new_m );
+			}
+			// now translate cur_matches
+			translateGappedCoordinates( cur_matches, 1, node2_seqs[seqJ], node2 );
+			seqI_matches.insert( seqI_matches.end(), cur_matches.begin(), cur_matches.end() );
+		}
+		EliminateOverlaps_v2( seqI_matches );
+		translateGappedCoordinates( seqI_matches, 0, node1_seqs[seqI], node1 );
+		ancestral_matches.insert( ancestral_matches.end(), seqI_matches.begin(), seqI_matches.end() );
+	}
+	EliminateOverlaps_v2( ancestral_matches );
+}
+
+
+void ProgressiveAligner::getPairwiseMatches( const vector< node_id_t >& node1_seqs, const vector< node_id_t >& node2_seqs, Matrix<MatchList>& pairwise_matches )
+{
+	pairwise_matches = Matrix< MatchList >( node1_seqs.size(), node2_seqs.size() );
+
+	// copy sequence tables
+	for( uint seqI = 0; seqI < node1_seqs.size(); seqI++ )
+	{
+		for( uint seqJ = 0; seqJ < node2_seqs.size(); seqJ++ )
+		{
+			uint ii = this->node_sequence_map[node1_seqs[seqI]];
+			uint jj = this->node_sequence_map[node2_seqs[seqJ]];
+			pairwise_matches(seqI, seqJ).seq_table.push_back(original_ml.seq_table[ii]);
+			pairwise_matches(seqI, seqJ).seq_table.push_back(original_ml.seq_table[jj]);
+			pairwise_matches(seqI, seqJ).seq_filename.push_back(original_ml.seq_filename[ii]);
+			pairwise_matches(seqI, seqJ).seq_filename.push_back(original_ml.seq_filename[jj]);
+		}
+	}
+
+	// now copy pairwise matches
+	for( size_t mI = 0; mI < original_ml.size(); mI++ )
+	{
+		for( uint seqI = 0; seqI < node1_seqs.size(); seqI++ )
+		{
+			uint ii = this->node_sequence_map[node1_seqs[seqI]];
+			if( original_ml[mI]->LeftEnd(ii) == NO_MATCH )
+				continue;
+			for( uint seqJ = 0; seqJ < node2_seqs.size(); seqJ++ )
+			{
+				uint jj = this->node_sequence_map[node2_seqs[seqJ]];
+				if( original_ml[mI]->LeftEnd(jj) == NO_MATCH )
+					continue;
+				Match mm( 2 );
+				Match* new_m = mm.Copy();
+				new_m->SetStart( 0, original_ml[mI]->Start(ii));
+				new_m->SetStart( 1, original_ml[mI]->Start(jj));
+				new_m->SetLength(original_ml[mI]->Length());
+				if( new_m->Start(0) < 0 )
+					new_m->Invert();	// assign reference orientation to seq 0
+				pairwise_matches(seqI,seqJ).push_back( new_m );
+			}
+		}
+	}
+}
+
+
+int IsDenseEnough( GappedAlignment* gal_iter )
+{
+	double total_len = 0;
+	gnSeqI seqs = 0;
+	for( uint seqI = 0; seqI < gal_iter->SeqCount(); seqI++ )
+	{
+		if( gal_iter->LeftEnd(seqI) == NO_MATCH )
+			continue;
+		total_len += gal_iter->Length(seqI);
+	}
+	double density = total_len / (gal_iter->AlignmentLength() * (double)gal_iter->Multiplicity());
+	// density of 1 is ideal
+	// the shorter the alignment, the closer we should be to 1 to allow splitting
+	// use a linear threshold with (min_window_size,1) and (max_window_size,min_gappiness)
+	// as endpoints of the threshold line
+	
+	// determine the density threshold for the given alignment length
+	double threshold = ((max_density - min_density)/(min_window_size - max_window_size)) * ( (double)gal_iter->AlignmentLength() - max_window_size ) + min_density;
+	if( density > max_density )	// don't bother aligning this, it's so dense we'll wait until iterative refinement.
+		return 2;
+	if( density > threshold )
+		return 1;
+	return 0;
+}
+
+void splitGappedAlignment( const GappedAlignment& ga, GappedAlignment& ga1, GappedAlignment& ga2, std::vector<size_t>& seqs1, std::vector<size_t>& seqs2 )
+{
+	const vector< string >& aln = GetAlignment( ga, std::vector<gnSequence*>(ga.SeqCount()) );
+	ga1 = ga;
+	ga2 = ga;
+	for( size_t seqI = 0; seqI < seqs1.size(); seqI++ )
+		ga2.SetLeftEnd(seqs1[seqI], NO_MATCH);
+	for( size_t seqI = 0; seqI < seqs2.size(); seqI++ )
+		ga1.SetLeftEnd(seqs2[seqI], NO_MATCH);
+}
+
+void removeLargeGapsPP( GappedAlignment& gal, list< GappedAlignment* >& gal_list, vector<bool>& gap_iv, const vector< size_t >& group1, const vector< size_t >& group2 )
+{
+	// scan through and remove any section where members of group1 aren't aligned to members of group2
+	// for more than some number of nucleotides
+	gap_iv.clear();
+	gal_list.clear();
+	const vector< string >& aln_matrix = GetAlignment(gal, vector<gnSequence*>(gal.SeqCount(),NULL));
+	size_t gap_cols = 0;
+	size_t last_aln_col = (std::numeric_limits<size_t>::max)();
+	size_t col_base = 0;
+	GappedAlignment* galp = gal.Copy();
+	for( size_t colI = 0; colI < gal.AlignmentLength(); colI++ )
+	{
+		 size_t g1 = 0;
+		 size_t g2 = 0;
+		 for( ; g1 < group1.size(); ++g1 )
+		 {
+			 if( aln_matrix[group1[g1]][colI] != '-' )
+				 break;
+		 }
+		 for( ; g2 < group2.size(); ++g2 )
+		 {
+			 if( aln_matrix[group2[g2]][colI] != '-' )
+				 break;
+		 }
+		 if( g1 < group1.size() && g2 < group2.size() )
+		 {
+			 // it's an aligned col
+			 if( gap_cols > max_gap_length )
+			 {
+				// crop out the middle gapped section
+				gnSeqI split_point = 0;
+				if( last_aln_col != (std::numeric_limits<size_t>::max)() )
+				{
+					split_point = last_aln_col + lcb_hangover - col_base;
+					gal_list.push_back( galp );
+					gap_iv.push_back(false);
+					galp = (GappedAlignment*)galp->Split(split_point);	// set galp to the right side after splitting
+					col_base += split_point;
+				}
+				split_point = colI - lcb_hangover - col_base;
+				gal_list.push_back( galp );
+				gap_iv.push_back(true);
+				galp = (GappedAlignment*)galp->Split(split_point);	// set galp to the right side after splitting
+				col_base += split_point;
+			 }
+			 last_aln_col = colI;
+			 gap_cols = 0;
+		 }else
+			 ++gap_cols;
+	}
+
+	if( gap_cols > max_gap_length )
+	{
+		gnSeqI split_point = 0;
+		if( last_aln_col != (std::numeric_limits<size_t>::max)() )
+		{
+			split_point = last_aln_col + lcb_hangover - col_base;
+			gal_list.push_back( galp );
+			gap_iv.push_back(false);
+			galp = (GappedAlignment*)galp->Split(split_point);	// set galp to the right side after splitting
+		}
+		gap_iv.push_back(true);
+	}else
+		gap_iv.push_back(false);
+	gal_list.push_back( galp );
+}
+
+void ProgressiveAligner::refineAlignment( GappedAlignment& gal, node_id_t ancestor, bool profile_aln, AlnProgressTracker& apt )
+{
+	// divide the gapped alignment up into windows of a given size and have
+	// muscle refine the alignments
+	// when anchors are dense use smaller windows to improve speed efficiency
+	list< GappedAlignment* > gal_list;
+	vector<bool> gap_iv;
+	std::vector<node_id_t> nodes1;
+	std::vector<node_id_t> nodes2;
+	getAlignedChildren( alignment_tree[ancestor].children[0], nodes1 );
+	getAlignedChildren( alignment_tree[ancestor].children[1], nodes2 );
+	std::vector<size_t> seqs1( nodes1.size() );
+	std::vector<size_t> seqs2( nodes2.size() );
+	for( size_t nI = 0; nI < nodes1.size(); nI++ )
+		seqs1[nI] = node_sequence_map[nodes1[nI]];
+	for( size_t nI = 0; nI < nodes2.size(); nI++ )
+		seqs2[nI] = node_sequence_map[nodes2[nI]];
+//	if( profile_aln )
+//	{
+		removeLargeGapsPP( gal, gal_list, gap_iv, seqs1, seqs2 );
+//	}else{
+//		gal_list.push_back( gal.Copy() );
+//		gap_iv.push_back(false);
+//	}
+	list< GappedAlignment* >::iterator gal_iter = gal_list.begin();
+	vector<bool>::iterator gap_iter = gap_iv.begin();
+	while(gal_iter != gal_list.end())
+	{
+		int density = IsDenseEnough( *gal_iter );
+		if( (density == 0 && (*gal_iter)->AlignmentLength() > max_window_size / 3) ||
+			(density == 1 && (*gal_iter)->AlignmentLength() > max_window_size ) ||
+			(density == 2 && (*gal_iter)->AlignmentLength() > max_window_size * 3 )
+
+//			  || ( (*gal_iter)->AlignmentLength() > min_window_size && density == 1 && profile_aln == true ) 
+			  )
+		{
+			// split in half
+			gnSeqI split_point = (*gal_iter)->AlignmentLength() / 2;
+			list< GappedAlignment* >::iterator ins_iter = gal_iter;
+			++ins_iter;
+//			ins_iter = gal_list.insert(ins_iter, new GappedAlignment(**gal_iter) );
+			ins_iter = gal_list.insert(ins_iter, (*gal_iter)->Copy());
+			vector<bool>::iterator gap_ins_iter = gap_iter;
+			size_t gap_off = gap_iter - gap_iv.begin();
+			++gap_ins_iter;
+			gap_iv.insert( gap_ins_iter, *gap_iter );
+			gap_iter = gap_iv.begin() + gap_off;
+			(*gal_iter)->CropEnd( split_point );
+			(*ins_iter)->CropStart( (*ins_iter)->AlignmentLength() - split_point );
+			continue;
+		}
+
+		++gal_iter;
+		++gap_iter;
+	}
+	MuscleInterface& mi = MuscleInterface::getMuscleInterface();
+	// now that the alignment is all split up use muscle to refine it
+	gnSeqI new_len = 0;
+
+	gap_iter = gap_iv.begin();
+
+	const size_t gal_count = gal_list.size();
+// this section can not be paralellized b/c it makes calls to muscle
+#pragma omp critical
+{
+	for( int galI = 0; galI < gal_count; galI++ )
+	{
+		list<GappedAlignment*>::iterator my_g_iter = gal_list.begin();
+		vector<bool>::iterator my_b_iter = gap_iv.begin();
+		for(uint a = 0; a < galI; a++)
+		{
+			++my_g_iter;
+			++my_b_iter;
+		}
+		apt.cur_leftend += (*my_g_iter)->AlignmentLength();
+		if( profile_aln && !(*my_b_iter) )
+		{
+			GappedAlignment ga1;
+			GappedAlignment ga2;
+			splitGappedAlignment( **my_g_iter, ga1, ga2, seqs1, seqs2 );
+			if( ga1.Multiplicity() > 0 && ga2.Multiplicity() > 0 )
+			{
+				mi.ProfileAlignFast( ga1, ga2, **my_g_iter, true );
+			}
+		}else if(!(*my_b_iter))
+		{
+			int density = IsDenseEnough( *my_g_iter );
+			if( density == 0 )
+				mi.RefineFast( **my_g_iter );
+			else if( density == 1 )
+				mi.RefineFast( **my_g_iter, 500 );
+			else
+				mi.RefineFast( **my_g_iter, 200 );
+		}
+
+		new_len += (*my_g_iter)->AlignmentLength();
+		// print a progress message
+		double cur_progress = ((double)apt.cur_leftend / (double)apt.total_len)*100.0;
+		printProgress((uint)apt.prev_progress, (uint)cur_progress, cout);
+		apt.prev_progress = cur_progress;
+	}
+	gal_iter = gal_list.end();
+}
+
+	// put humpty dumpty back together
+	vector< string > aln_matrix( gal.SeqCount(), string( new_len, '-' ) );
+	vector< string::size_type > pos( gal.SeqCount(), 0 );
+	for( gal_iter = gal_list.begin(); gal_iter != gal_list.end(); ++gal_iter )
+	{
+		const vector< string >& tmp_mat = GetAlignment(**gal_iter, vector<gnSequence*>( gal.SeqCount() ) );
+		for( uint seqI = 0; seqI < tmp_mat.size(); seqI++ )
+		{
+			if( gal.LeftEnd(seqI) == 0 )
+				continue;
+			aln_matrix[seqI].replace(pos[seqI], tmp_mat[seqI].size(), tmp_mat[seqI]);
+			pos[seqI] += tmp_mat[seqI].size();
+		}
+		(*gal_iter)->Free();
+//		delete (*gal_iter);
+	}
+	gal.SetAlignment(aln_matrix);
+}
+
+void ProgressiveAligner::doGappedAlignment( node_id_t ancestor, bool profile_aln )
+{
+	AlnProgressTracker apt;
+	gnSeqI total_len = 0;
+	for( size_t aI = 0; aI < alignment_tree[ancestor].ordering.size(); aI++ )
+		total_len += alignment_tree[ancestor].ordering[aI].Length();
+	apt.total_len = total_len;
+	apt.prev_progress = 0;
+
+	printProgress(-1, 0, cout);
+	apt.cur_leftend = 1;
+
+	for( size_t aI = 0; aI < alignment_tree[ancestor].ordering.size(); aI++ )
+	{
+		if( alignment_tree[ancestor].ordering[aI].reference_iv.Multiplicity() == 1 )
+		{
+			apt.cur_leftend += alignment_tree[ancestor].ordering[aI].reference_iv.AlignmentLength();
+			continue;	// don't bother re-refining intervals that didn't get aligned here
+		}
+
+//		printMemUsage();
+//		cout << "extract aln\n";
+		GappedAlignment gal;
+		extractAlignment(ancestor, aI, gal);
+//		printMemUsage();
+//		cout << "refine aln\n";
+		if( gal.Multiplicity() > 1 )	// no point in refining intervals that are unaligned anyways
+			refineAlignment( gal, ancestor, profile_aln, apt );
+		else
+			apt.cur_leftend += gal.AlignmentLength();
+//		printMemUsage();
+//		cout << "construct siv\n";
+		ConstructSuperIntervalFromMSA(ancestor, aI, gal);
+//		printMemUsage();
+
+		// print a progress message
+		double cur_progress = ((double)apt.cur_leftend / (double)apt.total_len)*100.0;
+		printProgress((uint)apt.prev_progress, (uint)cur_progress, cout);
+		apt.prev_progress = cur_progress;
+	}
+	printMemUsage();
+	cout << "Fix left ends\n";
+	FixLeftEnds(ancestor);
+	printMemUsage();
+
+	if( debug_aligner )
+		validateSuperIntervals(alignment_tree[ancestor].children[0], alignment_tree[ancestor].children[1], ancestor);
+	cout << "\ndone.\n";
+}
+
+void ProgressiveAligner::FixLeftEnds( node_id_t ancestor )
+{
+	// fixes all SuperInterval left-end coordinates for nodes below ancestor
+	stack< node_id_t > node_stack;
+	node_stack.push( ancestor );
+	vector<bool> visited( alignment_tree.size(), false );
+	while( node_stack.size() > 0 )
+	{
+		node_id_t cur_node = node_stack.top();
+		// visit post-order
+		if( !visited[cur_node] )
+		{
+			for( size_t childI = 0; childI < alignment_tree[cur_node].children.size(); childI++ )
+				node_stack.push( alignment_tree[cur_node].children[childI] );
+			visited[cur_node] = true;
+			continue;
+		}
+		node_stack.pop();
+		if( alignment_tree[cur_node].sequence != NULL )
+			continue;	// don't do anything on leaf nodes
+
+		vector< SuperInterval >& siv_list = alignment_tree[cur_node].ordering;
+		gnSeqI left_end = 1;
+		for( size_t sivI = 0; sivI < siv_list.size(); sivI++ )
+		{
+			siv_list[sivI].SetLeftEnd(left_end);
+			siv_list[sivI].SetLength(siv_list[sivI].reference_iv.AlignmentLength());
+			left_end += siv_list[sivI].reference_iv.AlignmentLength();
+			CompactGappedAlignment<>* m_cga = dynamic_cast<CompactGappedAlignment<>*>(siv_list[sivI].reference_iv.GetMatches()[0]);
+			
+			// this one wasn't refined, just move it appropriately
+			if( m_cga == NULL || siv_list[sivI].reference_iv.GetMatches().size() > 1 )
+			{
+				for( uint childI = 0; childI <= 1; childI++ )
+				{
+					size_t cur_siv = childI == 0 ? alignment_tree[cur_node].ordering[sivI].c1_siv : alignment_tree[cur_node].ordering[sivI].c2_siv;
+					if( cur_siv == (std::numeric_limits<size_t>::max)() )
+						continue;
+					const SuperInterval& c_siv = alignment_tree[ alignment_tree[cur_node].children[childI] ].ordering[ cur_siv ];
+					int64 diff = c_siv.LeftEnd() - siv_list[sivI].reference_iv.LeftEnd(childI);
+					siv_list[sivI].reference_iv.SetLeftEnd(childI, c_siv.LeftEnd());
+					const vector< AbstractMatch* >& matches = siv_list[sivI].reference_iv.GetMatches();
+					for( size_t mI = 0; mI < matches.size(); mI++ )
+					{
+						if( matches[mI]->LeftEnd(childI) != NO_MATCH )
+							matches[mI]->SetLeftEnd(childI, matches[mI]->LeftEnd(childI) + diff);
+					}
+				}
+
+			}else{
+
+				size_t c1_siv = alignment_tree[cur_node].ordering[sivI].c1_siv;
+				if( c1_siv != (std::numeric_limits<size_t>::max)() )
+				{
+					const SuperInterval& c_siv = alignment_tree[ alignment_tree[cur_node].children[0] ].ordering[ c1_siv ];
+					m_cga->SetLeftEnd(0, c_siv.LeftEnd());
+					siv_list[sivI].reference_iv.SetLeftEnd(0, c_siv.LeftEnd());
+					m_cga->SetLength(c_siv.Length(), 0);
+					siv_list[sivI].reference_iv.SetLength(c_siv.Length(), 0);
+					siv_list[sivI].reference_iv.SetOrientation(0, m_cga->Orientation(0));
+				}
+				size_t c2_siv = alignment_tree[cur_node].ordering[sivI].c2_siv;
+				if( c2_siv != (std::numeric_limits<size_t>::max)() )
+				{
+					const SuperInterval& c_siv = alignment_tree[ alignment_tree[cur_node].children[1] ].ordering[ c2_siv ];
+					m_cga->SetLeftEnd(1, c_siv.LeftEnd());
+					siv_list[sivI].reference_iv.SetLeftEnd(1, c_siv.LeftEnd());
+					m_cga->SetLength(c_siv.Length(), 1);
+					siv_list[sivI].reference_iv.SetLength(c_siv.Length(), 1);
+					siv_list[sivI].reference_iv.SetOrientation(1, m_cga->Orientation(1));
+				}
+			}
+			if( debug_cga && m_cga && !m_cga->validate() )
+//			if( m_cga && !m_cga->validate() )
+				cerr << "oh junkedy\n";
+
+		}
+	}
+}
+
+/**
+ * propagates an inversion of an ancestral SuperInterval to SuperIntervals in descendant nodes
+ */
+void propagateInvert( PhyloTree< AlignmentTreeNode >& alignment_tree, node_id_t ancestor, size_t ans_siv )
+{
+	stack< pair< node_id_t, size_t > > node_siv_stack;
+	node_siv_stack.push( make_pair(ancestor, ans_siv) );
+	while( node_siv_stack.size() > 0 )
+	{
+		pair< node_id_t, size_t > cur = node_siv_stack.top();
+		node_siv_stack.pop();
+		node_id_t cur_node = cur.first;
+		if( alignment_tree[cur_node].ordering[cur.second].c1_siv != (std::numeric_limits<size_t>::max)() )
+			node_siv_stack.push( make_pair( alignment_tree[cur_node].children[0], alignment_tree[cur_node].ordering[cur.second].c1_siv ) );
+		if( alignment_tree[cur_node].ordering[cur.second].c2_siv != (std::numeric_limits<size_t>::max)() )
+			node_siv_stack.push( make_pair( alignment_tree[cur_node].children[1], alignment_tree[cur_node].ordering[cur.second].c2_siv ) );
+		if( cur_node == ancestor )
+			continue;	// don't do anything at the ancestor
+		if( alignment_tree[cur_node].sequence != NULL )
+			continue;	// don't do anything on leaf nodes
+
+		// reverse the homology structure at this node
+		Interval& ref_iv = alignment_tree[cur_node].ordering[cur.second].reference_iv;
+		vector< AbstractMatch* > matches;
+		ref_iv.StealMatches( matches );
+		AbstractMatch::orientation o0 = matches[0]->Orientation(0);
+		AbstractMatch::orientation o1 = matches[0]->Orientation(1);
+		matches[0]->Invert();
+		if( o0 != AbstractMatch::undefined )
+			matches[0]->SetOrientation(0,o0);
+		if( o1 != AbstractMatch::undefined )
+			matches[0]->SetOrientation(1,o1);
+		ref_iv.SetMatches( matches );
+		if( o0 != AbstractMatch::undefined )
+		{
+			ref_iv.SetOrientation(0,o0);
+			ref_iv.SetLeftEnd(0,0);
+		}
+		if( o1 != AbstractMatch::undefined )
+		{
+			ref_iv.SetOrientation(1,o1);
+			ref_iv.SetLeftEnd(1,0);
+		}
+	}
+}
+
+
+void ProgressiveAligner::ConstructSuperIntervalFromMSA( node_id_t ancestor, size_t ans_siv, GappedAlignment& gal )
+{
+	const vector< string >& aln_matrix = GetAlignment( gal, vector< gnSequence* >() );
+	stack< pair< node_id_t, size_t > > node_siv_stack;
+	node_siv_stack.push( make_pair(ancestor, ans_siv) );
+	vector<bool> visited( alignment_tree.size(), false );
+	while( node_siv_stack.size() > 0 )
+	{
+		pair< node_id_t, size_t > cur = node_siv_stack.top();
+		node_id_t cur_node = cur.first;
+		// visit post-order
+		if( !visited[cur_node] )
+		{
+			if( alignment_tree[cur_node].ordering[cur.second].c1_siv != (std::numeric_limits<size_t>::max)() )
+				node_siv_stack.push( make_pair( alignment_tree[cur_node].children[0], alignment_tree[cur_node].ordering[cur.second].c1_siv ) );
+			if( alignment_tree[cur_node].ordering[cur.second].c2_siv != (std::numeric_limits<size_t>::max)() )
+				node_siv_stack.push( make_pair( alignment_tree[cur_node].children[1], alignment_tree[cur_node].ordering[cur.second].c2_siv ) );
+			visited[cur_node] = true;
+			continue;
+		}
+		node_siv_stack.pop();
+		if( alignment_tree[cur_node].sequence != NULL )
+			continue;	// don't do anything on leaf nodes
+
+		// build a super-interval
+		vector< node_id_t > node1_seqs;	/**< the node id's of extant sequences below node 1 */
+		vector< node_id_t > node2_seqs;	/**< the node id's of extant sequences below node 2 */
+		getAlignedChildren( alignment_tree[cur_node].children[0], node1_seqs );
+		getAlignedChildren( alignment_tree[cur_node].children[1], node2_seqs );
+		vector< bitset_t > m_aln(2, bitset_t( aln_matrix[0].size(), false ) );
+		gnSeqI seqI_len = 0;
+		gnSeqI seqJ_len = 0;
+		gnSeqI cur_col = 0;
+		for( size_t colI = 0; colI < aln_matrix[0].size(); colI++ )
+		{
+			uint seqI = 0;
+			uint seqJ = 0;
+			for( ; seqI < node1_seqs.size(); ++seqI )
+				if( aln_matrix[node_sequence_map[node1_seqs[seqI]]][colI] != '-' )
+					break;
+			for( ; seqJ < node2_seqs.size(); ++seqJ )
+				if( aln_matrix[node_sequence_map[node2_seqs[seqJ]]][colI] != '-' )
+					break;
+
+			if( seqI == node1_seqs.size() && seqJ == node2_seqs.size() )
+				continue;	// nothing in this column
+			if( seqI != node1_seqs.size() )
+			{
+				seqI_len++;
+				m_aln[0].set(cur_col);
+			}
+			if( seqJ != node2_seqs.size() )
+			{
+				seqJ_len++;
+				m_aln[1].set(cur_col);
+			}
+			cur_col++;
+		}
+		m_aln[0].resize(cur_col);
+		m_aln[1].resize(cur_col);
+		CompactGappedAlignment<> tmp_cga(m_aln.size(), cur_col);
+		CompactGappedAlignment<>* cga = tmp_cga.Copy();
+		cga->SetLeftEnd(0, seqI_len > 0 ? 1 : 0);	// at this point we have no idea where the left end should really be
+		cga->SetLeftEnd(1, seqJ_len > 0 ? 1 : 0);
+		if( cga->LeftEnd(0) != NO_MATCH )
+			cga->SetOrientation(0, alignment_tree[cur_node].ordering[cur.second].reference_iv.Orientation(0));
+		if( cga->LeftEnd(1) != NO_MATCH )
+			cga->SetOrientation(1, alignment_tree[cur_node].ordering[cur.second].reference_iv.Orientation(1));
+		cga->SetLength(seqI_len,0);
+		cga->SetLength(seqJ_len,1);
+		cga->SetAlignment(m_aln);	// do this afterwords so that it can create the bitcount
+
+		// the alignment may need to be reversed if the aligned parent is reverse
+		size_t p_siv = alignment_tree[cur_node].ordering[cur.second].parent_siv;
+		bool reverse_me = false;
+		if( p_siv != (std::numeric_limits<size_t>::max)() )
+		{
+			size_t p_node = alignment_tree[cur_node].parents[0];
+			int p_child = alignment_tree[p_node].children[0] == cur_node ? 0 : 1;
+			if( alignment_tree[p_node].ordering[p_siv].reference_iv.Orientation(p_child) == AbstractMatch::reverse )
+				reverse_me = true;
+		}
+		if( reverse_me )
+		{
+			cga->Invert();
+			if( cga->LeftEnd(0) != NO_MATCH )
+				cga->SetOrientation(0, alignment_tree[cur_node].ordering[cur.second].reference_iv.Orientation(0));
+			if( cga->LeftEnd(1) != NO_MATCH )
+				cga->SetOrientation(1, alignment_tree[cur_node].ordering[cur.second].reference_iv.Orientation(1));
+			propagateInvert( alignment_tree, cur_node, cur.second );
+		}
+
+		alignment_tree[cur_node].ordering[cur.second].reference_iv = Interval();
+		vector< AbstractMatch* > am_list(1, cga);
+		alignment_tree[cur_node].ordering[cur.second].reference_iv.SetMatches( am_list );
+		// set these to zero so they don't interfere with coordinate translation
+		alignment_tree[cur_node].ordering[cur.second].reference_iv.SetLeftEnd(0, 0);
+		alignment_tree[cur_node].ordering[cur.second].reference_iv.SetLeftEnd(1, 0);
+	}
+}
+
+typedef boost::tuple<CompactGappedAlignment<>*, vector< bitset_t >*, AbstractMatch* > _sort_tracker_type;
+
+template< class CompType >
+class CgaBsComp
+{
+public:
+	CgaBsComp( CompType& c ) : comp(c) {};
+	bool operator()( const _sort_tracker_type& a, const _sort_tracker_type& b )
+	{
+		return comp( a.get<0>(), b.get<0>() );
+	}
+protected:
+	CompType& comp;
+};
+
+template< typename MatchVector >
+void multFilter( MatchVector& matches, uint mult = 2 )
+{
+	// apply a multiplicity filter
+	size_t cur = 0;
+	for( size_t mI = 0; mI < matches.size(); ++mI )
+	{
+		if( matches[mI]->Multiplicity() == mult )
+			matches[cur++] = matches[mI];
+		else
+			matches[mI]->Free();
+	}
+	matches.erase(matches.begin()+cur, matches.end());
+}
+
+template< typename MatchVector >
+void alignedNtCountFilter( MatchVector& matches, uint length )
+{
+	// require at least some number of aligned pairs in the anchor
+	size_t cur = 0;
+	for( size_t mI = 0; mI < matches.size(); ++mI )
+	{
+		size_t len_sum = 0;
+		for( size_t seqI = 0; seqI < matches[mI]->SeqCount(); seqI++ )
+			if(matches[mI]->LeftEnd(seqI) != NO_MATCH)
+				len_sum += matches[mI]->Length(seqI);
+
+		if( len_sum - length > matches[mI]->AlignmentLength() )
+			matches[cur++] = matches[mI];
+		else
+			matches[mI]->Free();
+	}
+	matches.erase(matches.begin()+cur, matches.end());
+}
+
+
+bool debugging_cltm = false;
+void ProgressiveAligner::constructLcbTrackingMatches( 
+	node_id_t ancestral_node, 
+	vector< AbstractMatch* >& ancestral_matches, 
+	vector< LcbTrackingMatch< AbstractMatch* > >& tracking_matches 
+	)
+{
+	node_id_t child_0 = alignment_tree[ancestral_node].children[0];
+	node_id_t child_1 = alignment_tree[ancestral_node].children[1];
+	// split up matches at descendant's breakpoints
+	propagateDescendantBreakpoints( child_0, 0, ancestral_matches );
+	propagateDescendantBreakpoints( child_1, 1, ancestral_matches );
+
+	// store alignment bitvectors for each match...
+	vector< bitset_t > bs_tmp(alignment_tree.size());
+	vector< vector< bitset_t > > bs(ancestral_matches.size(), bs_tmp);
+	vector< _sort_tracker_type > cga_list;
+	// initialize alignment bitvectors
+	for( size_t mI = 0; mI < ancestral_matches.size(); mI++ )
+	{
+		vector< bitset_t > aln( alignment_tree.size(), bitset_t(ancestral_matches[mI]->AlignmentLength() ) );
+		swap( bs[mI], aln );
+		ancestral_matches[mI]->GetAlignment(aln);
+		swap( bs[mI][child_0], aln[0] );
+		swap( bs[mI][child_1], aln[1] );
+		CompactGappedAlignment<> c(alignment_tree.size(),0);
+		c.SetLeftEnd(child_0, ancestral_matches[mI]->LeftEnd(0));
+		c.SetOrientation(child_0, ancestral_matches[mI]->Orientation(0));
+		c.SetLength(ancestral_matches[mI]->Length(0), child_0);
+		c.SetLeftEnd(child_1, ancestral_matches[mI]->LeftEnd(1));
+		c.SetOrientation(child_1, ancestral_matches[mI]->Orientation(1));
+		c.SetLength(ancestral_matches[mI]->Length(1), child_1);
+		cga_list.push_back(make_tuple(c.Copy(), &bs[mI], ancestral_matches[mI]));
+	}
+
+	stack<node_id_t> node_stack;
+	node_stack.push(child_0);
+	node_stack.push(child_1);
+	while(node_stack.size() > 0)
+	{
+		node_id_t cur_node = node_stack.top();
+		node_stack.pop();
+		if( alignment_tree[cur_node].children.size() == 0 )
+			continue;
+		node_stack.push(alignment_tree[cur_node].children[0]);
+		node_stack.push(alignment_tree[cur_node].children[1]);
+
+		// do processing for cur_node...
+		// 1. determine which interval in the current node each match falls into
+		// 2. determine the offset of this match in that interval
+		// 3. translate with that interval
+
+		vector< SuperInterval >& siv_list = alignment_tree[cur_node].ordering;
+		SingleStartComparator< CompactGappedAlignment<> > ssc(cur_node);
+		CgaBsComp< SingleStartComparator< CompactGappedAlignment<> > > comp( ssc );
+		sort(cga_list.begin(), cga_list.end(), comp);
+		size_t mI = 0;
+		size_t sivI = 0;
+		while( mI < cga_list.size() && sivI < siv_list.size() )
+		{
+			CompactGappedAlignment<>* cur_match = cga_list[mI].get<0>();
+			if( cur_match->Start(cur_node) == 0 )
+			{
+				mI++;
+				continue;	// this one doesn't match in this lineage!!
+			}
+			if( cur_match->LeftEnd(cur_node) >= siv_list[sivI].LeftEnd() + siv_list[sivI].Length() )
+			{
+				sivI++;
+				continue;
+			}
+
+			if( cur_match->LeftEnd(cur_node) + cur_match->Length(cur_node) > 
+				siv_list[sivI].LeftEnd() + siv_list[sivI].Length() )
+			{
+				cerr << "doesn't fit\n";
+				cerr << "cga_list[" << mI << "]->LeftEnd(" << cur_node << "): " << cur_match->LeftEnd(cur_node) << endl;
+				cerr << "cga_list[" << mI << "]->RightEnd(" << cur_node << "): " << cur_match->RightEnd(cur_node) << endl;
+				breakHere();
+			}
+
+			// extract the region of the siv matched by the current match
+			CompactGappedAlignment<>* siv_cga = dynamic_cast<CompactGappedAlignment<>*>(siv_list[sivI].reference_iv.GetMatches()[0]);
+			if( siv_list[sivI].reference_iv.GetMatches().size() > 1 )
+				siv_cga = NULL;
+			if( siv_cga == NULL )
+			{
+				CompactGappedAlignment<> tmp_cga;
+				siv_cga = tmp_cga.Copy();
+				*siv_cga = CompactGappedAlignment<>(siv_list[sivI].reference_iv);
+				vector<AbstractMatch*> tmp_matches(1,siv_cga);
+				siv_list[sivI].reference_iv.SetMatches(tmp_matches);
+			}
+			CompactGappedAlignment<> new_cga;
+			siv_cga->copyRange(new_cga, cur_match->LeftEnd(cur_node) - siv_list[sivI].LeftEnd(), cur_match->Length(cur_node));
+			if( cur_match->Orientation(cur_node) == AbstractMatch::reverse )
+				new_cga.Invert();
+			if( new_cga.Multiplicity() == 0 )
+			{
+				cerr << "impossible!  there's no match!\n";
+				genome::breakHere();
+			}
+			// set the leftend in cga_list
+			for( uint cur_child = 0; cur_child < 2; cur_child++ )
+			{
+				node_id_t sweet_child = alignment_tree[cur_node].children[cur_child];
+				cur_match->SetLeftEnd(sweet_child, new_cga.LeftEnd(cur_child));
+				if( new_cga.LeftEnd(cur_child) != NO_MATCH )
+				{
+					cur_match->SetOrientation(sweet_child, new_cga.Orientation(cur_child));
+					cur_match->SetLength(new_cga.Length(cur_child), sweet_child);
+				}
+			}
+
+			// prepare a cga for translation
+			CompactGappedAlignment<> c(1,(*cga_list[mI].get<1>())[cur_node].size());
+			c.SetLeftEnd(0,1);
+			c.SetLength((*cga_list[mI].get<1>())[cur_node].count(),0);
+			vector<bitset_t> bivouac(1, (*cga_list[mI].get<1>())[cur_node]);
+			c.SetAlignment(bivouac);
+
+			// now translate each child
+			for( uint cur_child = 0; cur_child < 2; cur_child++ )
+			{
+				if( new_cga.Orientation(cur_child) == AbstractMatch::undefined )
+					continue;
+				CompactGappedAlignment<> cga_tmp = new_cga;
+				cga_tmp.SetStart(cur_child, 1);
+				c.translate(cga_tmp, cur_child, 0, false);
+				// adjust for end-gaps
+				bitset_t bs = (cga_tmp.GetAlignment())[cur_child];
+				bs.resize(c.GetAlignment()[0].size(), false);
+				bs <<= c.GetAlignment()[0].find_first();
+				node_id_t sweet_child = alignment_tree[cur_node].children[cur_child];
+				swap( (*cga_list[mI].get<1>())[sweet_child], bs );
+				for( size_t testI = 0; testI < cga_tmp.SeqCount(); ++testI )
+				{
+					if( ((*cga_list[mI].get<1>())[testI].size() != 0 && (*cga_list[mI].get<1>())[testI].size() != (*cga_list[mI].get<1>())[sweet_child].size() ) )
+					{
+						cerr << "bj0rk3l\n";
+						genome::breakHere();
+					}
+				}
+			}
+
+			debugging_cltm = false;
+			mI++;	// advance to the next match
+		}
+	}
+	tracking_matches.resize( cga_list.size() );
+	// finally, construct CompactGappedAlignments out of the bitsets
+	for( size_t bsI = 0; bsI < cga_list.size(); ++bsI )
+	{
+		cga_list[bsI].get<0>()->SetAlignment(*cga_list[bsI].get<1>());
+		cga_list[bsI].get<0>()->validate();
+		TrackingMatch& ltm = tracking_matches[bsI];
+		ltm.node_match = cga_list[bsI].get<0>();
+		ltm.original_match = cga_list[bsI].get<2>();
+		ltm.match_id = bsI;
+
+		bool found_extant = false;
+		for( size_t i = 0; i < alignment_tree.size()-1; ++i )
+		{
+			size_t im = node_sequence_map[i];
+			if( im == (std::numeric_limits<size_t>::max)() )
+				continue;
+			if( ltm.node_match->LeftEnd(i) != NO_MATCH )
+				found_extant = true;
+		}
+		if( !found_extant )
+		{
+			cout << "orig aln len: " << ltm.original_match->AlignmentLength() << endl;
+			cout << "orig lend 0: " << ltm.original_match->Start(0) << endl;
+			cout << "orig lend 1: " << ltm.original_match->Start(1) << endl;
+			cout << "orig length 0: " << ltm.original_match->Length(0) << endl;
+			cout << "orig length 1: " << ltm.original_match->Length(1) << endl;
+
+			cerr << "this is an ungrounded match!!!\n";
+			genome::breakHere();
+		}
+	}
+}
+
+size_t countUnrefined( PhyloTree< AlignmentTreeNode >& alignment_tree, node_id_t ancestor )
+{
+	stack< node_id_t > node_stack;
+	node_stack.push(ancestor);
+	size_t unrefined_count = 0;
+	while( node_stack.size() > 0 )
+	{
+		node_id_t cur_node = node_stack.top();
+		node_stack.pop();
+		if( alignment_tree[cur_node].children.size() > 0 )
+			for( size_t childI = 0; childI < alignment_tree[cur_node].children.size(); ++childI )
+				node_stack.push( alignment_tree[cur_node].children[childI] );
+		if( !alignment_tree[cur_node].refined )
+			unrefined_count++;
+	}
+	return unrefined_count;
+}
+
+void markAsRefined( PhyloTree< AlignmentTreeNode >& alignment_tree, node_id_t ancestor )
+{
+	stack< node_id_t > node_stack;
+	node_stack.push(ancestor);
+	size_t refined_count = 0;
+	while( node_stack.size() > 0 )
+	{
+		node_id_t cur_node = node_stack.top();
+		node_stack.pop();
+		if( alignment_tree[cur_node].children.size() > 0 )
+			for( size_t childI = 0; childI < alignment_tree[cur_node].children.size(); ++childI )
+				node_stack.push( alignment_tree[cur_node].children[childI] );
+		alignment_tree[cur_node].refined = true;
+	}
+	alignment_tree[ancestor].refined = false;
+}
+
+
+
+void ProgressiveAligner::pairwiseScoreTrackingMatches( 
+						std::vector< TrackingMatch >& tracking_matches, 
+						std::vector<node_id_t>& node1_descendants,
+						std::vector<node_id_t>& node2_descendants,
+						boost::multi_array< double, 3 >& tm_score_array)
+{
+	tm_score_array.resize( boost::extents[tracking_matches.size()][node1_descendants.size()][node2_descendants.size()] );
+	for( size_t mI = 0; mI < tracking_matches.size(); ++mI )
+	{
+		TrackingMatch* cur_match = &tracking_matches[mI];
+		AbstractMatch* node_match = cur_match->node_match;
+		for( size_t nI = 0; nI < node1_descendants.size(); ++nI )
+		{
+			if( node_sequence_map[node1_descendants[nI]] == (std::numeric_limits<uint>::max)()  ||
+				node_match->LeftEnd(node1_descendants[nI]) == NO_MATCH )
+				continue;
+			for( size_t nJ = 0; nJ < node2_descendants.size(); ++nJ )
+			{
+				if( node_sequence_map[node2_descendants[nJ]] == (std::numeric_limits<uint>::max)() ||
+					node_match->LeftEnd(node2_descendants[nJ]) == NO_MATCH )
+					continue;	// not extant or no match between this pair
+
+				node_id_t cur_n1 = node1_descendants[nI];
+				node_id_t cur_n2 = node2_descendants[nJ];
+				size_t nsmI = node_sequence_map[cur_n1];
+				size_t nsmJ = node_sequence_map[cur_n2];
+				PairwiseMatchAdapter pma( node_match, cur_n1, cur_n2 );
+				vector< AbstractMatch* > lcb_vect( 1, &pma );
+				vector< gnSequence* > ex_seqs(2);
+				ex_seqs[0] = alignment_tree[ cur_n1 ].sequence;
+				ex_seqs[1] = alignment_tree[ cur_n2 ].sequence;
+
+				tm_score_array[mI][nI][nJ] = GetPairwiseAnchorScore(lcb_vect, ex_seqs, subst_scoring, sol_list[nsmI], sol_list[nsmJ]);
+			}
+		}
+	}
+	computeAvgAncestralMatchScores(tracking_matches, node1_descendants, node2_descendants, tm_score_array);
+}
+
+void ProgressiveAligner::computeAvgAncestralMatchScores( 
+						std::vector< TrackingMatch >& tracking_matches, 
+						std::vector<node_id_t>& node1_descendants,
+						std::vector<node_id_t>& node2_descendants,
+						boost::multi_array< double, 3 >& tm_score_array)
+{
+	// now build up the consensus (ancestral) match scores and bp distances
+	for( uint nodeI = 0; nodeI < node1_descendants.size(); nodeI++ )
+	{
+		for( uint nodeJ = 0; nodeJ < node2_descendants.size(); nodeJ++ )
+		{
+			node_id_t n1 = node1_descendants[nodeI];
+			node_id_t n2 = node2_descendants[nodeJ];
+
+			vector<node_id_t> n1_ext;
+			vector<node_id_t> n2_ext;
+			getAlignedChildren(n1, n1_ext);
+			getAlignedChildren(n2, n2_ext);
+			if( n1_ext.size() == 1 && n2_ext.size() == 1 )
+				continue;	// this node has two extant nodes below it and was already scored
+
+			// map the nodes in n1_ext to their indices in n1_descendants
+			vector< node_id_t > n1_ext_map(n1_ext.size());
+			for( size_t i = 0; i < n1_ext.size(); ++i )
+			{
+				vector< node_id_t >::iterator iter = std::find( node1_descendants.begin(), node1_descendants.end(), n1_ext[i] );
+				n1_ext_map[i] = iter - node1_descendants.begin();
+			}
+			vector< node_id_t > n2_ext_map(n2_ext.size());
+			for( size_t i = 0; i < n2_ext.size(); ++i )
+			{
+				vector< node_id_t >::iterator iter = std::find( node2_descendants.begin(), node2_descendants.end(), n2_ext[i] );
+				n2_ext_map[i] = iter - node2_descendants.begin();
+			}
+
+			// compute scores for all matches at this node
+			for( size_t mI = 0; mI < tracking_matches.size(); ++mI )
+			{
+				uint tally = 0;
+				double score_sum = 0;
+				for( size_t i = 0; i < n1_ext.size(); ++i )
+				{
+					if( tracking_matches[mI].node_match->LeftEnd(n1_ext[i]) == NO_MATCH )
+						continue;
+					for( size_t j = 0; j < n2_ext.size(); ++j )
+					{
+						if( tracking_matches[mI].node_match->LeftEnd(n2_ext[j]) == NO_MATCH )
+							continue;
+						++tally;
+						score_sum += tm_score_array[mI][n1_ext_map[i]][n2_ext_map[j]];
+					}
+				}
+				if( tally > 0 )
+					tm_score_array[mI][nodeI][nodeJ] = score_sum / (double)tally;
+			}
+		}
+	}
+}
+
+
+void ProgressiveAligner::computeInternalNodeDistances( 
+						boost::multi_array<double, 2>& bp_dist_mat, 
+						boost::multi_array<double, 2>& cons_dist_mat, 
+						std::vector<node_id_t>& node1_descendants,
+						std::vector<node_id_t>& node2_descendants)
+{
+	// bp distances for the current node.
+	bp_dist_mat.resize(boost::extents[node1_descendants.size()][node2_descendants.size()]);
+	cons_dist_mat.resize(boost::extents[node1_descendants.size()][node2_descendants.size()]);
+	for( size_t nI = 0; nI < node1_descendants.size(); ++nI )
+	{
+		if( node_sequence_map[node1_descendants[nI]] == (std::numeric_limits<uint>::max)() )
+			continue;
+		for( size_t nJ = 0; nJ < node2_descendants.size(); ++nJ )
+		{
+			if( node_sequence_map[node2_descendants[nJ]] == (std::numeric_limits<uint>::max)() )
+				continue;
+			size_t i = node_sequence_map[node1_descendants[nI]];
+			size_t j = node_sequence_map[node2_descendants[nJ]];
+			bp_dist_mat[nI][nJ] = this->bp_distance[i][j];
+			cons_dist_mat[nI][nJ] = this->conservation_distance[i][j];
+		}
+	}
+
+	// now build up the consensus (ancestral) bp distances
+	for( uint nodeI = 0; nodeI < node1_descendants.size(); nodeI++ )
+	{
+		for( uint nodeJ = 0; nodeJ < node2_descendants.size(); nodeJ++ )
+		{
+			node_id_t n1 = node1_descendants[nodeI];
+			node_id_t n2 = node2_descendants[nodeJ];
+
+			vector<node_id_t> n1_ext;
+			vector<node_id_t> n2_ext;
+			getAlignedChildren(n1, n1_ext);
+			getAlignedChildren(n2, n2_ext);
+			if( n1_ext.size() == 1 && n2_ext.size() == 1 )
+				continue;	// this node has two extant nodes below it, so already has a dist
+
+			// map the nodes in n1_ext to their indices in n1_descendants
+			vector< node_id_t > n1_ext_map(n1_ext.size());
+			for( size_t i = 0; i < n1_ext.size(); ++i )
+			{
+				vector< node_id_t >::iterator iter = std::find( node1_descendants.begin(), node1_descendants.end(), n1_ext[i] );
+				n1_ext_map[i] = iter - node1_descendants.begin();
+			}
+			vector< node_id_t > n2_ext_map(n2_ext.size());
+			for( size_t i = 0; i < n2_ext.size(); ++i )
+			{
+				vector< node_id_t >::iterator iter = std::find( node2_descendants.begin(), node2_descendants.end(), n2_ext[i] );
+				n2_ext_map[i] = iter - node2_descendants.begin();
+			}
+
+			// compute average bp distance
+			for( size_t i = 0; i < n1_ext.size(); ++i )
+			{
+				for( size_t j = 0; j < n2_ext.size(); ++j )
+				{
+					bp_dist_mat[nodeI][nodeJ] += bp_dist_mat[n1_ext_map[i]][n2_ext_map[j]];
+					cons_dist_mat[nodeI][nodeJ] += cons_dist_mat[n1_ext_map[i]][n2_ext_map[j]];
+				}
+			}
+			bp_dist_mat[nodeI][nodeJ] /= (double)(n1_ext.size() * n2_ext.size());
+			cons_dist_mat[nodeI][nodeJ] /= (double)(n1_ext.size() * n2_ext.size());
+		}
+	}
+
+}
+
+double computeID( GappedAlignment& gal, size_t seqI, size_t seqJ )
+{
+	const vector< string >& aln_mat = GetAlignment( gal, vector< gnSequence* >(gal.SeqCount(), NULL ));
+	double id = 0;
+	double possible = 0;
+	for( size_t colI = 0; colI < gal.AlignmentLength(); colI++ )
+	{
+		if( aln_mat[seqI][colI] == '-' || aln_mat[seqJ][colI] == '-' )
+			continue;
+		if( toupper(aln_mat[seqI][colI]) == toupper(aln_mat[seqJ][colI]))
+			id++;
+		possible++;
+	}
+	return id / possible;
+}
+
+
+//
+//
+// different option -- just pick a representative from leaf(A) and leaf(B) to translate
+void ProgressiveAligner::getRepresentativeAncestralMatches( const vector< node_id_t > node1_seqs, const vector< node_id_t > node2_seqs, node_id_t node1, node_id_t node2, node_id_t ancestor, std::vector< AbstractMatch* >& ancestral_matches )
+{
+	// for each match, extract a representative match from any pair of genomes in node1_seqs and node2_seqs
+	// translate up the resulting set of matches and eliminate overlaps
+	vector< AbstractMatch* > cur_matches;
+	boost::multi_array< vector< AbstractMatch* >, 2 > seq_matches( boost::extents[node1_seqs.size()][node2_seqs.size()] );
+	for( size_t mI = 0; mI < original_ml.size(); mI++ )
+	{
+		for( uint seqI = 0; seqI < node1_seqs.size(); seqI++ )
+		{
+			uint ii = this->node_sequence_map[node1_seqs[seqI]];
+			if( original_ml[mI]->LeftEnd(ii) == NO_MATCH )
+				continue;
+
+			for( uint seqJ = 0; seqJ < node2_seqs.size(); seqJ++ )
+			{
+				uint jj = this->node_sequence_map[node2_seqs[seqJ]];
+				if( original_ml[mI]->LeftEnd(jj) == NO_MATCH )
+					continue;
+				Match mm( 2 );
+				Match* new_m = mm.Copy();
+				new_m->SetStart( 0, original_ml[mI]->Start(ii));
+				new_m->SetStart( 1, original_ml[mI]->Start(jj));
+				new_m->SetLength(original_ml[mI]->Length());
+				if( new_m->Start(0) < 0 )
+					new_m->Invert();	// assign reference orientation to seq 0
+				seq_matches[seqI][seqJ].push_back( new_m );
+				break;
+			}
+			break;
+		}
+	}
+	for( uint seqI = 0; seqI < node1_seqs.size(); seqI++ )
+		for( uint seqJ = 0; seqJ < node2_seqs.size(); seqJ++ )
+		{
+			translateGappedCoordinates( seq_matches[seqI][seqJ], 0, node1_seqs[seqI], node1 );
+			translateGappedCoordinates( seq_matches[seqI][seqJ], 1, node2_seqs[seqJ], node2 );
+			ancestral_matches.insert( ancestral_matches.end(), seq_matches[seqI][seqJ].begin(), seq_matches[seqI][seqJ].end() );
+		}
+
+	EliminateOverlaps_v2( ancestral_matches, true );
+}
+
+int cachecomp( const void* e1, const void* e2 )
+{
+	bool a = mems::cache_comparator(*(search_cache_t*)e1, *(search_cache_t*)e2);
+	bool b = mems::cache_comparator(*(search_cache_t*)e2, *(search_cache_t*)e1);
+	if(!a && !b)
+		return 0;
+	return a ? -1 : 1;
+}
+
+void ProgressiveAligner::alignProfileToProfile( node_id_t node1, node_id_t node2, node_id_t ancestor )
+{
+	// 1) find all pairwise matches
+	// 2) convert to pairwise matches among the ancestral sequences
+	//    - delete inconsistently aligned regions?
+	// 3) perform greedy b.p. elimination on the pairwise matches
+	// 4) extend LCBs
+	// 5)  if total alignment weight hasn't changed, go to (8)
+	// 6) search for additional matches between each match among extant sequences
+	// 7) go back to 2
+	// 8) perform a MUSCLE/Clustal alignment of each intervening region
+
+	vector< node_id_t > node1_seqs;	/**< the node id's of extant sequences below node 1 */
+	vector< node_id_t > node2_seqs;	/**< the node id's of extant sequences below node 2 */
+	getAlignedChildren( node1, node1_seqs );
+	getAlignedChildren( node2, node2_seqs );
+
+	uint seqI, seqJ;
+	gnSeqI prev_ancestral_seq_len = (std::numeric_limits<gnSeqI>::max)();
+
+	printMemUsage();
+	cout << "get ancestral matches\n";
+
+	Matrix<MatchList> pairwise_matches( node1_seqs.size(), node2_seqs.size() );
+//	getPairwiseMatches( node1_seqs, node2_seqs, pairwise_matches );
+	vector< AbstractMatch* > anc_pairwise_matches;
+	getRepresentativeAncestralMatches( node1_seqs, node2_seqs, node1, node2, ancestor, anc_pairwise_matches );
+	printMemUsage();
+	
+	PhyloTree< AlignmentTreeNode > aln_tree_backup;
+
+	/** A cache of regions that were searched in the previous round of recursion */
+	Matrix< std::vector< search_cache_t > > search_cache_db(node1_seqs.size(), node2_seqs.size());
+	double prev_anchoring_score = -(std::numeric_limits<double>::max)();
+	double cur_anchoring_score = -(std::numeric_limits<double>::max)();
+
+	while(true)
+	{
+		vector<AbstractMatch*> ancestral_matches;
+		if( anc_pairwise_matches.size() > 0 )
+		{
+			ancestral_matches.insert( ancestral_matches.begin(), anc_pairwise_matches.begin(), anc_pairwise_matches.end() );
+			anc_pairwise_matches.clear();
+		}
+		else
+		{
+			// part 2, construct pairwise matches to the ancestral sequence
+			// A)  for each pairwise match, translate its
+			//     coordinates to the ancestral genome
+			//	   -- try to use translateCoordinates
+			//     -- build a translation table for translateCoordinates
+
+			for( seqI = 0; seqI < node1_seqs.size(); seqI++ )
+			{
+				for( seqJ = 0; seqJ < node2_seqs.size(); seqJ++ )
+				{
+					cout << node_sequence_map[node1_seqs[seqI]] << "," << node_sequence_map[node2_seqs[seqJ]] << " has " << pairwise_matches(seqI,seqJ).size() << " pairwise matches\n";
+					cout.flush();
+
+					vector< AbstractMatch* > am_list( pairwise_matches(seqI, seqJ).begin(), pairwise_matches(seqI, seqJ).end() );
+					pairwise_matches(seqI, seqJ).clear();
+					translateGappedCoordinates( am_list, 1, node2_seqs[seqJ], node2 );
+					translateGappedCoordinates( am_list, 0, node1_seqs[seqI], node1 );
+					ancestral_matches.insert( ancestral_matches.end(), am_list.begin(), am_list.end() );
+				}
+			}
+		}
+		// include any matches from a previous iteration of this loop
+		for( size_t aI = 0; aI < alignment_tree[ancestor].ordering.size(); aI++ )
+		{
+			Interval& ref_iv = alignment_tree[ancestor].ordering[aI].reference_iv;
+			if( ref_iv.Multiplicity() == 2 )
+				for( size_t mI = 0; mI < ref_iv.GetMatches().size(); mI++ )
+					if( ref_iv.GetMatches()[mI]->Multiplicity() > 1 )
+						ancestral_matches.push_back( ref_iv.GetMatches()[mI]->Copy() );
+		}
+
+		// set seq 0 to forward ref. orientation
+		for( size_t mI = 0; mI < ancestral_matches.size(); ++mI )
+			if( ancestral_matches[mI]->Start(0) < 0 )
+				ancestral_matches[mI]->Invert();
+
+		// eliminate overlaps as they correspond to inconsistently or
+		// multiply aligned regions
+		EliminateOverlaps_v2( ancestral_matches );
+		
+		multFilter( ancestral_matches );
+
+		vector< vector< AbstractMatch* > > LCB_list;
+		vector< LCB > adjacencies;
+		vector< gnSeqI > breakpoints;
+
+		if( !collinear_genomes )
+		{
+			cout << "Performing Sum-of-pairs Greedy Breakpoint Elimination\n";
+			cout.flush();
+			// project the pairwise matches at this node to all-possible pairs matches at descendant nodes
+			// keep a mapping of ancestral to extant matches so that when an ancestral match gets removed
+			// the match among extant nodes also gets removed
+			// how should candidate matches to remove be generated?
+			// one possibility is to remove entire ancestral LCBs...  this may be problematic since ancestral
+			// LCBs don't correspond to the pairwise LCBs thus an ancestral LCB could be removed with no useful
+			// change in alignment score
+			//
+			//
+			// translate the matches into LcbTrackingMatches
+			printMemUsage();
+			cout << "construct LCB tracking matches\n";
+			vector< TrackingMatch > tracking_matches;
+			boost::multi_array< size_t, 3 > tm_lcb_id_array;
+			boost::multi_array< double, 3 > tm_score_array;
+			constructLcbTrackingMatches( ancestor, ancestral_matches, tracking_matches );
+
+			cout << "There are " << tracking_matches.size() << " tracking matches\n";
+			size_t used_components = 0;
+			for( size_t tmI = 0; tmI < tracking_matches.size(); ++tmI )
+			{
+				for( uint ssI = 0; ssI < tracking_matches[tmI].node_match->SeqCount(); ++ssI )
+					if( tracking_matches[tmI].node_match->LeftEnd(ssI) != NO_MATCH )
+						used_components++;
+			}
+			size_t total_components = tracking_matches.size() == 0 ? 0 : tracking_matches.size() * tracking_matches[0].node_match->SeqCount();
+			cout << "There are " << used_components << " / " << total_components << " components used\n";
+
+			vector<node_id_t> node1_descendants;
+			vector<node_id_t> node2_descendants;
+			if( scoring_scheme == ExtantSumOfPairsScoring )
+			{
+				node1_descendants = node1_seqs;
+				node2_descendants = node2_seqs;
+			}else{
+				getDescendants(alignment_tree, node1, node1_descendants);
+				getDescendants(alignment_tree, node2, node2_descendants);
+			}
+
+			//
+			// score the matches
+			//
+			printMemUsage();
+			cout << "init tracking match LCB tracking\n";
+			initTrackingMatchLCBTracking( tracking_matches, node1_descendants.size(), node2_descendants.size(), tm_lcb_id_array );
+			printMemUsage();
+			cout << "pairwise score tracking matches\n";
+			pairwiseScoreTrackingMatches( tracking_matches, node1_descendants, node2_descendants, tm_score_array );
+			printMemUsage();
+
+			// compute bp distances for the current node.
+			// ancestral nodes take the average distance of extant nodes
+			boost::multi_array<double, 2> bp_dist_mat;
+			boost::multi_array<double, 2> cons_dist_mat;
+			computeInternalNodeDistances( bp_dist_mat, cons_dist_mat, node1_descendants, node2_descendants);
+
+			vector< TrackingMatch* > t_matches(tracking_matches.size());
+			for( size_t mI = 0; mI < tracking_matches.size(); ++mI )
+				t_matches[mI] = &tracking_matches[mI];
+
+			// now sort these out into pairwise LCBs
+			cout << "get pairwise LCBs\n";
+			size_t pair_lcb_count = 0;
+			PairwiseLCBMatrix pairwise_adj_mat(boost::extents[node1_descendants.size()][node2_descendants.size()]);
+			for( uint nodeI = 0; nodeI < node1_descendants.size(); nodeI++ )
+				for( uint nodeJ = 0; nodeJ < node2_descendants.size(); nodeJ++ )
+				{
+					getPairwiseLCBs( node1_descendants[nodeI], node2_descendants[nodeJ], nodeI, nodeJ, t_matches, pairwise_adj_mat[nodeI][nodeJ], tm_score_array, tm_lcb_id_array );
+					pair_lcb_count += pairwise_adj_mat[nodeI][nodeJ].size();
+				}
+			cout << "there are " << pair_lcb_count << " pairwise LCBs\n";
+			printMemUsage();
+
+			sort( t_matches.begin(), t_matches.end() );
+
+			// other possibility, choose pairwise LCBs to remove.  a score improvement is always guaranteed
+			// compute LCBs among descendant nodes
+			// this is a good idea.  it factors out ancestral breakpoint decisions entirely
+			// need a data structure to track all pairwise LCBs that contain a given match
+			// template <class MatchType>
+			// class LcbTrackingMatch <MatchType> 
+			// { 
+			// public:
+			//  MatchType node_match;
+			//	boost::multi_array< size_t, 2 > lcb_id;
+			// }
+			// all pairwise LCBs would be evaluated for removal and the one that provides the greatest
+			// overall score improvement gets removed.
+			// upon removal, matches associated with that LCB would get removed, and any LCBs in other 
+			// genomes would get removed if they no longer had any matches
+			// to pull this off, the LCB struct needs to store the set of matches directly
+			// 
+			// but what about small cycles that appear only in 3 or more-way comparisons?  are these
+			// important?  umm, yeah, but only if you believe in evolution.
+			// 
+			// so here's the dilly-oh: score against the ancestral ordering(s) *and* all pairwise orderings
+			// for an ancestor.  ancestor contributes the sum of all descendants to the score and breakpoints
+			// are penalized as the sum of /participating/ descendants.  a descendant is participating
+			// if it has some matching region defined within the LCB and if removal of that matching region
+			// eliminates a breakpoint in the pairwise comparison
+			cout << "scaling bp penalty by conservation weight:\n";
+			print2d_matrix(cons_dist_mat, cout);
+			cout << "\n\nscaling bp penalty by bp weight: \n";
+			print2d_matrix(bp_dist_mat, cout);
+			cout << "\nGreedy BPE\n";
+			vector< TrackingMatch* > final;
+			if(scoring_scheme == AncestralScoring)
+			{
+				vector<node_id_t>::iterator d1_iter = std::find( node1_descendants.begin(), node1_descendants.end(), node1 );
+				vector<node_id_t>::iterator d2_iter = std::find( node2_descendants.begin(), node2_descendants.end(), node2 );
+				size_t d1_index = d1_iter - node1_descendants.begin();
+				size_t d2_index = d2_iter - node2_descendants.begin();
+				EvenFasterSumOfPairsBreakpointScorer spbs( breakpoint_penalty, min_breakpoint_penalty, bp_dist_mat, cons_dist_mat, 
+					t_matches, pairwise_adj_mat, node1_descendants, node2_descendants, 
+					tm_score_array, tm_lcb_id_array, d1_index, d1_index+1, d2_index, d2_index+1 );
+				cur_anchoring_score = greedySearch( spbs );
+				final = spbs.getResults();
+			}else{
+				EvenFasterSumOfPairsBreakpointScorer spbs( breakpoint_penalty, min_breakpoint_penalty, bp_dist_mat, cons_dist_mat, 
+					t_matches, pairwise_adj_mat, node1_descendants, node2_descendants, 
+					tm_score_array, tm_lcb_id_array, 0, node1_descendants.size(), 0, node2_descendants.size() );
+				cur_anchoring_score = greedySearch( spbs );
+				final = spbs.getResults();
+			}
+			cout << "done\n";
+
+			// free memory used by pairwise projections
+			for( size_t mI = 0; mI < tracking_matches.size(); ++mI )
+				tracking_matches[mI].node_match->Free();
+
+			ancestral_matches.clear();
+
+			// free memory from deleted matches here
+			std::sort(final.begin(), final.end());
+			vector< TrackingMatch* > deleted_t_matches( t_matches.size(), NULL );
+			std::set_difference( t_matches.begin(), t_matches.end(), final.begin(), final.end(), deleted_t_matches.begin() );
+			for( size_t delI = 0; delI < deleted_t_matches.size(); ++delI )
+			{
+				if( deleted_t_matches[delI] == NULL )
+					break;
+				deleted_t_matches[delI]->original_match->Free();
+			}
+
+			// convert back to an LCB list
+			vector< AbstractMatch* > new_matches(final.size());
+			for( size_t mI = 0; mI < final.size(); ++mI )
+				new_matches[mI] = final[mI]->original_match;
+
+			IdentifyBreakpoints( new_matches, breakpoints );
+			ComputeLCBs_v2( new_matches, breakpoints, LCB_list );
+
+		} // end if !collinear
+		else
+		{	// if we are assuming all genomes are collinear, then we don't need the 
+			// sophisticated pairwise breakpoint scoring and can get by with simple breakpoint
+			// penalties
+			IdentifyBreakpoints( ancestral_matches, breakpoints );
+			ComputeLCBs_v2( ancestral_matches, breakpoints, LCB_list );
+
+			vector< double > lcb_scores( LCB_list.size() );
+			double score_sum = 100;	// anything > 0 would work.  this will be the breakpoint penalty
+			for( size_t lcbI = 0; lcbI < LCB_list.size(); ++lcbI )
+			{
+				lcb_scores[lcbI] = SimpleGetLCBCoverage( LCB_list[lcbI] );
+				score_sum += lcb_scores[lcbI];
+			}
+
+			computeLCBAdjacencies_v3( LCB_list, lcb_scores, adjacencies );
+
+			// want to eliminate all breakpoints
+			SimpleBreakpointScorer wbs( adjacencies, score_sum, true );
+			cur_min_coverage = greedyBreakpointElimination_v4( adjacencies, lcb_scores, wbs, NULL, false );
+			vector<AbstractMatch*> deleted_matches;
+			filterMatches_v2( adjacencies, LCB_list, lcb_scores, deleted_matches );
+			for( size_t delI = 0; delI < deleted_matches.size(); ++delI )
+				deleted_matches[delI]->Free();
+		}
+		printMemUsage();
+
+		ancestral_matches.clear();
+
+		cout << "Arrived at " << LCB_list.size() << " intervals\n";
+		// create an ancestral ordering
+		vector< Interval* > pairwise_intervals;
+		Interval tmp_iv;
+		for( size_t lcbI = 0; lcbI < LCB_list.size(); lcbI++ )
+		{
+			pairwise_intervals.push_back( tmp_iv.Copy() );
+			pairwise_intervals.back()->SetMatches( LCB_list[lcbI] );
+		}
+		LCB_list.clear();
+
+		vector<gnSeqI> seq_lengths = vector<gnSeqI>(2,0);
+		for( size_t aI = 0; aI < alignment_tree[node1].ordering.size(); ++aI )
+			seq_lengths[0] += alignment_tree[node1].ordering[aI].Length();
+		for( size_t aI = 0; aI < alignment_tree[node2].ordering.size(); ++aI )
+			seq_lengths[1] += alignment_tree[node2].ordering[aI].Length();
+
+		cout << "Adding unaligned intervals\n";
+		addUnalignedIntervals_v2(pairwise_intervals, set<uint>(), seq_lengths);
+
+		cout << "addUnalignedIntervals yields " << pairwise_intervals.size() << " intervals\n";
+
+		bool borked = false;
+		if(debug_aligner)
+			borked = validatePairwiseIntervals(node1, node2, pairwise_intervals);
+
+		// merge unaligned intervals
+		cout << "Merging unaligned intervals\n";
+		cout.flush();
+		vector<Interval*> new_list1;
+		vector<Interval*> merged_intervals;
+		mergeUnalignedIntervals( 1, pairwise_intervals, new_list1 );
+		mergeUnalignedIntervals( 0, new_list1, merged_intervals );
+		cout << "Marbling gaps\n";
+		cout.flush();
+		for( size_t ivI = 0; ivI < merged_intervals.size(); ivI++ )
+			merged_intervals[ivI]->Marble(50);
+
+		cout << "Propagating descendant breakpoints\n";
+
+		// split up intervals at descendant's breakpoints
+		propagateDescendantBreakpoints( node1, 0, merged_intervals );
+		propagateDescendantBreakpoints( node2, 1, merged_intervals );
+
+		cout << "descendant 0(" << node1 << ") has " << alignment_tree[node1].ordering.size() << " intervals\n";
+		cout << "descendant 1(" << node2 << ") has " << alignment_tree[node2].ordering.size() << " intervals\n";
+		cout << "propagateDescendantBreakpoints yields " << merged_intervals.size() << " intervals\n";
+
+		if(debug_aligner)
+			borked = validatePairwiseIntervals(node1, node2, merged_intervals);
+		cout << "Creating ancestral ordering\n";
+		alignment_tree[ancestor].ordering.clear();
+		createAncestralOrdering( merged_intervals, alignment_tree[ancestor].ordering );
+		for( size_t ivI = 0; ivI < merged_intervals.size(); ivI++ )
+			merged_intervals[ivI]->Free();
+		merged_intervals.clear();	// free up some memory
+
+		if(debug_aligner)
+			validateSuperIntervals( node1, node2, ancestor );
+
+		// if we're not making any progress then bail out...
+		gnSeqI cur_ancestral_seq_len = 0;
+		for( size_t aI = 0; aI < alignment_tree[ancestor].ordering.size(); aI++ )
+			cur_ancestral_seq_len += alignment_tree[ancestor].ordering[aI].Length();
+
+		if( !collinear_genomes )
+			cout << "Previous anchoring score: " << prev_anchoring_score << ", new anchor score: " << cur_anchoring_score << endl;
+		else
+			cout << "Prev alignment len: " << prev_ancestral_seq_len << ", new alignment length: " << cur_ancestral_seq_len << endl;
+		// if cur_seq_len has decreased then we're improving
+		// if not, then we're done finding matches
+		if( collinear_genomes && cur_ancestral_seq_len >= prev_ancestral_seq_len )
+			break;
+
+		// stop unless we've increased the anchoring score by at least 0.5%
+		// the 0.5% is important for large alignments where many slow iterations might otherwise occur
+		// that only increase the anchoring score by a tiny amount
+		if( !collinear_genomes && cur_anchoring_score <= prev_anchoring_score + (genome::absolut(prev_anchoring_score)/200.0) )
+			break;
+		prev_anchoring_score = cur_anchoring_score;
+		prev_ancestral_seq_len = cur_ancestral_seq_len;
+
+		// accept the new alignment tree...
+		cout << "Backing up alignment tree...\n";
+		cout.flush();
+		aln_tree_backup = alignment_tree;
+
+		cout << "propagating ancestral breakpoints\n";
+		cout.flush();
+		recursiveApplyAncestralBreakpoints(ancestor);
+
+
+		if( debug_me ) 	 
+		{
+			for( size_t aI = 0; aI < alignment_tree[ancestor].ordering.size(); aI++ ) 	 
+			{
+				GappedAlignment gal; 	 
+				extractAlignment(ancestor, aI, gal); 	 
+
+				bool check = false;
+				for( size_t ii = 0; ii < gal.SeqCount(); ++ii )
+				{
+					if( gal.LeftEnd(ii) == 0 )
+						continue;
+					for( size_t jj = 0; jj < gal.SeqCount(); ++jj )
+					{
+						if( gal.LeftEnd(jj) == 0 )
+							continue;
+						check = check || computeID( gal, ii, jj ) < .5;
+					}
+				}
+				if( check )
+					cerr << "check iv " << aI << " dbg_count " << dbg_count << endl;
+				else
+					continue;
+
+				const vector< string >& aln_mat = GetAlignment(gal, this->original_ml.seq_table); 	 
+				gnSequence seq; 	 
+				for( size_t seqI = 0; seqI < gal.SeqCount(); ++seqI ) 	 
+					if( gal.LeftEnd(seqI) != NO_MATCH ) 	 
+						seq += aln_mat[seqI]; 	 
+
+				stringstream dbg_fname; 	 
+				dbg_fname << "prof_dbg_iv_" << aI << ".dbg." << dbg_count++ << ".fas"; 	 
+				ofstream debug_file( dbg_fname.str().c_str() ); 	 
+				gnFASSource::Write( seq, debug_file, false ); 	 
+				debug_file.close(); 	 
+			} 	 
+		}
+
+		if(debug_aligner)
+			validateSuperIntervals( node1, node2, ancestor );
+
+		if(recursive)
+		{
+		// search for additional alignment anchors
+		cout << "recursive anchor search\n";
+		cout.flush();
+		Matrix<MatchList> matches;
+		Matrix< std::vector< search_cache_t > > new_cache_db(node1_seqs.size(), node2_seqs.size());
+		// initialize storage for intervening regions
+		boost::multi_array< std::vector< std::vector< int64 > >, 2 > iv_regions( boost::extents[node1_seqs.size()][node2_seqs.size()] );
+		for( seqI = 0; seqI < node1_seqs.size(); seqI++ )
+			for( seqJ = 0; seqJ < node2_seqs.size(); seqJ++ )
+				iv_regions[seqI][seqJ].resize(2);
+		vector< gnSequence* > bseqs( node1_seqs.size() + node2_seqs.size() );
+		for( size_t aI = 0; aI < alignment_tree[ancestor].ordering.size(); aI++ )
+		{
+			CompactGappedAlignment<> cga;
+			extractAlignment(ancestor, aI, cga);
+			recurseOnPairs(node1_seqs, node2_seqs, cga, matches, search_cache_db, new_cache_db, iv_regions);
+
+			// add any new matches to the pairwise_matches matrix
+			for( seqI = 0; seqI < node1_seqs.size(); seqI++ )
+				for( seqJ = 0; seqJ < node2_seqs.size(); seqJ++ )
+					pairwise_matches(seqI, seqJ).insert( pairwise_matches(seqI, seqJ).end(), matches(seqI, seqJ).begin(), matches(seqI, seqJ).end() );
+
+		}
+
+		// add seqs
+		for( seqI = 0; seqI < node1_seqs.size(); seqI++ )
+			bseqs[seqI] = alignment_tree[ node1_seqs[seqI] ].sequence;
+		for( seqJ = 0; seqJ < node2_seqs.size(); seqJ++ )
+			bseqs[seqI+seqJ] =  alignment_tree[ node2_seqs[seqJ] ].sequence;
+
+		MaskedMemHash nway_mh;
+		// now search intervening regions
+		for( seqI = 0; seqI < node1_seqs.size(); seqI++ )
+			for( seqJ = 0; seqJ < node2_seqs.size(); seqJ++ )
+			{
+				std::sort( iv_regions[seqI][seqJ][0].begin(), iv_regions[seqI][seqJ][0].end() );
+				std::sort( iv_regions[seqI][seqJ][1].begin(), iv_regions[seqI][seqJ][1].end() );
+				MatchList new_matches;
+				new_matches.seq_table.resize(2);
+				new_matches.seq_table[0] = bseqs[seqI];
+				new_matches.seq_table[1] = bseqs[node1_seqs.size() + seqJ];
+				SearchLCBGaps( new_matches, iv_regions[seqI][seqJ], nway_mh );
+				cout << seqI << "," << seqJ << " have " << new_matches.size() << " new matches outside LCBs\n";
+				pairwise_matches(seqI, seqJ).insert( pairwise_matches(seqI, seqJ).end(), new_matches.begin(), new_matches.end() );
+			}
+
+		if(using_cache_db)
+		{
+
+		for( seqI = 0; seqI < node1_seqs.size(); seqI++ )
+		{
+			for( seqJ = 0; seqJ < node2_seqs.size(); seqJ++ )
+			{
+				for( size_t mI = 0; mI < search_cache_db(seqI,seqJ).size(); mI++ )
+				{
+					if( search_cache_db(seqI,seqJ)[mI].first != NULL )
+						search_cache_db(seqI,seqJ)[mI].first->Free();
+					if( search_cache_db(seqI,seqJ)[mI].second != NULL )
+						search_cache_db(seqI,seqJ)[mI].second->Free();
+				}
+				search_cache_db(seqI,seqJ).clear();
+				if(new_cache_db(seqI, seqJ).size() > 0)
+				{
+					// try sorting using C's qsort -- maybe there's something wrong with std::sort?
+					search_cache_t* sc_array = new search_cache_t[new_cache_db(seqI,seqJ).size()];
+					for( size_t i = 0; i < new_cache_db(seqI,seqJ).size(); i++ )
+						sc_array[i] = new_cache_db(seqI,seqJ)[i];
+					qsort(sc_array, new_cache_db(seqI,seqJ).size(), sizeof(AbstractMatch*), cachecomp);
+
+					search_cache_db(seqI, seqJ).resize(new_cache_db(seqI,seqJ).size());
+					for( size_t i = 0; i < new_cache_db(seqI,seqJ).size(); i++ )
+						search_cache_db(seqI, seqJ)[i] = sc_array[i];
+					delete[] sc_array;
+
+					new_cache_db(seqI, seqJ).clear();
+				}
+				if( pairwise_matches(seqI,seqJ).size() > 0 )
+					cout << seqI << "," << seqJ << " has an additional " << pairwise_matches(seqI,seqJ).size() << " matches\n";
+			}
+		}
+		
+		}
+		}	// if recursive
+
+		// restore backed up tree since we only want the final set of ancestral
+		// breakpoints applied to the descendants
+		cout << "Restoring backed up alignment tree...\n";
+		cout.flush();
+		swap( alignment_tree, aln_tree_backup );
+
+	}	// end while(true)
+
+	if( using_cache_db )
+	{
+	// delete the search cache
+	for( seqI = 0; seqI < node1_seqs.size(); seqI++ )
+		for( seqJ = 0; seqJ < node2_seqs.size(); seqJ++ )
+			for( size_t mI = 0; mI < search_cache_db(seqI,seqJ).size(); mI++ )
+			{
+				if( search_cache_db(seqI,seqJ)[mI].first != NULL )
+					search_cache_db(seqI,seqJ)[mI].first->Free();
+				if( search_cache_db(seqI,seqJ)[mI].second != NULL )
+					search_cache_db(seqI,seqJ)[mI].second->Free();
+			}
+	}
+
+	printMemUsage();
+
+	// aln_tree_backup has the highest scoring alignment_tree
+	swap( alignment_tree, aln_tree_backup );
+	cout << "propagating ancestral breakpoints\n";
+	recursiveApplyAncestralBreakpoints(ancestor);
+
+	printMemUsage();
+
+	// step 8) construct a muscle alignment in each intervening region
+	if( gapped_alignment )
+	{
+		cout << "performing a gapped alignment\n";
+		doGappedAlignment(ancestor, true);
+	}else
+		cout << "skipping gapped alignment\n";
+	if( refine )
+	{
+		size_t unrefined = countUnrefined( alignment_tree, ancestor );
+		if( unrefined > 5 && ancestor != alignment_tree.root )
+		{
+			cout << "performing iterative refinement\n";
+			doGappedAlignment(ancestor, false);
+			markAsRefined( alignment_tree, ancestor );
+		}
+	}
+	printMemUsage();
+
+
+	if( debug_me ) 	 
+	{
+		for( size_t aI = 0; aI < alignment_tree[ancestor].ordering.size(); aI++ ) 	 
+		{ 	 
+
+			static int dbg_count = 0; 	 
+			GappedAlignment gal; 	 
+			extractAlignment(ancestor, aI, gal); 	 
+
+			bool check = false;
+			for( size_t ii = 0; ii < gal.SeqCount(); ++ii )
+			{
+				if( gal.LeftEnd(ii) == 0 )
+					continue;
+				for( size_t jj = 0; jj < gal.SeqCount(); ++jj )
+				{
+					if( gal.LeftEnd(jj) == 0 )
+						continue;
+					check = check || computeID( gal, ii, jj ) < .5;
+				}
+			}
+			if( check )
+				cerr << "check iv " << aI << " dbg_count " << dbg_count << endl;
+			else
+				continue;
+
+			const vector< string >& aln_mat = GetAlignment(gal, this->original_ml.seq_table); 	 
+			gnSequence seq; 	 
+			for( size_t seqI = 0; seqI < gal.SeqCount(); ++seqI ) 	 
+				if( gal.LeftEnd(seqI) != NO_MATCH ) 	 
+					seq += aln_mat[seqI]; 	 
+
+			stringstream dbg_fname; 	 
+			dbg_fname << "prof_dbg_iv_" << aI << ".dbg." << dbg_count++ << ".fas"; 	 
+			ofstream debug_file( dbg_fname.str().c_str() ); 	 
+			gnFASSource::Write( seq, debug_file, false ); 	 
+			debug_file.close(); 	 
+		} 	 
+	}
+
+
+}
+
+
+void addGuy( uint seqI, AbstractMatch::orientation orient, 
+			std::vector< AbstractMatch* >& new_ivs, 
+			vector<Interval*>& new_list )
+{
+	Interval tmp_iv;
+	// set the orientation for any unaligned intervals
+	if( orient == AbstractMatch::reverse )
+	{
+		for( size_t nI = 0; nI < new_ivs.size(); nI++ )
+			if( new_ivs[nI]->LeftEnd(seqI) != NO_MATCH && new_ivs[nI]->Orientation(seqI) != orient)
+				new_ivs[nI]->Invert();
+	}
+	// add this guy
+	Interval* added_iv = tmp_iv.Copy();
+	added_iv->SetMatches( new_ivs );
+	new_list.push_back(added_iv);
+}
+
+void mergeUnalignedIntervals( uint seqI, vector< Interval* >& iv_list, vector< Interval* >& new_list )
+{
+	SSC<Interval> ivlcJ(seqI);
+	sort( iv_list.begin(), iv_list.end(), ivlcJ );
+
+	Interval tmp_iv;
+	AbstractMatch::orientation orient = AbstractMatch::undefined;
+	vector< AbstractMatch* > new_ivs;
+	vector< Interval* > to_delete;
+	for( size_t ordI = 0; ordI < iv_list.size(); ordI++ )
+	{
+		if( iv_list[ordI]->LeftEnd(seqI) == NO_MATCH )
+		{
+			new_list.push_back(iv_list[ordI]);
+			iv_list[ordI] = NULL;
+			continue;
+		}
+
+		if( orient == AbstractMatch::undefined && iv_list[ordI]->Multiplicity() == 2 )
+		{
+			orient = iv_list[ordI]->Orientation(seqI);
+			vector< AbstractMatch* > matches;
+			iv_list[ordI]->StealMatches( matches );
+			if( orient == AbstractMatch::forward )
+				new_ivs.insert( new_ivs.end(), matches.begin(), matches.end() );
+			else
+				new_ivs.insert( new_ivs.begin(), matches.begin(), matches.end() );
+
+			// if it's the last one then add
+			if( ordI + 1 == iv_list.size() )
+				addGuy( seqI, orient, new_ivs, new_list );
+			continue;
+		}
+		if( orient != AbstractMatch::undefined && iv_list[ordI]->Multiplicity() == 2 )
+		{
+			// add this guy...
+			// set the orientation for any unaligned intervals
+			addGuy( seqI, orient, new_ivs, new_list );
+
+			// prepare a new one
+			vector< AbstractMatch* > matches;
+			orient = iv_list[ordI]->Orientation(seqI);
+			iv_list[ordI]->StealMatches( matches );
+			new_ivs.insert( new_ivs.end(), matches.begin(), matches.end() );
+			// if it's the last one then add
+			if( ordI + 1 == iv_list.size() )
+				addGuy( seqI, orient, new_ivs, new_list );
+			continue;
+		}
+		if( new_ivs.size() == 0 )
+		{
+			vector< AbstractMatch* > matches;
+			iv_list[ordI]->StealMatches( matches );
+			new_ivs.insert( new_ivs.end(), matches.begin(), matches.end() );
+			continue;
+		}
+		// split this one in half (if its not the last one and there's something to split)...
+		Interval* left_iv = iv_list[ordI]->Copy();
+		to_delete.push_back( left_iv );	// make sure this gets deleted later
+		bool cropped = (ordI + 1 < iv_list.size() && iv_list[ordI]->Length(seqI) > 1);
+		if( cropped )
+		{
+			gnSeqI lendo = left_iv->AlignmentLength() / 2;
+			left_iv->CropEnd( left_iv->AlignmentLength() - lendo );
+			iv_list[ordI]->CropStart( lendo );
+		}
+		vector< AbstractMatch* > matches;
+		left_iv->StealMatches( matches );
+		if( orient == AbstractMatch::forward )
+			new_ivs.insert( new_ivs.end(), matches.begin(), matches.end() );
+		else
+			new_ivs.insert( new_ivs.begin(), matches.begin(), matches.end() );
+
+		addGuy( seqI, orient, new_ivs, new_list );
+		// prepare for the next
+		orient = AbstractMatch::undefined;
+		if(cropped)
+			ordI--;	// if we split a match, make sure we get the rest of this match on the next run through the loop
+	}
+
+	if( new_ivs.size() > 0 )
+	{
+		// uh-oh. there must not have been anything aligned
+		addGuy( seqI, AbstractMatch::forward, new_ivs, new_list );
+	}
+
+	// free up any left_ivs that were allocated
+	for( size_t delI = 0; delI < to_delete.size(); delI++ )
+		to_delete[delI]->Free();
+
+	// free up ivs left in iv_list
+	for( size_t ivI = 0; ivI < iv_list.size(); ivI++ )
+		if( iv_list[ivI] != NULL )
+			iv_list[ivI]->Free();
+	iv_list.clear();
+}
+
+
+/**
+ * 
+ */
+void ProgressiveAligner::createAncestralOrdering( vector<Interval*>& interval_list, vector< SuperInterval >& ancestral_sequence )
+{
+	// construct an ancestral SuperSequence
+	int64 left_end = 1;
+	ancestral_sequence.resize( interval_list.size() );
+	for( uint ivI = 0; ivI < interval_list.size(); ++ivI ){
+		if(debug_aligner)
+			interval_list[ivI]->ValidateMatches();
+		vector<AbstractMatch*> matches;
+		interval_list[ivI]->StealMatches(matches);
+		ancestral_sequence[ivI].reference_iv.SetMatches(matches);
+		ancestral_sequence[ivI].SetLeftEnd(left_end);
+		ancestral_sequence[ivI].SetLength(ancestral_sequence[ivI].reference_iv.AlignmentLength());
+		if(debug_aligner)
+			ancestral_sequence[ivI].ValidateSelf();
+		left_end += ancestral_sequence[ivI].Length();
+	}
+}
+
+void markAligned( PhyloTree< AlignmentTreeNode >& alignment_tree, node_id_t subject_node, node_id_t neighbor )
+{
+	for( uint parentI = 0; parentI < alignment_tree[subject_node].parents.size(); parentI++ )
+		if( alignment_tree[subject_node].parents[parentI] == neighbor )
+			alignment_tree[subject_node].parents_aligned[parentI] = true;
+	for( uint childI = 0; childI < alignment_tree[subject_node].children.size(); childI++ )
+		if( alignment_tree[subject_node].children[childI] == neighbor )
+			alignment_tree[subject_node].children_aligned[childI] = true;
+}
+
+
+bool
+ProgressiveAligner::validateSuperIntervals(node_id_t node1, node_id_t node2, node_id_t ancestor)
+{
+		// validate the ancestor
+	bool borked = false;
+	vector< SuperInterval >& siv_list = alignment_tree[ancestor].ordering;
+	gnSeqI n1_len = 0;
+	gnSeqI n2_len = 0;
+	gnSeqI my_len = 0;
+	gnSeqI my_iv_len = 0;
+	for( size_t sivI = 0; sivI < siv_list.size(); sivI++ )
+	{
+		if( siv_list[sivI].reference_iv.Start(0) != 0 )
+			n1_len += siv_list[sivI].reference_iv.Length(0);
+		if( siv_list[sivI].reference_iv.Start(1) != 0 )
+			n2_len += siv_list[sivI].reference_iv.Length(1);
+		my_len += siv_list[sivI].Length();
+		my_iv_len += siv_list[sivI].reference_iv.AlignmentLength();
+		siv_list[sivI].ValidateSelf();
+	}
+	gnSeqI real_n1len = 0;
+	gnSeqI real_n2len = 0;
+
+	vector< SuperInterval >& siv1_list = alignment_tree[node1].ordering;
+	for( size_t sivI = 0; sivI < siv1_list.size(); sivI++ )
+	{
+		if( siv1_list[sivI].Length() == 0 )
+			borked = true;
+		real_n1len += siv1_list[sivI].Length();
+		siv1_list[sivI].ValidateSelf();
+	}
+
+	vector< SuperInterval >& siv2_list = alignment_tree[node2].ordering;
+	for( size_t sivI = 0; sivI < siv2_list.size(); sivI++ )
+	{
+		if( siv2_list[sivI].Length() == 0 )
+			borked = true;
+		real_n2len += siv2_list[sivI].Length();
+		siv2_list[sivI].ValidateSelf();
+	}
+
+	if( real_n1len != n1_len || real_n2len != n2_len )
+			borked = true;
+
+	// check that each picks up where the last left off
+	for( size_t sivI = 1; sivI < siv1_list.size(); sivI++ )
+		if( siv1_list[sivI].LeftEnd() != siv1_list[sivI-1].LeftEnd() + siv1_list[sivI-1].Length() )
+		{
+			borked = true;
+		}
+	for( size_t sivI = 1; sivI < siv2_list.size(); sivI++ )
+		if( siv2_list[sivI].LeftEnd() != siv2_list[sivI-1].LeftEnd() + siv2_list[sivI-1].Length() )
+		{
+			borked = true;
+		}
+
+	if( my_len != my_iv_len )
+		borked = true;
+
+	if( my_len < real_n1len || my_len < real_n2len )
+		borked = true;
+
+	if( borked )
+	{
+		breakHere();
+		cerr << "child1 has " << siv1_list.size() << " ivs totalling " << real_n1len << " nt\n";
+		cerr << "child2 has " << siv2_list.size() << " ivs totalling " << real_n2len << " nt\n";
+		cerr << "parent has " << siv_list.size() << " ivs, n1_len: " << n1_len << " n2_len: " << n2_len << endl;
+	}
+	return borked;
+
+}
+
+bool ProgressiveAligner::validatePairwiseIntervals(node_id_t node1, node_id_t node2, std::vector<Interval*>& pair_iv)
+{
+		// validate the ancestor
+	bool borked = false;
+	gnSeqI n1_len = 0;
+	gnSeqI n2_len = 0;
+	for( size_t sivI = 0; sivI < pair_iv.size(); sivI++ )
+	{
+		if( pair_iv[sivI]->Start(0) != 0 )
+			n1_len += pair_iv[sivI]->Length(0);
+		if( pair_iv[sivI]->Start(1) != 0 )
+			n2_len += pair_iv[sivI]->Length(1);
+
+		vector< bitset_t > aln_mat;
+		pair_iv[sivI]->GetAlignment(aln_mat);
+		if( aln_mat[0].size() != pair_iv[sivI]->AlignmentLength() )
+		{
+			cerr << "broked\n";
+		}
+		pair_iv[sivI]->ValidateMatches();
+	}
+	gnSeqI real_n1len = 0;
+	gnSeqI real_n2len = 0;
+
+	vector< SuperInterval >& siv1_list = alignment_tree[node1].ordering;
+	for( size_t sivI = 0; sivI < siv1_list.size(); sivI++ )
+	{
+		if( siv1_list[sivI].Length() == 0 )
+			borked = true;
+		real_n1len += siv1_list[sivI].Length();
+	}
+
+	vector< SuperInterval >& siv2_list = alignment_tree[node2].ordering;
+	for( size_t sivI = 0; sivI < siv2_list.size(); sivI++ )
+	{
+		if( siv2_list[sivI].Length() == 0 )
+			borked = true;
+		real_n2len += siv2_list[sivI].Length();
+	}
+
+	if( real_n1len != n1_len || real_n2len != n2_len )
+			borked = true;
+
+	// check for overlapping intervals
+	vector< Interval* > tmp_iv_list = pair_iv;
+	for( uint seqI = 0; seqI < 2; seqI++ )
+	{
+		SSC<Interval> ssc(seqI);
+		sort( tmp_iv_list.begin(), tmp_iv_list.end(), ssc );
+		for( size_t ivI = 1; ivI < tmp_iv_list.size(); ivI++ )
+		{
+			if( tmp_iv_list[ivI-1]->LeftEnd(seqI) == NO_MATCH || tmp_iv_list[ivI]->LeftEnd(seqI) == NO_MATCH )
+				continue;
+			if( tmp_iv_list[ivI-1]->RightEnd(seqI) >= tmp_iv_list[ivI]->LeftEnd(seqI) )
+			{
+				cerr << "overlap:\n";
+				cerr << "tmp_iv_list[ivI-1].RightEnd(seqI): " << tmp_iv_list[ivI-1]->RightEnd(seqI) << endl;
+				cerr << "tmp_iv_list[ivI].LeftEnd(seqI): " << tmp_iv_list[ivI]->LeftEnd(seqI) << endl;
+				breakHere();
+			}
+		}
+	}
+
+	if( borked )
+	{
+		cerr << "child1 has " << siv1_list.size() << " ivs totalling " << real_n1len << " nt\n";
+		cerr << "child2 has " << siv2_list.size() << " ivs totalling " << real_n2len << " nt\n";
+		cerr << "parent has " << pair_iv.size() << " ivs, n1_len: " << n1_len << " n2_len: " << n2_len << endl;
+		if( n2_len < real_n2len )
+		{
+			SSC<Interval> sortie(1);
+			sort( pair_iv.begin(), pair_iv.end(), sortie );
+			size_t prev_iv = 9999999;
+			for( size_t ivI = 0; ivI < pair_iv.size(); ++ivI)
+			{
+				if( pair_iv[ivI]->LeftEnd(1) == NO_MATCH )
+					continue;
+
+				if( prev_iv != 9999999 )
+					cerr << "diff: " << pair_iv[ivI]->LeftEnd(1) - pair_iv[prev_iv]->RightEnd(1) << endl;
+				cerr << "Interval " << ivI << " LeftEnd(1): " << pair_iv[ivI]->LeftEnd(1) << " RightEnd(1): " << pair_iv[ivI]->RightEnd(1) << std::endl;
+				prev_iv = ivI;
+			}
+		}else if( n2_len > real_n2len )
+		{
+			SSC<Interval> sortie(1);
+			sort( pair_iv.begin(), pair_iv.end(), sortie );
+			for( size_t ivI = 0; ivI < pair_iv.size(); ++ivI)
+			{
+				if( pair_iv[ivI]->LeftEnd(1) < real_n2len )
+					continue;
+				cerr << "Interval " << ivI << " LeftEnd(1): " << pair_iv[ivI]->LeftEnd(1) << " RightEnd(1): " << pair_iv[ivI]->RightEnd(1) << std::endl;
+			}
+		}
+		breakHere();
+	}
+	return borked;
+}
+
+void ProgressiveAligner::alignNodes( node_id_t node1, node_id_t node2, node_id_t ancestor )
+{
+	cout << "Aligning node " << node1 << " to " << node2 << " via " << ancestor << "!\n";
+	// if node1 and node2 are not already children of ancestor then make it so...
+	if( alignment_tree[node1].parents[0] != ancestor || 
+		alignment_tree[node2].parents[0] != ancestor )
+	{
+		breakHere();
+		cerr << "rotten\n";
+	}
+	
+	alignProfileToProfile(node1, node2, ancestor);
+
+	// mark edges as aligned
+	markAligned( alignment_tree, node1, node2 );
+	markAligned( alignment_tree, node2, node1 );
+	markAligned( alignment_tree, node1, ancestor );
+	markAligned( alignment_tree, node2, ancestor );
+	markAligned( alignment_tree, ancestor, node1 );
+	markAligned( alignment_tree, ancestor, node2 );
+}
+
+/**
+ * finds the midpoint of a phylogenetic tree, returns the ids of the surrounding nodes in n1 and n2
+ */
+void findMidpoint( PhyloTree< AlignmentTreeNode >& alignment_tree, node_id_t& n1, node_id_t& n2 )
+{
+	// use boost's all pairs shortest path to find the longest path on the tree 
+	// Then actually traverse the path to determine which edge
+	// is halfway.
+	double scaling_factor = 100000;
+	using namespace boost;
+	typedef adjacency_list<vecS, vecS, undirectedS, no_property,
+	property< edge_weight_t, int, property< edge_color_t, default_color_type > > > Graph;
+	const int V = alignment_tree.size();
+	const std::size_t E = alignment_tree.size()-1;
+	typedef std::pair < int, int >Edge;
+	Edge* edge_array = new Edge[ alignment_tree.size() - 1 ];
+	int* weights = new int[ alignment_tree.size() - 1 ];
+	bitset_t child_found( alignment_tree.size(), false );
+	size_t eI = 0;
+	for( size_t vI = 0; vI < V; ++vI )
+	{
+		if( alignment_tree[vI].parents.size() != 0 )
+		{
+			edge_array[eI] = Edge( vI, alignment_tree[vI].parents[0] );
+			// for some reason boost insists on using an int for weights.  need to figure that out
+			weights[eI] = (int)(scaling_factor * genome::absolut(alignment_tree[vI].distance)) + 1;
+			eI++;
+		}
+	}
+
+#if defined(BOOST_MSVC) && BOOST_MSVC <= 1300
+	// VC++ can't handle the iterator constructor
+	Graph g(V);
+	for (std::size_t j = 0; j < E; ++j)
+	add_edge(edge_array[j].first, edge_array[j].second, g);
+#else
+	Graph g(edge_array, edge_array + E, V);
+#endif
+
+	property_map < Graph, edge_weight_t >::type w = get(edge_weight, g);
+	int *wp = weights;
+
+	graph_traits < Graph >::edge_iterator e, e_end;
+	for (boost::tie(e, e_end) = edges(g); e != e_end; ++e)
+		w[*e] = *wp++;
+
+	boost::multi_array<int,2> D( boost::extents[V][V] );
+	bool success = johnson_all_pairs_shortest_paths(g, D);
+	if( !success )
+	{
+		cerr << "failed, is this really a tree?\n";
+		return;
+	}
+
+	// find the most distant pair of nodes
+	int max_dist = (std::numeric_limits<int>::min)();
+	for (int i = 0; i < V; ++i) {
+		for (int j = 0; j < V; ++j) {
+			if( D[i][j] > max_dist )
+			{
+				max_dist = D[i][j];
+				n1 = i;
+				n2 = j;
+			}
+		}
+	}
+
+	typedef graph_traits<Graph>::vertex_descriptor vertex_t;
+	std::vector < vertex_t > pred(num_vertices(g));
+	std::vector < int > dist(num_vertices(g));
+	pred[n1] = n1;
+
+	undirected_dfs(g,
+		root_vertex( vertex( n1, g ) ).
+		visitor( make_dfs_visitor( make_pair(
+			record_predecessors(&pred[0], on_tree_edge()),
+			record_distances(&dist[0], on_tree_edge())
+		))).
+		edge_color_map(get(edge_color, g))
+		);
+
+	int cur_node = n2;
+	int prev_node = n2;
+	max_dist /= 2;
+	while( cur_node != n1 && max_dist > 0 )
+	{
+		if( alignment_tree[cur_node].parents.size() > 0 && 
+			alignment_tree[cur_node].parents[0] == pred[cur_node] )
+		{
+			max_dist -= (int)(scaling_factor * alignment_tree[cur_node].distance) + 1;
+			prev_node = cur_node;
+			cur_node = pred[cur_node];
+		}else
+		{
+			prev_node = cur_node;
+			cur_node = pred[cur_node];
+			max_dist -= (int)(scaling_factor * alignment_tree[cur_node].distance) + 1;
+		}
+	}
+	n1 = cur_node;
+	n2 = prev_node;
+
+	delete[] edge_array;
+	delete[] weights;
+}
+
+void extendRootBranches( PhyloTree< AlignmentTreeNode >& alignment_tree )
+{
+	// find the max branch length and set the root branch lengths to twice that
+	// swap children while we're at it
+	node_id_t ancestor = alignment_tree.root;
+	double max_blen = -(std::numeric_limits<double>::max)();
+	for( size_t nI = 0; nI < alignment_tree.size(); ++nI )
+	{
+		if( alignment_tree[nI].distance > max_blen )
+			max_blen = alignment_tree[nI].distance;
+		if( alignment_tree[nI].children.size() > 0 &&
+			alignment_tree[nI].children[0] > alignment_tree[nI].children[1] )
+		{
+			std::swap( alignment_tree[nI].children[0], alignment_tree[nI].children[1] );
+		}
+	}
+	for( size_t cI = 0; cI < alignment_tree[ancestor].children.size(); ++cI )
+		alignment_tree[alignment_tree[ancestor].children[cI]].distance = 2.0 * max_blen;
+}
+
+void chooseNextAlignmentPair( PhyloTree< AlignmentTreeNode >& alignment_tree, node_id_t& node1, node_id_t& node2, node_id_t& ancestor )
+{
+
+	// find the nearest alignable neighbor
+	node1 = 0;
+	node2 = 0;
+	ancestor = 0;
+	double nearest_distance = (numeric_limits<double>::max)();
+	for( node_id_t nodeI = 0; nodeI < alignment_tree.size(); nodeI++ )
+	{
+		AlignmentTreeNode& cur_node = alignment_tree[ nodeI ];
+
+		// skip this node if it's already been completely aligned
+		// or is an extant sequence
+		boolean completely_aligned = true;
+		for( uint alignedI = 0; alignedI < cur_node.children_aligned.size(); alignedI++ )
+			completely_aligned = completely_aligned && cur_node.children_aligned[alignedI];
+		for( uint alignedI = 0; alignedI < cur_node.parents_aligned.size(); alignedI++ )
+			completely_aligned = completely_aligned && cur_node.parents_aligned[alignedI];
+		if( cur_node.sequence != NULL || completely_aligned )
+			continue;
+		
+
+		vector< node_id_t > neighbor_id;
+		vector< boolean > alignable;
+		vector< double > distance;
+		
+		for( uint parentI = 0; parentI < cur_node.parents.size(); parentI++ )
+		{
+			neighbor_id.push_back( cur_node.parents[parentI] );
+			vector< node_id_t >::iterator cur_neighbor = neighbor_id.end() - 1;
+			if( *cur_neighbor == alignment_tree.root )
+			{
+				// need special handling for the root since the alignment
+				// tree is supposed to be unrooted
+				// add all of root's children except this one
+			}
+			distance.push_back( cur_node.distance );
+			alignable.push_back( !cur_node.parents_aligned[parentI] && (alignment_tree[*cur_neighbor].ordering.size() != 0 || alignment_tree[*cur_neighbor].sequence != NULL) );
+		}
+
+		for( uint childI = 0; childI < cur_node.children.size(); childI++ )
+		{
+			neighbor_id.push_back( cur_node.children[childI] );
+			vector< node_id_t >::iterator cur_neighbor = neighbor_id.end() - 1;
+			distance.push_back( alignment_tree[*cur_neighbor].distance );
+			alignable.push_back( !cur_node.children_aligned[childI] && (alignment_tree[*cur_neighbor].ordering.size() != 0 || alignment_tree[*cur_neighbor].sequence != NULL) );
+		}
+
+		if( cur_node.ordering.size() != 0 )
+		{
+			// this one already has at least two sequences aligned, if another
+			// is alignable then check its distance
+			for( int i = 0; i < neighbor_id.size(); i++ ){
+				if( !alignable[i] )
+					continue;
+				if( distance[i] < nearest_distance )
+				{
+					nearest_distance = distance[i];
+					node1 = nodeI;
+					node2 = neighbor_id[i];
+					ancestor = nodeI;
+				}
+			}
+		}else{
+			// find the nearest alignable pair
+			for( int i = 0; i < neighbor_id.size(); i++ )
+			{
+				if( !alignable[i] )
+					continue;
+				for( int j = i+1; j < neighbor_id.size(); j++ )
+				{
+					if( !alignable[j] )
+						continue;
+					if( distance[i] + distance[j] < nearest_distance )
+					{
+						nearest_distance = distance[i] + distance[j];
+						node1 = neighbor_id[i];
+						node2 = neighbor_id[j];
+						ancestor = nodeI;
+					}
+				}
+			}
+		}
+	}
+}
+
+/** use a list of precomputed matches instead of computing them */
+void ProgressiveAligner::setPairwiseMatches( MatchList& pair_ml )
+{
+	original_ml = pair_ml;
+	pair_ml.clear();	// ProgressiveAligner owns the matches now...
+}
+
+
+node_id_t createAlignmentTreeRoot( PhyloTree< AlignmentTreeNode >& alignment_tree, node_id_t node1, node_id_t node2 )
+{
+		// create a new node and link it inline between node1 and node2
+		AlignmentTreeNode atn;
+		alignment_tree.push_back( atn );
+		AlignmentTreeNode& old_root = alignment_tree[alignment_tree.root];
+		AlignmentTreeNode& new_root = alignment_tree.back();
+
+		if( find( alignment_tree[node1].children.begin(), alignment_tree[node1].children.end(), node2 ) !=
+			alignment_tree[node1].children.end() )
+		{
+			new_root.children.push_back(node2);
+			new_root.parents.push_back(node1);
+			alignment_tree[node2].parents.push_back(alignment_tree.size()-1);
+			alignment_tree[node1].children.push_back(alignment_tree.size()-1);
+		}else{
+			new_root.parents.push_back(node2);
+			new_root.children.push_back(node1);
+			alignment_tree[node2].children.push_back(alignment_tree.size()-1);
+			alignment_tree[node1].parents.push_back(alignment_tree.size()-1);
+		}
+
+		// completely unlink node1 and node2 from each other
+		findAndErase( alignment_tree[node1].children, node2 );
+		findAndErase( alignment_tree[node2].children, node1 );
+		findAndErase( alignment_tree[node1].parents, node2 );
+		findAndErase( alignment_tree[node2].parents, node1 );
+
+
+		// re-root the tree on the new node
+		rerootTree( alignment_tree, alignment_tree.size()-1 );
+
+		new_root.children_aligned = vector< boolean >( new_root.children.size(), false );
+		old_root.children_aligned = vector< boolean >( old_root.children.size(), false );
+		old_root.parents_aligned = vector< boolean >( old_root.parents.size(), false );
+		new_root.sequence = NULL;
+
+	return alignment_tree.root;
+}
+
+void ProgressiveAligner::extractAlignment( node_id_t ancestor, size_t super_iv, GappedAlignment& gal )
+{
+	CompactGappedAlignment<> cga;
+	extractAlignment( ancestor, super_iv, cga );
+	vector< string > aln;
+	GetAlignment( cga, this->original_ml.seq_table, aln );
+	gal = GappedAlignment(cga.SeqCount(), 0);
+	for( size_t seqI = 0; seqI < cga.SeqCount(); ++seqI )
+	{
+		gal.SetStart(seqI, cga.Start(seqI));
+		if( cga.Orientation(seqI) != AbstractMatch::undefined )
+			gal.SetLength(cga.Length(seqI), seqI);
+	}
+	gal.SetAlignment(aln);
+
+}
+
+void ProgressiveAligner::extractAlignment( node_id_t ancestor, size_t super_iv, CompactGappedAlignment<>& cga )
+{
+	// determine the leaf node intervals below this super_iv
+	vector< pair< node_id_t, size_t > > node_siv_list;
+	stack< pair<node_id_t,size_t> > node_stack;
+	node_stack.push(make_pair(ancestor,super_iv));
+	while( node_stack.size() > 0 )
+	{
+		pair<node_id_t,size_t> cur = node_stack.top();
+		node_id_t cur_node = cur.first;
+		node_stack.pop();
+		if( alignment_tree[cur_node].children.size() == 0 )
+			node_siv_list.push_back( cur );
+		for( size_t childI = 0; childI < alignment_tree[cur_node].children.size(); childI++ )
+		{
+			if( alignment_tree[cur_node].ordering[cur.second].reference_iv.LeftEnd(childI) == NO_MATCH )
+				continue;
+			size_t child_siv = childI == 0 ? alignment_tree[cur_node].ordering[cur.second].c1_siv : 
+				alignment_tree[cur_node].ordering[cur.second].c2_siv;
+			node_stack.push(make_pair(alignment_tree[cur_node].children[childI], child_siv) );
+			node_id_t n = alignment_tree[cur_node].children[childI];
+			if( alignment_tree[cur_node].ordering[cur.second].reference_iv.Length(childI) != alignment_tree[n].ordering[child_siv].Length() )
+			{
+				breakHere();
+				cerr << "alignment_tree[cur_node].ordering[cur.second].reference_iv.Length(childI): " << alignment_tree[cur_node].ordering[cur.second].reference_iv.Length(childI) << endl;
+				cerr << "rotten in the state of denmark...\n";
+			}
+		}
+	}
+
+	// armed with the list of pairs, extract each one...
+
+	// for each interval at the root write out the alignment
+	SuperInterval& a_iv = alignment_tree[ancestor].ordering[super_iv];
+	cga = CompactGappedAlignment<>(seq_count, a_iv.Length());
+	vector< bitset_t > aln_mats( seq_count );
+
+	// use translateCoordinates to map out each sequence's original coordinates
+	// to the alignment coordinates
+	for( size_t pairI = 0; pairI < node_siv_list.size(); pairI++ )
+	{
+		node_id_t nodeI = node_siv_list[pairI].first;
+		size_t seq_siv = node_siv_list[pairI].second;
+		
+		// translate seq_siv into ancestor alignment coordinates?  
+		// we can abuse translateCoordinates and the Match data structure :
+		//   - add a single "match" covering the entire sequence
+		//   - translate it up to alignment root coordinates
+		uint seqI = node_sequence_map[nodeI];
+		Match mm(2);
+		mm.SetStart(0, alignment_tree[nodeI].ordering[seq_siv].LeftEnd());
+		mm.SetStart(1, alignment_tree[nodeI].ordering[seq_siv].LeftEnd());
+		mm.SetLength( alignment_tree[nodeI].ordering[seq_siv].Length() );
+		
+		vector< AbstractMatch* > aml( 1, mm.Copy() );
+		translateGappedCoordinates( aml, 0, nodeI, ancestor );
+
+		if( aml.size() > 1 )
+		{
+			cerr << "huh?";
+			genome::breakHere();
+			SingleStartComparator<AbstractMatch> ssc( 0 );
+			sort( aml.begin(), aml.end(), ssc );	// huh?
+		}
+		CompactGappedAlignment<>* trans_cga = dynamic_cast<CompactGappedAlignment<>*>(aml[0]);
+		if( trans_cga == NULL )
+		{
+			CompactGappedAlignment<> tmp_cga;
+			trans_cga = tmp_cga.Copy();
+			*trans_cga = CompactGappedAlignment<>(*aml[0]);
+		}
+
+		if( trans_cga->LeftEnd(0) + trans_cga->Length(0) > a_iv.LeftEnd() + a_iv.Length() )
+		{
+			cerr << "trans_cga->Start(0): " << trans_cga->Start(0) << " trans_cga->Length(0): " << trans_cga->Length(0) << endl;
+			cerr << "a_iv.LeftEnd(): " << a_iv.LeftEnd() << " a_iv.Length(): " << a_iv.Length() << endl;
+			breakHere();
+		}
+		bool parity = trans_cga->Orientation(0) == trans_cga->Orientation(1);
+		cga.SetLeftEnd(seqI, trans_cga->LeftEnd(1));
+		AbstractMatch::orientation o = parity ? AbstractMatch::forward : AbstractMatch::reverse;
+		cga.SetOrientation(seqI, o);
+		const vector< bitset_t >& tmp = trans_cga->GetAlignment();
+		aln_mats[seqI] = tmp[1];
+
+		size_t offset = trans_cga->LeftEnd(0) - a_iv.LeftEnd();
+		if( aln_mats[seqI].size() < a_iv.Length() )
+		{
+			// need to resize and shift appropriately
+			aln_mats[seqI].resize( a_iv.Length() );
+			aln_mats[seqI] <<= offset;	// this is backwards in boost::dynamic_bitset for some reason...
+		}
+		if( trans_cga->LeftEnd(0) < a_iv.LeftEnd() )
+		{
+			cerr << "trans_cga->LeftEnd(0): " << trans_cga->LeftEnd(0) << endl;
+			cerr << "a_iv.LeftEnd(): " << a_iv.LeftEnd() << endl;
+			breakHere();
+		}
+
+		// validate match lengths
+		if( trans_cga->Length(1) != alignment_tree[nodeI].ordering[seq_siv].Length() )
+		{
+			cerr << "b0rked\n";
+			breakHere();
+		}
+		// set the length and alignment appropriately
+		cga.SetLength(trans_cga->Length(1), seqI);
+
+		// free storage used by trans_cga
+		trans_cga->Free();
+	}
+	for( uint seqI = 0; seqI < aln_mats.size(); seqI++ )
+		if( aln_mats[seqI].size() == 0 )
+			aln_mats[seqI].resize( a_iv.Length() );
+	cga.SetAlignment(aln_mats);
+}
+
+unsigned getDefaultBreakpointMax( const std::vector< genome::gnSequence* >& seq_table )
+{
+	double avg_len = 0;
+	for( size_t seqI = 0; seqI < seq_table.size(); ++seqI )
+		avg_len += seq_table[seqI]->length();
+	avg_len /= (double)(seq_table.size());
+	// heavily rearranged, recently diverged genomes like yersinia have up to 15 rearrangements per megabase of sequence
+	avg_len /= 1000000.0;	// convert to number of megabases
+	avg_len *= 15.0;	// "lots" of rearrangement
+	return (unsigned)avg_len;
+}
+
+// get a pairwise bp distance
+void ProgressiveAligner::CreatePairwiseBPDistance( boost::multi_array<double, 2>& bp_distmat )
+{
+	uint seq_count = original_ml.seq_table.size();
+	bp_distmat.resize(boost::extents[seq_count][seq_count]);
+	for( size_t i = 0; i < seq_count; ++i )
+		for( size_t j = 0; j < seq_count; ++j )
+			bp_distmat[i][j] = 1;
+
+#ifdef LCB_WEIGHT_LOSS_PLOT
+	stringstream pair_bp_ofname;
+	pair_bp_ofname << "pair_bp_log.txt";
+	ofstream pair_bp_out( pair_bp_ofname.str().c_str() );
+#endif
+
+	vector< pair<uint, uint> > seq_pairs( (seq_count * (seq_count-1))/2 );
+	int ii = 0;
+	for( uint seqI = 0; seqI < seq_count; seqI++ )
+		for( uint seqJ = seqI + 1; seqJ < seq_count; seqJ++ )
+			seq_pairs[ii++] = make_pair(seqI,seqJ);
+
+#pragma omp parallel for
+	for(int i = 0; i < seq_pairs.size(); i++)
+	{
+		uint seqI = seq_pairs[i].first;
+		uint seqJ = seq_pairs[i].second;
+		vector<uint>::iterator n1 = find( node_sequence_map.begin(), node_sequence_map.end(), seqI );
+		vector<uint>::iterator n2 = find( node_sequence_map.begin(), node_sequence_map.end(), seqJ );
+		vector<node_id_t> n1_seqs( 1, n1-node_sequence_map.begin() );
+		vector<node_id_t> n2_seqs( 1, n2-node_sequence_map.begin() );
+		Matrix<MatchList> mml;
+		getPairwiseMatches(n1_seqs, n2_seqs, mml);
+		MatchList& ml = mml(0,0);
+
+		// eliminate overlaps as they correspond to inconsistently or
+		// multiply aligned regions
+		EliminateOverlaps_v2( ml, true );
+		ml.MultiplicityFilter(2);
+
+		// do greedy b.p. elimination on the matches
+		vector< MatchList > LCB_list;
+		vector< LCB > adjacencies;
+		vector< gnSeqI > breakpoints;
+		IdentifyBreakpoints( ml, breakpoints );
+		ComputeLCBs_v2( ml, breakpoints, LCB_list );
+		vector< double > lcb_scores( LCB_list.size() );
+		cout << "Pair " << seq_pairs[i].first << ", " << seq_pairs[i].second << " has " << LCB_list.size() << " initial LCBs\n";
+		for( size_t lcbI = 0; lcbI < LCB_list.size(); ++lcbI )
+			lcb_scores[lcbI] = GetPairwiseAnchorScore( LCB_list[lcbI], ml.seq_table, this->subst_scoring, sol_list[seqI], sol_list[seqJ] );
+
+		computeLCBAdjacencies_v3( LCB_list, lcb_scores, adjacencies );
+
+		// want to discard all low-weight LCBs
+		// to arrive at a set of reliable LCBs
+		double cons_id = 1 - this->conservation_distance[seqI][seqJ];
+		double scaled_score = max( bp_dist_estimate_score * cons_id * cons_id * cons_id * cons_id, min_breakpoint_penalty);
+		cout << "Using scaled bp penalty: " << scaled_score << endl;
+		GreedyRemovalScorer wbs( adjacencies, scaled_score );
+#ifdef LCB_WEIGHT_LOSS_PLOT
+		cur_min_coverage = greedyBreakpointElimination_v4( adjacencies, lcb_scores, wbs, &pair_bp_out, seqI, seqJ );
+		pair_bp_out.flush();
+#else
+		cur_min_coverage = greedyBreakpointElimination_v4( adjacencies, lcb_scores, wbs, NULL );
+#endif
+		MatchList deleted_matches;
+		filterMatches_v2( adjacencies, LCB_list, lcb_scores, deleted_matches );
+		cout << "Pair (" << seqI << "," << seqJ << ") has " << LCB_list.size() << " well-supported breakpoints\n";
+		
+		// now set the distance entry
+		bp_distmat[seqI][seqJ] = LCB_list.size();
+		bp_distmat[seqJ][seqI] = LCB_list.size();
+
+		// free the matches
+		for( size_t dI = 0; dI < ml.size(); dI++ )
+			ml[dI]->Free();
+	}
+	// normalize to [0,1]
+	double bp_max = 0;
+	for( uint i = 0; i < bp_distmat.shape()[0]; ++i )
+		for( uint j = 0; j < bp_distmat.shape()[1]; ++j )
+		{
+			if( bp_distmat[i][j] > bp_max )
+				bp_max = bp_distmat[i][j];
+		}
+
+	double default_max = getDefaultBreakpointMax(original_ml.seq_table);
+	bp_max = bp_max > default_max ? bp_max : default_max;
+
+	for( uint i = 0; i < bp_distmat.shape()[0]; ++i )
+		for( uint j = 0; j < bp_distmat.shape()[1]; ++j )
+		{
+			if( i != j )
+				bp_distmat[i][j] /= bp_max;
+			bp_distmat[i][j] *= bp_dist_scale;
+		}
+}
+
+template< typename MatchListType >
+void makeAlignmentTree( PhyloTree< AlignmentTreeNode >& alignment_tree, MatchListType& mlist, vector< uint >& node_sequence_map )
+{
+	// initialize all nodes to unaligned
+	for( node_id_t nodeI = 0; nodeI < alignment_tree.size(); nodeI++ )
+	{
+		alignment_tree[nodeI].children_aligned = vector< boolean >( alignment_tree[nodeI].children.size(), false );
+		alignment_tree[nodeI].parents_aligned = vector< boolean >( alignment_tree[nodeI].parents.size(), false );
+		alignment_tree[nodeI].sequence = NULL;
+		alignment_tree[nodeI].refined = false;
+	}
+
+	// set the sequence appropriately for extant sequences
+	node_sequence_map = vector< uint >( alignment_tree.size(), -1 );
+	for( uint seqI = 0; seqI < mlist.seq_table.size(); seqI++ )
+	{
+		stringstream seq_name;
+		seq_name << "seq" << seqI + 1;
+		node_id_t nodeI = 0;
+		for( ; nodeI < alignment_tree.size(); nodeI++ )
+		{
+			if( seq_name.str() == alignment_tree[nodeI].name )
+			{
+				alignment_tree[nodeI].sequence = mlist.seq_table[seqI];
+				Match mm(1);
+				Match* m = mm.Copy();
+				m->SetStart(0,1);
+				m->SetLength(alignment_tree[nodeI].sequence->length(),0);
+				vector<AbstractMatch*> tmp(1,m);
+				Interval iv( tmp.begin(), tmp.end() );
+				m->Free();
+				SuperInterval si( iv );
+				si.SetLeftEnd(1);
+				si.SetLength(alignment_tree[nodeI].sequence->length());
+				alignment_tree[nodeI].ordering.push_back( si );
+				node_sequence_map[nodeI] = seqI;
+				break;
+			}
+		}
+		if( nodeI == alignment_tree.size() )
+			throw "Phylogenetic tree names unrecognized.  Should follow seqN naming format\n";
+	}
+}
+
+void DistanceMatrix( IntervalList& iv_list, NumericMatrix<double>& distmat )
+{
+	IdentityMatrix( iv_list, distmat );
+	TransformDistanceIdentity(distmat);
+}
+
+/*
+void makeSuperIntervals( IntervalList& iv_list, PhyloTree< TreeNode >& alignment_tree, vector< uint >& node_sequence_map )
+{
+	std::stack< node_id_t > node_stack;
+	node_stack.push( alignment_tree.root );
+	bitset_t visited( alignment_tree.size(), false );
+	while( node_stack.size() > 0 )
+	{
+		node_id_t cur_node = node_stack.top();
+		// visit post-order
+		for( size_t cI = 0; cI < alignment_tree[cur_node].children.size(); ++cI )
+		{
+			if( !visited[alignment_tree[cur_node].children[cI]] )
+				node_stack.push(alignment_tree[cur_node].children[cI]);
+		}
+		if( node_stack.top() != cur_node )
+			continue;
+		node_stack.pop();
+		if( alignment_tree[cur_node].children.size() == 0 )
+			continue;	// only process internal nodes
+
+		// process this node
+		// construct pairwise LCBs
+
+		uint seqI = node_sequence_map[alignment_tree[cur_node].children[0]];
+		uint seqJ = node_sequence_map[alignment_tree[cur_node].children[0]];
+		vector< uint > projection( 2 );
+		projection[0] = seqI;
+		projection[1] = seqJ;
+
+		vector< vector< MatchProjectionAdapter* > > LCB_list;
+		vector< LCB > projected_adjs;
+		projectIntervalList( iv_list, projection, LCB_list, projected_adjs );
+
+		// create a superinterval for each adj 
+//		alignment_tree[cur_node].ordering.resize(adjs.size());
+//		for( size_t adjI = 0; adjI < adjs.size(); ++adjI )
+//		{
+//			SuperInterval& siv = alignment_tree[cur_node].ordering[adjI];
+//			Match mleft(2);
+//			mleft.SetStart(0,adjI);
+//			mleft.SetStart(1,adjI);
+//			mleft.SetLength(1);
+//			siv.SetLeftEnd( adjI );
+//			siv.SetLength(1);
+//		}
+
+	}
+}
+*/
+
+void ProgressiveAligner::alignPP(IntervalList& prof1, IntervalList& prof2, IntervalList& interval_list )
+{
+	if( debug_aligner )
+	{
+		debug_interval = true;
+		debug_cga = true;
+	}
+
+	seq_count = prof1.seq_table.size() + prof2.seq_table.size();
+
+	if( this->breakpoint_penalty == -1 )
+		this->breakpoint_penalty = getDefaultBreakpointPenalty( original_ml.seq_table );
+
+	if( this->bp_dist_estimate_score == -1 )
+		this->bp_dist_estimate_score = getDefaultBpDistEstimateMinScore( original_ml.seq_table );
+	cout << "using default bp penalty: " << breakpoint_penalty << endl;
+	cout << "using default bp estimate min score: " << bp_dist_estimate_score << endl;
+
+	if( this->collinear_genomes )
+		this->breakpoint_penalty = -1;
+
+	if( collinear_genomes )
+		cout << "\nAssuming collinear genomes...\n";
+		
+	EliminateOverlaps_v2( original_ml );
+	// use existing pairwise matches
+	MatchList mlist;
+	mlist.clear();
+	mlist = original_ml;
+	cout << "Starting with " << mlist.size() << " multi-matches\n";
+
+//
+// Step 1) Compute guide trees for each profile and join them
+//
+	NumericMatrix< double > distance1;
+	DistanceMatrix( prof1, distance1 );
+	NumericMatrix< double > distance2;
+	DistanceMatrix( prof2, distance2 );
+
+	// Make a phylogenetic tree
+	// use the identity matrix method and convert to a distance matrix
+	MuscleInterface& ci = MuscleInterface::getMuscleInterface();	
+	string guide_tree_fname1 = CreateTempFileName("guide_tree");
+	registerFileToDelete( guide_tree_fname1 );
+	ci.CreateTree( distance1, guide_tree_fname1 );
+	string guide_tree_fname2 = CreateTempFileName("guide_tree");
+	registerFileToDelete( guide_tree_fname2 );
+	ci.CreateTree( distance2, guide_tree_fname2 );
+
+	// read the trees
+	ifstream tree_file1( guide_tree_fname1.c_str() );
+	if( !tree_file1.is_open() )
+		throw "Error opening guide tree file";
+	PhyloTree< AlignmentTreeNode > tree1;
+	tree1.readTree( tree_file1 );
+	tree_file1.close();
+	ifstream tree_file2( guide_tree_fname2.c_str() );
+	if( !tree_file2.is_open() )
+		throw "Error opening guide tree file";
+	PhyloTree< AlignmentTreeNode > tree2;
+	tree2.readTree( tree_file2 );
+	tree_file2.close();
+
+
+	// compute pairwise distances among all nodes
+	NumericMatrix< double > distance;
+	DistanceMatrix( mlist, distance );
+	conservation_distance.resize(boost::extents[seq_count][seq_count]);
+	for( uint seqI = 0; seqI < seq_count; ++seqI )
+		for( uint seqJ = 0; seqJ < seq_count; ++seqJ )
+			if( seqJ > seqI )
+				conservation_distance[seqI][seqJ] = distance(seqI,seqJ);
+			else
+				conservation_distance[seqI][seqJ] = distance(seqJ,seqI);
+
+
+	if( !collinear_genomes )
+	{
+		cout << "Calculating pairwise breakpoint distances\n";
+		CreatePairwiseBPDistance(bp_distance);
+		cout << "bp distance matrix:\n";
+		print2d_matrix(bp_distance, cout);
+		cout << endl;
+	}
+
+	// rescale the conservation distance
+	double conservation_range = 1;
+	double bp_range = 1;
+	for( uint seqI = 0; seqI < seq_count; ++seqI )
+		for( uint seqJ = 0; seqJ < seq_count; ++seqJ )
+			conservation_distance[seqI][seqJ] = distance(seqI,seqJ) / conservation_range;
+
+	if( !(collinear_genomes && seq_count > 20 ) )
+	{
+		cout << "genome content distance matrix:\n";
+		print2d_matrix(conservation_distance, cout);
+		cout << endl;
+	}
+
+//
+// construct the alignment tree by joining trees from each profile
+//
+	vector< uint > nsmap1;
+	vector< uint > nsmap2;
+	makeAlignmentTree( tree1, prof1, nsmap1 );
+//	prepareAlignmentTree(tree1);
+	makeAlignmentTree( tree2, prof2, nsmap2 );
+//	prepareAlignmentTree(tree2);
+
+	alignment_tree.resize( tree1.size() + tree2.size() + 1 );
+	// set the sequence appropriately for extant sequences
+	node_sequence_map = vector< uint >( alignment_tree.size(), -1 );
+
+	// initialize all nodes to unaligned
+	for( node_id_t nodeI = 0; nodeI < alignment_tree.size()-1; nodeI++ )
+	{
+		if( nodeI < tree1.size() )
+		{
+			alignment_tree[nodeI].sequence = tree1[nodeI].sequence;
+			alignment_tree[nodeI].children = tree1[nodeI].children;
+			alignment_tree[nodeI].parents = tree1[nodeI].parents;
+			alignment_tree[nodeI].ordering = tree1[nodeI].ordering;
+			alignment_tree[nodeI].distance = tree1[nodeI].distance;
+			alignment_tree[nodeI].name = tree1[nodeI].name;
+			node_sequence_map[nodeI] = nsmap1[nodeI];
+		}else{
+			alignment_tree[nodeI].sequence = tree2[nodeI-tree1.size()].sequence;
+			alignment_tree[nodeI].children = tree2[nodeI-tree1.size()].children;
+			alignment_tree[nodeI].parents = tree2[nodeI-tree1.size()].parents;
+			alignment_tree[nodeI].ordering = tree2[nodeI-tree1.size()].ordering;
+			alignment_tree[nodeI].distance = tree2[nodeI-tree1.size()].distance;
+			alignment_tree[nodeI].name = tree2[nodeI-tree1.size()].name;
+			for( size_t cI = 0; cI < alignment_tree[nodeI].children.size(); cI++ )
+				alignment_tree[nodeI].children[cI] += tree1.size();
+			for( size_t pI = 0; pI < alignment_tree[nodeI].parents.size(); pI++ )
+				alignment_tree[nodeI].parents[pI] += tree1.size();
+			node_sequence_map[nodeI] = nsmap2[nodeI-tree1.size()];
+			if( node_sequence_map[nodeI] != (std::numeric_limits<uint>::max)() )
+				node_sequence_map[nodeI] += prof1.seq_table.size();
+		}
+
+		alignment_tree[nodeI].children_aligned = vector< boolean >( alignment_tree[nodeI].children.size(), true );
+		alignment_tree[nodeI].parents_aligned = vector< boolean >( alignment_tree[nodeI].parents.size(), true );
+		alignment_tree[nodeI].refined = true;
+	}
+
+	alignment_tree.back().children.push_back( tree1.size()-1 );
+	alignment_tree.back().children.push_back( alignment_tree.size()-2 );
+	alignment_tree.back().distance = 100;
+	alignment_tree.back().children_aligned = vector< boolean >( alignment_tree.back().children.size(), true );
+	alignment_tree.back().parents_aligned = vector< boolean >( alignment_tree.back().parents.size(), true );
+	alignment_tree.back().refined = false;
+
+
+	getAlignment( interval_list );
+
+}
+
+void ProgressiveAligner::getAlignment( IntervalList& interval_list )
+{
+	cout << "Aligning...\n";
+	// pick each pair of sequences and align until none are left
+	while(true)
+	{
+		node_id_t node1;
+		node_id_t node2;
+		node_id_t ancestor;
+		chooseNextAlignmentPair( alignment_tree, node1, node2, ancestor );
+		if( node1 == node2 )
+			break;	// all pairs have been aligned
+
+		// this is the last alignable pair in the unrooted tree
+		// create a root from which the complete alignment can be extracted
+		alignNodes( node1, node2, ancestor );
+		if( ancestor == alignment_tree.root )
+			break;  // all done
+	}
+
+	if( refine )
+	{
+		// perform iterative refinement
+		cout << "Performing final pass iterative refinement\n";
+		doGappedAlignment(alignment_tree.root, false);
+	}
+
+	// peel off the alignment from the root node
+	cout << "root alignment has " << alignment_tree[alignment_tree.root].ordering.size() << " superintervals\n";
+	vector< SuperInterval >& a_ivs = alignment_tree[alignment_tree.root].ordering;
+	gnSeqI len = 0;
+	for( size_t ivI = 0; ivI < a_ivs.size(); ivI++ )
+	{
+		len += a_ivs[ivI].Length();
+	}
+	cout << "root alignment length: " << len << endl;
+
+
+	// for each interval at the root write out the alignment
+	for( size_t ivI = 0; ivI < a_ivs.size(); ivI++ )
+	{
+		GappedAlignment ga(seq_count, a_ivs[ivI].Length());
+		extractAlignment(alignment_tree.root, ivI, ga);
+		vector<AbstractMatch*> tmp(1, &ga);
+		interval_list.push_back( Interval(tmp.begin(), tmp.end()) );
+	}
+}
+
+/**
+ * 
+ */
+
+void ProgressiveAligner::align( vector< gnSequence* >& seq_table, IntervalList& interval_list ){
+	if( debug_aligner )
+	{
+		debug_interval = true;
+		debug_cga = true;
+	}
+
+	seq_count = seq_table.size();
+	this->currently_recursing = false;
+	interval_list.seq_table = seq_table;
+
+	// find pairwise matches
+	MatchList mlist;
+	mlist.seq_table = seq_table;
+
+	if( this->breakpoint_penalty == -1 )
+		this->breakpoint_penalty = getDefaultBreakpointPenalty( seq_table );
+	if( this->bp_dist_estimate_score == -1 )
+		this->bp_dist_estimate_score = getDefaultBpDistEstimateMinScore( original_ml.seq_table );
+	cout << "using default bp penalty: " << breakpoint_penalty << endl;
+	cout << "using default bp estimate min score: " << bp_dist_estimate_score << endl;
+
+	if( this->collinear_genomes )
+		this->breakpoint_penalty = -1;
+
+	if( collinear_genomes )
+		cout << "\nAssuming collinear genomes...\n";
+
+	mlist.clear();
+	mlist = original_ml;
+	cout << "Starting with " << mlist.size() << " multi-matches\n";
+	cout << "Computing genome content distance matrix...\n";
+
+//
+// Step 2) Compute a phylogenetic guide tree using the pairwise matches
+//
+	NumericMatrix< double > distance;
+	SingleCopyDistanceMatrix( mlist, mlist.seq_table, distance );
+	cout << "\n\nGenome conservation distance matrix: " << endl;
+	distance.print(cout);
+	cout << endl;
+
+	bool input_tree_specified = input_guide_tree_fname != "";
+	bool output_tree_specified = output_guide_tree_fname != "";
+	if( !input_tree_specified )
+	{
+		// Make a phylogenetic guide tree
+		if( !output_tree_specified )
+			output_guide_tree_fname = CreateTempFileName("guide_tree");
+		input_guide_tree_fname = output_guide_tree_fname;
+		cout << "Writing guide tree to " << output_guide_tree_fname << endl;
+		MuscleInterface& mi = MuscleInterface::getMuscleInterface();
+		mi.CreateTree( distance, output_guide_tree_fname );
+
+	//	ci.SetDistanceMatrix( distance, output_guide_tree_fname );
+	}
+
+	conservation_distance.resize(boost::extents[seq_count][seq_count]);
+	for( uint seqI = 0; seqI < seq_count; ++seqI )
+		for( uint seqJ = 0; seqJ < seq_count; ++seqJ )
+			if( seqJ > seqI )
+			{
+				conservation_distance[seqI][seqJ] = distance(seqI,seqJ);
+				conservation_distance[seqJ][seqI] = distance(seqI,seqJ);
+			}
+			else
+			{
+				conservation_distance[seqI][seqJ] = distance(seqJ,seqI);
+				conservation_distance[seqJ][seqI] = distance(seqJ,seqI);
+			}
+
+	cout << "reading tree...\n";
+	// load the guide tree
+	ifstream tree_file( input_guide_tree_fname.c_str() );
+	if( !tree_file.is_open() )
+		throw "Error opening guide tree file";
+	alignment_tree.readTree( tree_file );
+	tree_file.close();
+
+	cout << "initializing alignment tree...\n";
+	node_id_t node1;
+	node_id_t node2;
+	findMidpoint( alignment_tree, node1, node2 );
+	moveRootToBranch( alignment_tree, node1, node2 );
+
+	makeAlignmentTree( alignment_tree, mlist, node_sequence_map );
+	// midpoint root the tree
+//	findMidpoint( alignment_tree, node1, node2 );
+//	node_id_t ancestor = 0;
+//	if( seq_count > 2 )	// if only two sequences then the tree already has a root
+//		ancestor = createAlignmentTreeRoot( alignment_tree, node1, node2 );
+
+	// write out the rooted guide tree, but don't clobber the user's input tree
+	if( !input_tree_specified || output_tree_specified )
+	{
+		ofstream out_tree_file( output_guide_tree_fname.c_str() );
+		if( !out_tree_file.is_open() )
+			throw "Error opening guide tree file for write";
+		alignment_tree.writeTree( out_tree_file );
+		out_tree_file.close();
+	}
+
+	// ensure the root is the last to get aligned and swap children to canonical order
+	extendRootBranches(alignment_tree);
+
+
+	if( !collinear_genomes )
+	{
+		// need sol lists for scoring
+		vector<SeedOccurrenceList> blah(seq_count);
+		swap( blah, sol_list );
+//		sol_list = ;
+		// temporarily create a weight 11 SML
+/*		MatchList w11_mlist;
+		w11_mlist.seq_filename = original_ml.seq_filename;
+		w11_mlist.seq_table = original_ml.seq_table;
+		cout << "Creating weight 11 SMLs for repeat detection\n";
+		w11_mlist.CreateMemorySMLs( 11, NULL );
+*/
+		cout << "Constructing seed occurrence lists for repeat detection\n";
+#pragma omp parallel for
+		for( int seqI = 0; seqI < seq_count; seqI++ )
+		{
+			sol_list[seqI].construct(*(mlist.sml_table[seqI]));
+//			delete w11_mlist.sml_table[seqI];
+		}
+//		w11_mlist.sml_table.clear();
+	}
+	if( !collinear_genomes && use_weight_scaling )
+	{
+		cout << "Calculating pairwise breakpoint distances\n";
+		CreatePairwiseBPDistance(bp_distance);
+	}
+
+	// rescale the conservation distance
+	if( use_weight_scaling )
+	{
+		for( uint seqI = 0; seqI < seq_count; ++seqI )
+			for( uint seqJ = 0; seqJ < seq_count; ++seqJ )
+				conservation_distance[seqI][seqJ] = distance(seqI,seqJ) * conservation_dist_scale;
+	}else{
+		bp_distance.resize(boost::extents[seq_count][seq_count]);
+		for( uint seqI = 0; seqI < seq_count; ++seqI )
+			for( uint seqJ = 0; seqJ < seq_count; ++seqJ )
+			{
+				conservation_distance[seqI][seqJ] = 0;
+				bp_distance[seqI][seqJ] = 0;
+			}
+	}
+
+	if( !collinear_genomes )
+	{
+		cout << "genome content distance matrix:\n";
+		print2d_matrix(conservation_distance, cout);
+		cout << endl;
+		cout << "bp distance matrix:\n";
+		print2d_matrix(bp_distance, cout);
+		cout << endl;
+	}
+
+	getAlignment( interval_list );
+}
+
+
+// broken and unused function graveyard
+
+}
diff --git a/libMems/ProgressiveAligner.h b/libMems/ProgressiveAligner.h
new file mode 100644
index 0000000..84b3756
--- /dev/null
+++ b/libMems/ProgressiveAligner.h
@@ -0,0 +1,637 @@
+/*******************************************************************************
+ * $Id: ProgressiveAligner.h,v 1.23 2004/04/19 23:10:13 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * This file is licensed under the GPL.
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifndef _ProgressiveAligner_h_
+#define _ProgressiveAligner_h_
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/SuperInterval.h"
+#include "libMems/Aligner.h"
+#include "libMems/PhyloTree.h"
+#include "libMems/GreedyBreakpointElimination.h"
+#include "libMems/CompactGappedAlignment.h"
+#include "libMems/Islands.h"
+#include <boost/type_traits/remove_pointer.hpp>
+#include <boost/multi_array.hpp>
+#include "libMems/SeedOccurrenceList.h"
+#include "libMems/SubstitutionMatrix.h"
+#include "libMems/MatchProjectionAdapter.h"
+
+namespace mems
+{
+
+/** controls whether copious debugging tests and output gets written to screen */
+extern bool debug_aligner;
+
+
+/** A class that stores alignment-related information as a node in a phylogenetic tree */
+class AlignmentTreeNode : public TreeNode
+{
+public:
+	AlignmentTreeNode() : TreeNode(), refined(false) {};
+	std::vector< SuperInterval > ordering;	/**< A total ordering on alignments of sequence contained by leafs below this node */
+	std::vector< boolean > parents_aligned;		/**< have parents been aligned? */
+	std::vector< boolean > children_aligned;	/**< have children been aligned? */
+	genome::gnSequence* sequence;	/**< The sequence associated with this node, NULL for ancestral nodes */
+	bool refined;	/**< true if iterative refinement has been applied to the alignment at this node */
+};
+
+
+double getDefaultBreakpointPenalty( std::vector< genome::gnSequence* >& sequences );
+
+/**
+ * Computes multiple genome alignments using a progressive alignment algorithm
+ */
+class ProgressiveAligner : public mems::Aligner
+{
+public:
+	/** 
+	 * Constructs an aligner for the specified number of sequences.
+	 * @param seq_count 	The number of sequences that will be aligned with this Aligner
+	 */
+	ProgressiveAligner( uint seq_count );
+	ProgressiveAligner( const ProgressiveAligner& al );
+	ProgressiveAligner& operator=( const ProgressiveAligner& al );
+	~ProgressiveAligner();
+
+	/** sets the breakpoint penalty */
+	void setBreakpointPenalty( double bp_penalty ){ breakpoint_penalty = bp_penalty; }
+	/** sets the the minimum breakpoint penalty after scaling */
+	void setMinimumBreakpointPenalty( double min_bp_penalty ){ min_breakpoint_penalty = min_bp_penalty; }
+	/** assume all genomes are collinear when set to true */
+	void setCollinear( boolean collinear ){ this->collinear_genomes = collinear; }
+	/** use a list of precomputed matches instead of computing them */
+	void setPairwiseMatches( mems::MatchList& pair_ml );
+	/** use a precomputed guide tree stored in the given file */
+	void setInputGuideTreeFileName( std::string& fname ){ this->input_guide_tree_fname = fname; }
+	/** write the guide tree stored to the given file */
+	void setOutputGuideTreeFileName( std::string& fname ){ this->output_guide_tree_fname = fname; }
+	/** set the max length (in columns) of alignments passed to MUSCLE */
+	void SetMaxGappedAlignmentLength( size_t len );
+	/** set whether a cache database should be used to speed up recursive anchor search */
 
+	void SetUseCacheDb( bool cbd ){ this->using_cache_db = cbd; }
+
+	/** Set whether iterative refinement using MUSCLE should be performed (true/false) */
+	void setRefinement( bool refine ){ this->refine = refine; }
+	/** Set whether iterative refinement using MUSCLE should be performed (true/false) */
+	void setGappedAlignment( bool do_gapped_alignment ){ this->gapped_alignment = do_gapped_alignment; }
+
+	void setPairwiseScoringScheme( const mems::PairwiseScoringScheme& pss ){ this->subst_scoring = pss; }
+
+	enum LcbScoringScheme
+	{
+		AncestralScoring,
+		AncestralSumOfPairsScoring,
+		ExtantSumOfPairsScoring
+	};
+
+	/** set LCB the scoring scheme */
+	void setLcbScoringScheme( LcbScoringScheme scheme ){ scoring_scheme = scheme; }
+	LcbScoringScheme getLcbScoringScheme(void){ return scoring_scheme; }
+
+	void setUseSeedFamilies( bool use_seed_families ){ this->use_seed_families = use_seed_families; }
+	bool getUseSeedFamilies(void){ return this->use_seed_families; }
+
+	void setUseLcbWeightScaling( bool use_weight_scaling ){ this->use_weight_scaling = use_weight_scaling; }
+	bool getUseLcbWeightScaling(void){ return this->use_weight_scaling; }
+
+	void setBreakpointDistanceScale( double bp_dist_scale ){ this->bp_dist_scale = bp_dist_scale; }
+	double getBreakpointDistanceScale(void){ return this->bp_dist_scale; }
+
+	void setConservationDistanceScale( double conservation_dist_scale ){ this->conservation_dist_scale = conservation_dist_scale; }
+	double getConservationDistanceScale(void){ return this->conservation_dist_scale; }
+
+	void setBpDistEstimateMinScore( double min_score ){ this->bp_dist_estimate_score = min_score; }
+	double getBpDistEstimateMinScore(void){ return this->bp_dist_estimate_score; }
+
+	/** determine which extant sequences have been aligned at a given node */
+	void getAlignedChildren( node_id_t node, std::vector< node_id_t >& descendants );
+
+	/** chooses an ordering for aligned intervals at an ancestor node */
+	void createAncestralOrdering( std::vector< mems::Interval* >& interval_list, std::vector< SuperInterval >& ancestral_sequence );
+
+	/** constructs an alignment of node1 and node2 at their ancestor */
+	void alignProfileToProfile( node_id_t node1, node_id_t node2, node_id_t ancestor );
+
+	/** align the sequences at the designated pair of alignment tree nodes */
+	void alignNodes( node_id_t node1, node_id_t node2, node_id_t ancestor );
+
+
+	/** Given a set of sequences, construct and output an alignment as an IntervalList */
+	void align( std::vector< genome::gnSequence* >& seq_table, mems::IntervalList& interval_list );
+
+	void getPath( node_id_t first_n, node_id_t last_n, std::vector< node_id_t >& path );
+	template<class MatchType>
+	void propagateDescendantBreakpoints( node_id_t node1, uint seqI, std::vector< MatchType* >& iv_list );
+	void linkSuperIntervals( node_id_t node1, uint seqI, node_id_t ancestor );
+	void recursiveApplyAncestralBreakpoints( node_id_t ancestor );
+	void extractAlignment( node_id_t ancestor, size_t super_iv, mems::GappedAlignment& gal );
+	void extractAlignment( node_id_t ancestor, size_t super_iv, mems::CompactGappedAlignment<>& cga );
+
+	void getPairwiseMatches( const std::vector< node_id_t >& node1_seqs, const std::vector< node_id_t >& node2_seqs, Matrix<mems::MatchList>& pairwise_matches );
+	void getAncestralMatches( const std::vector< node_id_t > node1_seqs, const std::vector< node_id_t > node2_seqs, node_id_t node1, node_id_t node2, node_id_t ancestor, std::vector< mems::AbstractMatch* >& ancestral_matches );
+	void getRepresentativeAncestralMatches( const std::vector< node_id_t > node1_seqs, const std::vector< node_id_t > node2_seqs, node_id_t node1, node_id_t node2, node_id_t ancestor, std::vector< mems::AbstractMatch* >& ancestral_matches );
+	
+	// functions for recursive anchor search
+	
+	template<class GappedAlignmentType>
+	void recurseOnPairs( const std::vector<node_id_t>& node1_seqs, 
+		const std::vector<node_id_t>& node2_seqs, const GappedAlignmentType& iv, 
+		Matrix<mems::MatchList>& matches, Matrix< std::vector< mems::search_cache_t > >& search_cache_db, 
+		Matrix< std::vector< mems::search_cache_t > >& new_cache_db,
+		boost::multi_array< std::vector< std::vector< int64 > >, 2 >& iv_regions);
+	void pairwiseAnchorSearch( mems::MatchList& r_list, mems::Match* r_begin, mems::Match* r_end, const mems::AbstractMatch* iv, uint oseqI, uint oseqJ );
+
+	void translateGappedCoordinates( std::vector<mems::AbstractMatch*>& ml, uint seqI, node_id_t extant, node_id_t ancestor );
+
+	void doGappedAlignment( node_id_t ancestor, bool profile_aln );
+	void refineAlignment( mems::GappedAlignment& gal, node_id_t ancestor, bool profile_aln, AlnProgressTracker& apt );
+	void FixLeftEnds( node_id_t ancestor );
+	void ConstructSuperIntervalFromMSA( node_id_t ancestor, size_t ans_siv, mems::GappedAlignment& gal );
+
+	// determines LCBs among each pair of genomes using a somewhat stringent homology 
+	// criteria.  fills the distance matrix with the number of breakpoints between each pair
+	void CreatePairwiseBPDistance( boost::multi_array<double, 2>& bp_distmat );
+
+	void constructLcbTrackingMatches( node_id_t ancestral_node, std::vector< mems::AbstractMatch* >& ancestral_matches, std::vector< mems::LcbTrackingMatch< mems::AbstractMatch* > >& tracking_matches );
+
+	void pairwiseScoreTrackingMatches( 
+						std::vector< mems::TrackingMatch >& tracking_matches, 
+						std::vector<node_id_t>& node1_descendants, 
+						std::vector<node_id_t>& node2_descendants,
+						boost::multi_array< double, 3 >& tm_score_array
+						);
+
+	void computeAvgAncestralMatchScores( 
+						std::vector< TrackingMatch >& tracking_matches, 
+						std::vector<node_id_t>& node1_descendants,
+						std::vector<node_id_t>& node2_descendants,
+						boost::multi_array< double, 3 >& tm_score_array
+						);
+
+	void computeInternalNodeDistances( 
+						boost::multi_array<double, 2>& bp_dist_mat, 
+						boost::multi_array<double, 2>& cons_dist_mat, 
+						std::vector<node_id_t>& node1_descendants,
+						std::vector<node_id_t>& node2_descendants);
+
+	bool validateSuperIntervals(node_id_t node1, node_id_t node2, node_id_t ancestor);
+	bool validatePairwiseIntervals(node_id_t node1, node_id_t node2, std::vector<mems::Interval*>& pair_iv);
+
+
+	void alignPP(mems::IntervalList& prof1, mems::IntervalList& prof2, mems::IntervalList& interval_list );
+
+protected:
+	void getAlignment( mems::IntervalList& interval_list );
+
+	mems::MatchList original_ml;	/**< The list of matches calculated among all sequences.  Also contains the full sequences and sorted mer lists */
+	PhyloTree< AlignmentTreeNode > alignment_tree;
+	std::vector< uint > node_sequence_map;
+	double breakpoint_penalty;
+	double min_breakpoint_penalty;
+	std::string input_guide_tree_fname;
+	std::string output_guide_tree_fname;
+	boolean debug;
+	boolean refine;
+	bool using_cache_db;
+
+	std::vector< SeedOccurrenceList > sol_list;
+	boost::multi_array<double, 2> bp_distance;	/**< pairwise breakpoint distances.  dims will be [seq_count][seq_count] */
+	boost::multi_array<double, 2> conservation_distance;	/**< pairwise genome conservation distances.  dims will be [seq_count][seq_count] */
+
+	LcbScoringScheme scoring_scheme;
+	bool use_weight_scaling;
+	bool use_seed_families;
+
+	double bp_dist_scale;
+	double conservation_dist_scale;
+
+	double bp_dist_estimate_score;	/**< the minimum LCB score to use when estimating BP distance.  should be conservative (high) */
+
+	size_t max_gapped_alignment_length;
+
+	mems::PairwiseScoringScheme subst_scoring;
+};
+
+extern bool debug_aligner;
+
+	/** Select the next pair of nodes to align
+	 *  The chosen pair will either be unaligned extant sequences or unaligned
+	 *  ancestral sequences whose descendants have all been aligned.  The chosen pair has
+	 *  the shortest path on the tree
+	 *  When no sequences remain to be aligned, returns node1 == node2
+	 */
+void chooseNextAlignmentPair( PhyloTree< AlignmentTreeNode >& alignment_tree, node_id_t& node1, node_id_t& node2, node_id_t& ancestor );
+
+void markAligned( PhyloTree< AlignmentTreeNode >& alignment_tree, node_id_t subject_node, node_id_t neighbor );
+
+node_id_t createAlignmentTreeRoot( PhyloTree< AlignmentTreeNode >& alignment_tree, node_id_t node1, node_id_t node2 );
+
+// homogenizes an alignment tree and ordering to prepare for alignment
+void prepareAlignmentTree( PhyloTree< AlignmentTreeNode >& alignment_tree );
+
+inline
+ProgressiveAligner::~ProgressiveAligner()
+{
+	for( size_t mI = 0; mI < original_ml.size(); mI++ )
+		original_ml[mI]->Free();
+}
+
+template<class T>
+class AbsolutComparator
+{
+public:
+	boolean operator()(const T& a, const T& b) const
+	{
+		return (genome::absolut(a) < genome::absolut(b));
+	}
+};
+
+
+
+template <class MatchVector>
+void processNewMatch( uint seqI, MatchVector& new_matches, typename MatchVector::value_type& new_match )
+{
+	new_match->SetStart( seqI, 0 );
+	if( new_match->Multiplicity() > 1 && new_match->Length(seqI) > 0 )
+		new_matches.push_back( new_match );
+	else
+	{
+		new_match->Free();
+		new_match = NULL;
+	}
+}
+inline
+bool checkConsistent(const AbstractMatch* a, const AbstractMatch* b)
+{
+	bool consistent_overlap = true;
+	int64 o = (std::numeric_limits<int64>::max)();
+	int64 inter = 0;
+	uint seq_count = a->SeqCount();
+	for( size_t seqI = 0; seqI < seq_count; seqI++ )
+	{
+		if(b->LeftEnd(seqI) == 0 || a->LeftEnd(seqI) == 0)
+			continue;
+		inter++;
+		if(o == (std::numeric_limits<int64>::max)())
+			o = b->Start(seqI) - a->Start(seqI);
+		if(o != b->Start(seqI) - a->Start(seqI))
+			consistent_overlap = false;
+	}
+	consistent_overlap = consistent_overlap && inter > 1;
+	return consistent_overlap;
+}
+
+/**
+ * Delete overlapping regions in favor of the larger match.
+ * This code isn't perfect, it can delete too many base pairs in some cases
+ * @param	ml	The vector of matches
+ * @param	seq_ids	The indexes of sequences in which overlaps should be eliminated
+ * @param	eliminate_both	Delete both of the overlapping matches, instead of leaving one remaining
+ */
+template <class MatchVector>
+void EliminateOverlaps_v2( MatchVector& ml, const std::vector< uint >& seq_ids, bool eliminate_both = false ){
+	if( ml.size() < 2 )
+		return;
+	uint seq_count = ml[0]->SeqCount();
+	for( uint sidI = 0; sidI < seq_ids.size(); sidI++ ){
+		uint seqI = seq_ids[ sidI ];
+		mems::SingleStartComparator<mems::AbstractMatch> msc( seqI );
+		std::sort( ml.begin(), ml.end(), msc );
+		int64 matchI = 0;
+		int64 nextI = 0;
+		int64 deleted_count = 0;
+		MatchVector new_matches;
+
+		// scan forward to first defined match
+		for(; matchI != ml.size(); matchI++ )
+			if( ml[ matchI ]->Start( seqI ) != mems::NO_MATCH )
+				break;
+
+		for(; matchI < ml.size(); matchI++ ){
+			if( ml[ matchI ] == NULL )
+				continue;
+			
+			for( nextI = matchI + 1; nextI < ml.size(); nextI++ ){
+				if( ml[ nextI ] == NULL )
+					continue;
+
+				boolean deleted_matchI = false;
+				// check for overlaps
+				int64 startI = ml[ matchI ]->Start( seqI );
+				int64 lenI = ml[ matchI ]->Length( seqI );
+				int64 startJ = ml[ nextI ]->Start( seqI );
+				int64 diff =  genome::absolut( startJ ) - genome::absolut( startI ) - lenI;
+
+				if( diff >= 0 )
+					break;	// there are no more overlaps
+
+				diff = -diff;
+				typename MatchVector::value_type new_match;
+				bool mem_iter_smaller = ( ml[ nextI ]->Multiplicity() > ml[ matchI ]->Multiplicity() ) ||
+					( ml[ nextI ]->Multiplicity() == ml[ matchI ]->Multiplicity() && ml[ nextI ]->Length(seqI) > ml[ matchI ]->Length(seqI) );
+
+				bool consistent_overlap = checkConsistent( ml[ matchI ], ml[ nextI ] );
+
+				// delete bases from the smaller match
+				if( (!consistent_overlap && eliminate_both) || mem_iter_smaller )
+				{
+					// mem_iter is smaller
+					new_match = ml[matchI]->Copy();
+					// erase base pairs from new_match
+					if( diff >= lenI ){
+//							cerr << "Deleting " << **mem_iter << " at the hands of\n" << **next_iter << endl;
+						ml[ matchI ]->Free();
+						ml[ matchI ] = NULL;
+						matchI--;
+						deleted_matchI = true;
+						deleted_count++;
+					}else{
+						ml[ matchI ]->CropRight( diff, seqI );
+						new_match->CropLeft( new_match->Length(seqI) - diff, seqI );
+					}
+					processNewMatch( seqI, new_matches, new_match );
+				}
+				if( (!consistent_overlap && eliminate_both) || !mem_iter_smaller )
+				{
+					// match_iter is smaller
+					new_match = ml[nextI]->Copy();
+					// erase base pairs from new_match
+					if( diff >= ml[ nextI ]->Length(seqI) ){
+//							cerr << "Deleting " << **next_iter << " at the hands of\n" << **mem_iter << endl;
+						ml[ nextI ]->Free();
+						ml[ nextI ] = NULL;
+						deleted_count++;
+					}else{
+						ml[ nextI ]->CropLeft( diff, seqI );
+						new_match->CropRight( new_match->Length(seqI) - diff, seqI );
+					}
+					processNewMatch( seqI, new_matches, new_match );
+				}
+				if( deleted_matchI )
+					break;
+			}
+		}
+
+		if( deleted_count > 0 ){
+			size_t cur = 0;
+			for( size_t mI = 0; mI < ml.size(); ++mI )
+				if( ml[mI] != NULL )
+					ml[cur++] = ml[mI];
+			ml.erase( ml.begin() + cur, ml.end() );
+		}
+		ml.insert( ml.end(), new_matches.begin(), new_matches.end() );
+		new_matches.clear();
+	}
+}
+
+template <class MatchVector>
+void EliminateOverlaps_v2( MatchVector& ml, bool eliminate_both = false )
+{
+	if( ml.size() < 2 )
+		return;	// can't eliminate overlaps between fewer than 2 matches
+	uint seq_count = ml[0]->SeqCount();
+	std::vector< uint > seq_ids( seq_count );
+	for( uint i = 0; i < seq_count; ++i )
+		seq_ids[i] = i;
+	EliminateOverlaps_v2( ml, seq_ids, eliminate_both );
+};
+
+template< class MatchVector >
+uint64 SimpleGetLCBCoverage( MatchVector& lcb ){
+	typename MatchVector::iterator match_iter = lcb.begin();
+	uint64 coverage = 0;
+	bool debug = true;
+	for( ; match_iter != lcb.end(); ++match_iter ){
+		double maxlen = 0;
+		double minlen = 0;
+		for( uint seqI = 0; seqI < (*match_iter)->SeqCount(); seqI++ )
+		{
+			if( (*match_iter)->LeftEnd(seqI) != mems::NO_MATCH )
+			{
+				maxlen += (double)(*match_iter)->Length(seqI);
+				if( (*match_iter)->Length(seqI) > minlen )
+					minlen = (double)(*match_iter)->Length(seqI);
+			}
+		}
+		double score = exp( ((*match_iter)->AlignmentLength() - minlen) / (maxlen - minlen) );
+		score *= maxlen;
+		coverage += (uint64)score;
+	}
+	return coverage;
+}
+
+template< class MatchVectorType >
+void addUnalignedIntervals_v2( MatchVectorType& iv_list, std::set< uint > seq_set, std::vector<gnSeqI> seq_lengths )
+{
+	std::vector< mems::LCB > adjacencies;
+	uint lcbI;
+	uint seqI;
+	uint seq_count = seq_lengths.size();
+
+
+	if( seq_set.size() == 0 )
+	{
+		// if an empty seq set was passed then assume all seqs
+		// should be processed
+		for( seqI = 0; seqI < seq_count; seqI++ )
+			seq_set.insert( seqI );
+	}
+	std::vector< std::vector< typename MatchVectorType::value_type > > ymmv;
+	for( size_t ivI = 0; ivI < iv_list.size(); ++ivI )
+		ymmv.push_back( std::vector< typename MatchVectorType::value_type >( 1, iv_list[ivI] ) );
+
+	std::vector< double > scores( iv_list.size(), 0 );
+	computeLCBAdjacencies_v3( ymmv, scores, adjacencies );
+
+	std::vector< int > rightmost;
+	for( seqI = 0; seqI < seq_count; seqI++ ){
+		rightmost.push_back( -1 );
+	}
+
+	for( lcbI = 0; lcbI <= adjacencies.size(); lcbI++ ){
+		std::set< uint >::iterator seq_set_iterator = seq_set.begin();
+		for( ; seq_set_iterator != seq_set.end(); seq_set_iterator++ ){
+			seqI = *seq_set_iterator;
+			// scan left
+			int leftI;
+			if( lcbI < adjacencies.size() ){
+// left is always to the left!!
+				leftI = adjacencies[ lcbI ].left_adjacency[ seqI ];
+			}else
+				leftI = rightmost[ seqI ];
+
+			int rightI = lcbI < adjacencies.size() ? lcbI : -1;
+// right is always to the right!!
+			if( lcbI < adjacencies.size() )
+				if( adjacencies[ lcbI ].right_adjacency[ seqI ] == -1 )
+					rightmost[ seqI ] = lcbI;
+			
+			int64 left_start, right_start;
+			mems::getGapBounds( seq_lengths, adjacencies, seqI, leftI, rightI, left_start, right_start );
+			int64 gap_len =  genome::absolut( right_start ) - genome::absolut( left_start );
+			if( gap_len > 0 ){
+				mems::Match mm( seq_count );
+				mems::Match* m = mm.Copy();
+				for( uint seqJ = 0; seqJ < seq_count; seqJ++ ){
+					m->SetStart( seqJ, 0 );
+				}
+				m->SetStart( seqI, left_start );
+				m->SetLength( gap_len );
+				mems::Interval iv;
+				std::vector< mems::AbstractMatch* > tmpvec(1, m);
+				iv.SetMatches( tmpvec );
+				iv_list.push_back( iv.Copy() );
+			}
+		}
+	}
+}
+
+inline
+void projectIntervalList( mems::IntervalList& iv_list, std::vector< uint >& projection, std::vector< std::vector< mems::MatchProjectionAdapter* > >& LCB_list, std::vector< mems::LCB >& projected_adjs )
+{
+	std::vector< size_t > proj(projection.size());
+	for( size_t i = 0; i < projection.size(); ++i )
+		proj[i] = projection[i];
+	std::vector< mems::MatchProjectionAdapter* > mpa_list;
+	// construct pairwise Interval projections
+	for( size_t corI = 0; corI < iv_list.size(); corI++ )
+	{
+		size_t projI = 0;
+		for( ; projI < projection.size(); ++projI )
+			if( iv_list[corI].LeftEnd(projection[projI]) == mems::NO_MATCH )
+				break;
+		if( projI != projection.size() )
+			continue;
+		mems::MatchProjectionAdapter mpa_tmp( &iv_list[corI], proj );
+		mpa_list.push_back( mpa_tmp.Copy() );
+		if( mpa_list.back()->Orientation(0) == mems::AbstractMatch::reverse )
+			mpa_list.back()->Invert();
+	}
+	std::vector< gnSeqI > breakpoints;
+	IdentifyBreakpoints( mpa_list, breakpoints );
+	ComputeLCBs_v2( mpa_list, breakpoints, LCB_list );
+	std::vector< double > lcb_scores( LCB_list.size(), 0 );
+	computeLCBAdjacencies_v3( LCB_list, lcb_scores, projected_adjs );
+}
+
+
+template< class MatchType = mems::AbstractMatch >
+class GenericMatchSeqManipulator
+{
+public:
+	GenericMatchSeqManipulator( uint seq ) : m_seq(seq) {}
+	gnSeqI LeftEnd(MatchType*& m) const{ return m->LeftEnd(m_seq); }
+	gnSeqI Length(MatchType*& m) const{ return m->Length(m_seq); }
+	void CropLeft(MatchType*& m, gnSeqI amount ) const{ m->CropLeft(amount, m_seq); }
+	void CropRight(MatchType*& m, gnSeqI amount ) const{ m->CropRight(amount, m_seq); }
+	template< typename ContainerType >
+	void AddCopy(ContainerType& c, MatchType*& m) const{ c.push_back( m->Copy() ); }
+private:
+	uint m_seq;
+};
+
+typedef GenericMatchSeqManipulator<> AbstractMatchSeqManipulator;
+
+class SuperIntervalManipulator
+{
+public:
+	gnSeqI LeftEnd(const SuperInterval& siv) const{ return siv.LeftEnd(); }
+	gnSeqI Length(const SuperInterval& siv) const{ return siv.Length(); }
+	void CropLeft( SuperInterval& siv, gnSeqI amount ) const{ siv.CropLeft( amount );}
+	void CropRight( SuperInterval& siv, gnSeqI amount ) const{ siv.CropRight( amount );}
+	template< typename ContainerType >
+	void AddCopy(ContainerType& c, const SuperInterval& siv) const{ c.push_back( siv ); }
+};
+
+
+// iv_list is a container class that contains pointers to intervals or 
+// matches of some sort
+// precondition: both bp_list and intervals *must* be sorted
+template< class T, class Maniplator >
+void applyBreakpoints( std::vector< gnSeqI >& bp_list, std::vector<T>& iv_list, Maniplator& manip )
+{
+
+	size_t iv_count = iv_list.size();
+	size_t bpI = 0;
+	size_t ivI = 0;
+	while( ivI < iv_count && bpI < bp_list.size() )
+	{
+		if( manip.LeftEnd(iv_list[ivI]) == NO_MATCH )
+		{
+			++ivI;
+			continue;	// undefined in seqI, so no breakpoint here
+		}
+		//  -(ivI)----
+		//  -------|--
+		if( manip.LeftEnd(iv_list[ivI]) + manip.Length(iv_list[ivI]) <= bp_list[bpI] )
+		{
+			++ivI;
+			continue;
+		}
+		//  -----(ivI)-
+		//  --|--------
+		if( bp_list[bpI] <= manip.LeftEnd(iv_list[ivI]) )
+		{
+			++bpI;
+			continue;
+		}
+
+		// if split_at isn't 0 then we need to split cur_iv
+		// put the left side in the new list and crop cur_iv
+		gnSeqI crop_amt = bp_list[bpI] - manip.LeftEnd(iv_list[ivI]);
+		manip.AddCopy( iv_list, iv_list[ivI] );
+		T& left_iv = iv_list.back();
+
+		manip.CropLeft( iv_list[ivI], crop_amt );
+		manip.CropRight( left_iv, manip.Length(left_iv)-crop_amt );
+		// restore ordering
+		size_t nextI = ivI + 1;
+		while( nextI < iv_count && manip.LeftEnd( iv_list[nextI-1] ) > manip.LeftEnd( iv_list[nextI] ) )
+		{
+			std::swap( iv_list[nextI-1], iv_list[nextI] );
+			nextI++;
+		}
+
+// assume that crop works correctly and that it's okay to pass matches with NO_MATCH		
+/**/
+		if( manip.Length( iv_list[ivI] ) == 0 )
+		{
+			std::cerr << "Big fat generic zero 1\n";
+			genome::breakHere();
+		}
+		if( manip.Length( left_iv ) == 0 )
+		{
+			std::cerr << "Big fat generic zero 2\n";
+			genome::breakHere();
+		}
+		if( manip.LeftEnd( iv_list[ivI] ) == 0 )
+		{
+			std::cerr << "uh oh\n";
+			genome::breakHere();
+		}
+		if( manip.LeftEnd( left_iv ) == 0 )
+		{
+			std::cerr << "uh oh 2\n";
+			genome::breakHere();
+		}
+/**/
+	}
+}
+
+
+}
+
+//namespace std {
+//	void swap( PhyloTree<mems::AlignmentTreeNode>& a, PhyloTree<mems::AlignmentTreeNode>& b);
+//}
+
+#endif // _ProgressiveAligner_h_
diff --git a/libMems/RepeatHash.cpp b/libMems/RepeatHash.cpp
new file mode 100755
index 0000000..dd4c02d
--- /dev/null
+++ b/libMems/RepeatHash.cpp
@@ -0,0 +1,64 @@
+/*******************************************************************************
+ * $Id: RepeatHash.cpp,v 1.13 2004/03/01 02:40:08 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * Please see the file called COPYING for licensing, copying, and modification
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/RepeatHash.h"
+#include <list>
+
+using namespace std;
+using namespace genome;
+namespace mems {
+
+
+RepeatHash* RepeatHash::Clone() const{
+	return new RepeatHash(*this);
+}
+
+boolean RepeatHash::CreateMatches(){
+	if(seq_count == 1){
+		MatchFinder::FindMatchSeeds();
+		return true;
+	}
+
+	return false;
+}
+
+boolean RepeatHash::EnumerateMatches( IdmerList& match_list ){
+	return HashMatch(match_list);
+}
+
+//why have separate hash tables?
+// RepeatHashEntries use GENETICIST coordinates.  They start at 1, not 0.
+boolean RepeatHash::HashMatch(IdmerList& match_list){
+	//check that there is at least one forward component
+	match_list.sort(&idmer_position_lessthan);
+	// initialize the hash entry
+	MatchHashEntry mhe = MatchHashEntry( match_list.size(), GetSar(0)->SeedLength());
+	mhe.SetLength( GetSar(0)->SeedLength() );
+	
+	//Fill in the new Match and set direction parity if needed.
+	IdmerList::iterator iter = match_list.begin();
+
+	uint32 repeatI = 0;
+	for(; iter != match_list.end(); iter++)
+		mhe.SetStart(repeatI++, iter->position + 1);
+
+	SetDirection( mhe );
+	mhe.CalculateOffset();
+	if(mhe.Multiplicity() < 2){
+		cout << "red flag " << mhe << "\n";
+	}else{
+		AddHashEntry(mhe);
+	}
+	return true;
+}
+
+} // namespace mems
diff --git a/libMems/RepeatHash.h b/libMems/RepeatHash.h
new file mode 100755
index 0000000..09888c4
--- /dev/null
+++ b/libMems/RepeatHash.h
@@ -0,0 +1,54 @@
+/*******************************************************************************
+ * $Id: RepeatHash.h,v 1.8 2004/03/01 02:40:08 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * This file is licensed under the GPL.
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifndef _RepeatHash_h_
+#define _RepeatHash_h_
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/MemHash.h"
+
+namespace mems {
+
+/**
+ * Finds repeats within a single sequence.
+ * This class extends the functionality of MemHash to search for repetitive
+ * matches within a single sequence.
+ */
+class RepeatHash : public MemHash{
+public:
+	virtual RepeatHash* Clone() const;
+	virtual boolean CreateMatches();
+protected:
+
+	virtual boolean EnumerateMatches( IdmerList& match_list );
+	virtual boolean HashMatch(IdmerList& match_list);
+	virtual SortedMerList* GetSar(uint32 sarI) const;
+};
+
+
+inline
+SortedMerList* RepeatHash::GetSar(uint32 sarI) const{
+	return sar_table[0];
+}
+
+inline
+bool idmer_greaterthan(idmer& a_v, idmer& m_v){
+	return (a_v.mer < m_v.mer);// ? true : false;
+};
+
+inline
+bool idmer_position_lessthan(idmer& a_v, idmer& m_v){
+	return (a_v.position < m_v.position);// ? true : false;
+};
+
+}
+
+#endif //_RepeatHash_h_
diff --git a/libMems/RepeatMatch.cpp b/libMems/RepeatMatch.cpp
new file mode 100644
index 0000000..2cea740
--- /dev/null
+++ b/libMems/RepeatMatch.cpp
@@ -0,0 +1,51 @@
+/*******************************************************************************
+ * $Id: Match.cpp,v 1.9 2004/03/01 02:40:08 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * Please see the file called COPYING for licensing, copying, and modification
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/RepeatMatch.h"
+#include "libGenome/gnException.h"
+#include "libGenome/gnDebug.h"
+
+namespace mems {
+
+RepeatMatch::RepeatMatch() : MatchHashEntry()
+{
+}
+
+RepeatMatch::~RepeatMatch(){
+
+}
+
+void RepeatMatch::FromSeq( uint32 match_id, uint32 seq_id )
+{
+// unsure what to do with this:  (it doesn't compile)
+//	this->m_seq_id.insert( match_id, seq_id);
+
+}
+
+uint32 RepeatMatch::SeqId( uint32 match_id )
+{
+	return this->m_seq_id.at(match_id);
+
+}
+
+std::ostream& operator<<(std::ostream& os, const RepeatMatch& mhe){ //write to stream.
+	os << mhe.Length();
+	for(uint32 i=0; i < mhe.SeqCount(); i++)
+	{
+		
+		//if ( mhe.Start(i) < 
+		os << '\t' << mhe.Start(i);
+	}
+	return os;
+}
+
+}	// namespace mems
diff --git a/libMems/RepeatMatch.h b/libMems/RepeatMatch.h
new file mode 100644
index 0000000..ecbd9e2
--- /dev/null
+++ b/libMems/RepeatMatch.h
@@ -0,0 +1,51 @@
+/*******************************************************************************
+ * $Id: Match.h,v 1.10 2004/03/01 02:40:08 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * This file is licensed under the GPL.
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifndef _RepeatMatch_h_
+#define _RepeatMatch_h_
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libGenome/gnClone.h"
+#include <iostream>
+#include <vector>
+#include <set>
+#include "libMems/MatchHashEntry.h"
+
+namespace mems {
+
+/**
+ * The Match class stores the location of an <b>equal size</b> (inexact or exactly) 
+ * matching region
+ * between several sequences.  There are numerous functions in this
+ * class which can be used to compare and manipulate this match.
+ */
+class RepeatMatch : public MatchHashEntry {
+
+public:
+	RepeatMatch();
+	RepeatMatch( const uint32 seq_count, const gnSeqI mersize, const MemType m_type = seed );
+	RepeatMatch(const RepeatMatch& mhe);
+	~RepeatMatch();
+	void FromSeq( uint32 match_id, uint32 seq_id );
+	uint32 SeqId( uint32 match_id );
+protected:
+	std::vector<uint32> m_seq_id;
+
+private:
+
+
+};
+std::ostream& operator<<(std::ostream& os, const RepeatMatch& mhe); //write to source.
+
+}	// namespace mems
+
+#endif // _RepeatMatch_h_
+
diff --git a/libMems/RepeatMatchList.cpp b/libMems/RepeatMatchList.cpp
new file mode 100644
index 0000000..b1384d0
--- /dev/null
+++ b/libMems/RepeatMatchList.cpp
@@ -0,0 +1,300 @@
+/*******************************************************************************
+ * $Id: MatchList.cpp,v 1.22 2004/03/01 02:40:08 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * Please see the file called COPYING for licensing, copying, and modification
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/RepeatMatchList.h"
+#include "libMems/DNAFileSML.h"
+#include "libMems/DNAMemorySML.h"
+#include "libMems/MemHash.h"
+#include <map>
+#include <sstream>
+#include <ctime>
+
+using namespace std;
+using namespace genome;
+namespace mems {
+
+typedef void* MatchID_t;
+
+
+RepeatMatchList::RepeatMatchList() :MatchList()
+{
+}
+
+
+void RepeatMatchList::LoadSequences( ostream* log_stream ){
+	
+	if( seq_filename.size() == 0 )
+		return;
+
+	gnSeqI total_len = 0;
+	for( uint seqI = 0; seqI < seq_filename.size(); seqI++ ){
+		gnSequence* file_sequence = new gnSequence();
+		// Load the sequence and tell the user if it loaded successfully
+		try{
+			file_sequence->LoadSource( seq_filename[ seqI ] );
+		}catch( gnException& gne ){
+			delete file_sequence;
+			if( gne.GetCode() == FileNotOpened() )
+				cerr << "Error loading " << seq_filename[ seqI ] << endl;
+			else
+				cerr << gne;
+			return;
+		}catch( exception& e ){
+			delete file_sequence;
+			cerr << "Unhandled exception loading " << seq_filename[ seqI ] << endl;
+			cerr << "At: " << __FILE__ << ":" << __LINE__ << endl;
+			cerr << e.what();
+			return;
+		}catch( ... ){
+			delete file_sequence;
+			cerr << "Unknown exception when loading " << seq_filename[ seqI ] << endl;
+			return;
+		}
+		
+		total_len += file_sequence->length();
+		seq_table.push_back( file_sequence );
+		if( log_stream != NULL ){
+			(*log_stream) << "Sequence loaded successfully.\n";
+			(*log_stream) << seq_filename[ seqI ] << " " << file_sequence->length() << " base pairs.\n";
+		}
+	}
+
+}
+
+void RepeatMatchList::LoadSMLs( uint mer_size, ostream* log_stream ){
+
+	// if the mer_size parameter is 0 then calculate a default mer size for these sequences
+	if( mer_size == 0 ){
+		mer_size = GetDefaultMerSize( seq_table );
+		if( log_stream != NULL ){
+			(*log_stream) << "Using weight " << mer_size << " mers for initial seeds\n";
+		}
+	}
+
+	// load and creates SMLs as necessary
+	//punt: tjt
+	//uint64 default_seed = getSeed( mer_size );
+	uint64 default_seed = getSolidSeed( mer_size );
+	vector< uint > create_list;
+	uint seqI = 0;
+	for( seqI = 0; seqI < seq_table.size(); seqI++ ){
+		// define a DNAFileSML to store a sorted mer list
+		DNAFileSML* file_sml = new DNAFileSML();
+		sml_table.push_back( file_sml );
+
+		boolean success = true;
+		try{
+			file_sml->LoadFile( sml_filename[ seqI ] );
+		}catch( gnException& gne ){
+			success = false;
+			create_list.push_back( seqI );
+		}
+		boolean recreate = false;
+		if(success && (file_sml->Seed() != default_seed )){
+			if( log_stream != NULL )
+				(*log_stream) << "Default seed mismatch.  A new sorted mer list will be created.\n";
+			recreate = true;
+			create_list.push_back( seqI );
+		}
+
+		if( success && !recreate && log_stream != NULL )
+			(*log_stream) << "Sorted mer list loaded successfully\n";
+	}
+
+	// free up memory before creating any SMLs
+	if( create_list.size() > 0 )
+		for( seqI = 0; seqI < sml_table.size(); seqI++ ){
+			sml_table[ seqI ]->Clear();
+			delete sml_table[ seqI ];
+			sml_table[ seqI ] = NULL;
+		}
+	
+	// create any SMLs that need to be created
+	for( uint createI = 0; createI < create_list.size(); createI++ ){
+		if( log_stream != NULL )
+			(*log_stream) << "Creating sorted mer list\n";
+		try{
+
+		time_t start_time = time(NULL);
+		sml_table[ create_list[ createI ] ] = new DNAFileSML( sml_filename[ create_list[ createI ] ] );
+		sml_table[ create_list[ createI ] ]->Create( *seq_table[ create_list[ createI ] ], default_seed );
+		time_t end_time = time(NULL);
+	 	if( log_stream != NULL )
+			(*log_stream) << "Create time was: " << end_time - start_time << " seconds.\n";
+		
+		}catch(...){
+			cerr << "Error creating sorted mer list\n";
+			throw;
+		}
+	}
+	
+	// reload the other SMLs now that creation has completed
+	if( create_list.size() > 0 ){
+		for( seqI = 0; seqI < sml_filename.size(); seqI++ ){
+			if( sml_table[ seqI ] != NULL )
+				continue;
+			sml_table[ seqI ] = new DNAFileSML( sml_filename[ seqI ] );
+			try{
+				((DNAFileSML*)sml_table[ seqI ])->LoadFile( sml_filename[ seqI ] );
+			}catch( gnException& gne ){
+				cerr << "Error loading sorted mer list\n";
+				throw;
+			}
+		}
+	}
+}
+void RepeatMatchList::ReadList(istream& match_file){
+	string tag;
+	gnSeqI len;
+	int64 start;
+	unsigned int seq_count;
+	
+	match_file >> tag;	//format version tag
+	if( tag != "FormatVersion" ){
+		Throw_gnEx(InvalidFileFormat());
+	}
+	match_file >> tag;	//format version
+	if( tag != "3" ){
+		Throw_gnEx(InvalidFileFormat());
+	}
+	match_file >> tag;	//sequence count tag
+	if( tag != "SequenceCount" ){
+		Throw_gnEx(InvalidFileFormat());
+	}
+	match_file >> seq_count;	//sequence count
+	if(seq_count < 2){
+		Throw_gnEx(InvalidFileFormat());
+	}
+	
+	// read the sequence file names and lengths
+	for( unsigned int seqI = 0; seqI < seq_count; seqI++ ){		
+		match_file >> tag;	// name tag
+		getline( match_file, tag );
+		// skip the tab character
+		tag = tag.substr( 1 );
+		seq_filename.push_back(tag);
+//		try{
+//			gnSequence *new_seq = new gnSequence();
+//			new_seq->LoadSource(tag);
+//			seq_table.push_back( new_seq );
+//		}catch( gnException& gne );
+		match_file >> tag;	// length tag
+		gnSeqI seq_len;
+		match_file >> seq_len;	// length
+		if( seqI < seq_table.size() )
+			if( seq_table[ seqI ]->length() != seq_len ){
+				cerr << "Warning: Genome sizes in the match list differ.\n";
+				cerr << "seq_table[ " << seqI << " ]->length() " << seq_table[ seqI ]->length() << " seq_len: " << seq_len << endl;
+			}
+	}
+
+	// read the number of matches
+	unsigned int match_count;
+	match_file >> tag;	// match count tag
+	match_file >> match_count;	// match count
+		
+	// read the matches
+	map< MatchID_t, Match* > match_map;
+	string cur_line;
+	getline( match_file, cur_line );
+	while( getline( match_file, cur_line ) ){
+		MatchHashEntry mhe( seq_count, 0 );
+		stringstream line_stream( cur_line );
+		
+		line_stream >> len;
+		mhe.SetLength(len);
+
+		for(uint32 seqI = 0; seqI < seq_count; seqI++){
+			line_stream >> start;
+			mhe.SetStart(seqI, start);
+		}
+		
+		mhe.CalculateOffset();
+		
+		MatchID_t match_id;
+		line_stream >> match_id;
+		
+		uint sub_count;
+		boolean bad_stream = false;
+		line_stream >> sub_count;
+		if(sub_count > 0)
+			throw "Unable to read file, invalid format, cannot read subset information\n";
+
+		if( bad_stream )
+			break;
+
+		uint sup_count;
+		line_stream >> sup_count;
+		if(sup_count > 0)
+			throw "Unable to read file, invalid format, cannot read superset information\n";
+		if( bad_stream )
+			break;
+		
+		Match* new_match = mhe.Copy();
+		push_back( new_match );
+		match_map.insert( map< MatchID_t, Match* >::value_type( match_id, new_match ));
+	}
+	if( match_count != size() ){
+		Throw_gnEx(InvalidFileFormat());
+	}
+}
+
+void RepeatMatchList::WriteList(ostream& match_file) const{
+	if( size() == 0 )
+		return;
+	Match* first_mem = *(begin());
+	unsigned int seq_count = first_mem->SeqCount();
+
+	match_file << "FormatVersion" << '\t' << 3 << "\n";
+	match_file << "SequenceCount" << '\t' << seq_count << "\n";
+	for(unsigned int seqI = 0; seqI < seq_count; seqI++){
+		match_file << "Sequence" << seqI << "File" << '\t';
+		if( seq_filename.size() > seqI )
+			match_file << seq_filename[seqI];
+		else
+			match_file << "null";
+		match_file << "\n";
+		match_file << "Sequence" << seqI << "Length" << '\t';
+		if( seq_table.size() > seqI )
+			match_file << seq_table[seqI]->length();
+		else
+			match_file << "0";
+		match_file << "\n";
+	}
+
+	match_file << "MatchCount" << '\t' << size() << endl;
+
+	//get all the mems out of the hash table and write them out
+    vector<Match*>::const_iterator match_iter;
+	match_iter = begin();
+	set<Match*> cur_set;
+	set<Match*>::iterator set_iter;
+	for(; match_iter != end(); match_iter++){
+		// print the match
+		match_file << **match_iter << '\t';
+
+		// print the Multiplicity
+		match_file << (*match_iter)->Multiplicity() << '\t';
+
+		// print the match address
+		match_file << (MatchID_t)(*match_iter) << '\t';
+				
+		// print subset id's
+		match_file << 0;
+
+		// print superset id's
+		match_file << '\t' << 0;
+		match_file << endl;
+	}
+}
+
+}	// namespace mems
diff --git a/libMems/RepeatMatchList.h b/libMems/RepeatMatchList.h
new file mode 100644
index 0000000..fbb2cf8
--- /dev/null
+++ b/libMems/RepeatMatchList.h
@@ -0,0 +1,66 @@
+/*******************************************************************************
+ * $Id: MatchList.h,v 1.10 2004/03/01 02:40:08 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * This file is licensed under the GPL.
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifndef _RepeatMatchList_h_
+#define _RepeatMatchList_h_
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <iostream>
+#include <list>
+#include "libMems/SortedMerList.h"
+#include "libGenome/gnSequence.h"
+#include "libMems/Match.h"
+#include "libMems/MatchList.h"
+//#include <valarray>
+
+namespace mems {
+
+// punt: Need to subclass AbstractMatchList, which can be a MatchList or RepeatMatchList
+
+class RepeatMatchList : public MatchList {
+public:
+	RepeatMatchList();
+	RepeatMatchList( const RepeatMatchList& ml );
+
+	void LoadSequences( std::ostream* log_stream );
+	void LoadSMLs( uint mer_size, std::ostream* log_stream );
+
+	/**
+	 * Reads a MatchList from an input stream
+	 * Sequence and SML file names are read into the seq_filename
+	 * and sml_filename vectors, but the actual files are not
+	 * opened.  The calling function should load them after
+	 * using this method.
+	 * @param match_stream The input stream to read from
+	 */
+	void ReadList( std::istream& match_stream );
+
+	/**
+	 *  Writes a MatchList to the designated output stream
+	 * @param match_stream The output stream to write to
+	 */
+	void WriteList( std::ostream& match_stream ) const;
+		
+	//vector<string> sml_filename;		/**< The file names of the sorted mer list for each sequence, may be empty or null */
+	//vector<string> seq_filename;		/**< The file names of the sequence data, may be empty or null */
+	//vector<SortedMerList*> sml_table;	/**< The sorted mer list associated with each sequence, may be empty or null */
+	//vector<genome::gnSequence*> seq_table;		/**< The actual sequences associated with the matches stored in this list.  Should not be empty or null. */
+
+
+protected:
+	
+};
+
+}	// namespace mems
+
+#endif 
+
+
diff --git a/libMems/Scoring.h b/libMems/Scoring.h
new file mode 100644
index 0000000..712f6ce
--- /dev/null
+++ b/libMems/Scoring.h
@@ -0,0 +1,335 @@
+/*******************************************************************************
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * This file is licensed under the GPL.
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifndef __Scoring_h__
+#define __Scoring_h__
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/SubstitutionMatrix.h"
+#include <string>
+#include <vector>
+
+namespace mems {
+
+static const score_t INVALID_SCORE = (std::numeric_limits<score_t>::max)();
+
+//tjtaed: function to compute the SP column score, and cumulative SP score from an alignment
+void computeSPScore( const std::vector<std::string>& alignment, const PairwiseScoringScheme& pss, std::vector<score_t>& scores, score_t& score );
+//tjt: function to compute the consensus column score, consensus sequence, and cumulative consensus score from an alignment
+void computeConsensusScore( const std::vector<std::string>& alignment, const PairwiseScoringScheme& pss, std::vector<score_t>& scores, std::string& consensus, score_t& score );
+void computeMatchScores( const std::string& seq1, const std::string& seq2, const PairwiseScoringScheme& scoring, std::vector<score_t>& scores );
+void computeGapScores( const std::string& seq1, const std::string& seq2, const PairwiseScoringScheme& scoring, std::vector<score_t>& scores );
+
+
+//tjt: function to compute the consensus column score, consensus sequence, and cumulative consensus score from an alignment 
+inline
+void computeConsensusScore( const std::vector<std::string>& alignment, const PairwiseScoringScheme& pss, 
+						   std::vector<score_t>& scores, std::string& consensus, score_t& score )
+{
+
+	consensus.clear();
+	std::vector< std::vector< score_t > > allscores;
+
+	scores.resize( alignment.at(0).size() );
+	std::fill(scores.begin(), scores.end(), INVALID_SCORE);
+
+	score =	INVALID_SCORE;
+
+	std::vector< string > nucleotides;
+	nucleotides.push_back(std::string(alignment.at(0).size(),'A'));
+	nucleotides.push_back(std::string(alignment.at(0).size(),'G'));
+	nucleotides.push_back(std::string(alignment.at(0).size(),'C'));
+	nucleotides.push_back(std::string(alignment.at(0).size(),'T'));
+	
+	for( size_t i = 0; i < nucleotides.size(); i++)
+	{
+		//tjt: score alignment!
+		//for each row in the alignment, compare to string of A,G,C,T and build consensus
+		std::vector< score_t > consensus_scores(alignment.at(0).size(), 0);
+		
+		for( gnSeqI j = 0; j < alignment.size(); j++)
+		{
+			std::vector< score_t > tscores(alignment.at(0).size(), 0);
+		
+			computeMatchScores( alignment.at(j), nucleotides.at(i), pss, tscores );
+			
+			for( gnSeqI k = 0; k < alignment.at(j).size(); k++)
+				if( tscores.at(k) != INVALID_SCORE )
+					consensus_scores.at(k) += tscores.at(k);
+
+			computeGapScores( alignment.at(j), nucleotides.at(i), pss, tscores );
+
+			for( gnSeqI k = 0; k < alignment.at(j).size(); k++)
+				if( tscores.at(k) != INVALID_SCORE )
+					consensus_scores.at(k) += tscores.at(k);
+			
+		}
+		allscores.push_back(consensus_scores);
+	}
+	
+	//tjt: find maxvalue for each column
+	// 0 = A, 1 = G, 2 = C, 3 = T
+	
+	std::vector< int > columnbp( alignment.at(0).size(), (std::numeric_limits<int>::min)());
+	
+	//for A,G,C,T
+	for( size_t i = 0; i < nucleotides.size(); i++)
+	{
+		//for each column
+		for( size_t j = 0; j < alignment.at(0).size(); j++)
+		{
+			if( allscores.at(i).at(j) == INVALID_SCORE )
+				continue;
+			if( i == 0  )
+			{				
+				scores.at(j) = allscores.at(i).at(j);
+				columnbp.at(j) = 0;
+			}
+			else if (allscores.at(i).at(j) > scores.at(j))
+			{
+				scores.at(j) = allscores.at(i).at(j);
+				columnbp.at(j) = i;
+			}
+		}
+	}
+	//update score with maxvalue from each column
+	for( size_t j = 0; j < alignment.at(0).size(); j++)
+	{
+		if( scores.at(j) != INVALID_SCORE )
+			score += scores.at(j);
+		if (columnbp.at(j) == 0)
+			consensus.append("A");
+		else if (columnbp.at(j) == 1)
+			consensus.append("G");
+		else if (columnbp.at(j) == 2)
+			consensus.append("C");
+		else if (columnbp.at(j) == 3)
+			consensus.append("T");
+	
+	}
+}
+
+inline
+void computeMatchScores( const std::string& seq1, const std::string& seq2, 
+						const PairwiseScoringScheme& scoring, std::vector<score_t>& scores )
+{
+	scores.resize( seq1.size() );
+	std::fill(scores.begin(), scores.end(), INVALID_SCORE);
+	const uint8* table = SortedMerList::BasicDNATable();
+
+	for (unsigned uColIndex = 0; uColIndex < seq1.size(); ++uColIndex)
+	{
+		char c1 = seq1[uColIndex];
+		char c2 = seq2[uColIndex];
+		if( c1 == '-' || c2 == '-' )
+			continue;
+		unsigned uLetter1 = table[c1];
+		unsigned uLetter2 = table[c2];
+
+		score_t scoreMatch = scoring.matrix[uLetter1][uLetter2];
+		scores[uColIndex] = scoreMatch;
+	}
+}
+
+inline
+void computeGapScores( const std::string& seq1, const std::string& seq2, const PairwiseScoringScheme& scoring, 
+					  std::vector<score_t>& scores )
+{
+	scores.resize(seq1.size());
+
+	bool bGapping1 = false;
+	bool bGapping2 = false;
+	score_t gap_open_score = scoring.gap_open;
+	score_t gap_extend_score = scoring.gap_extend;
+	score_t term_gap_score = gap_open_score;
+
+	unsigned uColCount = seq1.size();
+	unsigned uColStart = 0;
+	bool bLeftTermGap = false;
+	for (unsigned uColIndex = 0; uColIndex < seq1.size(); ++uColIndex)
+	{
+		bool bGap1 = seq1[uColIndex] == '-';
+		bool bGap2 = seq2[uColIndex] == '-';
+		if (!bGap1 || !bGap2)
+			{
+			if (bGap1 || bGap2)
+				bLeftTermGap = true;
+			uColStart = uColIndex;
+			break;
+			}
+		}
+
+	unsigned uColEnd = uColCount - 1;
+	bool bRightTermGap = false;
+	for (int iColIndex = (int) uColCount - 1; iColIndex >= 0; --iColIndex)
+		{
+		bool bGap1 = seq1[iColIndex] == '-';
+		bool bGap2 = seq2[iColIndex] == '-';
+		if (!bGap1 || !bGap2)
+			{
+			if (bGap1 || bGap2)
+				bRightTermGap = true;
+			uColEnd = (unsigned) iColIndex;
+			break;
+			}
+		}
+
+	unsigned gap_left_col = 0;
+	score_t cur_gap_score = 0;
+	for (unsigned uColIndex = uColStart; uColIndex <= uColEnd; ++uColIndex)
+		{
+		bool bGap1 = seq1[uColIndex] == '-';
+		bool bGap2 = seq2[uColIndex] == '-';
+
+		if (bGap1 && bGap2)
+			continue;
+
+		if (bGap1)
+			{
+			if (!bGapping1)
+				{
+				gap_left_col = uColIndex;
+				if (uColIndex == uColStart)
+					{
+					cur_gap_score += term_gap_score;
+				}else{
+					cur_gap_score += gap_open_score;
+					}
+				bGapping1 = true;
+				}
+			else
+				{
+				cur_gap_score += gap_extend_score;
+				}
+			continue;
+			}
+
+		else if (bGap2)
+			{
+			if (!bGapping2)
+				{
+				gap_left_col = uColIndex;
+				if (uColIndex == uColStart)
+					{
+					cur_gap_score += term_gap_score;
+				}else{
+					cur_gap_score += gap_open_score;
+					}
+				bGapping2 = true;
+				}
+			else
+				{
+				cur_gap_score += gap_extend_score;
+				}
+			continue;
+			}
+
+		if( (bGapping1 || bGapping2) )
+		{
+			score_t valid_cols = 0;
+			for( unsigned uGapIndex = gap_left_col; uGapIndex < uColIndex; ++uGapIndex )
+				if( seq1[uGapIndex] != '-' || seq2[uGapIndex] != '-' )
+					valid_cols++;
+			// spread the total gap penalty evenly across all columns
+			score_t per_site_penalty = cur_gap_score / valid_cols;
+			score_t extra = cur_gap_score - (per_site_penalty * valid_cols);
+			for( unsigned uGapIndex = gap_left_col; uGapIndex < uColIndex; ++uGapIndex )
+			{
+				if( seq1[uGapIndex] == '-' && seq2[uGapIndex] == '-' )
+					continue;
+				if( scores[uGapIndex] != INVALID_SCORE )
+				{
+					genome::breakHere();
+					cerr << "asdgohasdoghasodgh\n";
+				}
+				scores[uGapIndex] = per_site_penalty;
+			}
+			if( scores[gap_left_col] == INVALID_SCORE )
+			{
+				cerr << "crap!\n";
+				genome::breakHere();
+			}
+			scores[gap_left_col] += extra;
+			gap_left_col = (std::numeric_limits<unsigned>::max)();
+			cur_gap_score = 0;
+		}
+		bGapping1 = false;
+		bGapping2 = false;
+		}
+
+	if (bGapping1 || bGapping2)
+		{
+		cur_gap_score -= gap_open_score;
+		cur_gap_score += term_gap_score;
+
+		score_t valid_cols = 0;
+		for( unsigned uGapIndex = gap_left_col; uGapIndex < uColCount; ++uGapIndex )
+			if( seq1[uGapIndex] != '-' || seq2[uGapIndex] != '-' )
+				valid_cols++;
+		// spread the total gap penalty evenly across all columns
+		score_t per_site_penalty = cur_gap_score / valid_cols;
+		score_t extra = cur_gap_score - (per_site_penalty * valid_cols);
+		for( unsigned uGapIndex = gap_left_col; uGapIndex < uColCount; ++uGapIndex )
+		{
+			if( seq1[uGapIndex] == '-' && seq2[uGapIndex] == '-' )
+				continue;
+			scores[uGapIndex] = per_site_penalty;
+		}
+		if( valid_cols > 0 )
+		{
+			if( scores[gap_left_col] == INVALID_SCORE )
+			{
+				cerr << "crap!\n";
+				genome::breakHere();
+			}
+			scores[gap_left_col] += extra;
+		}
+	}
+}
+
+inline
+void computeSPScore( const std::vector<string>& alignment, const PairwiseScoringScheme& pss, 
+					std::vector<score_t>& scores, score_t& score )
+{
+	std::vector< score_t > cur_m_scores( alignment[0].size(), INVALID_SCORE );
+	std::vector< score_t > cur_g_scores( alignment[0].size(), INVALID_SCORE );
+	scores.resize(alignment[0].size());
+	std::fill(scores.begin(), scores.end(), 0);
+	score = 0;
+	double w = 1;	// weight, to be determined later...
+	for( size_t i = 0; i < alignment.size(); ++i )
+	{
+		for( size_t j = i+1; j < alignment.size(); ++j )
+		{
+			std::fill( cur_m_scores.begin(), cur_m_scores.end(), INVALID_SCORE );
+			std::fill( cur_g_scores.begin(), cur_g_scores.end(), INVALID_SCORE );
+			computeMatchScores( alignment.at(i), alignment.at(j), pss, cur_m_scores );
+			computeGapScores( alignment.at(i), alignment.at(j), pss, cur_g_scores );
+			for( size_t k = 0; k < cur_m_scores.size(); ++k )
+			{
+				score_t s = 0;
+				if( cur_m_scores[k] != INVALID_SCORE )
+					s += cur_m_scores[k];
+				if( cur_g_scores[k] != INVALID_SCORE )
+					s += cur_g_scores[k];
+				scores[k] += (score_t)(w * (double)s);
+			}
+		}
+	}
+	for( size_t k = 0; k < scores.size(); ++k )
+		score += scores[k];
+}
+
+
+}	// namespace mems
+
+
+#endif	// __Scoring_h__
+
diff --git a/libMems/SeedMasks.h b/libMems/SeedMasks.h
new file mode 100644
index 0000000..64d65e3
--- /dev/null
+++ b/libMems/SeedMasks.h
@@ -0,0 +1,404 @@
+/*******************************************************************************
+ * $Id: SortedMerList.h,v 1.13 2004/02/27 23:08:55 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * This file is licensed under the GPL.
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifndef _SeedMasks_h_
+#define _SeedMasks_h_
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef __cplusplus
+#include <cmath>
+#else
+#include <math.h>
+#endif
+#include "libGenome/gnDefs.h"
+
+/* Seed patterns taken from: AE Darling, T Treangen, L Zhang, C Kuiken, X Messeguer, NT Perna
+ * "Procrastination leads to efficient match filtration for local multiple alignment" 
+ * Lecture Notes in Bioinformatics 4175:126-137 Springer-Verlag 2006
+ */
+
+/**
+ * returns the array of default seed mask patterns
+ * Each seed is represented by a pair of 32 bit integers
+ */
+#ifdef __cplusplus
+static
+#endif
+uint32** seedMasks();
+
+/**
+ * the first three seed masks in each of the following are
+ * 'good' seeds according to Louxin Zhang
+ */
+#ifdef __cplusplus
+inline static
+#endif
+uint32** seedMasks(){
+	static uint32 seed_masks_3[] = 
+	{
+		0,0xb, //0b1011
+		0,0,		0,0,		0,0,		0,0,		0,0,	};
+	static uint32 seed_masks_4[] = 
+	{
+		0,0x3b, //0b101011,
+		0,0,		0,0,		0,0,		0,0,		0,0,	};
+	static uint32 seed_masks_5[] = 
+	{
+		0,0x6b,		//0b1101011,
+		0,0x139,	//0b100111001,
+		0,0x193,	//0b110010011,
+		0,0x6b,		//0b1101011,
+		0,0,		0,0,	};
+	static uint32 seed_masks_6[] = 
+	{
+		0,0x58D, //0b10110001101,
+		0,0x653, //0b11001010011,
+		0,0x1AB, //0b110101011,
+		0,0xdb,	//0b11011011,
+		0,0,		0,0,	};
+	static uint32 seed_masks_7[] = 
+	{
+		0,0x1953,	//0b1100101010011
+		0,0x588d,	//0b101100010001101
+		0,0x688b,	//0b110100010001011
+		0,0x17d,	//0b101111101,
+		0,0x164d,	//0b1011001001101,
+		0,0,		0,0,	};
+	static uint32 seed_masks_8[] = 
+	{
+		0,0x3927, //0b11100100100111,
+		0,0x1CA7, //0b1110010100111,
+		0,0x6553, //0b110010101010011,
+		0,0xb6d,	//0b101101101101,
+		0,0,		0,0,	};
+	static uint32 seed_masks_9[] = 
+	{
+		0,0x7497,	//0b111010010010111,
+		0,0x1c927,	//0b11100100100100111,
+		0,0x72a7,	//0b111001010100111,
+		0,0x6fb,	//0b11011111011,
+		0,0x16ed,	//0b1011011101101,
+		0,0,	};
+	static uint32 seed_masks_10[] = 
+	{
+		0,0x1d297,	//		0,0b11101001010010111,
+		0,0x3A497,  //		0,0b111010010010010111,
+		0,0xE997,  //		0,0b1110100110010111,
+		0,0x6D5B,  //		0,0b110110101011011,
+		0,0,		0,0,	};
+	static uint32 seed_masks_11[] = 
+	{
+		0,0x7954f,	//0b11110010101001111,
+		0,0x75257,	//0b1110101001001010111,
+		0,0x1c9527,	//0b111001001010100100111,
+		0,0x5bed,	//0b101101111101101,  // third b.p. coding pattern
+		0,0x5b26d,	//0b1011011001001101101,
+		0,0,	};
+	static uint32 seed_masks_12[] = 
+	{
+		0,0x7954f,	//		0,0b1111001010101001111,
+		0,0x3D32F,  //		0,0b111101001100101111,
+		0,0x768B7,  //		0,0b1110110100010110111,
+		0,0x5B56D,  //		0,0b1011011010101101101,
+		0,0,		0,0,	};
+	static uint32 seed_masks_13[] = 
+	{
+		0,0x792a4f,	//0b11110010010101001001111,
+		0,0x1d64d7,	//0b111010110010011010111,
+		0,0x1d3597,	//0b111010011010110010111,
+		0,0x1b7db,	//0b11011011111011011,  // third b.p. coding pattern
+		0,0x75ad7,	//0b1110101101011010111,
+		0,0,	};
+	static uint32 seed_masks_14[] = 
+	{
+		0,0x1e6acf,  //		0,0b111100110101011001111,
+		0,0xF59AF,   //		0,0b11110101100110101111,
+		0,0x3D4CAF,  //		0,0b1111010100110010101111,
+		0,0x35AD6B,  //		0,0b1101011010110101101011,
+		0,0,		0,0,	};
+	static uint32 seed_masks_15[] = 
+	{
+		0,0x7ac9af,	//0b11110101100100110101111,
+		0,0x7b2a6f,	//0b11110110010101001101111,
+		0,0x79aacf,	//0b11110011010101011001111,
+		0,0x16df6d,	//0b101101101111101101101,	// third b.p. coding pattern
+		0,0x6b5d6b,	//0b11010110101110101101011,
+		0,0,	};
+	static uint32 seed_masks_16[] = 
+	{
+		0,0xf599af,  //		0,0b111101011001100110101111,
+		0,0xEE5A77,  //		0,0b111011100101101001110111,
+		0,0x7CD59F,  //		0,0b11111001101010110011111,
+		0,0xEB5AD7,  //		0,0b111010110101101011010111,
+		0,0,		0,0,	};
+	static uint32 seed_masks_17[] =
+	{
+		0,0x6dbedb,	//0b11011011011111011011011,	// third b.p. coding pattern
+		0,0,		0,0,		0,0,		0,0,		0,0,	};
+	static uint32 seed_masks_18[] =
+	{
+		0,0x3E6B59F,//		0,0b11111001101011010110011111,
+		0,0x3EB335F,//		0,0b11111010110011001101011111,
+		0,0x7B3566F,//		0,0b111101100110101011001101111,
+		0,0,		0,0,		0,0,	};
+
+	static uint32 seed_masks_19[] =
+	{
+		0,0x7b974ef,	//0b111101110010111010011101111
+		0,0x7d6735f,	//0b111110101100111001101011111
+		0,0x1edd74f,	//0b1111011011101011101101111
+		0,0,		0,0,		0,0,	};
+	static uint32 seed_masks_20[] =
+	{
+		0,0x1F59B35F,	//0b11111010110011011001101011111,
+		0,0x3EDCEDF,	//0b11111011011100111011011111,
+		0,0xFAE675F,	//0b1111101011100110011101011111,
+		0,0,		0,0,		0,0,	};
+	static uint32 seed_masks_21[] =
+	{
+		0,0x7ddaddf,	//0b111110111011010110111011111,
+		0,0xaeb3f,		//0b11111100110101110101100111111,
+		0,0x7eb76bf,	//0b111111010110111011010111111,
+		0,0,		0,0,		0,0,	};
+	// default to solid seeds for weight 22+
+	static uint32 seed_masks_22[] =
+	{
+		0,0x003fffff,
+		0,0,		0,0,		0,0,		0,0,		0,0,	};
+	static uint32 seed_masks_23[] =
+	{
+		0,0x007fffff,
+		0,0,		0,0,		0,0,		0,0,		0,0,	};
+	static uint32 seed_masks_24[] =
+	{
+		0,0x00ffffff,
+		0,0,		0,0,		0,0,		0,0,		0,0,	};
+	static uint32 seed_masks_25[] =
+	{
+		0,0x01ffffff,
+		0,0,		0,0,		0,0,		0,0,		0,0,	};
+	static uint32 seed_masks_26[] =
+	{
+		0,0x03ffffff,
+		0,0,		0,0,		0,0,		0,0,		0,0,	};
+	static uint32 seed_masks_27[] =
+	{
+		0,0x07ffffff,
+		0,0,		0,0,		0,0,		0,0,		0,0,	};
+	static uint32 seed_masks_28[] =
+	{
+		0,0x0fffffff,
+		0,0,		0,0,		0,0,		0,0,		0,0,	};
+	static uint32 seed_masks_29[] =
+	{
+		0,0x1fffffff,
+		0,0,		0,0,		0,0,		0,0,		0,0,	};
+	static uint32 seed_masks_30[] =
+	{
+		0,0x3fffffff,
+		0,0,		0,0,		0,0,		0,0,		0,0,	};
+	static uint32 seed_masks_31[] =
+	{
+		0,0x7fffffff,
+		0,0,		0,0,		0,0,		0,0,		0,0,	};
+
+	static uint32 no_seeds[] = 
+	{
+		0,0,	
+		0,0,	
+		0,0,	
+		0,0,	
+		0,0,	
+		0,0,	
+	};
+	
+	static uint32* seed_masks[] = 
+	{
+	no_seeds,
+	no_seeds,
+	no_seeds,
+	seed_masks_3,
+	seed_masks_4,
+	seed_masks_5,
+	seed_masks_6,
+	seed_masks_7,
+	seed_masks_8,
+	seed_masks_9,
+	seed_masks_10,
+	seed_masks_11,
+	seed_masks_12,
+	seed_masks_13,
+	seed_masks_14,
+	seed_masks_15,
+	seed_masks_16,
+	seed_masks_17,
+	seed_masks_18,
+	seed_masks_19,
+	seed_masks_20,
+	seed_masks_21,
+	seed_masks_22,
+	seed_masks_23,
+	seed_masks_24,
+	seed_masks_25,
+	seed_masks_26,
+	seed_masks_27,
+	seed_masks_28,
+	seed_masks_29,
+	seed_masks_30,
+	seed_masks_31,
+	};
+	
+	return seed_masks;
+}
+
+static const int CODING_SEED = 3;
+static const int SOLID_SEED = INT_MAX;
+
+/**
+ * Returns a solid seed of a given weight.
+ */
+#ifdef __cplusplus
+static
+#endif
+int64 getSolidSeed( int weight );
+
+#ifdef __cplusplus
+inline static
+#endif
+int64 getSolidSeed( int weight ){
+	int64 seed = 1;
+	seed <<= weight;
+	seed--;
+	return seed;
+};
+
+
+
+/**
+ * returns a seed of a given weight.  Setting seed_rank > 0 will select a seed
+ * of a lower sensitivity rank according to Choi et. al. 2004
+ */
+#ifdef __cplusplus
+static int64 getSeed( int weight, int seed_rank = 0 );
+#else
+int64 getSeed( int weight, int seed_rank );
+#endif
+
+#ifdef __cplusplus
+inline static
+#endif
+int64 getSeed( int weight, int seed_rank ){
+	uint32** masks;
+	int high;
+	int low;
+	int i = 1;
+	int64 seed = 0;
+	if( seed_rank == SOLID_SEED )
+		return getSolidSeed( weight );
+
+	masks = seedMasks();
+	if(weight > 31)
+		return getSolidSeed(32);
+	if( seed_rank > 5 )
+		return getSolidSeed(weight);
+	if( masks[weight][seed_rank*2+1] == 0 )
+		return getSolidSeed(weight);
+	high = masks[ weight ][ seed_rank*2 ];
+	low = masks[ weight ][ seed_rank*2 + 1 ];
+	
+	seed |= high;
+	seed <<= 32;
+	seed |= low;
+	return seed;
+};
+
+
+/**
+ * calculates the length of a seed pattern
+ */
+#ifdef __cplusplus
+static
+#endif
+int getSeedLength( int64 seed );
+
+#ifdef __cplusplus
+inline static
+#endif
+int getSeedLength( int64 seed ){
+	int right_bit = -1;
+	int left_bit = -1;
+	uint bitI = 0;
+	for( ; bitI < 64; ++bitI ){
+		if( (seed & 1) == 1 ){
+			left_bit = bitI;
+			if( right_bit == -1 )
+				right_bit = bitI;
+		}
+		seed >>= 1;
+	}
+	if( left_bit != -1 )
+		return left_bit - right_bit + 1;
+	return 0;
+}
+
+/**
+ * calculates the weight of a seed pattern
+ */
+#ifdef __cplusplus
+static
+#endif
+int getSeedWeight( int64 seed );
+
+#ifdef __cplusplus
+inline static
+#endif
+int getSeedWeight( int64 seed ){
+	int weight = 0;
+	uint bitI = 0;
+	for( ; bitI < 64; ++bitI ){
+		if( (seed & 1) == 1 ){
+			++weight;
+		}
+		seed >>= 1;
+	}
+	return weight;
+}
+
+const uint MIN_DNA_SEED_WEIGHT = 5;
+const uint MAX_DNA_SEED_WEIGHT = 31;
+
+/**
+ * Calculate the default seed weight based on sequence length
+ */
+#ifdef __cplusplus
+static
+#endif
+uint getDefaultSeedWeight( gnSeqI avg_sequence_length );
+
+#ifdef __cplusplus
+inline static
+#endif
+uint getDefaultSeedWeight( gnSeqI avg_sequence_length ){
+	uint mer_size = (uint)ceil((log( (double)avg_sequence_length ) / log( 2.0 ))/1.5);
+	// don't allow even weights-- they can be palindromic
+	if( !(mer_size & 0x1 ) )
+		++mer_size;
+	mer_size = mer_size < MIN_DNA_SEED_WEIGHT ? 0 : mer_size;
+	if( avg_sequence_length == 0 )
+		mer_size = 0;
+
+	// 31 is the maximum DNA seed weight
+	mer_size = mer_size > MAX_DNA_SEED_WEIGHT ? MAX_DNA_SEED_WEIGHT : mer_size;
+	return mer_size;
+}
+
+
+#endif // _SeedMasks_h_
diff --git a/libMems/SeedOccurrenceList.h b/libMems/SeedOccurrenceList.h
new file mode 100644
index 0000000..e2cbcb0
--- /dev/null
+++ b/libMems/SeedOccurrenceList.h
@@ -0,0 +1,100 @@
+#ifndef __SeedOccurrenceList_h__
+#define __SeedOccurrenceList_h__
+
+#include <vector>
+#include "libMems/SortedMerList.h"
+#include <boost/iostreams/device/mapped_file.hpp>
+#include <boost/filesystem.hpp>
+#include <fstream>
+#include "libMems/Files.h"
+
+namespace mems
+{
+
+class SeedOccurrenceList
+{
+public:
+	typedef float32 frequency_type;
+
+	SeedOccurrenceList(){}
+
+	template< typename SMLType >
+	void construct( SMLType& sml )
+	{
+		const size_t total_len = sml.Length();
+		count.resize(total_len);
+		size_t seed_start = 0;
+		size_t cur_seed_count = 1;
+		uint64 mer_mask = sml.GetSeedMask();
+		size_t seedI = 1;
+		bmer prevmer;
+		bmer merI; 
+		const size_t sml_length = sml.SMLLength();
+		if( sml_length > 0 )
+			merI = sml[0];
+		for( seedI = 1; seedI < sml_length; seedI++ )
+		{
+			prevmer = merI;
+			merI = sml[seedI];
+			if( (merI.mer & mer_mask) == (prevmer.mer & mer_mask) )
+			{
+				++cur_seed_count;
+				continue;
+			}
+			// set seed frequencies
+			for( size_t i = seed_start; i < seedI; ++i )
+				count[sml[i].position] = (frequency_type)cur_seed_count;
+			seed_start = seedI;
+			cur_seed_count = 1;
+		}
+		// set seed frequencies for the last few
+		for( size_t i = seed_start; i < seedI && i < sml_length; ++i )
+			count[sml[i].position] = (frequency_type)cur_seed_count;
+		// hack: fudge the last few values on the end of the sequence, necessary when sequence isn't circular
+		for( ; seedI < total_len; ++seedI )
+			count[seedI]=1;
+
+		smoothFrequencies( sml );
+
+		// wipe out any stray zeros
+		for( size_t i = 0; i < total_len; ++i )
+			if( count[i]== 0 )
+				count[i] = 1;
+	}
+
+
+	frequency_type getFrequency( gnSeqI position )
+	{
+		return count[position];
+	}
+
+protected:
+	/**
+	 * converts position freqs to the average freq of all k-mers containing that position
+	 */
+	template< typename SMLType >
+	void smoothFrequencies( const SMLType& sml )
+	{
+		size_t seed_length = sml.SeedLength();
+		// hack: for beginning (seed_length) positions assume that previous
+		// containing seeds were unique
+		double sum = seed_length - 1 + count[0];
+		std::vector<frequency_type> buf(seed_length, 1);
+		buf[0] = count[0];
+		for( size_t i = 1; i < sml.Length(); i++ )
+		{
+			count[i-1] = sum / seed_length;
+			sum += count[i];
+			size_t bufI = i % seed_length;
+			sum -= buf[bufI];
+			buf[bufI] = count[i];
+		}
+	}
+	
+	std::vector<frequency_type> count;
+};
+
+}
+
+#endif	// __SeedOccurrenceList_h__
+
diff --git a/libMems/SlotAllocator.cpp b/libMems/SlotAllocator.cpp
new file mode 100644
index 0000000..8df67a8
--- /dev/null
+++ b/libMems/SlotAllocator.cpp
@@ -0,0 +1,5 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#include "SlotAllocator.h"
+
diff --git a/libMems/SlotAllocator.h b/libMems/SlotAllocator.h
new file mode 100644
index 0000000..59bd687
--- /dev/null
+++ b/libMems/SlotAllocator.h
@@ -0,0 +1,170 @@
+/*******************************************************************************
+ * $Id: SlotAllocator.h,v 1.6 2004/02/27 23:08:55 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * This file is licensed under the GPL.
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifndef _SlotAllocator_h_
+#define _SlotAllocator_h_
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <vector>
+#include <list>
+#include <stdexcept>
+#include <iostream>
+#include "libMUSCLE/threadstorage.h"
+
+
+namespace mems {
+
+
+/** When more space is needed to store a datatype, the memory pool will grow by this factor */
+const double POOL_GROWTH_RATE = 1.6;
+	
+/**
+ * This class allocates memory according to the slot allocation scheme for
+ * fixed size objects.  Each time all slots are full it allocates twice the
+ * previous allocation.  If it is unable to allocate twice the previous 
+ * allocation, it does a binary 'search' for the largest amount of memory it
+ * can allocate. 
+ * The current implementation does not allow memory to
+ * be freed once allocated.
+ */
+template< class T >
+class SlotAllocator {
+public:
+	static SlotAllocator<T>& GetSlotAllocator();
+	T* Allocate();
+	void Free( T* t );
+	void Free( std::vector<T*>& chunk );
+	~SlotAllocator(){ 
+		Purge();
+	};
+	void Purge(){
+//#pragma omp critical
+//{
+	std::vector<T*>& data = this->data.get();
+	unsigned& tail_free = this->tail_free.get();
+	unsigned& n_elems = this->n_elems.get();
+	std::vector< T* >& free_list = this->free_list.get();
+		for( unsigned dataI = 0; dataI < data.size(); dataI++ )
+			free(data[dataI]);
+		data.clear();
+		free_list.clear();
+		tail_free = 0;
+		n_elems = 0;
+//}
+	}
+
+protected:
+	TLS< std::vector<T*> > data;
+	TLS< unsigned > tail_free;
+	TLS< unsigned > n_elems;	/**< number of T in the most recently allocated block */
+
+	TLS< std::vector< T* > > free_list;
+
+private:
+	SlotAllocator() : tail_free(0), n_elems(0) {};
+	SlotAllocator& operator=( SlotAllocator& sa ){ n_elems = sa.n_elems; data = sa.data; tail_free = sa.tail_free; return *this;};
+	SlotAllocator( SlotAllocator& sa ){ *this = sa; };
+		
+};
+
+template< class T >
+inline
+SlotAllocator< T >& SlotAllocator< T >::GetSlotAllocator(){
+	static SlotAllocator< T >* sa = new SlotAllocator< T >();
+	return *sa;
+}
+
+
+template< class T >
+inline
+T* SlotAllocator< T >::Allocate(){
+	T* t_ptr = NULL;
+
+{
+	std::vector<T*>& data = this->data.get();
+	unsigned& tail_free = this->tail_free.get();
+	unsigned& n_elems = this->n_elems.get();
+	std::vector< T* >& free_list = this->free_list.get();
+//	omp_guard rex( locker );
+	if( free_list.begin() != free_list.end() ){
+		t_ptr = free_list.back();
+		free_list.pop_back();
+	}else if( tail_free > 0 ){
+		int T_index = n_elems - tail_free--;
+		t_ptr = &(data.back()[ T_index ]);
+	}else{
+
+		// Last resort:
+		// increase the size of the data array
+		unsigned new_size = (unsigned)(((double)n_elems * POOL_GROWTH_RATE)+0.5);
+		if( new_size == 0 )
+			new_size++;
+		T* new_data = NULL;
+		while( true ){
+			try{
+				new_data = (T*)malloc(sizeof(T)*new_size);
+				break;
+			}catch(...){
+				new_size = new_size / 2;
+				if( new_size == 0 )
+					break;
+			}
+		}
+		if( new_data == NULL || new_size == 0 ){
+			throw std::out_of_range( "SlotAllocator::Allocate(): Unable to allocate more memory" );
+		}
+		data.push_back( new_data );
+		tail_free = new_size - 1;
+		t_ptr = & data.back()[0];
+		n_elems = new_size;
+	}
+}
+	return t_ptr;
+}
+
+template< class T >
+inline
+void SlotAllocator< T >::Free( T* t ){
+	// for debugging double free
+/*	for(size_t i = 0; i < free_list.size(); i++ )
+		if( free_list[i] == t )
+			std::cerr << "ERROR DOUBLE FREE\n";
+*/	
+	t->~T();
+{
+//	omp_guard rex( locker );
+	std::vector< T* >& free_list = this->free_list.get();
+
+	free_list.push_back( t );
+}
+}
+
+template< class T >
+inline
+void SlotAllocator< T >::Free( std::vector<T*>& chunk ){
+	// for debugging double free
+/*	for(size_t i = 0; i < free_list.size(); i++ )
+		if( free_list[i] == t )
+			std::cerr << "ERROR DOUBLE FREE\n";
+*/	
+	for( size_t i = 0; i < chunk.size(); i++ )
+		chunk[i]->~T();
+{
+//	omp_guard rex( locker );
+	std::vector< T* >& free_list = this->free_list.get();
+	free_list.insert(free_list.end(), chunk.begin(), chunk.end());
+}
+	chunk.clear();
+}
+
+}
+
+#endif	// _SlotAllocator_h_
diff --git a/libMems/SortedMerList.cpp b/libMems/SortedMerList.cpp
new file mode 100644
index 0000000..dcfe808
--- /dev/null
+++ b/libMems/SortedMerList.cpp
@@ -0,0 +1,826 @@
+/*******************************************************************************
+ * $Id: SortedMerList.cpp,v 1.23 2004/03/01 02:40:08 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * Please see the file called COPYING for licensing, copying, and modification
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/SortedMerList.h"
+
+using namespace std;
+using namespace genome;
+namespace mems {
+
+const uint8* SortedMerList::BasicDNATable(){
+	static const uint8* const bdt = SortedMerList::CreateBasicDNATable();
+	return bdt;
+}
+
+const uint8* SortedMerList::ProteinTable(){
+	static const uint8* const bdt = SortedMerList::CreateProteinTable();
+	return bdt;
+}
+
+const uint8* SortedMerList::CreateBasicDNATable(){
+	uint8* bdt = new uint8[UINT8_MAX];
+	memset(bdt, 0, UINT8_MAX);
+	bdt['c'] = 1;
+	bdt['C'] = 1;
+	bdt['b'] = 1;
+	bdt['B'] = 1;
+	bdt['y'] = 1;
+	bdt['Y'] = 1;
+	bdt['g'] = 2;
+	bdt['G'] = 2;
+	bdt['s'] = 2;
+	bdt['S'] = 2;
+	bdt['k'] = 2;
+	bdt['K'] = 2;
+	bdt['t'] = 3;
+	bdt['T'] = 3;
+	return bdt;
+}
+
+const uint8* SortedMerList::CreateProteinTable(){
+	uint8* pt = new uint8[UINT8_MAX];
+	memset(pt, 0, UINT8_MAX);
+	pt['A'] = 0;
+	pt['R'] = 1;
+	pt['N'] = 2;
+	pt['D'] = 3;
+	pt['C'] = 4;
+	pt['Q'] = 5;
+	pt['E'] = 6;
+	pt['G'] = 7;
+	pt['H'] = 8;
+	pt['I'] = 9;
+	pt['L'] = 10;
+	pt['K'] = 11;
+	pt['M'] = 12;
+	pt['F'] = 13;
+	pt['P'] = 14;
+	pt['S'] = 15;
+	pt['T'] = 16;
+	pt['W'] = 17;
+	pt['Y'] = 18;
+	pt['V'] = 19;
+	
+	pt['a'] = 0;
+	pt['r'] = 1;
+	pt['n'] = 2;
+	pt['d'] = 3;
+	pt['c'] = 4;
+	pt['q'] = 5;
+	pt['e'] = 6;
+	pt['g'] = 7;
+	pt['h'] = 8;
+	pt['i'] = 9;
+	pt['l'] = 10;
+	pt['k'] = 11;
+	pt['m'] = 12;
+	pt['f'] = 13;
+	pt['p'] = 14;
+	pt['s'] = 15;
+	pt['t'] = 16;
+	pt['w'] = 17;
+	pt['y'] = 18;
+	pt['v'] = 19;
+	return pt;
+}
+
+SortedMerList::SortedMerList(){
+	//default to BasicDNA settings
+	header.length = 0;
+	header.alphabet_bits = 2;
+	header.unique_mers = NO_UNIQUE_COUNT;
+	memcpy(header.translation_table, BasicDNATable(), UINT8_MAX);
+	header.description[0] = 0;
+	header.seed_length = DNA_MER_SIZE;
+	header.id = 0;
+	header.circular = false;
+	mask_size = DNA_MER_SIZE;
+	mer_mask = 0;
+	seed_mask = 0;
+	// init sequence data to null
+	sequence = NULL;
+	binary_seq_len = 0;
+}
+
+SortedMerList::SortedMerList( const SortedMerList& sa ){
+	sequence = NULL;
+	*this = sa;
+}
+
+SortedMerList& SortedMerList::operator=(const SortedMerList& sa)
+{
+	header = sa.header;
+	mer_mask = sa.mer_mask;
+	seed_mask = sa.seed_mask;
+	mask_size = sa.mask_size;
+	binary_seq_len = sa.binary_seq_len;
+
+	// copy binary sequence data
+	if( sa.sequence != NULL ){
+		if( sequence != NULL )
+			delete[] sequence;
+		sequence = new uint32[binary_seq_len];
+		memcpy(sequence, sa.sequence, sizeof(uint32) * binary_seq_len);
+	}else
+		sequence = NULL;
+
+	return *this;
+}
+
+SortedMerList::~SortedMerList(){
+	if( sequence != NULL )
+		delete[] sequence;
+}
+
+void SortedMerList::Clear(){
+	//default to BasicDNA settings
+	header.length = 0;
+	header.alphabet_bits = 2;
+	header.unique_mers = NO_UNIQUE_COUNT;
+	memcpy(header.translation_table, BasicDNATable(), UINT8_MAX);
+	header.description[0] = 0;
+	header.seed_length = DNA_MER_SIZE;
+	header.id = 0;
+	header.circular = false;
+	mask_size = DNA_MER_SIZE;
+	mer_mask = 0;
+	seed_mask = 0;
+	// delete sequence data
+	if( sequence != NULL ){
+		delete[] sequence;
+		sequence = NULL;
+	}
+	binary_seq_len = 0;
+}
+
+uint32 SortedMerList::CalculateMaxMerSize() const{
+	bmer tmp;
+	return (sizeof(tmp.mer) * 8) / header.alphabet_bits;
+}
+
+boolean SortedMerList::FindMer(const uint64 query_mer, gnSeqI& result){
+	bmer merle;
+	merle.mer = query_mer;
+	gnSeqI last_pos = Length();
+	if( last_pos == 0 || (last_pos < header.seed_length && !header.circular) )
+		return false;
+	last_pos -= header.circular ? 1 : header.seed_length;
+	result = bsearch(merle, 0, last_pos );
+	return ((*this)[result].mer == merle.mer);
+}
+
+boolean SortedMerList::Find(const string& query_seq, gnSeqI& result) {
+	struct bmer merle;
+	merle.mer = 0;
+
+	//check the length to make sure it is small enough
+	gnSeqI len = query_seq.length() * header.alphabet_bits < 64 ? 
+		query_seq.length() : 64 / header.alphabet_bits;
+		
+	translate((uint8*)&merle.mer, query_seq.c_str(), len);
+	return FindMer( merle.mer, result );
+}
+
+void SortedMerList::FindAll(const string& query_seq, vector<gnSeqI> result) {
+	struct bmer merle;
+	merle.mer = 0;
+
+	//check the length to make sure it is small enough
+	gnSeqI len = query_seq.length() * header.alphabet_bits < 64 ? 
+		query_seq.length() : 64 / header.alphabet_bits;
+		
+	translate((uint8*)&merle.mer, query_seq.c_str(), len);
+	
+	//find the first match then start filling forward.
+	gnSeqI matchI = 0;
+	gnSeqI last_pos = Length();
+	last_pos -= header.circular ? 1 : header.seed_length;
+	bmer matchmer;
+	matchI = bsearch(merle, 0, last_pos);
+
+	//first seek backwards
+	int64 cur_matchI = matchI;
+	matchmer = (*this)[matchI];
+	while(cur_matchI >= 0 && matchmer.mer == merle.mer){
+		cur_matchI--;
+		matchmer = (*this)[cur_matchI];
+	}
+	int64 first_matchI = cur_matchI+1;
+
+	//now seek forwards
+	cur_matchI = matchI+1;
+	matchmer = (*this)[cur_matchI];
+	while(cur_matchI < GNSEQI_END && matchmer.mer == merle.mer){
+		cur_matchI++;
+		matchmer = (*this)[cur_matchI];
+	}
+	//fill the result array
+	for(matchI = first_matchI; matchI < cur_matchI; matchI++)
+		result.push_back(matchI);
+}
+
+string SortedMerList::Description() const{
+	return header.description;
+}
+
+void SortedMerList::SetDescription(const string& d){
+	strncpy(header.description, d.c_str(), DESCRIPTION_SIZE-1);
+}
+
+uint SortedMerList::SeedLength() const{
+	return header.seed_length;
+}
+/**
+ * Returns the weight of the seed that this SML was sorted on.
+ */
+uint SortedMerList::SeedWeight() const{
+	return header.seed_weight;
+}
+/**
+ * Returns the seed pattern that this SML was sorted on.
+ */
+uint64 SortedMerList::Seed() const{
+	return header.seed;
+}
+
+boolean SortedMerList::IsCircular() const{
+	return header.circular;
+}
+
+uint64 SortedMerList::GetMerMask() const{
+	return mer_mask;
+}
+
+uint64 SortedMerList::GetSeedMask() const{
+	return seed_mask;
+}
+
+uint32 SortedMerList::GetMerMaskSize() const{
+	return mask_size;
+}
+
+void SortedMerList::SetMerMaskSize(uint32 mer_size){
+	if(mer_size > header.seed_length)
+		mask_size = header.seed_length;
+	else
+		mask_size = mer_size;
+
+	// calculate the mer mask
+	mer_mask = UINT32_MAX;
+	mer_mask <<= 32;
+	mer_mask |= UINT32_MAX;
+	mer_mask <<= (64 - header.alphabet_bits * mer_size);
+}
+
+gnSeqI SortedMerList::Length() const{
+	return header.length;
+}
+
+gnSeqI SortedMerList::SMLLength() const{
+	// make sure there was at least one seed
+	if( header.length < header.seed_length )
+		return 0;
+	if( !header.circular )
+		return header.length - header.seed_length + 1;
+	return header.length;
+}
+
+sarID_t SortedMerList::GetID() const{
+	return header.id;
+}
+void SortedMerList::SetID(const sarID_t d){
+	header.id = d;
+}
+
+#define OPT_HEADER_ALPHABET_BITS DNA_ALPHA_BITS
+
+void SortedMerList::SetSequence(gnSeqC* seq_buf, gnSeqI seq_len){
+	binary_seq_len = (seq_len * header.alphabet_bits) / 32;
+	if((seq_len * header.alphabet_bits) % 32 != 0)
+		binary_seq_len++;
+
+	binary_seq_len+=2;	// zero-pad the end for extra working room
+
+	if( sequence != NULL )
+		delete[] sequence;
+	sequence = new uint32[binary_seq_len];
+	translate32(sequence, seq_buf, seq_len);
+}
+
+// this should return a mer containing all characters covered by the
+// spaced seed
+uint64 SortedMerList::GetMer(gnSeqI position) const
+{
+	//check this for access violations.
+	uint64 mer_a;
+	gnSeqI mer_word, mer_bit;
+	uint32 merle;
+	//get mer_a
+	mer_a = 0;
+	mer_word = (position * (gnSeqI)OPT_HEADER_ALPHABET_BITS) / (gnSeqI)32;
+	mer_bit = (position * (gnSeqI)OPT_HEADER_ALPHABET_BITS) % (gnSeqI)32;
+	mer_a |= sequence[mer_word++];
+	mer_a <<= 32;
+	mer_a |= sequence[mer_word++];
+	if(mer_bit > 0){
+		merle = sequence[mer_word];
+		merle >>= 32 - mer_bit;
+		mer_a <<= mer_bit;
+		mer_a |= merle;
+	}
+	mer_a &= mer_mask;
+	return mer_a;
+}
+
+//potential buffer overflows here.  make dest extra big.
+void SortedMerList::GetBSequence(uint32* dest, const gnSeqI len, const gnSeqI offset){
+	//first determine the byte offset of the sequence within the file.
+	if(offset >= header.length){
+		Throw_gnEx( IndexOutOfBounds() );
+	}
+	uint64 startpos = (offset * OPT_HEADER_ALPHABET_BITS) / 32;
+	int begin_remainder = (offset * OPT_HEADER_ALPHABET_BITS) % 32;
+	uint64 readlen = offset + len < header.length ? len : header.length - offset;
+
+	gnSeqI word_read_len = (readlen * OPT_HEADER_ALPHABET_BITS) / 32;
+	int end_remainder = (readlen * OPT_HEADER_ALPHABET_BITS) % 32;
+	if(begin_remainder + (readlen * OPT_HEADER_ALPHABET_BITS) > 32
+	   && end_remainder > 0)
+		word_read_len++;
+	if(begin_remainder > 0)
+		word_read_len++;
+	
+	//now do the actual read
+	memcpy((char*)dest, (char*)sequence + (startpos * 4), word_read_len * 4);
+	
+	//now shift if needed
+	ShiftWords(dest, word_read_len, -begin_remainder);
+	
+	//now mask if needed
+	if(end_remainder > begin_remainder){
+		uint32 mask = 0xFFFFFFFF;
+		mask <<= 32 - (end_remainder - begin_remainder);
+		dest[word_read_len-1] &= mask;
+	}else if(end_remainder < begin_remainder){
+		uint32 mask = 0xFFFFFFFF;
+		mask <<= (begin_remainder - end_remainder);
+		dest[word_read_len-2] &= mask;
+	}
+}
+
+gnSeqI SortedMerList::bsearch(const struct bmer& query_mer, const gnSeqI start, const gnSeqI end) {
+
+	gnSeqI middle = (start + end) / 2;
+	struct bmer midmer = (*this)[middle];
+	if(midmer.mer == query_mer.mer)
+		return middle;
+	else if((midmer.mer < query_mer.mer) && (middle < end))
+		return bsearch(query_mer, middle + 1, end);
+	else if((midmer.mer > query_mer.mer) && (start < middle))
+		return bsearch(query_mer, start, middle - 1);
+	
+	//if we get here then the mer was not found.
+	//return where it would be if it existed.
+	return middle;
+}
+
+//translate the character sequence to binary form based on the
+//translation table.
+void SortedMerList::translate(uint8* dest, const gnSeqC* src, const gnSeqI len) const{
+	uint8 start_bit = 0;
+	gnSeqI cur_byte = 0;
+	const uint32 alpha_bits = OPT_HEADER_ALPHABET_BITS;
+	dest[cur_byte] = 0;
+	for(uint32 i=0; i < len; i++){
+		uint8 tmp = header.translation_table[src[i]];
+		if(start_bit + alpha_bits <= 8){
+			tmp <<= 8 - start_bit - alpha_bits;
+			dest[cur_byte] |= tmp;
+		}else{
+			uint8 over_bits = (start_bit + alpha_bits) % 8;
+			uint8 tmp2 = tmp;
+			tmp2 <<= 8 - over_bits;
+			tmp >>= over_bits;
+			dest[cur_byte] |= tmp;
+			dest[cur_byte+1] |= tmp2;
+		}
+		start_bit += alpha_bits;
+		if(start_bit >= 8){
+			start_bit %= 8;
+			cur_byte++;
+			dest[cur_byte] = 0;
+		}
+	}
+}
+
+void SortedMerList::translate32(uint32* dest, const gnSeqC* src, const gnSeqI len) const{
+	if( len == 0 )
+		return;
+	uint8 start_bit = 0;
+	gnSeqI cur_word = 0;
+	const uint32 alpha_bits = OPT_HEADER_ALPHABET_BITS;
+	dest[cur_word] = 0;
+	for(uint32 i=0; i < len; i++){
+		if(src[i]=='-'){
+			cerr << "ERROR! gap character encountered at genome sequence position " << i << std::endl;
+			cerr << "Input sequences must be unaligned and ungapped!\n";
+			throw "Gap in genome sequence\n";
+		}
+		uint32 tmp = header.translation_table[src[i]];
+		if(start_bit + alpha_bits <= 32){
+			tmp <<= 32 - start_bit - alpha_bits;
+			dest[cur_word] |= tmp;
+			start_bit += alpha_bits;
+			if(start_bit >= 32 && i < len - 1){
+				start_bit %= 32;
+				cur_word++;
+				dest[cur_word] = 0;
+			}
+		}else{
+			uint8 over_bits = (start_bit + alpha_bits) % 32;
+			uint32 tmp2 = tmp;
+			tmp2 <<= 32 - over_bits;
+			tmp >>= over_bits;
+			dest[cur_word] |= tmp;
+			cur_word++;
+			dest[cur_word] = 0;
+			dest[cur_word] |= tmp2;
+			start_bit = over_bits;
+		}
+	}
+}
+SMLHeader SortedMerList::GetHeader() const{
+	return header;
+}
+
+gnSeqI SortedMerList::UniqueMerCount(){
+	if(header.unique_mers != NO_UNIQUE_COUNT)
+		return header.unique_mers;
+
+	uint32 MER_BUFFER_SIZE = 16384;  //not quite arbitrary (2^14)
+	gnSeqI cur_pos = 0;
+	vector<bmer> mer_vector;
+	bmer prev_mer;
+	gnSeqI m_unique = 0;
+	gnSeqI report_interval = MER_BUFFER_SIZE * 212;
+	while(cur_pos < header.length){
+		if(!Read(mer_vector, MER_BUFFER_SIZE, cur_pos)){
+			break;
+//			DebugMsg("SortedMerList::UniqueMerCount: Error reading bmer vector.");
+//			return NO_UNIQUE_COUNT;
+		}
+		uint32 mer_count = mer_vector.size();
+		if(mer_count == 0)
+			break;
+		if(cur_pos > 0 && prev_mer.mer != mer_vector[0].mer)
+			m_unique++;
+		
+		//count them up.
+		uint32 i = 0;
+		for(uint32 j = 1; j < mer_count; j++){
+			if((mer_vector[i].mer & mer_mask) != (mer_vector[j].mer & mer_mask) )
+				m_unique++;
+			i++;
+		}
+		prev_mer = mer_vector[i];
+		cur_pos += mer_count;
+		if( cur_pos % report_interval == 0 ){
+//			cout << "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b";
+			cout << m_unique << "/" << cur_pos << endl;
+		}
+	}
+	cout << endl;
+	m_unique++;
+	header.unique_mers = m_unique;
+	return header.unique_mers;
+}
+
+//will not handle more than 8GB sequence on 32-bit systems
+void SortedMerList::ShiftWords(unsigned int* data, uint32 length, int32 bits)
+{
+	int32 word_bits = 8 * sizeof(unsigned int);
+	if(bits > 0 && bits < word_bits){
+		//shift everything right starting at the end
+		data[length - 1] >>= bits;
+		for(int i=length-2; i >= 0; i--){
+			uint32 tmp = data[i];
+			tmp <<= word_bits - bits;
+			data[i+1] |= tmp;
+			data[i] >>= bits;
+		}
+	}else if(bits < 0 && bits > (-1)*word_bits){
+		bits *= -1;
+		//shift everything left
+		data[0] <<= bits;
+		for(uint32 i=0; i < length; i++){
+			uint32 tmp = data[i+1];
+			tmp >>= word_bits - bits;
+			data[i] |= tmp;
+			data[i+1] <<= bits;
+		}
+	}
+}
+
+void SortedMerList::FillSML(gnSeqC* seq_buf, gnSeqI seq_len, boolean circular, vector<bmer>& sml_array){
+	const uint32 alpha_bits = OPT_HEADER_ALPHABET_BITS;
+	const uint32 mer_size = header.seed_length;
+	gnSeqI sar_len = seq_len;
+	if(!circular)
+		sar_len -= header.seed_length - 1;
+	sml_array.reserve(sar_len);
+
+	bmer cur_suffix;
+	cur_suffix.mer = 0;
+	cur_suffix.position = 0;
+
+	/* now fill in the suffix array with the forward sequence*/
+	for(gnSeqI i=0; i < mer_size; i++){
+		cur_suffix.mer <<= alpha_bits;
+		cur_suffix.mer |= header.translation_table[seq_buf[i]];
+	}
+	uint8 dead_bits = 64 - (mer_size * alpha_bits);
+	cur_suffix.mer <<= dead_bits;
+
+	sml_array.push_back(cur_suffix);
+
+	//fill sml_array with mers
+	for(gnSeqI seqI = 1; seqI < sar_len; seqI++){//already added the
+													//first one
+		cur_suffix.position++;
+		cur_suffix.mer <<= alpha_bits;
+		uint64 new_mer = header.translation_table[seq_buf[seqI+(mer_size-1)]];
+		new_mer <<= dead_bits;
+		cur_suffix.mer |= new_mer;
+		sml_array.push_back(cur_suffix);
+	}
+}
+
+void SortedMerList::FillSML(const gnSequence& seq, vector<bmer>& sml_array){
+	gnSeqI seq_len = seq.length();
+	Array<gnSeqC> seq_buf( seq_len );
+	seq.ToArray(seq_buf.data, seq_len);
+	FillSML(seq_buf.data, seq_len, seq.isCircular(), sml_array);
+}
+
+void SortedMerList::FillSML(gnSeqI seq_len, vector<gnSeqI>& pos_array){
+	pos_array.clear();
+	pos_array.reserve( seq_len );
+	for(gnSeqI seqI = 0; seqI < seq_len; seqI++ )
+		pos_array.push_back(seqI);
+}
+
+uint64 SortedMerList::GetDnaMer(gnSeqI offset) const
+{
+	// get the forward orientation mer
+	uint64 mer_a = SortedMerList::GetMer( offset );
+	//find the reverse complement of mer_a and return it if it's
+	//smaller
+	uint64 mer_c = RevCompMer( mer_a, header.seed_length );	//mer_c will be the reverse complement
+	
+	// for debugging
+//	if( mer_c < mer_a )
+//		return mer_c;
+	return mer_a < mer_c ? mer_a : mer_c;
+}
+
+#define OPT_ALPHA_MASQ 0x00000003
+
+uint64 SortedMerList::RevCompMer( uint64 mer_a, int mer_length ) const
+{
+	//find the reverse complement of mer_a and return it if it's
+	//smaller
+	uint64 mer_b, mer_c = 0;	//mer_c will be the reverse complement
+	mer_b = ~mer_a;
+//	uint32 masq = 0xffffffff;
+//	masq >>= 32 - header.alphabet_bits;
+	for(uint32 i = 0; i < 64; i += OPT_HEADER_ALPHABET_BITS){
+		mer_c |= mer_b & OPT_ALPHA_MASQ;
+//		mer_c |= mer_b & masq;
+		mer_b >>= OPT_HEADER_ALPHABET_BITS;
+		mer_c <<= OPT_HEADER_ALPHABET_BITS;
+	}
+	mer_c <<= 64 - (OPT_HEADER_ALPHABET_BITS * (mer_length+1));
+	mer_c |= 1;
+	return mer_c;
+}
+
+
+void SortedMerList::FillDnaSML(const gnSequence& seq, vector<bmer>& sml_array){
+	/* now fill in the suffix array with the forward sequence*/
+	const uint32 alpha_bits = OPT_HEADER_ALPHABET_BITS;
+	const uint32 mer_size = header.seed_length;
+	gnSeqI sar_len = seq.length();
+	if( sar_len < header.seed_length )
+		return;	// can't have an sml if there ain't enough sequence
+	if( !seq.isCircular() )
+		sar_len -= ( header.seed_length - 1);
+	sml_array.reserve(sar_len);
+
+	uint32 dead_bits = 64 - (mer_size * alpha_bits);
+	uint64 create_mask = UINT32_MAX;
+	create_mask <<= 32;
+	create_mask |= UINT32_MAX;
+	create_mask <<= dead_bits;
+
+	bmer cur_suffix, rcur_suffix;
+	cur_suffix.mer = sequence[0];
+	cur_suffix.mer <<= 32;
+	cur_suffix.mer |= sequence[1];
+	cur_suffix.mer &= create_mask;
+	cur_suffix.position = 0;
+	rcur_suffix.mer = 0;
+	rcur_suffix.position = 0;
+	
+	//find the reverse complement of cur_suffix.mer and return it if it's
+	//smaller
+	uint64 mer_b = 0;
+	mer_b = ~cur_suffix.mer;
+//	uint32 masq = 0xffffffff;
+//	masq >>= 32 - alpha_bits;
+	for(uint32 i = 0; i < 64; i += alpha_bits){
+//		rcur_suffix.mer |= mer_b & masq;
+		rcur_suffix.mer |= mer_b & OPT_ALPHA_MASQ;
+		mer_b >>= alpha_bits;
+		rcur_suffix.mer <<= alpha_bits;
+	}
+	rcur_suffix.mer <<= dead_bits - alpha_bits;
+	rcur_suffix.mer |= 1;
+
+	//add the first mer
+	if(cur_suffix.mer < rcur_suffix.mer)
+		sml_array.push_back(cur_suffix);
+	else
+		sml_array.push_back(rcur_suffix);
+
+	//fill sml_array with mers
+	gnSeqI 	endI = sar_len + mer_size;
+	if(seq.isCircular())
+		endI += mer_size;
+
+	uint32 rdead_bits = 64 - alpha_bits - dead_bits;
+	uint64 tmp_rseq = 0;
+	uint32 seqI = (mer_size * alpha_bits) / 32;
+	int32 cur_bit = 32 - alpha_bits - ((mer_size * alpha_bits) % 32);
+	uint32 cur_seq = sequence[seqI];
+	uint64 tmp_seq;
+//	uint32 alpha_mask = 0xFFFFFFFF;
+//	alpha_mask >>= 32 - alpha_bits;
+	uint64 revalpha_mask = OPT_ALPHA_MASQ;
+	revalpha_mask <<= dead_bits;
+
+	//which is slower? a memory operation or a conditional?
+	//probably a memory operation.
+	for(gnSeqI cur_pos = mer_size + 1; cur_pos < endI; cur_pos++){//already added the
+													//first one
+		//increment positions
+		cur_suffix.position++;
+		rcur_suffix.position++;
+		//extract the next character
+		tmp_seq = cur_seq;
+		tmp_seq >>= cur_bit;
+		tmp_seq &= OPT_ALPHA_MASQ;
+		tmp_seq <<= dead_bits;
+		
+		//add it to the forward mer
+		cur_suffix.mer <<= alpha_bits;
+		cur_suffix.mer |= tmp_seq;
+
+		//do the reverse complement mer
+		tmp_seq = ~tmp_seq;
+		tmp_seq &= revalpha_mask;
+		tmp_rseq = tmp_seq;
+		tmp_rseq <<= rdead_bits;
+		rcur_suffix.mer >>= alpha_bits;
+		rcur_suffix.mer |= tmp_rseq;
+		rcur_suffix.mer &= create_mask;
+		rcur_suffix.mer |= 1;
+		if(cur_suffix.mer < rcur_suffix.mer)
+			sml_array.push_back(cur_suffix);
+		else
+			sml_array.push_back(rcur_suffix);
+
+		cur_bit -= alpha_bits;
+		if(cur_bit < 0){
+			cur_bit += alpha_bits;
+			cur_seq <<= 16;		//trade bitwise ops for conditional
+			cur_seq <<= 16 - (cur_bit);
+			seqI++;
+			tmp_seq = sequence[seqI];
+			tmp_seq >>= cur_bit;
+			cur_seq |= tmp_seq;
+			cur_bit += 32 - alpha_bits;
+		}
+	}
+}
+
+
+uint64 SortedMerList::GetSeedMer( gnSeqI offset ) const
+{
+	//check this for access violations.
+	uint64 mer_a = SortedMerList::GetMer( offset );
+	uint64 mer_b = SortedMerList::GetMer( offset + 1 );
+	uint64 seed_mer = 0;
+	uint64 alpha_mask = 1;
+	alpha_mask <<= OPT_HEADER_ALPHABET_BITS;
+	alpha_mask--;
+	alpha_mask <<= 62;
+	uint64 cur_alpha_mask = alpha_mask;
+	uint64 char_mask = 1;
+	char_mask <<= header.seed_length - 1;
+	uint64 cur_mer = mer_a;
+	const int mer_transition = 64 / OPT_HEADER_ALPHABET_BITS;
+	int patternI = 0;
+	int rshift_amt = 64 - OPT_HEADER_ALPHABET_BITS;
+	for( ; patternI < header.seed_length; patternI++ ){
+		if( patternI == mer_transition ){
+			cur_mer = mer_b;
+			cur_alpha_mask = alpha_mask;
+			rshift_amt = 64 - OPT_HEADER_ALPHABET_BITS;
+		}
+		if( (header.seed & char_mask) != 0 ){
+			uint64 char_tmp = cur_mer & cur_alpha_mask;
+			char_tmp >>= rshift_amt;
+			seed_mer <<= OPT_HEADER_ALPHABET_BITS;
+			seed_mer |= char_tmp;
+		}
+		cur_alpha_mask >>= OPT_HEADER_ALPHABET_BITS;
+		char_mask >>= 1;
+		rshift_amt -= OPT_HEADER_ALPHABET_BITS;
+	}
+
+	seed_mer <<= 64 - (OPT_HEADER_ALPHABET_BITS * header.seed_weight);
+	return seed_mer;
+}
+
+uint64 SortedMerList::GetDnaSeedMer( gnSeqI offset ) const
+{
+	uint64 seed_mer = SortedMerList::GetSeedMer( offset );
+	uint64 rev_mer = RevCompMer( seed_mer, header.seed_weight );
+	return seed_mer < rev_mer ? seed_mer : rev_mer;
+}
+
+void SortedMerList::FillDnaSeedSML(const gnSequence& seq, vector<bmer>& sml_array){
+	// first get the length of the sequence
+	gnSeqI sar_len = SMLLength();
+	if( sar_len == 0 )
+		return;	// can't have an sml if there ain't enough sequence
+	sml_array.resize(sar_len);
+	
+	/* now fill in the sml_array with the forward sequence */
+	for( gnSeqI seedI = 0; seedI < sar_len; seedI++ ){
+		sml_array[seedI].mer = GetDnaSeedMer( seedI );
+		sml_array[seedI].position = seedI;
+	}
+}
+
+
+void SortedMerList::Create(const gnSequence& seq, const uint64 seed){
+	
+	if(CalculateMaxMerSize() == 0)
+		Throw_gnExMsg( SMLCreateError(), "Alphabet size is too large" );
+
+	int seed_length = getSeedLength( seed );
+	int seed_weight = getSeedWeight( seed );
+	
+	if(seed_length > CalculateMaxMerSize())
+		Throw_gnExMsg( SMLCreateError(), "Mer size is too large" );
+
+	if(seed_length == 0)
+		Throw_gnExMsg( SMLCreateError(), "Can't have 0 seed length" );
+
+	//determine sequence and sar length and read in sequence
+	gnSeqI seq_len = seq.length();
+	if(!seq.isCircular()){
+		header.circular = false;
+	}else
+		header.circular = true;
+	// use the nifty Array class as a wrapper for the buffer to ensure correct deallocation
+	gnSeqI buf_len = seq.isCircular() ? seq_len + seed_length : seq_len;
+	Array<gnSeqC> seq_buf( buf_len );
+	seq.ToArray(seq_buf.data, seq_len);
+	if( seq.isCircular() )
+		seq.ToArray(seq_buf.data + seq_len, seed_length-1);
+
+	// set header information
+	header.length = seq_len;
+	header.seed_length = seed_length;
+	header.seed_weight = seed_weight;
+	header.seed = seed;
+
+	SetMerMaskSize( seed_weight );
+	seed_mask = mer_mask;
+	SetMerMaskSize( seed_length );
+
+	SetSequence( seq_buf.data, buf_len );
+}
+
+} // namespace mems
diff --git a/libMems/SortedMerList.h b/libMems/SortedMerList.h
new file mode 100644
index 0000000..3caa91c
--- /dev/null
+++ b/libMems/SortedMerList.h
@@ -0,0 +1,323 @@
+/*******************************************************************************
+ * $Id: SortedMerList.h,v 1.13 2004/02/27 23:08:55 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * This file is licensed under the GPL.
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifndef _SortedMerList_h_
+#define _SortedMerList_h_
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libGenome/gnDefs.h"
+#include "libGenome/gnClone.h"
+#include "libGenome/gnDebug.h"
+#include "libGenome/gnSequence.h"
+#include "libGenome/gnException.h"
+#include "stdlib.h"
+#include <string>
+#include <vector>
+#include "libMems/SeedMasks.h"
+
+namespace mems {
+
+#define DNA_ALPHA_BITS 2	/**< number of bits to represent each nucleotide of DNA */
+#define DNA_MER_SIZE 31		/**< largest possible number of characters in each dna mer ALWAYS ODD */
+
+#define PROTEIN_ALPHA_BITS 5	/**< number of bits to represent each amino acid */
+#define PROTEIN_MER_SIZE 12		/**< default number of characters in each protein mer */
+
+#define DESCRIPTION_SIZE 2048	/**< Number of bytes for the freeform text description of an SML */
+
+#define NO_UNIQUE_COUNT UINT32_MAX
+
+typedef int16 sarID_t;
+
+typedef uint32 smlSeqI_t;
+
+//4 + 8 = 16 (blame C alignment rules.)
+struct bmer{
+	smlSeqI_t position;	/**< starting position of this mer in the sequence */
+	uint64 mer; 		/**< the actual binary encoded mer */
+};
+
+struct SMLHeader{
+	uint32 version;						/**< Format version - 4 bytes */
+	uint32 alphabet_bits;				/**< Bits per character in the alphabet - 4 bytes */
+//	uint32 mer_size;					/**< Size of mers used for sorting the list - 4 bytes */
+	uint64 seed;						/**< The pattern used in each seed */
+	uint32 seed_length;					/**< The length of the seed mask */
+	uint32 seed_weight;					/**< The weight of the seed mask */
+	uint64 length;						/**< length of the sequence before circularity - 8 bytes */
+	uint32 unique_mers;					/**< Number of unique mers in the sequence 4 bytes */
+	uint32 word_size;					/**< Word size on the machine the sequence was translated */
+	boolean little_endian;				/**< Is the byte order little endian?  0==no, !0==yes */
+	sarID_t id;							/**< Obsolete ID value - 1 byte, eaten by alignment? */
+	boolean circular;					/**< Circularity of sequence - 1 byte */
+	uint8 translation_table[UINT8_MAX];	/**< Translation table for ascii characters to binary values -- 256 bytes */
+	char description[DESCRIPTION_SIZE]; /**< Freeform text description of sequence data -- 2048 bytes */
+};
+
+
+/**
+ * A base class which defines an interface common to all sorted mer lists
+ */
+class SortedMerList : public genome::gnClone
+{
+public:
+	SortedMerList();
+	SortedMerList( const SortedMerList& sa );
+	SortedMerList& operator=(const SortedMerList& sa);
+	~SortedMerList();
+	
+	/**
+	 * Set data structures to default values
+	 */
+	virtual void Clear();
+	
+	/**
+	 * Creates a new sorted mer list.
+	 * This function enumerates each possible mer of the specified size and 
+	 * sorts them alphabetically in order to construct a sorted mer list.
+	 * @param seq The sequence to create an SML for.
+	 * @param mersize The size of the mers to sort on.
+	 */
+	virtual void Create(const genome::gnSequence& seq, const uint64 seed);
+	/**
+	 * Read a range of mers in the sorted mer list.
+	 * This function reads a section of data from the sorted mer list starting at 'offset'
+	 * and continuing for 'size' mers.  The mers are placed into readVector.  Anything
+	 * already in readVector is cleared.  Returns false if there was a problem completing the
+	 * read.  If the end of the list is reached, all mers which could be read will be placed
+	 * into readVector and false will be returned
+	 * @param readVector the vector to read bmers into.
+	 * @param size The number of bmers to read.
+	 * @param offset The mer index in the sorted mer list to start reading from. 
+	 * @return false if a problem was encountered while reading.
+	 */
+	virtual boolean Read(std::vector<bmer>& readVector, gnSeqI size, gnSeqI offset) = 0;
+	/**
+	 * Merges two SortedMerLists.
+	 */
+	virtual void Merge(SortedMerList& sa, SortedMerList& sa2) = 0;
+	
+	/**
+	 * Get the mer at the specified index in the sorted mer list.
+	 * @param index The index of the mer to return.
+	 * @return The specified mer.
+	 */
+	virtual bmer operator[](gnSeqI index) = 0;
+	/**
+	 * Get the mer at the specified index in the sorted mer list.
+	 * @param position The index of the mer to return.
+	 * @return The specified mer.
+	 */
+	virtual uint64 GetMer(gnSeqI position) const;
+	/**
+	 * Searches the SML for a subsequence which matches the query string.
+	 * Returns true if one is found, false otherwise.
+	 * If no matching mer is found, 'result' contains the index that the query
+	 * sequence would be in if it existed in the SML.
+	 */
+	virtual boolean Find(const std::string& query_seq, gnSeqI& result);
+	/**
+	 * Searches the SML for a mer which matches the query mer.
+	 * Returns true if one is found, false otherwise.
+	 * If no matching mer is found, 'result' contains the index that the query
+	 * mer would be in if it existed in the SML.
+	 */
+	virtual boolean FindMer(const uint64 query_mer, gnSeqI& result);
+	/**
+	 * Searches the SML for mers which match the query mer.
+	 * Puts the indices of all matching mers into the 'result' vector
+	 */
+	virtual void FindAll(const std::string& query_seq, std::vector<gnSeqI> result);
+	/**
+	 * Returns the number of unique mers in the sequence
+	 */
+	virtual gnSeqI UniqueMerCount();
+	
+	/**
+	 * Returns a freeform text description of the SML.
+	 */
+	virtual std::string Description() const;
+	/**
+	 * Sets the freeform text description of the SML.
+	 */
+	virtual void SetDescription(const std::string& d);
+	/**
+	 * Returns the length of the seed pattern that this SML was sorted on.
+	 */
+	virtual uint SeedLength() const;
+	/**
+	 * Returns the weight of the seed that this SML was sorted on.
+	 */
+	virtual uint SeedWeight() const;
+	/**
+	 * Returns the seed pattern that this SML was sorted on.
+	 */
+	virtual uint64 Seed() const;
+	/**
+	 * Returns the length of the mer mask.
+	 * Some types of sorted mer list support a configurable mer mask size, allowing
+	 * the same sorted mer list to behave as though it were sorted on a shorter mer size.
+	 * DNA sorted mer lists do not support this feature.
+	 */
+	virtual uint32 GetMerMaskSize() const;
+	/**
+	 * Sets the length of the mer mask.
+	 * Some types of sorted mer list support a configurable mer mask size, allowing
+	 * the same sorted mer list to behave as though it were sorted on a shorter mer size.
+	 * DNA sorted mer lists do not support this feature.
+	 */
+	virtual void SetMerMaskSize(uint32 mer_size);
+	/**
+	 * Returns the length of the sequence encoded in this sorted mer list.
+	 */
+	gnSeqI Length() const;
+	/**
+	 * Returns the length of the sorted mer list itself.  This value will be less
+	 * than the sequence length if the sequence isn't circular
+	 */
+	gnSeqI SMLLength() const;
+	/**
+	 * Ignore this.
+	 */
+	virtual sarID_t GetID() const;
+	/**
+	 * Ignore this.
+	 */
+	virtual void SetID(const sarID_t d);
+	/**
+	 * Returns true if this SML is circular.  False otherwise.
+	 */
+	virtual boolean IsCircular() const;
+	/**
+	 * Returns a mask which can be bitwise AND'ed to a mer in order to
+	 * get only the relevant bits of sequence data without direction bits.
+	 */
+	virtual uint64 GetMerMask() const;
+	/**
+	 * Returns a mask which can be bitwise AND'ed to a seed mer in order to
+	 * get only the relevant bits of sequence data without direction bits.
+	 */
+	virtual uint64 GetSeedMask() const;
+	/**
+	 * Returns a copy of the header information for this SML.
+	 */
+	virtual SMLHeader GetHeader() const;
+	/**
+	 * Returns a translation table for DNA sequence which disambiguates each nucleotide.
+	 */
+	static const uint8* BasicDNATable();
+	/**
+	 * Returns a translation table for Protein sequence.
+	 */
+	static const uint8* ProteinTable();
+	/** 
+	 * Places a copy of the binary encoded sequence data into dest.
+	 * @param len The length in sequence characters to copy
+	 * @param offset The sequence offset to start copying from
+	 * @throws IndexOutOfBounds if offset or len are invalid
+	 */
+	virtual void GetBSequence(uint32* dest, const gnSeqI len, const gnSeqI offset);
+	
+	/**
+	 * Returns the reverse complement of a mer
+	 */
+	virtual uint64 RevCompMer( uint64 mer_a, int mer_length ) const;
+	/**
+	 * Applies the seed mask to the sequence at the given offset and returns the resulting
+	 * seed.
+	 */
+	virtual uint64 GetSeedMer( gnSeqI offset ) const;
+	/**
+	 * Returns the lesser of the forward and reverse complement seeds at the given offset.
+	 * Note: The seed pattern should be palindromic, otherwise the returned rev. complement
+	 * match will be under a different pattern.
+	 */
+	virtual uint64 GetDnaSeedMer( gnSeqI offset ) const;
+
+	/**
+	 * Applies the seed mask to the sequence at the given offset and returns the resulting
+	 * seed.
+	 */
+	virtual void FillDnaSeedSML(const genome::gnSequence& seq, std::vector<bmer>& sml_array);
+
+protected:
+	struct SMLHeader header; /**< stores general information about this sorted mer list */
+	uint64 mer_mask;	/**< a mask for the used bits in a mer */
+	uint64 seed_mask;	/**< a mask covering only the number of characters a seed covers */
+	uint32 mask_size;   /**< the number of characters covered by the mask */
+	uint32 *sequence;	/**< Stores the sequence data */
+	gnSeqI binary_seq_len;	/**< Stores the length in 32 bit words of the sequence */
+
+	/** Set the sequence data to the seq_len characters in seq_buf */
+	virtual void SetSequence(gnSeqC* seq_buf, gnSeqI seq_len);
+	/** Fill in the vector of bmers with the initial unsorted bmers for the sequence in seq_buf  */
+	virtual void FillSML(gnSeqC* seq_buf, gnSeqI seq_len, boolean circular, std::vector<bmer>& sml_array);
+	virtual void FillSML(const genome::gnSequence& seq, std::vector<bmer>& sml_array);
+	virtual void FillDnaSML(const genome::gnSequence& seq, std::vector<bmer>& sml_array);
+	/** Fill in the vector of positions with the initial unsorted positions for the sequence in seq_buf  */
+	virtual void FillSML(gnSeqI seq_len, std::vector<gnSeqI>& sml_array);
+	virtual uint64 GetDnaMer(gnSeqI offset) const;
+
+	virtual gnSeqI bsearch(const struct bmer& query_mer, const gnSeqI start, const gnSeqI end);
+	virtual void translate(uint8* dest, const gnSeqC* src, const gnSeqI len) const;
+	virtual void translate32(uint32* dest, const gnSeqC* src, const gnSeqI len) const;
+	/**
+	 * Shifts an entire array of words left or right by a few bits
+	 * @param data A pointer to the array of words
+	 * @param bits The number of bits to shift by.  A positive number shifts right and a negative number shifts left.
+	 */
+	virtual void ShiftWords(uint32* data, uint32 length, int32 bits);
+	virtual uint32 CalculateMaxMerSize() const;
+
+	static const uint8* CreateBasicDNATable();
+	static const uint8* CreateProteinTable();
+};
+
+/**
+ * Thrown when there is an error creating a sorted mer list.
+ */
+CREATE_EXCEPTION(SMLCreateError);
+
+/**
+ * Thrown when there is an error merging two sorted mer lists.
+ */
+CREATE_EXCEPTION(SMLMergeError);
+
+class MerCompare {
+public:
+	MerCompare( SortedMerList* sa ){ sar = sa; }
+	boolean operator()(const gnSeqI a, const gnSeqI b) const{
+		return sar->GetMer(a) < sar->GetMer(b);
+	}
+protected:
+	SortedMerList* sar;
+};
+
+bool bmer_lessthan(const bmer& a_v, const bmer& m_v);
+bool bmer_id_lessthan(const bmer& a_v, const bmer& m_v);
+
+int bmer_compare(const void* a_v, const void* m_v);
+bool bmer_id_lessthan(const bmer& a_v, const bmer& m_v);
+
+//less than function for STL sort functions
+inline
+bool bmer_lessthan(const bmer& a_v, const bmer& m_v){
+	return (a_v.mer < m_v.mer);// ? true : false;
+};
+
+inline
+int bmer_compare(const void* a_v, const void* m_v){
+	return (int)((int64)(((bmer*)a_v)->mer) - (int64)(((bmer*)m_v)->mer));
+}
+
+}
+
+#endif   //_SortedMerList_h_
diff --git a/libMems/SparseAbstractMatch.h b/libMems/SparseAbstractMatch.h
new file mode 100644
index 0000000..e42844c
--- /dev/null
+++ b/libMems/SparseAbstractMatch.h
@@ -0,0 +1,250 @@
+/*******************************************************************************
+ * $Id: SparseAbstractMatch.h,v 1.8 2004/02/27 23:08:55 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * This file is licensed under the GPL.
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifndef __SparseAbstractMatch_h__
+#define __SparseAbstractMatch_h__
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libGenome/gnClone.h"
+#include "libGenome/gnDefs.h"
+#include "libMems/AbstractMatch.h"
+#include <vector>
+#include <limits>
+
+namespace mems {
+
+//template< class gnSeqIAlloc=boost::pool_allocator<gnSeqI>, class uintAlloc=boost::pool_allocator<uint> >
+/**
+ * The SparseAbstractMatch implements the AbstractMatch interface in a way
+ * that allows matches with a large SeqCount and low Multiplicity to be stored efficiently
+ */
+template< class gnSeqIAlloc=std::allocator<gnSeqI>, class uintAlloc=std::allocator<uint> >
+class SparseAbstractMatch : public AbstractMatch {
+public:
+	SparseAbstractMatch() : m_seq_count(0) {}
+	/**
+	 * Creates a new SparseAbstractMatch.
+	 * @param seq_count The total number of sequences in the alignment
+	 */
+	SparseAbstractMatch(const uint seq_count );
+
+	// use compiler-generated copy constructor, assignment operator, and destructor
+
+	// see AbstractMatch base class documentation for these functions
+
+	int64 Start(uint seqI) const;
+	void SetStart(uint seqI, int64 startI);
+	uint Multiplicity() const{return (uint)seq_ids.size();}
+	uint SeqCount() const{return m_seq_count;}
+	uint FirstStart() const;
+	virtual void Invert();
+
+	gnSeqI LeftEnd(uint seqI) const;
+	orientation Orientation(uint seqI) const;
+	void SetLeftEnd(uint seqI, gnSeqI position);
+	void SetOrientation(uint seqI, orientation o);
+	
+	// these functions manipulate the start coordinates quickly
+	virtual void MoveStart(int64 move_amount);
+	virtual void MoveEnd(int64 move_amount);
+
+	virtual boolean operator==( const SparseAbstractMatch& sam ) const;
+
+	virtual uint UsedSeq( uint seqI ) const;
+protected:
+
+	std::vector<uint, uintAlloc > seq_ids;
+	uint m_seq_count;
+	std::vector<gnSeqI, gnSeqIAlloc > leftend;
+	bitset_t orient;	// bitset_t has its own allocator
+	uint SeqToIndex( uint seqI ) const;
+
+	// for use by derived classes in order to swap contents
+	void swap( SparseAbstractMatch* other );	
+};
+
+
+template< class gnSeqIAlloc, class uintAlloc >
+SparseAbstractMatch< gnSeqIAlloc, uintAlloc >::SparseAbstractMatch(const uint seq_count ) :
+m_seq_count(seq_count)
+{}
+
+template< class gnSeqIAlloc, class uintAlloc >
+void SparseAbstractMatch< gnSeqIAlloc, uintAlloc >::swap( SparseAbstractMatch* other )
+{
+	std::swap(seq_ids, other->seq_ids);
+	std::swap(m_seq_count, other->m_seq_count);
+	std::swap(leftend, other->leftend);
+	std::swap(orient, other->orient);
+}
+
+template< class gnSeqIAlloc, class uintAlloc >
+uint SparseAbstractMatch< gnSeqIAlloc, uintAlloc >::FirstStart() const
+{
+	uint minI = (std::numeric_limits<uint>::max)();
+	for( std::size_t i = 0; i < seq_ids.size(); ++i )
+		minI = seq_ids[i] < minI ? seq_ids[i] : minI;
+	return minI;
+}
+
+template< class gnSeqIAlloc, class uintAlloc >
+uint SparseAbstractMatch< gnSeqIAlloc, uintAlloc >::SeqToIndex( uint seqI ) const
+{
+	uint posI = 0;
+	for( ; posI < seq_ids.size(); ++posI )
+		if( seq_ids[posI] == seqI )
+			break;
+	return posI;
+}
+
+
+template< class gnSeqIAlloc, class uintAlloc >
+int64 SparseAbstractMatch< gnSeqIAlloc, uintAlloc >::Start(uint seqI) const
+{
+	uint posI = SeqToIndex( seqI );
+	if( posI >= seq_ids.size() )
+		return NO_MATCH;
+	int64 s = leftend[posI];
+	return orient.test(posI)? -s : s;
+}
+
+
+template< class gnSeqIAlloc, class uintAlloc >
+void SparseAbstractMatch< gnSeqIAlloc, uintAlloc >::SetStart(uint seqI, int64 startI)
+{
+	uint posI = SeqToIndex( seqI );
+	if( startI == NO_MATCH && posI >= seq_ids.size() )
+		return;
+	if( startI == NO_MATCH )
+	{
+		seq_ids.erase( seq_ids.begin() + posI );
+		leftend.erase( leftend.begin() + posI );
+		for( size_t i = posI; i + 1 < orient.size(); ++i )
+			orient.set( i, orient.test( i + 1 ) );
+		orient.resize( orient.size()-1 );
+		return;
+	}
+	if( posI >= seq_ids.size() )
+	{
+		seq_ids.push_back(seqI);
+		leftend.push_back(genome::absolut(startI));
+		orient.resize( orient.size() + 1, (startI < 0) );
+	}else{
+		leftend[posI] = genome::absolut(startI); 
+		orient.set(posI, startI < 0);
+	}
+}
+
+
+template< class gnSeqIAlloc, class uintAlloc >
+void SparseAbstractMatch< gnSeqIAlloc, uintAlloc >::Invert()
+{
+	orient.flip();
+}
+
+
+
+template< class gnSeqIAlloc, class uintAlloc >
+gnSeqI SparseAbstractMatch< gnSeqIAlloc, uintAlloc >::LeftEnd(uint seqI) const
+{ 
+	uint posI = SeqToIndex( seqI );
+	return posI < leftend.size() ? leftend[posI] : 0;
+}
+
+
+template< class gnSeqIAlloc, class uintAlloc >
+AbstractMatch::orientation SparseAbstractMatch< gnSeqIAlloc, uintAlloc >::Orientation(uint seqI) const
+{ 
+	uint posI = SeqToIndex( seqI );
+	if( posI < leftend.size() && leftend[posI] != NO_MATCH )
+		return orient.test(posI) ? reverse : forward; 
+	return undefined;
+}
+
+
+template< class gnSeqIAlloc, class uintAlloc >
+void SparseAbstractMatch< gnSeqIAlloc, uintAlloc >::SetLeftEnd(uint seqI, gnSeqI position)
+{ 
+	uint posI = SeqToIndex( seqI );
+	if( position == NO_MATCH && posI >= seq_ids.size() )
+		return;
+	if( posI >= leftend.size() )
+	{
+		seq_ids.push_back(seqI);
+		leftend.push_back(position);
+		orient.resize( orient.size() + 1 );	// defaults to false
+	}else if( position == NO_MATCH )
+	{
+		seq_ids.erase( seq_ids.begin() + posI );
+		leftend.erase( leftend.begin() + posI );
+		for( size_t i = posI; i + 1 < orient.size(); ++i )
+			orient.set( i, orient.test( i + 1 ) );
+		orient.resize( orient.size()-1 );
+		return;
+	}
+
+	leftend[posI]=position; 
+}
+
+template< class gnSeqIAlloc, class uintAlloc >
+void SparseAbstractMatch< gnSeqIAlloc, uintAlloc >::SetOrientation(uint seqI, orientation o)
+{ 
+	uint posI = SeqToIndex( seqI );
+	// just assume that posI is in-bounds... if not throw an exception!
+	if( posI >= orient.size() )
+		throw "ArrayIndexOutOfBounds!\n";
+	orient.set(posI, o == reverse);
+}
+
+template< class gnSeqIAlloc, class uintAlloc >
+void SparseAbstractMatch< gnSeqIAlloc, uintAlloc >::MoveStart(int64 move_amount)
+{
+	for( uint i=0; i < leftend.size(); ++i )
+		if( orient.test(i) == false && leftend[i] != NO_MATCH )
+			leftend[i] += move_amount;
+}
+
+template< class gnSeqIAlloc, class uintAlloc >
+void SparseAbstractMatch< gnSeqIAlloc, uintAlloc >::MoveEnd(int64 move_amount)
+{
+	for( uint i=0; i < leftend.size(); ++i )
+		if( orient.test(i) && leftend[i] != NO_MATCH )
+			leftend[i] += move_amount;
+}
+
+template< class gnSeqIAlloc, class uintAlloc >
+boolean SparseAbstractMatch< gnSeqIAlloc, uintAlloc >::operator==( const SparseAbstractMatch< gnSeqIAlloc, uintAlloc >& sam ) const
+{
+	for( uint i=0; i < leftend.size(); ++i ){
+		if( leftend[i] != sam.leftend[i] ||
+			(leftend[i] != 0 && orient.test(i) != sam.orient.test(i)))
+			return false;
+	}
+	return true;
+}
+
+template< class gnSeqIAlloc, class uintAlloc >
+uint SparseAbstractMatch< gnSeqIAlloc, uintAlloc >::UsedSeq( uint seqI ) const
+{
+	uint count = 0;
+	for( uint i = 0; i < leftend.size(); i++ )
+	{
+		if(leftend[i] != 0)
+			count++;
+		if( count > seqI )
+			return i;
+	}
+	return (std::numeric_limits<uint>::max)();
+}
+
+}
+
+#endif // __SparseAbstractMatch_h__
diff --git a/libMems/SubstitutionMatrix.h b/libMems/SubstitutionMatrix.h
new file mode 100644
index 0000000..07c5cd2
--- /dev/null
+++ b/libMems/SubstitutionMatrix.h
@@ -0,0 +1,111 @@
+/*******************************************************************************
+ * $Id: SubstitutionMatrix.h,v 1.7 2004/03/01 02:40:08 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * This file is licensed under the GPL.
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifndef __SubstitutionMatrix_h__
+#define __SubstitutionMatrix_h__
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libGenome/gnSequence.h"
+#include <iostream>
+#include <sstream>
+
+namespace mems {
+
+typedef int score_t;
+static const score_t hoxd_matrix[4][4] = 
+{ 
+	{91,	-114,	-31,	-123}, // A
+
+	{-114,	100,	-125,	-31}, // C
+
+	{-31,	-125,	100,	-114}, // G
+
+	{-123,	-31,	-114,	91}, // T
+};
+
+static const score_t default_gap_open = -400;
+static const score_t default_gap_extend = -30;
+
+class PairwiseScoringScheme
+{
+public:
+	score_t matrix[4][4];	/**< 4x4 nucleotide substitution matrix */
+	score_t gap_open;	/**< gap open penalty */
+	score_t gap_extend;	/**< gap extend penalty */
+
+	PairwiseScoringScheme( const score_t matrix[4][4], score_t gap_open, score_t gap_extend )
+	{
+		setMatrix(matrix);
+		this->gap_open = gap_open;
+		this->gap_extend = gap_extend;
+	}
+
+	PairwiseScoringScheme(){ *this = PairwiseScoringScheme( hoxd_matrix, default_gap_open, default_gap_extend ); }
+	PairwiseScoringScheme& operator=( const PairwiseScoringScheme& pss )
+	{
+		setMatrix(pss.matrix);
+		this->gap_open = pss.gap_open;
+		this->gap_extend = pss.gap_extend;
+		return *this;
+	}
+	void setMatrix( const score_t matrix[4][4] )
+	{
+		for( int i = 0; i < 4; ++i )
+			for( int j = 0; j < 4; ++j )
+				this->matrix[i][j] = matrix[i][j];
+	}
+};
+
+static PairwiseScoringScheme& getDefaultScoringScheme()
+{
+	static PairwiseScoringScheme pss( hoxd_matrix, default_gap_open, default_gap_extend );
+	return pss;
+}
+
+void readSubstitutionMatrix( std::istream& is, score_t matrix[4][4] );
+
+inline
+void readSubstitutionMatrix( std::istream& is, score_t matrix[4][4] )
+{
+	std::string tmp;
+	std::getline( is, tmp );	// first line contains header info
+	std::getline( is, tmp );	// second line contains sub mat column labels
+	std::stringstream ss( tmp );
+	std::string letter;
+	bool format_ok = true;
+	ss >> letter;
+	format_ok = format_ok && letter == "A";
+	ss >> letter;
+	format_ok = format_ok && letter == "C";
+	ss >> letter;
+	format_ok = format_ok && letter == "G";
+	ss >> letter;
+	format_ok = format_ok && letter == "T";
+	ss >> letter;
+	format_ok = format_ok && letter == "N";
+	if( !format_ok )
+	{
+		std::cerr << "Invalid substitution matrix format\n";
+		throw "Invalid substitution matrix format\n";
+	}
+
+	for( int i = 0; i < 4; i++ )
+	{
+		is >> letter;	// the first character on each line should be a letter
+		for( int j = 0; j < 4; j++ )
+			is >> matrix[i][j];
+		is >> letter;	// this should be the N sub score (which gets ignored)
+	}
+}
+
+}
+
+#endif // __SubstitutionMatrix_h__
diff --git a/libMems/SuperInterval.cpp b/libMems/SuperInterval.cpp
new file mode 100644
index 0000000..d2d5577
--- /dev/null
+++ b/libMems/SuperInterval.cpp
@@ -0,0 +1,124 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+
+#include "libMems/SuperInterval.h"
+
+using namespace std;
+using namespace genome;
+
+namespace mems {
+// working in mems
+
+bool debug_aligner = false;
+
+SuperInterval::SuperInterval() :
+length(0),
+left_end(0),
+c1_siv((std::numeric_limits<size_t>::max)()),
+c2_siv((std::numeric_limits<size_t>::max)()),
+parent_siv((std::numeric_limits<size_t>::max)())
+{}
+
+SuperInterval::SuperInterval( const Interval& reference_iv ) :
+reference_iv(reference_iv),
+length(0),
+left_end(0),
+c1_siv((std::numeric_limits<size_t>::max)()),
+c2_siv((std::numeric_limits<size_t>::max)()),
+parent_siv((std::numeric_limits<size_t>::max)())
+{
+}
+
+SuperInterval::SuperInterval(const SuperInterval& siv) :
+left_end(siv.left_end),
+length( siv.length ),
+reference_iv( siv.reference_iv ),
+c1_siv(siv.c1_siv),
+c2_siv(siv.c2_siv),
+parent_siv(siv.parent_siv)
+{
+}
+SuperInterval& SuperInterval::operator=(const SuperInterval& siv)
+{
+	left_end = siv.left_end;
+	length = siv.length;
+	reference_iv = siv.reference_iv;
+	c1_siv = siv.c1_siv;
+	c2_siv = siv.c2_siv;
+	parent_siv = siv.parent_siv;
+	return *this;
+}
+
+
+
+/** Sets the length of this match to @param len */
+void SuperInterval::SetLength( gnSeqI len )
+{
+	length = len;
+}
+
+void SuperInterval::CropLeft( gnSeqI amount )
+{
+	reference_iv.CropStart(amount);
+
+	left_end += amount;
+	length -= amount;
+
+	if(debug_aligner)
+		ValidateSelf();
+}
+
+void SuperInterval::CropRight( gnSeqI amount )
+{
+	reference_iv.CropEnd(amount);
+	length -= amount;
+
+	if(debug_aligner)
+		ValidateSelf();
+}
+
+void SuperInterval::ValidateSelf() const
+{
+	vector< bitset_t > aln_mat;
+	reference_iv.GetAlignment(aln_mat);
+	if( aln_mat[0].size() != reference_iv.AlignmentLength() )
+	{
+		breakHere();
+		cerr << "trouble! aln_mat[0].size() is: " << aln_mat[0].size() << " while reference_iv.AlignmentLength() is: " << reference_iv.AlignmentLength() << endl;
+		cerr << "mult: " << reference_iv.Multiplicity() << endl;
+		cerr << "matches.size(): " << reference_iv.GetMatches().size() << endl;
+	}
+	for( size_t i = 0; i < aln_mat.size(); i++ )
+	{
+		gnSeqI lenny = 0;
+		for( size_t j = 0; j < aln_mat[i].size(); j++ )
+			if( aln_mat[i][j] )
+				lenny++;
+		if( lenny != reference_iv.Length(i) )
+		{
+			cerr << "krudunkle, ref_iv.Length(" << i << "): " << reference_iv.Length(i) << "\n";
+			cerr << "should be: " << lenny << endl;
+			breakHere();
+		}
+	}
+	if( reference_iv.LeftEnd(0) != NO_MATCH && reference_iv.Length(0) == 0 )
+	{
+		cerr << "brozooka\n";
+		breakHere();
+	}
+	if( reference_iv.LeftEnd(1) != NO_MATCH && reference_iv.Length(1) == 0 )
+	{
+		cerr << "brokazooka\n";
+		breakHere();
+	}
+
+	if( Length() != reference_iv.AlignmentLength() )
+	{
+		breakHere();
+		cerr << "crapola\n";
+	}
+}
+
+} // namespace mems
diff --git a/libMems/SuperInterval.h b/libMems/SuperInterval.h
new file mode 100644
index 0000000..fefcc07
--- /dev/null
+++ b/libMems/SuperInterval.h
@@ -0,0 +1,81 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifndef __SuperInterval_h__
+#define __SuperInterval_h__
+
+#include "libMems/Interval.h"
+
+namespace mems {
+
+/**
+ * A class that stores an alignment and coordinate mapping between collinear segments of an ancestral genome and two
+ * descendant genomes.
+ */
+class SuperInterval
+{
+public:
+
+	SuperInterval();
+	/**
+	 * Creates a new SuperInterval.
+	 */
+	SuperInterval( const mems::Interval& reference_iv );
+	SuperInterval(const SuperInterval& siv);
+	SuperInterval& operator=(const SuperInterval& siv);
+	~SuperInterval(){}
+	
+	/** Returns the length */
+	virtual gnSeqI Length() const { return length; }
+
+	/** Sets the length to @param len */
+	virtual void SetLength( gnSeqI len );
+
+	virtual int64 LeftEnd() const { return left_end; }
+
+	virtual void SetLeftEnd( const int64& left_end ) { this->left_end = left_end; }
+
+	mems::Interval reference_iv;
+
+	/** the index of the SuperInterval this is aligned to in c1 */
+	size_t c1_siv;
+	/** the index of the SuperInterval this is aligned to in c2 */
+	size_t c2_siv;
+	/** the index of the SuperInterval this is aligned to in the parent */
+	size_t parent_siv;
+
+	void CropLeft( gnSeqI amount );
+	void CropRight( gnSeqI amount );
+
+	bool operator<( const SuperInterval& si ) const{ return left_end < si.left_end; }
+
+	void ValidateSelf() const;
+
+	void swap( SuperInterval& other )
+	{
+		reference_iv.swap(other.reference_iv);
+		std::swap(c1_siv, other.c1_siv);
+		std::swap(c2_siv, other.c2_siv);
+		std::swap(parent_siv, other.parent_siv);
+		std::swap(left_end, other.left_end);
+		std::swap(length, other.length);
+	}
+
+protected:
+	int64 left_end;
+	int64 length;
+};
+
+
+} // namespace mems
+
+namespace std {
+template<> inline
+void swap( mems::SuperInterval& a, mems::SuperInterval& b )
+{
+	a.swap(b);
+}
+}
+
+#endif //__SuperInterval_h__
diff --git a/libMems/TreeUtilities.h b/libMems/TreeUtilities.h
new file mode 100644
index 0000000..26148ca
--- /dev/null
+++ b/libMems/TreeUtilities.h
@@ -0,0 +1,138 @@
+#ifndef __TreeUtilities_h__
+#define __TreeUtilities_h__
+
+#include <stack>
+
+namespace mems {
+
+template<class T, class S>
+void findAndErase( T& container, S& item )
+{
+	T new_container;
+	for( typename T::iterator t_iter = container.begin(); t_iter != container.end(); t_iter++ )
+		if( *t_iter != item )
+			new_container.push_back( *t_iter );
+	container = new_container;
+};
+
+/**
+ * Depth first search to check whether a subtree contains a given node
+ */
+template<class Tree>
+bool containsNode( Tree& t, node_id_t subtree_nodeI, node_id_t query_nodeI )
+{
+	std::stack< node_id_t > node_stack;
+	node_stack.push( subtree_nodeI );
+	while( node_stack.size() > 0 )
+	{
+		node_id_t cur_node = node_stack.top();
+		node_stack.pop();
+		if( cur_node == query_nodeI )
+			return true;
+		if( t[cur_node].children.size() > 0 )
+		{
+			for( size_t childI = 0; childI < t[cur_node].children.size(); childI++ )
+				node_stack.push( t[cur_node].children[childI] );
+		}
+	}
+	return false;
+}
+
+
+/** place a root on the branch with endpoints root_left and root_right
+ */
+template<class Tree>
+void rerootTree( Tree& t, node_id_t new_root )
+{
+	// new root must be an internal node
+	if( t[new_root].children.size() == 0 )
+		throw "Can't root on a leaf node";
+	if( new_root == t.root )
+		return;	// idiot caller didn't realize it's already rooted here
+
+	// change the old root node to an internal node
+	uint childI = 0;
+	for( ; childI < t[t.root].children.size(); childI++ ){
+		if( containsNode( t, t[t.root].children[childI], new_root ) )
+		{
+			t[t.root].parents.push_back( t[t.root].children[childI] );
+			findAndErase( t[t.root].children, t[t.root].children[childI] );
+			break;
+		}
+	}
+	// shake the tree out on the new root node
+	t.root = new_root;
+	t[t.root].children.insert( t[t.root].children.end(), t[t.root].parents.begin(), t[t.root].parents.end() );
+	t[t.root].parents.clear();
+
+	std::stack<node_id_t> node_stack;
+	node_stack.push(t.root);
+	while( node_stack.size() > 0 )
+	{
+		// delete the current node from all of its child nodes lists 
+		// and insert it as a parent
+		// make all other nodes reference by the child grandchildren
+		// recurse on each child
+		node_id_t cur_node = node_stack.top();
+		node_stack.pop();
+		for( uint childI = 0; childI < t[cur_node].children.size(); childI++ )
+		{
+			findAndErase( t[t[cur_node].children[childI]].children, cur_node );
+			findAndErase( t[t[cur_node].children[childI]].parents, cur_node );
+			t[t[cur_node].children[childI]].children.insert( t[t[cur_node].children[childI]].children.end(), t[t[cur_node].children[childI]].parents.begin(), t[t[cur_node].children[childI]].parents.end() );
+			t[t[cur_node].children[childI]].parents.clear();
+			t[t[cur_node].children[childI]].parents.push_back(cur_node);
+			node_stack.push(t[cur_node].children[childI]);
+		}
+	}
+}
+
+/**
+ * takes a rooted tree and moves the root to a branch
+ */
+template<class Tree>
+void moveRootToBranch( Tree& t, node_id_t left_node, node_id_t right_node )
+{
+	// this function has no effect if left_node or right_node are already the root
+	if( left_node == t.root || right_node == t.root )
+		return;
+	// left_node and right_node must be adjacent
+	if( (t[left_node].parents.size() == 0 || t[right_node].parents.size() == 0 ) ||
+		(t[left_node].parents[0] != right_node && t[right_node].parents[0] != left_node ) )
+		return;
+
+	if( t[left_node].children.size() == 0 )
+		swap( left_node, right_node );	// left node was a leaf so root on right node
+
+	// save the root
+	node_id_t old_root = t.root;
+	// reroot the tree on left_node, then move the old root on the branch leading to right_node
+	rerootTree( t, left_node );
+	// remove old_root
+	node_id_t rp = t[old_root].parents[0];
+	findAndErase( t[rp].children, old_root );
+	for( size_t cI = 0; cI < t[old_root].children.size(); cI++ )
+	{
+		t[t[old_root].children[cI]].parents[0] = rp;
+		t[t[old_root].children[cI]].distance += t[old_root].distance;
+		t[rp].children.push_back( t[old_root].children[cI] );
+	}
+	t[old_root].children.clear();
+
+	// link old_root in between left_node and right_node
+	findAndErase( t[left_node].children, right_node );
+	t[left_node].children.push_back( old_root );
+	t[old_root].parents[0] = left_node;
+	t[right_node].parents[0] = old_root;
+	t[old_root].children.push_back( right_node );
+	t[old_root].distance = t[right_node].distance / 2.0;
+	t[right_node].distance /= 2.0;
+
+	// finally reroot on old_root
+	rerootTree( t, old_root );
+}
+
+
+}	// namespace mems
+
+#endif // __TreeUtilities_h__
diff --git a/libMems/UngappedLocalAlignment.h b/libMems/UngappedLocalAlignment.h
new file mode 100644
index 0000000..3bea0d2
--- /dev/null
+++ b/libMems/UngappedLocalAlignment.h
@@ -0,0 +1,227 @@
+/*******************************************************************************
+ * $Id: UngappedLocalAlignment.h,v 1.10 2004/03/01 02:40:08 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * This file is licensed under the GPL.
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifndef __UngappedLocalAlignment_h__
+#define __UngappedLocalAlignment_h__
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libGenome/gnClone.h"
+#include "libGenome/gnException.h"
+#include "libMems/AbstractMatch.h"
+
+namespace mems {
+
+/**
+ * The UngappedLocalAlignment class stores the location of an <b>equal size</b> (inexact or exactly) 
+ * matching region between several sequences.  This class can use one of several storage schemes
+ * such as DenseAbstractMatch or SparseAbstractMatch
+ */
+template< class AbstractMatchImpl >
+class UngappedLocalAlignment : public AbstractMatchImpl 
+{
+
+public:
+	UngappedLocalAlignment();
+	/**
+	 * Creates a new UngappedLocalAlignment.
+	 * @param seq_count The total number of sequences in the alignment
+	 */
+	UngappedLocalAlignment( const uint seq_count );
+
+	// use trivial copy constructor, destructor, and operator =
+
+	UngappedLocalAlignment* Clone() const;
+	UngappedLocalAlignment* Copy() const;
+	virtual void Free();
+
+	/** comparison operator, compares two UngappedLocalAlignmentes to see if they are the same */
+	boolean operator==(const UngappedLocalAlignment& mhe) const;
+
+	gnSeqI Length( uint seqI = (std::numeric_limits<uint>::max)() ) const
+	{
+		if( seqI == (std::numeric_limits<uint>::max)() ) 
+			return m_length;
+		if( this->LeftEnd(seqI) == NO_MATCH )
+			return 0;
+		return m_length;
+	}
+	void SetLength(gnSeqI len, uint seqI = 0){m_length = len;}
+	gnSeqI AlignmentLength() const{return m_length;}
+	
+	//warning:  none of the following do bounds checking.
+	virtual void Move( int64 distance );
+	virtual void CropStart(gnSeqI crop_amount);
+	virtual void CropEnd(gnSeqI crop_amount);
+	virtual void ExtendStart(gnSeqI extend_amount);
+	virtual void ExtendEnd(gnSeqI extend_amount);
+
+	virtual void CropLeft(gnSeqI crop_amount, uint seqI);
+	virtual void CropRight(gnSeqI crop_amount, uint seqI);
+
+	void GetAlignment( std::vector< bitset_t >& align_matrix ) const;
+
+	void GetColumn( gnSeqI col, std::vector<gnSeqI>& pos, std::vector<bool>& column ) const;
+
+	/**
+	 * Writes the location of this UngappedLocalAlignment to the specified output stream (e.g. cout).
+	 */
+	template<typename AMImpl> friend std::ostream& operator<<(std::ostream& os, const UngappedLocalAlignment<AMImpl>& ula); //write to source.
+
+	bool IsGap( uint seqI, gnSeqI col ) const {
+		return (this->LeftEnd(seqI) != NO_MATCH && col < m_length);
+	}
+
+protected:
+
+	gnSeqI m_length;
+};
+
+
+template< class AbstractMatchImpl >
+UngappedLocalAlignment< AbstractMatchImpl >::UngappedLocalAlignment() : AbstractMatchImpl()
+{
+}
+
+
+template< class AbstractMatchImpl >
+UngappedLocalAlignment< AbstractMatchImpl >::UngappedLocalAlignment(uint seq_count)
+ : AbstractMatchImpl( seq_count )
+{
+}
+
+
+template< class AbstractMatchImpl >
+UngappedLocalAlignment< AbstractMatchImpl >* 
+UngappedLocalAlignment< AbstractMatchImpl >::Clone() const
+{
+	return new UngappedLocalAlignment(*this);
+}
+
+template< class AbstractMatchImpl >
+UngappedLocalAlignment<AbstractMatchImpl>* UngappedLocalAlignment<AbstractMatchImpl>::Copy() const
+{
+	return m_allocateAndCopy( *this );
+}
+template< class AbstractMatchImpl >
+void UngappedLocalAlignment<AbstractMatchImpl>::Free()
+{
+	m_free(this);
+}
+
+template< class AbstractMatchImpl >
+boolean UngappedLocalAlignment<AbstractMatchImpl>::operator==(const UngappedLocalAlignment& ula) const
+{
+	if(m_length != ula.m_length)
+		return false;
+	return AbstractMatchImpl::operator==(ula);
+}
+
+template< class AbstractMatchImpl >
+void UngappedLocalAlignment<AbstractMatchImpl>::Move( int64 distance )
+{
+	for( uint32 i=0; i < AbstractMatchImpl::SeqCount(); i++ ){
+		int64 start = AbstractMatchImpl::Start(i);
+		if( start != NO_MATCH )
+			AbstractMatchImpl::SetStart(i, start + distance );
+	}
+}
+
+template< class AbstractMatchImpl >
+void UngappedLocalAlignment<AbstractMatchImpl>::CropStart(gnSeqI crop_amount)
+{
+	if( crop_amount > m_length )
+		Throw_gnEx( genome::SeqIndexOutOfBounds() );
+	m_length -= crop_amount;
+	AbstractMatchImpl::MoveStart(crop_amount);
+}
+
+template< class AbstractMatchImpl >
+void UngappedLocalAlignment<AbstractMatchImpl>::CropEnd(gnSeqI crop_amount){
+	if( crop_amount > m_length )
+		Throw_gnEx( genome::SeqIndexOutOfBounds() );
+	m_length -= crop_amount;
+	AbstractMatchImpl::MoveEnd(crop_amount);
+}
+
+template< class AbstractMatchImpl >
+void UngappedLocalAlignment<AbstractMatchImpl>::ExtendStart(gnSeqI extend_amount){
+	m_length += extend_amount;
+	int64 amt = extend_amount;
+	AbstractMatchImpl::MoveStart(-amt);
+}
+
+template< class AbstractMatchImpl >
+void UngappedLocalAlignment<AbstractMatchImpl>::ExtendEnd(gnSeqI extend_amount){
+	m_length += extend_amount;
+	int64 amt = extend_amount;
+	AbstractMatchImpl::MoveEnd(-amt);
+}
+
+template< class AbstractMatchImpl >
+void UngappedLocalAlignment<AbstractMatchImpl>::CropLeft(gnSeqI crop_amount, uint seqI)
+{
+	if(AbstractMatchImpl::Orientation(seqI) == AbstractMatch::forward)
+		CropStart(crop_amount);
+	else
+		CropEnd(crop_amount);
+}
+
+template< class AbstractMatchImpl >
+void UngappedLocalAlignment<AbstractMatchImpl>::CropRight(gnSeqI crop_amount, uint seqI)
+{
+	if(AbstractMatchImpl::Orientation(seqI) == AbstractMatch::forward)
+		CropEnd(crop_amount);
+	else
+		CropStart(crop_amount);
+}
+
+template< class AbstractMatchImpl >
+void UngappedLocalAlignment< AbstractMatchImpl >::GetAlignment( std::vector< bitset_t >& align_matrix ) const
+{
+	align_matrix = std::vector< bitset_t >(this->SeqCount(), bitset_t( this->AlignmentLength(), false ) );
+	for( uint seqI = 0; seqI < this->SeqCount(); seqI++ )
+	{
+		if( this->LeftEnd(seqI) != NO_MATCH )
+			align_matrix[seqI].flip();
+	}
+}
+
+template< typename AbstractMatchImpl >
+std::ostream& operator<<(std::ostream& os, const UngappedLocalAlignment< AbstractMatchImpl >& ula);
+
+template< typename AbstractMatchImpl >
+std::ostream& operator<<(std::ostream& os, const UngappedLocalAlignment< AbstractMatchImpl >& ula){ //write to stream.
+	os << ula.m_length;
+	for(uint i=0; i < ula.SeqCount(); i++)
+		os << '\t' << ula.Start(i);
+	return os;
+}
+
+
+template< class AbstractMatchImpl >
+void UngappedLocalAlignment< AbstractMatchImpl >::GetColumn( gnSeqI col, std::vector<gnSeqI>& pos, std::vector<bool>& column ) const
+{
+	pos = std::vector<gnSeqI>(this->SeqCount(), NO_MATCH);
+	column = std::vector<bool>(this->SeqCount(), true);
+	for( uint seqI = 0; seqI < this->SeqCount(); seqI++ )
+	{
+		if( this->Orientation(seqI) == AbstractMatch::forward )
+			pos[seqI] = this->LeftEnd(seqI) + col;
+		else if( this->Orientation(seqI) == AbstractMatch::reverse )
+			pos[seqI] = this->RightEnd(seqI) - col;
+		else
+			column[seqI] = false;
+	}
+}
+
+}
+
+#endif // _UngappedLocalAlignment_h_
diff --git a/libMems/configuration.h b/libMems/configuration.h
new file mode 100644
index 0000000..15928b7
--- /dev/null
+++ b/libMems/configuration.h
@@ -0,0 +1,37 @@
+#ifndef __libMems_configuration_h__
+#define __libMems_configuration_h__
+
+#if defined(WIN32)||defined(WIN64)
+
+// set the mems library name to include based on the configuration...
+
+#if defined(WIN64)&&defined(NDEBUG)&&!defined(FASTDEBUG)&&defined(_OPENMP)
+#pragma comment(lib, "mems64omp.lib")
+#endif
+#if defined(WIN64)&&defined(FASTDEBUG)&&defined(_OPENMP)
+#pragma comment(lib, "mems64fdomp.lib")
+#endif
+#if defined(WIN32)&&!defined(WIN64)&&defined(NDEBUG)&&!defined(FASTDEBUG)&&defined(_OPENMP)
+#pragma comment(lib, "memsomp.lib")
+#endif
+#if defined(WIN32)&&!defined(WIN64)&&defined(FASTDEBUG)&&defined(_OPENMP)
+#pragma comment(lib, "memsfdomp.lib")
+#endif
+#if defined(WIN64)&&defined(NDEBUG)&&!defined(FASTDEBUG)&&!defined(_OPENMP)
+#pragma comment(lib, "mems64.lib")
+#endif
+#if defined(WIN64)&&defined(FASTDEBUG)&&!defined(_OPENMP)
+#pragma comment(lib, "mems64fd.lib")
+#endif
+#if defined(WIN32)&&!defined(WIN64)&&defined(NDEBUG)&&!defined(FASTDEBUG)&&!defined(_OPENMP)
+#pragma comment(lib, "mems.lib")
+#endif
+#if defined(WIN32)&&!defined(WIN64)&&defined(FASTDEBUG)&&!defined(_OPENMP)
+#pragma comment(lib, "memsfd.lib")
+#endif
+
+
+#endif
+
+#endif // __libMems_configuration_h__
+
diff --git a/libMems/dmSML/Makefile.am b/libMems/dmSML/Makefile.am
new file mode 100644
index 0000000..7df4a8c
--- /dev/null
+++ b/libMems/dmSML/Makefile.am
@@ -0,0 +1,22 @@
+AM_CFLAGS = -DUSE_POSIX_AIO
+
+DMSML_H = \
+asyncio.h alinuxaio.h aPOSIXaio.h \
+alibc.h awin32aio.h buffer.h \
+util.h sorting.h dmsort.h \
+timing.h sml.h
+
+DMSML_SRC = \
+asyncio.c alinuxaio.c aPOSIXaio.c \
+alibc.c awin32aio.c buffer.c \
+util.c sorting.c dmsort.c \
+timing.c sml.c
+
+library_includedir=$(includedir)/$(GENERIC_LIBRARY_NAME)-$(GENERIC_API_VERSION)/$(GENERIC_LIBRARY_NAME)/dmSML
+
+library_include_HEADERS = $(DMSML_H)
+
+noinst_LTLIBRARIES = libdmSML.la
+libdmSML_la_SOURCES = $(DMSML_SRC)
+
+INCLUDES = -I$(top_srcdir) $(DEPS_CFLAGS)
diff --git a/libMems/dmSML/aPOSIXaio.c b/libMems/dmSML/aPOSIXaio.c
new file mode 100644
index 0000000..9d5851c
--- /dev/null
+++ b/libMems/dmSML/aPOSIXaio.c
@@ -0,0 +1,124 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/dmSML/aPOSIXaio.h"
+#ifdef USE_POSIX_AIO
+
+#include "libMems/dmSML/asyncio.h"
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+
+int OpenPAIO( aFILE * file, const char *path, int mode ){
+	int flags = 0;
+#ifdef O_LARGEFILE
+	flags |= O_LARGEFILE;
+#endif
+	if(mode == A_READ){
+		file->file_descriptor = open(path, flags | O_RDONLY, S_IREAD | S_IWRITE | S_IRGRP | S_IWGRP );
+	}else{
+		file->file_descriptor = open(path, flags | O_RDWR | O_CREAT | O_TRUNC,  S_IREAD | S_IWRITE | S_IRGRP | S_IWGRP);
+	}
+	if(file->file_descriptor < 0){
+		
+		perror(path);
+	}
+	return file->file_descriptor >= 0;
+}
+
+int ClosePAIO( aFILE * file ){
+	return close( file->file_descriptor ) == 0;
+}
+
+int FillAIOStruct( aFILE * file, aIORec * rec ){
+// fill the request data structure
+	rec->aio_cb = (aiocb_t*) malloc( sizeof(aiocb_t));
+	if(rec->aio_cb == 0)
+		return 0;
+
+	if( rec->pos != CURRENT_POS ){
+		offset_t tmppos = rec->pos;
+		tmppos >>= 32;
+		file->filep_high = tmppos;
+		// clear high bits.  Is this really necessary?
+		tmppos = rec->pos;
+		tmppos <<= 32;
+		tmppos >>= 32;
+		file->filep_low = tmppos;
+	}
+
+	rec->aio_cb->aio_fildes = file->file_descriptor;
+	rec->aio_cb->aio_offset = file->filep_high;
+	rec->aio_cb->aio_offset <<= 32;
+	rec->aio_cb->aio_offset |= file->filep_low;
+	rec->aio_cb->aio_buf = rec->buf;
+	rec->aio_cb->aio_nbytes = rec->size * rec->count;
+	rec->aio_cb->aio_reqprio = 0;
+	memset(&(rec->aio_cb->aio_sigevent), 0, sizeof(struct sigevent) );
+	return 1;
+}
+
+int WritePAIO( aFILE * file, aIORec * rec ){
+        int req_error;
+	if( FillAIOStruct( file, rec ) ){
+		// request the io
+		rec->aio_cb->aio_lio_opcode = LIO_WRITE;
+		req_error = aio_write(rec->aio_cb);
+		if(req_error == -1){
+			perror("write");
+//            printf( "aiocb->aio_filedes = %d\n", rec->aio_cb->aio_filedes );
+//            printf( "aiocb->aio_offset = %llu\n", rec->aio_cb->aio_offset );
+//            printf( "aiocb->aio_buf = %lx\n", rec->aio_cb->aio_buf );
+//            printf( "aiocb->aio_nbytes = %llu\n", rec->aio_cb->aio_nbytes );
+            printf( "aiocb->aio_reqprio = %d\n", rec->aio_cb->aio_reqprio );
+		}
+		return req_error == 0;
+	}
+	return 0;
+}
+
+int ReadPAIO( aFILE * file, aIORec * rec ){
+	int req_error;
+// fill the request data structure
+	if( FillAIOStruct( file, rec ) ){
+	// request the io
+		rec->aio_cb->aio_lio_opcode = LIO_READ;
+		req_error = aio_read(rec->aio_cb);
+        if(req_error == -1){
+                perror("write");
+//                printf( "aiocb->aio_filedes = %d\n", rec->aio_cb->aio_filedes );
+//                printf( "aiocb->aio_offset = %llu\n", rec->aio_cb->aio_offset );
+//                printf( "aiocb->aio_buf = %lx\n", rec->aio_cb->aio_buf );
+//                printf( "aiocb->aio_nbytes = %llu\n", rec->aio_cb->aio_nbytes );
+                printf( "aiocb->aio_reqprio = %d\n", rec->aio_cb->aio_reqprio );
+        }
+		return req_error == 0;
+	}
+	return 0;
+}
+
+// PRECONDITION:  file->queuetail is not null
+// simply queries wether the first request submitted to the file has
+// completed yet.
+int QueryLastCompletePAIO( aFILE * file ){
+	int rval;
+	struct aiocb *request_array[] = { file->queuetail->aio_cb };
+	struct timespec zero_wait;
+
+	zero_wait.tv_sec = 0;
+	zero_wait.tv_nsec = 0;
+	
+	rval = aio_suspend(request_array, 1, &zero_wait);
+	if(rval == 0){
+		return 1; //why, shouldnt we tell the caller what finished?
+	}else if(rval == -1)
+		;
+//		perror("aio_suspend");
+	return 0;
+}
+
+#endif /* USE_POSIX_AIO */
diff --git a/libMems/dmSML/aPOSIXaio.h b/libMems/dmSML/aPOSIXaio.h
new file mode 100644
index 0000000..410eb8b
--- /dev/null
+++ b/libMems/dmSML/aPOSIXaio.h
@@ -0,0 +1,18 @@
+#ifndef _aPOSIXaio_h_
+#define _aPOSIXaio_h_
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/dmSML/asyncio.h"
+
+int OpenPAIO( aFILE * file, const char *path, int mode );
+int ClosePAIO( aFILE * file );
+
+int WritePAIO( aFILE * file, aIORec * rec );
+int ReadPAIO( aFILE * file, aIORec * rec );
+
+int QueryLastCompletePAIO( aFILE * file );
+
+#endif /* _aPOSIXaio_h_ */
diff --git a/libMems/dmSML/alibc.c b/libMems/dmSML/alibc.c
new file mode 100644
index 0000000..b14bf7a
--- /dev/null
+++ b/libMems/dmSML/alibc.c
@@ -0,0 +1,47 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/dmSML/asyncio.h"
+#include "libMems/dmSML/alibc.h"
+
+#if defined USE_LIBC
+
+int OpenLibC( aFILE * file, const char *path, int mode ) {
+    FILE * result = fopen( path, mode == A_READ ? "rb" : "wb" );
+    file->libchandle = result;
+    if( result == NULL ) {
+        return( 0 );
+    }
+    return( 1 );
+}
+
+
+int CloseLibC( aFILE * file ) {
+    fclose( file->libchandle );
+    return( 1 );
+}
+
+
+int WriteLibC( aFILE * file, aIORec * rec ) {
+    fwrite( rec->buf, rec->size, rec->count, file->libchandle );
+    return( 1 );
+}
+
+int ReadLibC( aFILE * file, aIORec * rec ) {
+    fread( rec->buf, rec->size, rec->count, file->libchandle );
+    return( 1 );
+}
+
+
+int OperationCompleteLibC( aFILE * file ) {
+    // libc operations are atomic
+    return( 1 );
+}
+
+int FileBusyLibC( aFILE * file ) {
+    // libc operations are atomic
+    return( 1 );
+}
+
+#endif /* USE_LIBC */
diff --git a/libMems/dmSML/alibc.h b/libMems/dmSML/alibc.h
new file mode 100644
index 0000000..e9e626e
--- /dev/null
+++ b/libMems/dmSML/alibc.h
@@ -0,0 +1,15 @@
+#ifndef _alibc_h_
+#define _alibc_h_
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+int OpenLibC( aFILE * file, const char *path, int mode );
+int CloseLibC( aFILE * file );
+int WriteLibC( aFILE * file, aIORec * rec );
+int ReadLibC( aFILE * file, aIORec * rec );
+
+/* Line ending test modification... */
+
+#endif /* _alibc_h_ */
diff --git a/libMems/dmSML/alinuxaio.c b/libMems/dmSML/alinuxaio.c
new file mode 100644
index 0000000..15aee68
--- /dev/null
+++ b/libMems/dmSML/alinuxaio.c
@@ -0,0 +1,283 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/dmSML/alinuxaio.h"
+#ifdef USE_LINUX_AIO
+
+#include <libaio.h>
+
+#include "libMems/dmSML/asyncio.h"
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+/*
+#define __NR_io_setup		245
+#define __NR_io_destroy		246
+#define __NR_io_getevents	247
+#define __NR_io_submit		248
+#define __NR_io_cancel		249
+*/
+
+io_context_t ctx_id = NULL;
+
+#ifndef __u64
+typedef unsigned long long __u64;
+#endif
+
+__u64 current_id = 0;
+
+unsigned event_max = 10000;
+
+// error = sys_io_destroy( ctx_id );
+
+
+typedef struct completion_id_s {
+	__u64 data;
+	struct completion_id_s* next;
+	struct completion_id_s* last;
+} completion_id_t;
+
+typedef struct completion_id_list_s { 
+    int nitems;
+    completion_id_t * head;
+} completion_id_list_t;
+
+// buffer list manipulations
+// returns argument
+completion_id_list_t * InitListComp( completion_id_list_t * list );
+void PushHeadComp( completion_id_list_t * list, completion_id_t * item );
+void PushTailComp( completion_id_list_t * list, completion_id_t * item );
+completion_id_t * PopHeadComp( completion_id_list_t * list );
+completion_id_t * PopTailComp( completion_id_list_t * list );
+// returns second argument
+completion_id_t * RemoveItemComp( completion_id_list_t * list, completion_id_t * item );
+
+
+// buffer list manipulations
+// returns argument
+completion_id_list_t * InitListComp( completion_id_list_t * list ) {
+    list->head = NULL;
+    list->nitems = 0;
+    return( list );
+}
+
+
+void PushHeadComp( completion_id_list_t * list, completion_id_t * item ) {
+    // one special case for empty list, because we can't
+    // dereference list->head until we assign to it.
+    if( list->head == NULL ) {
+        list->head = item;
+        list->nitems = 1;
+        list->head->next = list->head;
+        list->head->last = list->head;
+        return;
+    }
+    // other cases are easier, because no more null pointers.
+    item->last = list->head->last;
+    item->next = list->head;
+    list->head->last->next = item;
+    list->head->last = item;
+    list->head = item;
+    // we added an item.
+    list->nitems++;
+}
+
+void PushTailComp( completion_id_list_t * list, completion_id_t * item ) {
+    // this is exactly equivalent to doing a PushHead and
+    // then backing up the list head one.
+    // get the item in there
+    PushHeadComp( list, item );
+    // back up the head.
+    list->head = list->head->last;
+}
+
+completion_id_t * PopHeadComp( completion_id_list_t * list ) {
+    completion_id_t *ret;
+    // just get rid of the head item and return it.
+    if( list->head == NULL ) {
+        return( NULL );
+    }
+    list->head->next->last = list->head->last;
+    list->head->last->next = list->head->next;
+    ret = list->head;
+    list->head = list->head->next;
+    ret->next = ret->last = NULL;
+    list->nitems--;
+    if( list->nitems == 0 ) {
+        list->head = NULL;
+    }
+    return( ret );
+}
+
+completion_id_t * PopTailComp( completion_id_list_t * list ) {
+    // just get rid of the tail item and return it.
+    if( list->head == NULL ) {
+        return( list->head );
+    }
+    // otherwise, a pop tail is equivalent to moving the
+    // head back one and popping head.
+    list->head = list->head->last;
+    return( PopHeadComp( list ) );
+}
+
+// returns second argument
+completion_id_t * RemoveItemComp( completion_id_list_t * list, completion_id_t * item ) {
+    // FIXME: handle NULL cases in a reasonable way?
+    if( item == list->head ) {
+        return( PopHeadComp( list ) );
+    }
+    item->next->last = item->last;
+    item->last->next = item->next;
+    item->next = item->last = NULL;
+    list->nitems--;
+    if( list->nitems == 0 ) {
+        list->head = NULL;
+    }
+    return( item );
+}
+
+
+completion_id_list_t *completion_list = NULL;
+
+int OpenLinux( aFILE * file, const char *path, int mode ){
+	long error;
+	if( ctx_id == 0 ){
+		error = io_queue_init( event_max, &ctx_id );
+		if( error != 0 )
+			perror( "io_setup" );
+	}
+	if( completion_list == NULL ){
+		completion_list = (completion_id_list_t*)malloc( sizeof( completion_id_list_t ) );
+		completion_list = InitListComp( completion_list );
+	}
+		
+	if(mode == A_READ){
+		file->file_descriptor = open(path, O_LARGEFILE | O_RDONLY, S_IREAD | S_IWRITE | S_IRGRP | S_IWGRP );
+	}else{
+		file->file_descriptor = open(path, O_RDWR | O_CREAT | O_TRUNC | O_LARGEFILE,  S_IREAD | S_IWRITE | S_IRGRP | S_IWGRP);
+	}
+	if(file->file_descriptor < 0){
+		
+		perror(path);
+	}
+	return file->file_descriptor >= 0;
+}
+
+int CloseLinux( aFILE * file ){	
+	return close( file->file_descriptor ) == 0;
+}
+
+void CleanupLinux(){
+	// free the completion list
+	free( completion_list );
+	completion_list = NULL;
+	ctx_id = NULL;
+}
+
+int FillAIOStruct( aFILE * file, aIORec * rec ){
+// fill the request data structure
+	rec->aio_cb = (iocb_t*) malloc( sizeof(iocb_t));
+	if(rec->aio_cb == 0)
+		return 0;
+
+	memset(rec->aio_cb, 0, sizeof(iocb_t));
+	if( rec->pos != CURRENT_POS ){
+		offset_t tmppos = rec->pos;
+		tmppos >>= 32;
+		file->filep_high = tmppos;
+		// clear high bits.  Is this really necessary?
+		tmppos = rec->pos;
+		tmppos <<= 32;
+		tmppos >>= 32;
+		file->filep_low = tmppos;
+	}
+
+//	rec->aio_cb->aio_data = current_id++;
+	rec->aio_cb->aio_fildes = file->file_descriptor;
+	rec->aio_cb->u.c.offset = file->filep_high;
+	rec->aio_cb->u.c.offset <<= 32;
+	rec->aio_cb->u.c.offset |= file->filep_low;
+	rec->aio_cb->u.c.buf = rec->buf;
+	rec->aio_cb->u.c.nbytes = rec->size * rec->count;
+	
+	return 1;
+}
+
+int WriteLinux( aFILE * file, aIORec * rec ){
+        int req_error;
+	struct iocb *request_array[] = { rec->aio_cb };
+	if( FillAIOStruct( file, rec ) ){
+		// request the io
+		rec->aio_cb->aio_lio_opcode = IO_CMD_PWRITE;
+		req_error = io_submit( ctx_id, 1, &rec->aio_cb );
+		if(req_error != 1){
+			printf("write_submit: io_submit res=%d [%s]\n", req_error, strerror(-req_error));
+            printf( "aiocb->aio_fildes = %d\n", rec->aio_cb->aio_fildes );
+            printf( "aiocb->u.c.offset = %llu\n", rec->aio_cb->u.c.offset );
+            printf( "aiocb->u.c.buf = %lx\n", rec->aio_cb->u.c.buf );
+            printf( "aiocb->u.c.nbytes = %llu\n", rec->aio_cb->u.c.nbytes );
+            printf( "aiocb->aio_reqprio = %d\n", rec->aio_cb->aio_reqprio );
+		}
+		return req_error == 1;
+	}
+	return 0;
+}
+
+int ReadLinux( aFILE * file, aIORec * rec ){
+	int req_error;
+	struct iocb *request_array[] = { rec->aio_cb };
+// fill the request data structure
+	if( FillAIOStruct( file, rec ) ){
+	// request the io
+		rec->aio_cb->aio_lio_opcode = IO_CMD_PREAD;
+		req_error = io_submit( ctx_id, 1, &rec->aio_cb );
+        if(req_error != 1){
+			printf("read_submit: io_submit res=%d [%s]\n", req_error, strerror(-req_error));
+//                printf( "aiocb->aio_filedes = %d\n", rec->aio_cb->aio_filedes );
+//                printf( "aiocb->aio_offset = %llu\n", rec->aio_cb->aio_offset );
+//                printf( "aiocb->aio_buf = %lx\n", rec->aio_cb->aio_buf );
+//                printf( "aiocb->aio_nbytes = %llu\n", rec->aio_cb->aio_nbytes );
+                printf( "aiocb->aio_reqprio = %d\n", rec->aio_cb->aio_reqprio );
+        }
+		return req_error == 1;
+	}
+	return 0;
+}
+
+
+// PRECONDITION:  file->queuetail is not null
+// simply queries wether the first request submitted to the file has
+// completed yet.
+int QueryLastCompleteLinux( aFILE * file ){
+	int rval;
+	int compI;
+	completion_id_t *comp;
+	struct io_event ioe;
+	struct timespec zero_wait;
+
+	zero_wait.tv_sec = 0;
+	zero_wait.tv_nsec = 10000000;
+	
+	rval = io_getevents( ctx_id, 0, 1, &ioe, &zero_wait );
+	if( rval == 1 ){
+		completion_id_t *completion = (completion_id_t*)malloc( sizeof(completion_id_t) );
+		completion->data = ioe.data;
+		PushTailComp( completion_list, completion );
+	}
+	comp = completion_list->head;
+	for( compI = 0; compI < completion_list->nitems; compI++ ){
+		if( comp->data == ioe.data )
+			break;
+	}
+	if( compI != completion_list->nitems ){
+		RemoveItemComp( completion_list, comp );
+		return 1; // success
+	}
+	return 0;	// hasn't completed yet
+}
+
+#endif
diff --git a/libMems/dmSML/alinuxaio.h b/libMems/dmSML/alinuxaio.h
new file mode 100644
index 0000000..9474c61
--- /dev/null
+++ b/libMems/dmSML/alinuxaio.h
@@ -0,0 +1,19 @@
+#ifndef _alinuxaio_h_
+#define _alinuxaio_h_
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/dmSML/asyncio.h"
+
+
+int OpenLinux( aFILE * file, const char *path, int mode );
+int CloseLinux( aFILE * file );
+
+int WriteLinux( aFILE * file, aIORec * rec );
+int ReadLinux( aFILE * file, aIORec * rec );
+
+int QueryLastCompleteLinux( aFILE * file );
+
+#endif /* _alinuxaio_h_ */
diff --git a/libMems/dmSML/asyncio.c b/libMems/dmSML/asyncio.c
new file mode 100644
index 0000000..218631d
--- /dev/null
+++ b/libMems/dmSML/asyncio.c
@@ -0,0 +1,358 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/dmSML/asyncio.h"
+
+#include "libMems/dmSML/alibc.h"
+#include "libMems/dmSML/awin32aio.h"
+#include "libMems/dmSML/aPOSIXaio.h"
+
+#include "libMems/dmSML/util.h"
+
+#include "libMems/dmSML/buffer.h"
+#include <string.h>
+
+#if defined(USE_POSIX_AIO)||defined(USE_LINUX_AIO)
+#include <unistd.h>
+#include <sys/stat.h>
+#endif
+
+static int OperationNumber = 0;
+
+int QueueEmpty( aFILE * file );
+void RemoveOperation( aFILE * file );
+void FreeQueue( aFILE * file );
+int ExecuteWrite( aFILE * file, aIORec * rec );
+int ExecuteRead( aFILE * file, aIORec * rec );
+int EnqueueOperation( char * buffer, offset_t size, offset_t count, aFILE * file, offset_t pos );
+void ExecuteOperation( aFILE * file );
+int QueryOpComplete( aFILE * file );
+int aAct( void * buffer, offset_t size, offset_t count, aFILE * file, offset_t pos );
+
+int QueueEmpty( aFILE * file ) {
+    return( (file->queuehead == file->queuetail) && (file->queuehead == NULL) );
+}
+
+void RemoveOperation( aFILE * file ) {
+    aIORec * tofree;
+    if( !QueueEmpty( file ) ) {
+        tofree = file->queuetail;
+        if( file->queuetail == file->queuehead ) {
+            file->queuehead = file->queuetail = NULL;
+        } else {
+            file->queuetail->next->last = NULL;
+            file->queuetail = file->queuetail->next;
+        }
+        // FIXME: ack hack from my poor design for win32
+#if defined USE_WIN32
+        free( tofree->w32overlapped );
+#elif defined(USE_POSIX_AIO)||defined(USE_LINUX_AIO)
+		free( tofree->aio_cb );
+#endif
+        free( tofree );
+    }        
+}
+
+
+void FreeQueue( aFILE * file ) {
+    while( !QueueEmpty( file ) ) {
+        RemoveOperation( file );
+    }
+}
+
+
+
+// opens a file
+aFILE * aOpen( const char * path, int mode ) {
+    int err = 0;
+    aFILE *ret = malloc( sizeof( *ret ) );
+    
+    memset( ret, 0, sizeof( *ret ) );
+    ret->mode = mode;
+    ret->busy = 0;
+#if defined USE_LINUX_AIO
+    err = !OpenLinux( ret, path, mode );
+#elif defined USE_POSIX_AIO
+    err = !OpenPAIO( ret, path, mode );
+#elif defined USE_LIBC
+    err = !OpenLibC( ret, path, mode );
+#elif defined USE_WIN32
+    err = !OpenWIN32( ret, path, mode );
+#endif
+    if( err ) {
+        free( ret );
+        ret = NULL;
+    }
+    return( ret );
+}
+
+
+// helper to close a file
+int aClose( aFILE * file ) {
+    int err = 0;
+    // block until the file is no longer busy.
+    aWaitNotBusy( file );
+#if defined USE_LINUX_AIO
+    err = CloseLinux( file );
+#elif defined USE_POSIX_AIO
+    err = ClosePAIO( file );
+#elif defined USE_LIBC
+    err = CloseLibC( file );
+#elif defined USE_WIN32
+    err = CloseWIN32( file );
+#endif
+    FreeQueue( file );
+    free( file );
+    return( err );
+}
+
+
+
+int ExecuteWrite( aFILE * file, aIORec * rec ) {
+    int err = 0;
+#if defined USE_LINUX_AIO
+    err = !WriteLinux( file, rec );
+#elif defined USE_POSIX_AIO
+    err = !WritePAIO( file, rec );
+#elif defined USE_LIBC
+    err = !WriteLibC( file, rec );
+#elif defined USE_WIN32
+    err = !WriteWIN32( file, rec );
+#endif
+    if( err ) {
+        //printf( "error in ExecuteWrite\n" );
+    } else {
+        file->busy = 1;
+    }
+    return( err );
+}
+
+
+int ExecuteRead( aFILE * file, aIORec * rec ) {
+    int err = 0;
+#if defined USE_LINUX_AIO
+    err = !ReadLinux( file, rec );
+#elif defined USE_POSIX_AIO
+    err = !ReadPAIO( file, rec );
+#elif defined USE_LIBC
+    err = !ReadLibC( file, rec );
+#elif defined USE_WIN32
+    err = !ReadWIN32( file, rec );
+#endif
+    if( err ) {
+        //printf( "error in ExecuteRead\n" );
+    } else {
+        file->busy = 1;
+    }
+    return( err );
+}
+
+
+
+int EnqueueOperation( char * buffer, offset_t size, offset_t count, aFILE * file, offset_t pos ) {
+    if( QueueEmpty( file ) ) {
+        
+		file->queuehead = file->queuetail = malloc( sizeof( *file->queuehead ) );
+		memset( file->queuehead, 0, sizeof( *(file->queuehead) ) );
+		file->queuehead->last = NULL;
+    } else {
+		file->queuehead->next = malloc( sizeof( *file->queuehead->next ) );
+		memset( file->queuehead->next, 0, sizeof( *(file->queuehead->next) ) );
+		file->queuehead->next->last = file->queuehead;
+		file->queuehead = file->queuehead->next;
+    }
+	file->queuehead->buf = buffer;
+	file->queuehead->size = size;
+	file->queuehead->count = count;
+	file->queuehead->pos = pos;
+	file->queuehead->operation = ++OperationNumber;        
+	file->queuehead->next = NULL;
+    return( file->queuehead->operation );
+}
+
+
+
+void ExecuteOperation( aFILE * file ) {
+    // if file is busy or there are no pending ops, we can't do much.
+    if( !QueueEmpty( file ) && !file->busy ) {
+        int err = 0;
+        if( file->mode == A_WRITE ) {
+            err = ExecuteWrite( file, file->queuetail );
+        } else {
+            err = ExecuteRead( file, file->queuetail );
+        }
+        if( !err ) {
+            // advance file pointer
+            AddTo64( file->queuetail->size * file->queuetail->count, &(file->filep_high), &(file->filep_low) );
+        }
+        
+    }
+}
+
+
+int QueryOpComplete( aFILE * file ) {
+    // check to see if the last operation was completed.
+    if( file->queuetail != NULL ) {
+#if defined USE_LINUX_AIO
+	return( QueryLastCompleteLinux( file ) );
+#elif defined USE_POSIX_AIO
+	return( QueryLastCompletePAIO( file ) );
+#elif defined USE_LIBC
+        return( 1 );
+#elif defined USE_WIN32
+        return( QueryLastCompleteWIN32( file ) );
+#endif
+    }
+    return( 1 );
+}
+
+
+
+
+// for files open for writing, ensures that all data is
+// safely on disk (flushes buffer cache).
+void aFlush( aFILE *file ) {
+#if defined(USE_POSIX_AIO)||defined(USE_LINUX_AIO)
+	if( fsync( file->file_descriptor ) )
+		perror("fsync");
+#elif defined USE_LIBC
+    if( fflush( file->libchandle ) ) {
+        printf( "error flushing stdio libc file\n" );
+    }
+#elif defined USE_WIN32
+    if( !FlushFileBuffers( file->w32handle ) ) {
+        printf( "error flushing win32 file\n" );
+    }
+#endif
+}
+
+// get the size in bytes of a particular file
+unsigned long long aStatFileSize( const char * path ) {
+#if defined(USE_POSIX_AIO)||defined(USE_LINUX_AIO)
+	struct stat stat_data;
+	if( stat( path , &stat_data) ){
+		perror(path);
+		return 0;
+	}
+	return stat_data.st_size;
+#elif defined USE_LIBC
+#error "libc aStatSize not implemented"
+#elif defined USE_WIN32
+	WIN32_FILE_ATTRIBUTE_DATA file_data;
+	unsigned long long f_size;
+	GetFileAttributesEx( path, GetFileExInfoStandard, (void*)&file_data );
+	f_size = file_data.nFileSizeHigh;
+	f_size <<= 32;
+	f_size += file_data.nFileSizeLow;
+	return f_size;
+//#error "Implement me!  WIN32 aStatSize"
+#endif
+}
+
+
+// get the size in records of a particular file
+// used when skipping the binning phase
+unsigned long aStatSize( const char * path ) {
+#if defined(USE_POSIX_AIO)||defined(USE_LINUX_AIO)
+	struct stat stat_data;
+	if( stat( path , &stat_data) ){
+		perror(path);
+		return 0;
+	}
+	return stat_data.st_size / sizeof(record_t);
+#elif defined USE_LIBC
+#error "libc aStatSize not implemented"
+#elif defined USE_WIN32
+	return aStatFileSize( path ) / sizeof(record_t);
+	printf("Implement me!  WIN32 aStatSize");
+//#error "Implement me!  WIN32 aStatSize"
+#endif
+}
+
+
+void aUpdateOperations( aFILE * file ) {
+    int op_complete;
+    // if we are busy, see if the last thing has completed.
+    op_complete = QueryOpComplete( file );
+    if( !op_complete ) {
+        //printf( "op not yet complete on file 0x%X\n", file );
+    }
+    if( !QueueEmpty( file ) && file->busy && op_complete ) {
+        RemoveOperation( file );
+        file->busy = 0;
+    }
+    // if the queue is still not empty, start the next one up.
+    if( !QueueEmpty( file ) ) {
+        ExecuteOperation( file );
+    }
+        
+}
+
+
+
+
+int aAct( void * buffer, offset_t size, offset_t count, aFILE * file, offset_t pos ) {
+    int operation = 0;
+    // enter the operation in the queue, and then
+    // try to execute what we can.
+    // enqueue the op.
+    operation = EnqueueOperation( buffer, size, count, file, pos );
+    // execute operations
+    ExecuteOperation( file );
+    return( operation );
+}
+
+
+// these allow you to queue reads and writes.
+// these return 0 for a failure, or an operation
+// code that can be checked for completion with
+// a_OperationComplete
+int aWrite( void * buffer, offset_t size, offset_t count, aFILE * file, offset_t pos ) {
+    return( aAct( buffer, size, count, file, pos ) );
+}
+int aRead( void * buffer, offset_t size, offset_t count, aFILE * file, offset_t pos ) {
+    return( aAct( buffer, size, count, file, pos ) );
+}
+
+
+// returns 1 if the operation was completed, 0 otherwise.
+int aOperationComplete( aFILE * file, int operation ) {
+    aIORec *qp;
+    // scan through the queue until we find the op
+    // or we get to the end.  if we get to the end
+    // and don't find it, it must have completed,
+    // otherwise it hasn't.
+    for( qp = file->queuetail; qp != NULL; qp = qp->next ) {
+        if( qp->operation == operation ) {
+            return( 0 );
+        }
+    }
+    return( 1 );
+}
+
+
+// returns 1 if the file is doing IO, 0 otherwise.
+int aFileBusy( aFILE * file ) {
+    return( file->busy );
+}
+
+
+// blocks and waits for the specified operation to
+// complete.
+void aWaitComplete( aFILE * file, int operation ) {
+    while( !aOperationComplete( file, operation ) ) {
+        aUpdateOperations( file );
+    }
+}
+
+
+// blocks and waits for the file to not be busy
+// and for all IO operations to complete.
+void aWaitNotBusy( aFILE * file ) {
+    while( file->busy ) {
+        aUpdateOperations( file );
+    }
+}
+
+
diff --git a/libMems/dmSML/asyncio.h b/libMems/dmSML/asyncio.h
new file mode 100644
index 0000000..03caa7d
--- /dev/null
+++ b/libMems/dmSML/asyncio.h
@@ -0,0 +1,166 @@
+#ifndef _asyncio_h_
+#define _asyncio_h_
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+//#define USE_LINUX_AIO
+//#define USE_LIBC_AIO	// don't use kaio
+
+#ifdef WIN32
+#   define WIN32_LEAN_AND_MEAN
+#   include <windows.h>
+#   define USE_WIN32
+#else 
+#   ifndef _LARGEFILE64_SOURCE
+#	define _FILE_OFFSET_BITS 64
+#	define _LARGEFILE_SOURCE
+#	define _LARGEFILE64_SOURCE
+#   endif
+// use kaio by default
+#	if defined(USE_LIBC_AIO) || defined(USE_POSIX_AIO)
+#		ifdef HAVE_SYS_TYPES_H
+#			include <sys/types.h>
+#		endif
+#		if defined HAVE_SYS_AIO_H
+#			include <sys/aio.h>
+#		elif HAVE_AIO_H
+#			include <aio.h>
+#		endif
+#		ifdef HAVE_FEATURES_H
+#			include <features.h>
+#		endif
+typedef struct aiocb aiocb_t;
+#	endif
+#	ifdef USE_LINUX_AIO
+#		define _FILE_OFFSET_BITS 64
+#		define _LARGEFILE_SOURCE
+#		define _LARGEFILE64_SOURCE
+#		include <libaio.h>
+typedef struct iocb iocb_t;
+#	endif
+#	ifdef HAVE_FEATURES_H
+#		include <features.h>
+#	endif
+#endif
+
+#include <stdlib.h>
+#include <stdio.h>
+
+
+#define CURRENT_POS -1
+typedef unsigned long long offset_t;
+
+// is this a struct to store RECORDS to write out?
+// the way it's used looks like it's not intended
+// for generic data...
+typedef struct _aIORec {
+#if defined USE_POSIX_AIO
+// posix aio uses the aiocb_t type to describe aio requests
+	aiocb_t *aio_cb;
+#elif defined USE_LINUX_AIO
+	iocb_t* aio_cb;
+#elif defined USE_LIBC
+#elif defined USE_WIN32
+    // win32-specific data.
+    // this is a pointer because windows needs it to
+    // be in a fixed spot.  But we have to resize the
+    // data structure that contains these, so we need
+    // to allocate them separately.
+    // unfortunately, this means we need to do linear
+    // search to figure out what thing in the queue some
+    // completion corresponds to.  Fortunately, this
+    // rarely needs to be done.  I think this is The
+    // Right Thing, given the tools and our goals.
+    OVERLAPPED * w32overlapped;
+#endif
+    // must do linear search to find specific operations,
+    // but no big deal.
+    int operation;
+    char * buf;
+    offset_t size;
+    offset_t count;	//what is count for??
+    offset_t pos;
+    struct _aIORec * next;
+    struct _aIORec * last;
+    
+} aIORec;
+
+
+// users don't need to concern themselves with this.
+typedef struct _aFILE {
+#if defined(USE_POSIX_AIO)||defined(USE_LINUX_AIO)
+	int file_descriptor;
+#elif defined USE_LIBC
+    FILE * libchandle;
+#elif defined USE_WIN32
+    HANDLE w32handle;
+#endif
+    // read or write (both read and write??)
+    int mode;
+    // file seek pointer
+    unsigned int filep_high;
+    unsigned int filep_low;
+    // is a read/write operation in progress?
+    int busy;
+    // operation serial number (to ensure serial operation).
+    int op;
+    // are we to be closed?
+    int toclose;
+    // queue of io operations
+    aIORec *queuehead, *queuetail;
+} aFILE;
+
+
+enum {
+    A_READ,
+    A_WRITE
+};
+
+
+// these work just like fopen and fclose
+aFILE * aOpen( const char * path, int mode );
+// close will block until all operations
+// on the file are complete.
+int aClose( aFILE * file );
+
+// these allow you to queue reads and writes.
+// these return 0 for a failure, or an operation
+// code that can be checked for completion with
+// a_OperationComplete
+int aWrite( void * buffer, offset_t size, offset_t count, aFILE * file, offset_t pos );
+int aRead( void * buffer, offset_t size, offset_t count, aFILE * file, offset_t pos );
+
+// returns 1 if the operation was completed, 0 otherwise.
+int aOperationComplete( aFILE * file, int operation );
+
+// returns 1 if the file is doing IO, 0 otherwise.
+int aFileBusy( aFILE * file );
+
+// blocks and waits for the specified operation to
+// complete.
+void aWaitComplete( aFILE * file, int operation );
+    
+// blocks and waits for the file to not be busy
+// and for *all* IO operations to complete.
+void aWaitNotBusy( aFILE * file );
+
+// polls the aio file to see if anything's completed, and
+// starts the next queued up jobs if they are.  does not
+// block.
+void aUpdateOperations( aFILE * file );
+
+
+// for files open for writing, ensures that all data is
+// safely on disk (flushes buffer cache).
+void aFlush( aFILE *file );
+
+// get the size in records of a particular file
+// used when skipping the binning phase
+unsigned long aStatSize( const char * path );
+
+// get the size in bytes of a particular file
+unsigned long long aStatFileSize( const char * path );
+
+#endif /* _asyncio_h_ */
diff --git a/libMems/dmSML/awin32aio.c b/libMems/dmSML/awin32aio.c
new file mode 100644
index 0000000..93b4d39
--- /dev/null
+++ b/libMems/dmSML/awin32aio.c
@@ -0,0 +1,160 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/dmSML/awin32aio.h"
+#include "libMems/dmSML/util.h"
+#ifdef USE_WIN32
+
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+
+
+static VOID CALLBACK DummyCompletionRoutine( DWORD err, DWORD nbytes, LPOVERLAPPED lpo ) {
+    // we poll for completion, so this is just a dummy to make windows happy.
+    printf( "completion routine!\n" );
+}
+
+
+int OpenWIN32( aFILE * file, const char *path, int mode ) {
+    HANDLE result;
+    DWORD access = mode == A_READ ? GENERIC_READ : GENERIC_WRITE;
+    DWORD disposition = mode == A_READ ? OPEN_EXISTING : CREATE_ALWAYS;
+    result = CreateFile( 
+        path, 
+        access, 
+        FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
+        NULL,
+        disposition,
+        FILE_FLAG_OVERLAPPED,
+        NULL );
+    if( result == INVALID_HANDLE_VALUE ) {
+    	access = GetLastError();
+    	printf( "Error opening %s, code %d\n", path, access );
+        return( 0 );
+    }
+    file->w32handle = result;
+    return( 1 );
+}
+
+
+int CloseWIN32( aFILE * file ) {
+    return( CloseHandle( file->w32handle ) );
+}
+
+
+int WriteWIN32( aFILE * file, aIORec * rec ) {
+
+    static offset_t total_bytes = 0;
+    DWORD err;
+    if( file->mode != A_WRITE ) {
+        return( 0 );
+    }
+
+    rec->w32overlapped = malloc( sizeof( *(rec->w32overlapped) ) );
+    memset( rec->w32overlapped, 0, sizeof( *(rec->w32overlapped) ) );
+	
+	if( rec->pos != CURRENT_POS ){
+		offset_t tmppos = rec->pos;
+		tmppos >>= 32;
+		file->filep_high = tmppos;
+		// clear high bits.  Is this really necessary?
+		tmppos = rec->pos;
+		tmppos <<= 32;
+		tmppos >>= 32;
+		file->filep_low = tmppos;
+	}
+
+    rec->w32overlapped->OffsetHigh = file->filep_high;
+    rec->w32overlapped->Offset = file->filep_low;
+
+    //printf( "issuing write -- first few bytes of buffer are\n" );
+    //for( i = 0; i < 20; i++ ) {
+    //    printf( "%c", rec->buf[i] );
+    //}
+    //printf( "\n" );
+    total_bytes += rec->size * rec->count;
+    //printf( "total bytes: %d\n", total_bytes );
+    if( WriteFileEx( 
+        file->w32handle, 
+        rec->buf, 
+        rec->size*rec->count, 
+        rec->w32overlapped,
+        DummyCompletionRoutine ) == 0 ) {
+        err = GetLastError();
+        printf( "error with WriteFileEx: %d\n", err );
+        return( 0 );
+    }
+    return( 1 );
+}
+
+
+int ReadWIN32( aFILE * file, aIORec * rec ) {
+    DWORD err;
+    if( file->mode != A_READ ) {
+        return( 0 );
+    }
+    rec->w32overlapped = malloc( sizeof( *(rec->w32overlapped) ) );
+    memset( rec->w32overlapped, 0, sizeof( *(rec->w32overlapped) ) );
+
+	if( rec->pos != CURRENT_POS ){
+		offset_t tmppos = rec->pos;
+		tmppos >>= 32;
+		file->filep_high = tmppos;
+		// clear high bits.  Is this really necessary?
+		tmppos = rec->pos;
+		tmppos <<= 32;
+		tmppos >>= 32;
+		file->filep_low = tmppos;
+	}
+
+    rec->w32overlapped->OffsetHigh = file->filep_high;
+    rec->w32overlapped->Offset = file->filep_low;
+    if( ReadFileEx( 
+        file->w32handle, 
+        rec->buf, 
+        rec->size*rec->count, 
+        rec->w32overlapped,
+        DummyCompletionRoutine ) == 0 ) {
+        err = GetLastError();
+        switch( err ) {
+        case ERROR_HANDLE_EOF:
+            printf( "readfileex says EOF -- we'll pretend it worked\n" );
+            return( 1 );
+        default:
+            printf( "error with ReadFileEx -- Last Error: %d\n", GetLastError() );
+            printf( "called:  ReadFileEx( %d, %d, %d, %d, %d )\n", 
+                file->w32handle, 
+                rec->buf, 
+                rec->size*rec->count, 
+                rec->w32overlapped,
+                DummyCompletionRoutine );
+            return( 0 );
+        }
+    }
+    return( 1 );
+}
+
+
+int QueryLastCompleteWIN32( aFILE * file ) {
+    DWORD result;
+    // this operation may not have ever been executed yet (the case
+    // where w32overlapped is NULL) so we must detect this.
+    if( file->queuetail && file->queuetail->w32overlapped ) {
+        // this is a simple poll, because we're waiting for 0 msec.
+        result = WaitForSingleObject( file->w32handle, 0 );
+        if( result != WAIT_TIMEOUT ) {
+            return( 1 );
+        } else {
+            return( 0 );
+        }
+        //return( HasOverlappedIoCompleted( file->queuetail->w32overlapped ) );
+    } else {
+        return( 0 );
+    }
+}
+
+
+
+#endif /* USE_WIN32 */
diff --git a/libMems/dmSML/awin32aio.h b/libMems/dmSML/awin32aio.h
new file mode 100644
index 0000000..87aa604
--- /dev/null
+++ b/libMems/dmSML/awin32aio.h
@@ -0,0 +1,18 @@
+#ifndef _awin32_h_
+#define _awin32_h_
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/dmSML/asyncio.h"
+
+int OpenWIN32( aFILE * file, const char *path, int mode );
+int CloseWIN32( aFILE * file );
+
+int WriteWIN32( aFILE * file, aIORec * rec );
+int ReadWIN32( aFILE * file, aIORec * rec );
+
+int QueryLastCompleteWIN32( aFILE * file );
+
+#endif /* _awin32_h_ */
diff --git a/libMems/dmSML/buffer.c b/libMems/dmSML/buffer.c
new file mode 100644
index 0000000..d5d7219
--- /dev/null
+++ b/libMems/dmSML/buffer.c
@@ -0,0 +1,407 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <time.h>
+#include <stddef.h>
+#include "libMems/dmSML/buffer.h"
+#include <string.h>
+
+// portably fills an int with reasonably random bits.
+// one assumption is that MAX_RAND is bigger than 256.
+static int BigRandom() {
+    static char firsttime = 1;
+    int i, result;
+    if( firsttime ) {
+        firsttime = 0;
+        srand( 0 );
+        //srand( time( NULL ) );
+    }
+    
+    result = 0;
+    for( i = 0; i < sizeof( result ); i++ ) {
+        result <<= sizeof( result );
+        result ^= rand();
+    }
+    // the funny test here because if result == INT_MIN on a
+    // two's complement machine, -result *also* == INT_MIN.
+    return( result < 0 ? (-result < 0 ? 0 : -result) : result );
+}
+
+
+// Working Set support.
+// returns resulting size of the entire structure.
+int MakeWorkingSet( working_set_t * ws, offset_t goalsize, offset_t minrecs, offset_t maxrecs ) {
+// wrap the memory allocation loop with an outer loop
+// that will attempt smaller working set sizes if large ones fail to allocate
+	while( 1 ){
+
+	    // we incrementally grow the working set to the desired size.
+	    // however, we just compute the growth and how big the buffers will be,
+	    // then we malloc one single large chunk of memory, and arrange things
+	    // such that all of the buffer_ts are contiguous in one chunk, and
+	    // all the actual data is contiguous after.
+	    offset_t cursize = 0;
+	    offset_t overhead = sizeof( ws->bufs[0] );
+	    offset_t minsize = overhead + minrecs * sizeof( record_t );
+	    offset_t maxsize = overhead + maxrecs * sizeof( record_t );
+	    offset_t nbufs = 0;      // number of real buffers pleged to the working set
+	    offset_t maxbufs = 256;  // the max number of buffers we track (this grows if necessary)
+	    offset_t *buflist = malloc( sizeof( *buflist ) * maxbufs ); // grows when necessary
+	    
+	    record_t *recordptr;
+	    offset_t i;
+	    // if we can't possibly do anything useful
+	    if( goalsize < minsize || maxrecs < minrecs || !buflist ) {
+	    	if( buflist )
+		        free( buflist );
+	        return( 0 );
+	    }
+
+	    // just start allocating buffers until we can't anymore
+	    while( goalsize - cursize >= maxsize ) {
+	        offset_t randrecs = BigRandom() % (maxrecs - minrecs + 1) + minrecs;
+	        if( nbufs == maxbufs ) {
+	            // resize the array
+	            maxbufs *= 2;
+	            buflist = realloc( buflist, sizeof( *buflist ) * maxbufs );
+	        }
+	        buflist[nbufs++] = randrecs;
+	        // update the number of bytes we've currently decided to allocate.
+	        cursize += overhead + randrecs * sizeof( record_t );
+	    }
+		// now we have nbufs buffers, and the number of records they should
+		// store is in the buflist list.
+		// allocate one big chunk of memory
+		printf( "allocating %llu bytes for working set (%llu bufs)\n", cursize, nbufs );
+
+		ws->bufs = malloc( cursize );
+		// if it failed to allocate try a smaller size
+		if( !ws->bufs ){
+			goalsize /= 2;
+			continue;
+		}
+
+		ws->size = cursize;
+		ws->nbufs = nbufs;
+		// clear it out
+		memset( ws->bufs, 0, cursize );
+
+		// Now fill in the pointers to the records for all the buffers.
+		// these all reside after the buffers in the working set.
+		// Something convenient from this scheme is that in order to free
+		// the working set when we're through, we just free ws->bufs.
+		// pointer to first set of records.
+		recordptr = (record_t *)( ((ptrdiff_t)ws->bufs) + (ws->nbufs * sizeof( ws->bufs[0] )) );
+		for( i = 0; i < nbufs; i++ ) {
+		    ws->bufs[i].totalrecs = buflist[i];
+		    ws->bufs[i].recs = recordptr;
+		    recordptr += ws->bufs[i].totalrecs;
+		}
+
+		free( buflist );
+	    return( cursize );
+	}
+    return 0;
+}
+
+
+
+
+
+// Working Set support.
+// Reorganize the working set with a different distribution of buffers.
+void ReorganizeWorkingSet( working_set_t * ws, offset_t minrecs, offset_t maxrecs ) {
+    // we incrementally grow the working set to the desired size.
+    // however, we just compute the growth and how big the buffers will be,
+    // then we malloc one single large chunk of memory, and arrange things
+    // such that all of the buffer_ts are contiguous in one chunk, and
+    // all the actual data is contiguous after.
+    offset_t goalsize = ws->size;
+    offset_t cursize = 0;
+    offset_t overhead = sizeof( ws->bufs[0] );
+    offset_t minsize = overhead + minrecs * sizeof( record_t );
+    offset_t maxsize = overhead + maxrecs * sizeof( record_t );
+    offset_t nbufs = 0;      // number of real buffers pledged to the working set
+    offset_t maxbufs = 256;  // the max number of buffers we're tracking (this grows if necessary)
+    offset_t *buflist = malloc( sizeof( *buflist ) * maxbufs ); // grows when necessary
+    offset_t leftovers;
+    record_t *recordptr;
+    offset_t i;
+    
+    // if we can't possibly do anything useful
+    if( maxrecs < minrecs ) {
+        free( buflist );
+        return;
+    }
+
+    if( goalsize < minsize ) {
+        minsize = goalsize;
+        minrecs = (minsize-overhead) / sizeof( record_t );
+    }
+    
+    // just start allocating buffers until we can't anymore
+    while( goalsize - cursize >= maxsize ) {
+        offset_t randrecs = BigRandom() % (maxrecs - minrecs + 1) + minrecs;
+        if( nbufs == maxbufs ) {
+            // resize the array
+            maxbufs *= 2;
+            buflist = realloc( buflist, sizeof( *buflist ) * maxbufs );
+        }
+        buflist[nbufs++] = randrecs;
+        // update the number of bytes we've currently decided to allocate.
+        cursize += overhead + randrecs * sizeof( record_t );
+    }
+    
+    // clean up the last bit
+    if( goalsize - cursize > overhead ) {
+        leftovers = (goalsize - cursize - overhead) / sizeof( record_t );
+        if( leftovers ) {
+            if( nbufs == maxbufs ) {
+                // resize the array
+                maxbufs *= 2;
+                buflist = realloc( buflist, sizeof( *buflist ) * maxbufs );
+            }
+            buflist[nbufs++] = leftovers;
+            cursize += overhead + leftovers * sizeof( record_t );
+        }
+    }
+
+    // now we have nbufs buffers, and the number of records they should
+    // store is in the buflist list.
+
+    ws->nbufs = nbufs;
+    // clear it out
+    memset( ws->bufs, 0, cursize );
+    // Now fill in the pointers to the records for all the buffers.
+    // these all reside after the buffers in the working set.
+    // Something convenient from this scheme is that in order to free
+    // the working set when we're through, we just free ws->bufs.
+    // pointer to first set of records.
+    recordptr = (record_t *)( ((ptrdiff_t)ws->bufs) + (ws->nbufs * sizeof( ws->bufs[0] )) );
+    for( i = 0; i < nbufs; i++ ) {
+        ws->bufs[i].totalrecs = buflist[i];
+        ws->bufs[i].recs = recordptr;
+        recordptr += ws->bufs[i].totalrecs;
+    }
+    
+    free( buflist );
+    return;
+}
+
+
+
+
+
+
+
+
+
+// this updates all the IO on the working set buffers, querying those that
+// are not in OP_FINISHED or OP_NONE and putting those that finish into OP_FINISHED
+void UpdateWSIOFinishedState( working_set_t * ws ) {
+    // gets rid of an indirection in the loop.
+    // this method (rather than using an index) 
+    // (I also think it's cleaner)
+    buffer_t *b;
+    // simply walk all of them 
+    for( b = ws->bufs; b - ws->bufs < ws->nbufs; b++ ) {
+        // real operation #s are whole numbers.
+        if( b->operation > OP_NONE ) {
+            //printf( "examining operation %d\n", b->operation );
+            if( aOperationComplete( b->file, b->operation ) ) {
+                //printf( "* Completed operation %d on device %x\n", b->operation, b->device );
+                b->operation = OP_FINISHED;
+            } else {
+                //printf( "operation %d INCOMPLETE IO\n", b->operation );
+            }
+        }
+    }
+}
+
+
+
+// buffer list manipulations
+// returns argument
+buffer_list_t * InitList( buffer_list_t * list ) {
+    list->head = NULL;
+    list->nitems = 0;
+    return( list );
+}
+
+
+void PushHead( buffer_list_t * list, buffer_t * item ) {
+    // one special case for empty list, because we can't
+    // dereference list->head until we assign to it.
+    if( list->head == NULL ) {
+        list->head = item;
+        list->nitems = 1;
+        list->head->next = list->head;
+        list->head->last = list->head;
+        return;
+    }
+    // other cases are easier, because no more null pointers.
+    item->last = list->head->last;
+    item->next = list->head;
+    list->head->last->next = item;
+    list->head->last = item;
+    list->head = item;
+    // we added an item.
+    list->nitems++;
+}
+
+void PushTail( buffer_list_t * list, buffer_t * item ) {
+    // this is exactly equivalent to doing a PushHead and
+    // then backing up the list head one.
+    // get the item in there
+    PushHead( list, item );
+    // back up the head.
+    list->head = list->head->last;
+}
+
+buffer_t * PopHead( buffer_list_t * list ) {
+    buffer_t *ret;
+    // just get rid of the head item and return it.
+    if( list->head == NULL ) {
+        return( NULL );
+    }
+    list->head->next->last = list->head->last;
+    list->head->last->next = list->head->next;
+    ret = list->head;
+    list->head = list->head->next;
+    ret->next = ret->last = NULL;
+    list->nitems--;
+    if( list->nitems == 0 ) {
+        list->head = NULL;
+    }
+    return( ret );
+}
+
+buffer_t * PopTail( buffer_list_t * list ) {
+    // just get rid of the tail item and return it.
+    if( list->head == NULL ) {
+        return( list->head );
+    }
+    // otherwise, a pop tail is equivalent to moving the
+    // head back one and popping head.
+    list->head = list->head->last;
+    return( PopHead( list ) );
+}
+
+// returns second argument
+buffer_t * RemoveItem( buffer_list_t * list, buffer_t * item ) {
+    // FIXME: handle NULL cases in a reasonable way?
+    if( item == list->head ) {
+        return( PopHead( list ) );
+    }
+    item->next->last = item->last;
+    item->last->next = item->next;
+    item->next = item->last = NULL;
+    list->nitems--;
+    if( list->nitems == 0 ) {
+        list->head = NULL;
+    }
+    return( item );
+}
+
+
+
+
+
+
+int CompareKeys_qsort_wrapper( const void *r1, const void *r2 ) {
+
+    return( CompareKeys( (record_t *)r1, (record_t *)r2 ) );
+
+}
+
+
+
+int CompareKeys( const record_t *r1, const record_t *r2 ) {
+
+    return( COMPARE_KEYS( *r1, *r2 ) );
+    //return( memcmp( r1->key, r2->key, sizeof( r1->key ) ) );
+
+}
+
+
+
+
+
+
+
+// This *must* enforce a serialized order for reading and writing, lest
+// we write sorted data out in the wrong order!
+void UpdateDeviceIOExecuteState( working_set_t * ws, iodevice_t * dev ) {
+    // check to see if the device's IO job completed    
+    if( !dev->buf || dev->state == DEV_FREE || dev->buf->operation == OP_FINISHED ) {
+        // find another job to take its place and execute it.
+        buffer_t *b;
+        buffer_t *found_buf = NULL;
+        dev->state = DEV_FREE;
+        dev->buf = NULL;
+        // simply walk all of them, find the operation on this device
+        // that has the lowest op number for its file.  This is made "more fair"
+        // by picking the first operation that matches the device, then finding
+        // all other buffers that operate on the same file
+        for( b = ws->bufs; b - ws->bufs < ws->nbufs; b++ ) {
+            // is this one that should be executed next?
+            
+            if( b->operation == OP_PENDING && b->device == dev ) {
+                if( !found_buf ) {
+                    found_buf = b;
+                } else if( (b->file == found_buf->file) && 
+                    (b->fileop < found_buf->fileop) ) {
+                    found_buf = b;
+                }
+            }
+            
+            /*
+            if( b->operation == OP_PENDING && b->device == dev ) {
+            dev->buf = b;
+            b->operation = b->file->mode == A_READ 
+            ? aRead( b->recs, sizeof( b->recs[0] ), b->numrecs, b->file )
+            : aWrite( b->recs, sizeof( b->recs[0] ), b->numrecs, b->file );
+            dev->state = DEV_BUSY;
+            //printf( "* Created operation %d on device %x\n", b->operation, b->device );
+            // found one, so quit.
+            break;
+            }
+            */
+        }
+        
+        if( found_buf ) {
+            dev->buf = found_buf;
+            found_buf->operation = found_buf->file->mode == A_READ 
+                ? aRead( found_buf->recs, 1, found_buf->io_size, found_buf->file, found_buf->io_pos )
+                : aWrite( found_buf->recs, 1, found_buf->io_size, found_buf->file, found_buf->io_pos );
+            dev->state = DEV_BUSY;
+        }
+        
+    }
+}
+
+// read and write to/from disk.
+void ReadBuffer( buffer_t * buffer, offset_t num_recs, iodevice_t * dev ) {
+	buffer->io_size = num_recs * sizeof( record_t );
+    buffer->numrecs = num_recs;
+    buffer->device = dev;
+    buffer->fileop = buffer->file->op++;
+    buffer->io_pos = CURRENT_POS;
+    if( buffer->operation != OP_NONE ) {
+        printf( "weird!\n" );
+    } else {
+        buffer->operation = OP_PENDING;
+    }
+    //printf( "* Initiated (pending) operation on %x\n", dev );
+}
+
+void WriteBuffer( buffer_t * buffer, offset_t num_recs, iodevice_t * dev ) {
+    // exactly the same as a read -- the operation is just scheduled.
+    // the exact nature (read or write) is determined by the mode
+    // of the opened file at the time operation is in fact
+    // executed.
+    ReadBuffer( buffer, num_recs, dev );
+    
+}
+
diff --git a/libMems/dmSML/buffer.h b/libMems/dmSML/buffer.h
new file mode 100644
index 0000000..69f8370
--- /dev/null
+++ b/libMems/dmSML/buffer.h
@@ -0,0 +1,203 @@
+#ifndef _buffer_h_
+#define _buffer_h_
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/dmSML/asyncio.h"
+
+// forward decl for the benefit of iodevice_t
+// (can't be avoided)
+typedef struct buffer_s buffer_t;
+
+enum {
+    DEV_FREE,
+    DEV_BUSY,
+};
+
+/*
+================
+iodevice_t
+An IO device represents a physical disk.  This is used to make
+sure that we're not doing more than one operation on any disk
+at a time.  The reason is if we are, the OS threads that do the
+asynchronous IO will contend for the disk and we'll start seeking.
+Seeking is bad.
+================
+*/
+typedef struct iodevice_s {
+    int         op;     // an operation number used to enforce serial operation on each device.
+    int         state;  // either DEV_FREE or DEV_BUSY as above.
+    buffer_t    *buf;   // if we're DEV_BUSY, the buffer we're operating on.
+} iodevice_t;
+
+
+/*
+================
+buffer_state_t
+Buffers are used for IO, and the IO is asynchronous.  Buffers
+have a bit of state to indicate what their current IO status is.
+Ordinarily, buffers are in an OP_NONE state, to indicate no
+operation is being performed.  When an operation is initiated,
+the buffer transitions to OP_PENDING and changes to a valid operation
+code when the operation actually starts.  When the operation completes,
+the buffer is transitioned to the OP_FINISHED state, so that the
+program may determine when buffers have completed their operations.
+Then the app should transition the state back to OP_NONE.
+================
+*/
+enum {
+    OP_PENDING = -2,
+    OP_FINISHED = -1,
+    OP_NONE = 0,
+};
+
+
+
+#define CompareKeyPtrs( a, b ) \
+((int)(a)->key[0]-(int)(b)->key[0] ? (int)(a)->key[0]-(int)(b)->key[0] :    \
+(int)(a)->key[1]-(int)(b)->key[1] ? (int)(a)->key[1]-(int)(b)->key[1] :     \
+(int)(a)->key[2]-(int)(b)->key[2] ? (int)(a)->key[2]-(int)(b)->key[2] :     \
+(int)(a)->key[3]-(int)(b)->key[3] ? (int)(a)->key[3]-(int)(b)->key[3] :     \
+(int)(a)->key[4]-(int)(b)->key[4] ? (int)(a)->key[4]-(int)(b)->key[4] :     \
+(int)(a)->key[5]-(int)(b)->key[5] ? (int)(a)->key[5]-(int)(b)->key[5] :     \
+(int)(a)->key[6]-(int)(b)->key[6] ? (int)(a)->key[6]-(int)(b)->key[6] :     \
+(int)(a)->key[7]-(int)(b)->key[7] ? (int)(a)->key[7]-(int)(b)->key[7] :     \
+(int)(a)->key[8]-(int)(b)->key[8] ? (int)(a)->key[8]-(int)(b)->key[8] :     \
+(int)(a)->key[9]-(int)(b)->key[9] ? (int)(a)->key[9]-(int)(b)->key[9] : 0)
+
+#define COMPARE_KEYS( a, b ) \
+((a).key[0]!=(b).key[0] ? (a).key[0]-(b).key[0] :    \
+(a).key[1]!=(b).key[1] ? (a).key[1]-(b).key[1] :     \
+(a).key[2]!=(b).key[2] ? (a).key[2]-(b).key[2] :     \
+(a).key[3]!=(b).key[3] ? (a).key[3]-(b).key[3] :     \
+(a).key[4]!=(b).key[4] ? (a).key[4]-(b).key[4] :     \
+(a).key[5]!=(b).key[5] ? (a).key[5]-(b).key[5] :     \
+(a).key[6]!=(b).key[6] ? (a).key[6]-(b).key[6] :     \
+(a).key[7]!=(b).key[7] ? (a).key[7]-(b).key[7] :     \
+(a).key[8]!=(b).key[8] ? (a).key[8]-(b).key[8] :     \
+(a).key[9]!=(b).key[9] ? (a).key[9]-(b).key[9] : 0)
+
+
+
+
+// this is the record as in the files to be sorted
+typedef struct record_s {
+    unsigned char key[10];
+    unsigned char num[1];
+    unsigned char payload[1];
+} record_t;
+
+
+
+
+
+int CompareKeys_qsort_wrapper( const void *r1, const void *r2 );
+int CompareKeys( const record_t *r1, const record_t *r2 );
+
+
+
+
+
+/*
+================
+buffer_t
+This is the unit of information most commonly dealt with.
+We read into these, and write these out, and use these for
+binning.  A single Working Set of these buffers should be
+used for the duration of the program, and they should be
+managed with the buffer lists below.
+sizeof( buffer_t ) == 32, so the overhead isn't bad.
+================
+*/
+struct buffer_s {
+    
+    aFILE           *file;      // the file this buffer is attached to for IO ops
+    iodevice_t      *device;    // which IO device this is on (for scheduling IO)
+    int             operation;  // either OP_NONE, OP_FINISHED, or the op #.
+    offset_t        numrecs;    // the number of valid records in this buffer.
+    offset_t        totalrecs;  // number of real records in recs
+    int             fileop;     // operation number on device to ensure serialized ops.
+    record_t        *recs;      // actual record storage
+    struct buffer_s *next;      // for chaining lists together.
+    struct buffer_s *last;
+    offset_t        io_size;	// amount of data for i/o, need not be equal to numrecs
+    long long		input_pos;	// the sequence offset that this data was read from, only valid during binning phase
+    offset_t		io_pos;		// the file offset for I/O, set to CURRENT_POS to use the current file seek pointer
+};
+
+
+/*
+================
+buffer_list_t
+Buffer lists are used to manage pools, like the free list,
+the reading list, the to process list, and a list for each bin.
+We use circular lists because they're simpler.
+================
+*/
+typedef struct buffer_list_s { 
+    int nitems;
+    buffer_t * head;
+} buffer_list_t;
+
+
+/*
+================
+working_set_t
+Working sets are collections of buffers.  They are useful so that
+you can use a fixed amount of memory to deal with things.  The
+problem then becomes internal working set management.
+================
+*/
+typedef struct working_set_s {
+    offset_t    size;       // actual size of working set in bytes
+    int         nbufs;
+    buffer_t    *bufs;
+} working_set_t;
+
+    
+
+
+// Working Set support.
+// returns resulting size of the entire structure.
+// goalsize is the desired size of the working set in bytes, minbufsize and maxbufsize
+// are the minimum and maximum desired number of records in buffers.  The buffers will
+// be allocated with random sizes in this range until the desired goalsize is reached.
+// this will return 0 in the case of a malloc error or if the goalsize is too small to
+// have any buffers allocated for it.
+int MakeWorkingSet( working_set_t * ws, offset_t goalsize, offset_t minrecs, offset_t maxrecs );
+
+// Working Set support.
+// Reorganize the working set with a different distribution of buffers.
+void ReorganizeWorkingSet( working_set_t * ws, offset_t minrecs, offset_t maxrecs );
+
+    
+// this updates all the IO on the working set buffers, querying those that
+// are not in OP_FINISHED or OP_NONE and putting those that finish into OP_FINISHED
+void UpdateWSIOFinishedState( working_set_t * ws );
+
+
+// this updates the IO on a particular device.  this routine and the one above
+// should probably be called as this one after that one, and in addtion, this
+// one called for every device in the system.
+void UpdateDeviceIOExecuteState( working_set_t * ws, iodevice_t * dev );
+
+
+// buffer list manipulations
+// returns argument
+buffer_list_t * InitList( buffer_list_t * list );
+void PushHead( buffer_list_t * list, buffer_t * item );
+void PushTail( buffer_list_t * list, buffer_t * item );
+buffer_t * PopHead( buffer_list_t * list );
+buffer_t * PopTail( buffer_list_t * list );
+// returns second argument
+buffer_t * RemoveItem( buffer_list_t * list, buffer_t * item );
+
+// read and write to/from disk.
+void ReadBuffer( buffer_t * buffer, offset_t num_recs, iodevice_t * dev );
+void WriteBuffer( buffer_t * buffer, offset_t num_recs, iodevice_t * dev );
+
+
+#endif /* _buffer_h_ */
+
diff --git a/libMems/dmSML/dmsort.c b/libMems/dmSML/dmsort.c
new file mode 100644
index 0000000..4c99215
--- /dev/null
+++ b/libMems/dmSML/dmsort.c
@@ -0,0 +1,1942 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "libMems/dmSML/util.h"
+#include "libMems/dmSML/timing.h"
+#include "libMems/dmSML/asyncio.h"
+#include "libMems/dmSML/buffer.h"
+#include "libMems/dmSML/sorting.h"
+#include "libMems/dmSML/sml.h"
+#include "libMems/dmSML/dmsort.h"
+
+// define this if you're using the ASCII sortgen data.
+// don't define if you're using random data (dmsortgen)
+//#define ASCII_KEYBYTES
+
+// define this if using dmSML with sequences that have large
+// stretches of NNNNN...  such as an unfinished eukaryote
+//#define NNNNN_KEYBYTES
+
+// define this if you want to measure the overlapping
+// of your sorting with I/O in the sorting phase --
+// this makes the sort routine do nothing.
+//#define NO_SORT_PERF_TEST
+
+// define the following if you don't want to write
+// data during the sort phase in order to get timings
+//#define NO_WRITE_PERF_TEST
+
+// define this to skip the binning phase in order to
+// perform measurements on the sort phase.  The bin
+// files to use during sorting must already exist (duh!)
+// #define NO_BINNING_PERF_TEST
+
+// define this to test the performance of binning and
+// restructuring without bin writing
+//#define NO_BIN_WRITE_PERF_TEST
+
+// define this to test the performance without restructuring
+// each SML bin
+//#define NO_RESTRUCTURE_PERF_TEST
+
+/*
+
+#define NELEMS(x) \
+    ( sizeof((x)) / sizeof((x)[0]) )
+
+
+#define MIN(x,y)    ((x)<(y)?(x):(y))
+#define MINRECS     (1311)
+#define MAXRECS     (1311)
+
+
+// this is somewhat less appealing than a config file,
+// but speed is critical and parsing a config file at
+// startup is just inconvenient.  Besides, specifying
+// what we care about is easy enough this way.
+typedef struct device_s {
+    const char      *devname;
+    const char      *path;
+    iodevice_t      dev;
+} device_t;
+*/
+
+device_t *Devices;
+int NumDevices;
+
+
+
+// ugly hack
+//#define BIN_SPECIAL     (-10000)
+
+int NSortBufs;
+sort_buf_t *SortBufs;
+
+
+// how the working set is allocated originally.
+offset_t BufferSizeMin;
+offset_t BufferSizeMax;
+
+
+
+/*
+// what we use to represent a bin.
+typedef struct bin_s {
+    aFILE               *file;      // File we write/read on.
+    int                 dev;        // This is an index into the Devices table.
+    offset_t            nrecs;      // Number of records written to bin.
+    buffer_list_t       bufs;       // Our list of buffers that holds our data.
+} bin_t;
+
+*/
+
+// number specified by a cmdline param at runtime.
+bin_t   *Bins;
+int     NumBins;
+int     NumBinDevs;  // number of binning devices
+
+/*
+typedef struct seqbuf_s {
+	aFILE				*file;		// Output file
+	int					dev;		// device table index for output file
+	offset_t			bufpos;		// position in current buffer
+	uint64				seq_pos;	// position in sequence that is next to translate
+	buffer_list_t		bufs;		// list of buffers for data
+} seqbuf_t;
+*/
+
+seqbuf_t Seqbuf;
+    
+aFILE   *Data;       // the data to sort
+int     DataDev;     // the device the data file is on.
+
+
+const char *OutFileName = "unset";     // the output file name.
+aFILE   *Output;     // the output file (sorted data goes here)
+int     OutputDev;   // the device the output goes on.
+
+
+
+int BinToRead, BinToWrite, BinToSort;
+
+
+
+working_set_t   WS;      // the Working Set we use to do our sorting.
+
+offset_t NumRecs;        // the total number of blocks to process
+offset_t RecsProcessed;  // number of blocks processed (put in bins to write out)
+offset_t RecsRead;       // number of records fully read in.
+offset_t RecsUnread;     // number of blocks on disk (not yet had 'read' called)
+offset_t RecsCommitted;  // number of records committed to be written.
+offset_t RecsWritten;    // number of records actually written on disk.
+
+
+// timers
+double RunningTime;
+dmtimer_t *RunningTimer;
+double BinningTime;
+dmtimer_t *BinningTimer;
+double SortingTime;
+dmtimer_t *SortingTimer;
+
+double QSortTime;
+dmtimer_t *QSortTimer;
+
+double ReadIdleTime;
+dmtimer_t *ReadIdleTimer;
+double SortIdleTime;
+dmtimer_t *SortIdleTimer;
+double WriteIdleTime;
+dmtimer_t *WriteIdleTimer;
+
+// buffer lists
+buffer_list_t   Free;           // the free list
+buffer_list_t   ToProcess;      // list read and to be processed
+buffer_list_t   Reading;        // the list that's waiting on stuff to read.
+buffer_list_t   Restructure;	// buffers that need post-read and pre-binning processing
+
+
+static buffer_t * AllocateFree( void ) {
+    buffer_t * ret;
+    if( Free.nitems ) {
+        ret = PopHead( &Free );
+    } else {
+        printf( "error: called AllocateFree but free list is empty\n" );
+        return( NULL );
+    }
+    ret->device = NULL;
+    ret->file = NULL;
+    ret->last = ret->next = NULL;
+    ret->numrecs = 0;
+    ret->operation = OP_NONE;
+    return( ret );
+}
+
+
+static unsigned int divisor = 0;
+
+static int ComputeBinNumber( const unsigned char key[10] ) {
+    int i;
+    unsigned int keyval = 0;
+    // how many bits can we use for the binning number?
+    // first time through, compute divisor
+    // assume even distribution
+    // strange constant is 256^3, because we're dealing
+    // with effectively a base 256 number here, and we can
+    // only handle 3 places without overflowing.
+    if( divisor == 0 ) {
+        divisor = (unsigned)16777216 / (unsigned)NumBins;
+        // need ceiling of this
+        divisor += (unsigned)16777216 % (unsigned)NumBins ? 1 : 0;
+        printf( "Divisor is: %u\n", divisor );
+    }
+    // now we compute the number represented by the first 3
+    // characters of the key, and divide it by divisor, the
+    // integral part gives the bin number.
+    for( i = 0; i < 3; i++ ) {
+        keyval <<= 8;
+        keyval += key[i];
+    }
+//    printf( "Key is %.2x %.2x %.2x \n", key[0],key[1], key[2] );
+    
+//    printf( "Keyval is: %u\n", keyval );
+//    printf( "Bin is: %u\n", keyval / divisor );
+    return( keyval / divisor );
+}
+
+// just like ComputeBinNumber except we reserve one bin for zero keys.
+static int ComputeNNNNNBinNumber( const unsigned char key[10] ) {
+    int i;
+    unsigned int keyval = 0;
+    if( divisor == 0 ) {
+        divisor = (unsigned)16777216 / ((unsigned)NumBins - 1);
+        // need ceiling of this
+        divisor += (unsigned)16777216 % ((unsigned)NumBins - 1) ? 1 : 0;
+        printf( "Divisor is: %u\n", divisor );
+    }
+    // now we compute the number represented by the first 3
+    // characters of the key, and divide it by divisor, the
+    // integral part gives the bin number.
+    for( i = 0; i < 3; i++ ) {
+        keyval <<= 8;
+        keyval += key[i];
+    }
+//    printf( "Key is %.2x %.2x %.2x \n", key[0],key[1], key[2] );
+    
+//    printf( "Keyval is: %u\n", keyval );
+//    printf( "Bin is: %u\n", keyval / divisor );
+	if( keyval == 0 )
+		return 0;
+    return ( keyval / divisor ) + 1;
+}
+
+
+
+static int ComputeAsciiBinNumber( const unsigned char key[10] ) {
+    int i;
+    unsigned int keyval = 0;
+    // how many bits can we use for the binning number?
+    // first time through, compute divisor
+    if( divisor == 0 ) {
+        // strange constant is 95^4 -- the max possible value
+        // of the first five key characters + 1.
+        divisor = 81450625 / NumBins;
+        // need ceiling of this
+        divisor += 81450625 % NumBins ? 1 : 0;
+    }
+    // now we compute the number represented by the first 4
+    // characters of the key, and divide it by divisor, the
+    // integral part gives the bin number.
+    
+    for( i = 0; i < 4; i++ ) {
+        keyval *= 95;
+        keyval += key[i] - ' ';
+    }
+    return( keyval / divisor );
+}
+
+
+
+static offset_t         consumed_recs = 0;
+static buffer_t         *toprocess = NULL;
+
+static void DoBinning( void ) {
+    //printf( "---------------  do binning -------------\n" );
+    while( 1 ) {
+        int bin = -1;
+        // if we don't already have a buffer to process, see if we
+        // can get one.
+        if( toprocess == NULL ) {
+            //printf( "toprocess == null -- no currently processing buffer\n" );
+            if( ToProcess.nitems ) {
+                //printf( "getting one off ToProcess list\n" );
+                toprocess = PopHead( &(ToProcess) );
+                consumed_recs = 0;
+            } else {
+                // we can't get anything to process
+                //printf( "nothing to process\n" );
+                return;
+            }
+        }
+        //printf( "processing records in current toprocess buffer\n" );
+        // try to process all the records in the toprocess buffer.
+        //printf( "for( ; consumed_recs (%d) < toprocess->numrecs (%d); ... ) {\n", consumed_recs, toprocess->numrecs );
+        for( ; consumed_recs < toprocess->numrecs; consumed_recs++, RecsProcessed++ ) {
+            
+            buffer_t *headbuf;
+            record_t *rec = &(toprocess->recs[consumed_recs]);
+            
+            // find what bin this next record belongs in.
+#ifdef ASCII_KEYBYTES
+            bin = ComputeAsciiBinNumber( rec->key );
+#else
+#ifdef NNNNN_KEYBYTES
+			bin = ComputeNNNNNBinNumber( rec->key );
+#else
+            bin = ComputeBinNumber( rec->key );
+#endif
+#endif
+            if( (bin >= NumBins) || (bin < 0) ) {
+                printf( "error: invalid bin from ComputeBinNumber: %d\n", bin );
+            }
+
+            //printf( "record bound for bin %d\n", bin );
+
+            // now, let's see what the situation is with that bin and its
+            // buffers.  In particular, do we have a spot to put this record?
+            headbuf = Bins[bin].bufs.head;
+            // if we have a buffer, and the buffer is full or executing or
+            // if there's no buffer at all, let's try to get one
+            if( !headbuf || 
+                headbuf->numrecs == headbuf->totalrecs || 
+                headbuf->operation != OP_NONE ) {
+                //printf( "headbuf busy or full -- op: %d, numrecs: %d, totalrecs: %d\n",
+                    //headbuf->operation, headbuf->numrecs, headbuf->totalrecs );
+                // first see if this is our 'special' buffer and if we can use it
+                if( headbuf->operation == BIN_SPECIAL ) {
+                    //printf( "headbuf is only one left and finished so reclaiming for use\n" );
+                    headbuf->numrecs = 0;
+                    headbuf->operation = OP_NONE;
+                } else {
+                    //printf( "trying to get buffer from free list\n" );
+                    if( Free.nitems ) {
+                        //printf( "got one from freelist\n" );
+                        PushHead( &(Bins[bin].bufs), AllocateFree() );
+                        headbuf = Bins[bin].bufs.head;
+                    } else {
+//                        printf( "no free buffers to use for bin -- binning BLOCKS!\n" );
+                        return;
+                    }
+                }
+            }
+            // now headbuf must exist, and it must be non-full so we can
+            // add our item.
+            headbuf->recs[headbuf->numrecs++] = *rec;
+            Bins[bin].nrecs++;
+            //printf( "added rec to bin\n" );
+            // if we made it full, write the thing
+            if( headbuf->numrecs >= headbuf->totalrecs ) {
+                //printf( "writing bin buffer because full\n" );
+                headbuf->file = Bins[bin].file;
+                headbuf->device = &(Devices[Bins[bin].dev].dev);
+                RecsCommitted += headbuf->numrecs;
+#ifdef NO_BIN_WRITE_PERF_TEST
+				// just put it in the finished state
+				headbuf->operation = OP_FINISHED;
+#else
+                WriteBuffer( headbuf, headbuf->numrecs, headbuf->device );
+#endif
+                headbuf = NULL;
+            }
+            
+        }
+        
+        // if we hit the end of this buffer,
+        // put it back on the free list, and start the loop over
+        if( consumed_recs >= toprocess->numrecs ) {
+            //printf( "finished with this block\n" );
+            PushTail( &Free, toprocess );
+            toprocess = NULL;
+        }
+
+        //printf( "going back for more\n" );
+        
+    }
+}
+
+
+
+
+
+void FinishBinning() {
+    int i;
+    buffer_t *b;
+    offset_t recs = 0;
+    // be sure to finish off the write operations.
+    for( i = 0; i < NumBins; i++ ) {
+        //printf( "bin: %d, nrecs: %d, operation: %d\n", i, Bins[i].nrecs, Bins[i].operation );
+        while( Bins[i].bufs.nitems ) {
+            // walk through the buffers, and if they haven't been executed,
+            // execute them.
+            b = PopHead( &(Bins[i].bufs) );
+            if( b->operation == OP_NONE && b->numrecs ) {
+                recs += b->numrecs;
+                b->file = Bins[i].file;
+                b->device = &(Devices[Bins[i].dev].dev);
+#ifdef NO_BIN_WRITE_PERF_TEST
+				// just put it in the finished state
+				b->operation = OP_FINISHED;
+#else
+                WriteBuffer( b, b->numrecs, b->device );
+#endif
+            }
+        }
+    }
+    RecsCommitted += recs;
+}
+
+
+
+offset_t CalculateDataReadSize( buffer_t* b ){
+// commented version is for traditional dmsort
+//	return MIN(b->totalrecs, RecsUnread) * sizeof( record_t );
+	return MIN(b->totalrecs + mask_length - 1, RecsUnread + mask_length - 1 );
+}
+
+static void DoReading( void ) {
+    buffer_t * b;
+    //printf( "do reading\n" );
+    if( RecsUnread && Free.nitems ) {
+        // allocate a buffer
+        b = AllocateFree();
+        
+        // start reading into it.
+        b->file = Data;
+        ReadBuffer( b, MIN(b->totalrecs, RecsUnread), &(Devices[DataDev].dev) );
+
+        b->input_pos = NumRecs - RecsUnread;
+       	// need to step back mask_length - 1 characters to get the complete sequence!!
+//		if( b->input_pos >= mask_length - 1 )
+//			b->input_pos -= mask_length - 1;
+        b->io_pos = b->input_pos;
+//		printf( "Reading offset %llu\n", b->io_pos );
+        b->io_size = CalculateDataReadSize( b );
+        // decrement recsunread appropriately
+        RecsUnread -= MIN(MIN(b->totalrecs,RecsUnread),RecsUnread);
+        
+        // put the thing on the Reading list.
+        //printf( "new buffer on reading list\n" );
+        PushTail( &Reading, b );
+    }
+}
+
+
+
+
+
+static void HandleBinWriteCompletions( void ) {
+    int i;
+    buffer_t *b, *tmpnext;
+    //printf( "handle bin write completions\n" );
+    for( i = 0; i < NumBins; i++ ) {
+        b = Bins[i].bufs.head;
+        do {
+            if( !b ) {
+                break;
+            }
+            tmpnext = b->next;
+            if( b->operation == OP_FINISHED ) {
+                RecsWritten += b->numrecs;
+                if( Bins[i].bufs.nitems > 1 ) {
+                    b->operation = OP_NONE;
+                    PushHead( &Free, RemoveItem( &(Bins[i].bufs), b ) );
+                } else {
+                    b->operation = BIN_SPECIAL;
+                }
+            }
+            b = tmpnext;
+        } while( b != Bins[i].bufs.head && Bins[i].bufs.nitems > 1 );
+    }
+}
+
+static void HandleSeqbufWriteCompletions( void ) {
+    buffer_t *b, *tmpnext;
+    //printf( "handle bin write completions\n" );
+    b = Seqbuf.bufs.head;
+    do {
+        if( !b ) {
+            break;
+        }
+        tmpnext = b->next;
+        if( b->operation == OP_FINISHED ) {
+            if( Seqbuf.bufs.nitems > 1 ) {
+                b->operation = OP_NONE;
+                PushHead( &Free, RemoveItem( &(Seqbuf.bufs), b ) );
+            } 
+        }
+        b = tmpnext;
+    } while( b != Seqbuf.bufs.head && Seqbuf.bufs.nitems > 1 );
+}
+
+#define ALPHA_BITS 2
+
+static void Translate32(uint32* dest, const char* src, const unsigned len){
+	uint8 start_bit = 0;
+	unsigned cur_word = 0;
+	uint32 word_mer = 0;
+	uint32 i = 0;
+	if( len == 0 )
+		return;
+	for(i=0; i < len; i++){
+//		uint32 tmp = DNA_TABLE[src[i]];
+		if(start_bit + ALPHA_BITS <= 32){
+			word_mer <<= ALPHA_BITS;
+			word_mer |= DNA_TABLE[src[i]];
+			dest[cur_word] = word_mer;
+			start_bit += ALPHA_BITS;
+			if(start_bit >= 32 && i < len - 1){
+				word_mer = 0;
+				start_bit %= 32;
+				cur_word++;
+			}
+		}else{
+			printf("Error, this should never happen with DNA sequence\n" );
+/*			uint8 over_bits = (start_bit + ALPHA_BITS) % 32;
+			uint32 tmp2 = tmp;
+			tmp2 <<= 32 - over_bits;
+			tmp >>= over_bits;
+			dest[cur_word] |= tmp;
+			cur_word++;
+			dest[cur_word] = 0;
+			dest[cur_word] |= tmp2;
+			start_bit = over_bits;
+*/		}
+	}
+	if( start_bit != 0 ){
+		dest[cur_word] <<= 32 - start_bit;
+	}
+}
+
+
+void RestructureReadSMLBins( void ) {
+	char little_endian = 1;
+	mask_t bit;
+	mask_t mer, rc_mer;
+	record_t forward, reverse;
+	record_t begin[6];	// the first six records could potentially overwrite the sequence
+	int i;
+	offset_t seqI, extras, weight;
+	char* sequence;
+	sml_t *sml;
+
+    buffer_t *b, *tmpnext;
+	
+	// variables for translation to 2-bit per base
+    buffer_t *headbuf;
+	int seq_bit;
+	int seq_word;
+	int word_remainder;
+	offset_t translate_length;
+	int config_value = 4554307;
+//	int seq_offset;
+    // go through and see if any have completed.
+    b = Restructure.head;
+    do {
+        if( !b ) {
+            break;
+        }
+		// is this the buffer we need to translate next?
+		if( b->input_pos != Seqbuf.seq_pos ){
+			b = b->next;
+			continue;
+		}
+		
+        tmpnext = b->next;
+		sequence = (char *)b->recs;
+		sml = (sml_t*)b->recs;
+
+		// translate the sequence that was just read and write it out
+        headbuf = Seqbuf.bufs.head;
+        // if we have a buffer, and the buffer is full or executing or
+        // if there's no buffer at all, let's try to get one
+        if( !headbuf || 
+            headbuf->operation != OP_NONE ) {
+            //printf( "headbuf busy or full -- op: %d, numrecs: %d, totalrecs: %d\n",
+                //headbuf->operation, headbuf->numrecs, headbuf->totalrecs );
+            // first see if this is our 'special' buffer and if we can use it
+            if( headbuf->operation == OP_FINISHED ) {
+                //printf( "headbuf is only one left and finished so reclaiming for use\n" );
+                headbuf->numrecs = 0;
+                headbuf->operation = OP_NONE;
+	            Seqbuf.bufpos = 0;
+            } else {
+                //printf( "trying to get buffer from free list\n" );
+                if( Free.nitems ) {
+//                    printf( "got one from freelist\n" );
+                    PushHead( &(Seqbuf.bufs), AllocateFree() );
+                    headbuf = Seqbuf.bufs.head;
+		            Seqbuf.bufpos = 0;
+                } else {
+//                    printf( "no free buffers to use for Seqbuf -- restructuring BLOCKS!\n" );
+                    return;
+                }
+            }
+        }
+
+		seq_bit = Seqbuf.bufpos * 2;
+		seq_word = seq_bit / 32;
+		word_remainder = seq_bit % 32;
+		if( word_remainder != 0 ){
+			seq_word++;
+		}
+		
+//			int end_bit = 2 * (Seqbuf->bufpos + b->io_size - mask_length + 1);
+//			int end_remainder = end_bit % 32;
+		translate_length = b->io_size - mask_length + 1 - (word_remainder / 2);
+		if( b->io_size + b->input_pos >= NumRecs ){
+			// this is the last I/O, translate the whole thing
+			translate_length += mask_length - 1;
+		}
+//			translate_length -= end_remainder / 2;
+		
+		// The number of bytes in headbuf->recs must ALWAYS be divisible by 4 when using
+		// Translate32, otherwise corruption will result
+#ifndef NO_RESTRUCTURE_PERF_TEST
+		Translate32( (uint32*)(headbuf->recs) + seq_word, ((char*)b->recs) + (word_remainder / 2), translate_length );
+#endif
+		
+		// need to fill in beginning
+		if( word_remainder != 0 ){
+			int begin_mer = 0;
+			for( seqI = 0; seqI < word_remainder / 2; seqI++ ){
+				begin_mer <<= 2;
+				begin_mer |= DNA_TABLE[ sequence[ seqI ] ];
+			}
+//			((uint32*)headbuf->recs)[ seq_word - 1 ] <<= 32 - word_remainder;
+			((uint32*)headbuf->recs)[ seq_word - 1 ] |= begin_mer;
+		}
+		
+		Seqbuf.bufpos += translate_length + (word_remainder / 2);
+		Seqbuf.seq_pos += translate_length + (word_remainder / 2);
+
+        // if we made it full, write the thing
+        // each buf will consume headbuf->totalrecs / 4 bytes.
+        // there are headbuf->totalrecs * sizeof( record_t ) bytes available in the Seqbuf.
+        // thus we can fit 4 * sizeof( record_t ) bufs in each Seqbuf
+        if( Seqbuf.bufpos == headbuf->totalrecs * sizeof( record_t ) * 4 ||
+			b->io_size + b->input_pos >= NumRecs ) {
+            //printf( "writing bin buffer because full\n" );
+            headbuf->file = Seqbuf.file;
+            headbuf->device = &(Devices[Seqbuf.dev].dev);
+            WriteBuffer( headbuf, headbuf->totalrecs, headbuf->device );
+            headbuf->io_size = Seqbuf.bufpos / 4;
+            if( b->io_size + b->input_pos >= NumRecs ){
+            	offset_t offI = 0;
+            	offI = headbuf->io_size % 4;
+            	if( offI != 0 )
+	            	headbuf->io_size += 4 - offI;
+            	for( offI = 0; offI < 8; offI++ )
+            		((char*)headbuf->recs)[ headbuf->io_size + offI ] = 0;
+            	headbuf->io_size += 8;
+            }
+            headbuf = NULL;
+        }else if( Seqbuf.bufpos > headbuf->totalrecs * sizeof( record_t ) * 4 ){
+        	printf( "Error.  Over filled Seqbuf\n" );
+        }
+
+
+		// translate the sequence according to the current sequence mask
+#ifndef NO_RESTRUCTURE_PERF_TEST
+		for( seqI = b->io_size - mask_length + 1; seqI > 0; seqI-- ){
+			bit = 1;
+			bit <<= mask_length - 1;
+			mer = 0;
+			weight = 0;
+			for( i = 0; i < mask_length; i++ ){
+				if( bit & seed_mask ){
+					mer <<= 2;
+					mer |= DNA_TABLE[ sequence[ seqI + i - 1 ] ];
+				}
+				bit >>= 1;
+			}
+			// copy the mer from the 64-bit integer based on the endian-ness of the system
+			// copy mer to forward key
+			mer <<= 64 - (2 * mask_weight);
+//			if( seqI + b->input_pos == config_value )
+//				__asm( nop );
+			if( little_endian ){
+				for( i = 0; i < MASK_T_BYTES; i++ )
+					forward.key[i] = ((char*)(&mer))[ sizeof( mer ) - i - 1 ];
+
+			}else{
+				for( i = 0; i < MASK_T_BYTES; i++ )
+					forward.key[i] = ((char*)(&mer))[ i ];
+			}
+
+			// reverse complement the mer
+			mer = ~mer;
+			for( i = 0; i < 64; i += 2 ){
+				rc_mer <<= 2;
+				rc_mer |= mer & 3;
+				mer >>= 2;
+			}
+			rc_mer <<= 64 - (2 * mask_weight);
+			// copy mer to reverse key
+			if( little_endian ){
+				for( i = 0; i < MASK_T_BYTES; i++ )
+					reverse.key[i] = ((char*)(&rc_mer))[ sizeof( mer ) - i - 1 ];
+			}else{
+				for( i = 0; i < MASK_T_BYTES; i++ )
+					reverse.key[i] = (((char*)(&rc_mer))[i]);
+			}
+			// put the lesser key in forward
+			if( COMPARE_KEYS( forward, reverse ) > 0)
+				forward = reverse;
+			
+			// watch out for the last 6 records
+			if( seqI <= 6 ){
+				begin[ seqI - 1] = forward;
+			}else{
+				b->recs[ seqI - 1 ] = forward;
+				// set the position
+				sml[ seqI - 1 ].pos = b->input_pos + seqI - 1;
+			}
+		}
+
+		extras = b->io_size - mask_length + 1 < 6 ? b->io_size - mask_length + 1 : 6;
+		
+		// fill in the first six records
+		for(; seqI < extras; seqI++ ){
+			b->recs[ seqI ] = begin[ seqI ];
+			// set the position
+			sml[ seqI ].pos = b->input_pos + seqI;
+		}
+#else
+	if(1){	// define a new scope so the variables can be local
+	// simulate random data in each bin
+    int i;
+    unsigned int keyval = 0;
+	unsigned int tmpval = 0;
+    if( divisor == 0 ) {
+        divisor = (unsigned)16777216 / (unsigned)NumBins;
+        // need ceiling of this
+        divisor += (unsigned)16777216 % (unsigned)NumBins ? 1 : 0;
+        printf( "Divisor is: %u\n", divisor );
+    }
+	for( seqI = 0; seqI < b->numrecs; seqI++ ){
+		tmpval = keyval;
+	    for( i = 3; i > 0; i-- ) {
+			b->recs[ seqI ].key[ i - 1 ] = (tmpval & 0xFF);
+			b->recs[ seqI ].key[ i - 1 ] = 0;
+			tmpval >>= 8;
+		}
+		keyval += divisor;
+	}
+	}
+#endif
+		
+		// b has been restructured, add it to the ToProcess list
+        PushTail( &ToProcess, RemoveItem( &Restructure, b ) );
+
+		
+        b = tmpnext;
+    } while( b != Restructure.head && Restructure.nitems );
+}
+
+static void HandleReadingCompletions( void ) {
+    buffer_t *b, *tmpnext;
+    // just go through and see if any have completed.
+    b = Reading.head;
+    do {
+        if( !b ) {
+            break;
+        }
+        tmpnext = b->next;
+        if( b->operation == OP_FINISHED ) {
+            // migrate this to the toprocess list
+            b->operation = OP_NONE;
+            PushTail( &Restructure, RemoveItem( &Reading, b ) );
+            // bookkeeping
+            RecsRead += b->numrecs;
+        }
+        b = tmpnext;
+    } while( b != Reading.head && Reading.nitems );
+}
+
+
+
+void print_usage( const char* pname ){
+	printf( "Usage: %s <-m Working set size in MB> <-b buffer size> <-i input file> <-o output file> [-n number of records] <bin directory> <num bins> ... [bin directory] [num bins]\n", pname );
+}
+
+int InitdmSML( long working_mb, long buffer_size, const char* input_filename, const char* output_filename, const char* const* scratch_paths, uint64 seed ) {
+    int i, j;
+    offset_t desired_ws_size, actual_ws_size;
+    SMLHeader_t header;
+    struct {
+        const char * bin_dev;
+        int devnum;
+        int nbins;
+    } bins[8];
+
+	char *bin_name;
+	int scratchI = 0;
+
+    // initialize the timing stuff
+    InitTime();
+
+    // start the running timer now.
+    RunningTime = 0;
+    RunningTimer = StartTimer();
+
+	if( working_mb != 0 ){
+
+	desired_ws_size = working_mb;
+	desired_ws_size *= 1024 * 1024;	// convert to bytes
+
+	}else{
+	// set desired working set size to half of physical memory...
+#ifdef WIN32
+	{
+/*	MEMORYSTATUSEX ms;
+	memset( &ms, 0, sizeof( MEMORYSTATUSEX ) );
+	GlobalMemoryStatusEx( &ms );
+	desired_ws_size = ms.ullTotalPhys / 2;
+*/
+	MEMORYSTATUS ms;
+	memset( &ms, 0, sizeof( MEMORYSTATUS ) );
+	GlobalMemoryStatus( &ms );
+	desired_ws_size = ms.dwTotalPhys / 2;
+	}
+#else
+
+    {
+	// get it from /proc/meminfo
+	FILE *fp = fopen("/proc/meminfo", "r");
+	if ( fp )
+	{
+		long memTotal;
+
+		char buf[1024];
+		if ( fgets(buf, sizeof(buf), fp) )
+		{
+			sscanf(buf, "MemTotal: %ld kB", &memTotal);
+			fprintf( stderr, buf );
+		}
+		fclose(fp);
+		// allocate about 6/10 of physical memory
+		// leave the rest for buffer cache
+		desired_ws_size = memTotal * 512;
+	}
+	}
+
+#endif
+	// never allocate more than 2GB
+	if( desired_ws_size / 1024  > 2048 * 1024 ){
+		desired_ws_size = 1024 * 1024;
+		desired_ws_size *= 2048;
+	}
+//	desired_ws_size /= sizeof( record_t ); // get working set size in records
+	}
+	
+	if( buffer_size == 0 ){
+		buffer_size = 1;
+		while( desired_ws_size / (buffer_size*sizeof(record_t)) > 2048 ){
+			buffer_size *= 2;
+		}
+	}
+
+	BufferSizeMin = BufferSizeMax = buffer_size;
+	OutFileName = output_filename;
+	
+	// find out how many scratch paths were given before the null terminator
+	for( ; ; scratchI++ ){
+		if( !scratch_paths || scratch_paths[ scratchI ] == NULL )
+			break;
+	}
+	
+    
+
+	NumBinDevs = scratchI;
+	NumDevices = 2 + NumBinDevs;
+	Devices = (device_t*)malloc( NumDevices * sizeof(device_t) );
+	DataDev = 0;
+	OutputDev = 1;
+	Devices[DataDev].devname = "Input device";
+	Devices[DataDev].path = input_filename;
+	Devices[DataDev].dev.buf = NULL;
+	Devices[OutputDev].devname = "Output device";
+	Devices[OutputDev].path = OutFileName;
+	Devices[OutputDev].dev.buf = NULL;
+    
+    
+    if( NumBinDevs == 0 ) {
+    	return TOO_FEW_BINS;
+    } else if( NumBinDevs > 8 ) {
+    	return TOO_MANY_BINS;
+    }
+	
+	NumRecs = aStatFileSize( input_filename );
+
+	// calculate number of bins using nrecs and ws_size
+	NumBins = desired_ws_size / (200 * NumBinDevs);
+	NumBins = NumRecs / NumBins;
+	NumBins = NumBins < 5 * NumBinDevs ? 5 * NumBinDevs : NumBins;	// don't allow fewer than 5 bins per dev
+	// round for equal number of bins per dev
+	if( NumBins % NumBinDevs != 0 )
+		NumBins = ( (NumBins / NumBinDevs) + 1 ) * NumBinDevs;
+	printf( "Creating %d bin files\n", NumBins );
+	for( i = 2; i < NumDevices; i++ ){
+		bin_name = (char*)malloc( 10 );
+		strcpy( bin_name, "bin dev__" );
+		bin_name[8] = 0x40 + i - 2;
+		Devices[i].devname = bin_name;
+		Devices[i].path = scratch_paths[ i - 2 ];
+		Devices[i].dev.buf = NULL;
+		bins[i - 2].bin_dev = bin_name;
+		bins[i - 2].nbins = NumBins / NumBinDevs;	// allocate even an portion of bins per device
+		bins[i - 2].devnum = i;
+	}
+	
+    // get buffer size.
+    if( BufferSizeMin == 0 ) {
+        BufferSizeMin = MINRECS;
+        BufferSizeMax = MAXRECS;
+    }
+
+
+    // open the input file
+    Data = aOpen( input_filename, A_READ );
+	if( Data == NULL ) {
+	        printf( "couldn't open data file\n" );
+		return INPUT_NOT_OPENED;
+	}
+   
+    // get working set size
+    if( desired_ws_size == 0 ) {
+        printf( "invalid working set size (%llu) -- must be at least 0\n", desired_ws_size );
+    	return INVALID_WS_SIZE;
+    }
+	
+	// init translation table
+	DNA_TABLE = CreateBasicDNATable();
+
+    // open the output file
+    Output = aOpen( OutFileName, A_WRITE );
+    if( !Output ) {
+        printf( "couldn't open output file!\n" );
+    	return OUTPUT_NOT_OPENED;
+    }
+	
+	header = InitSML( Output, NumRecs, seed );
+	seed_mask = header.seed;
+	mask_length = header.seed_length;
+	mask_weight = header.seed_weight;
+	
+	if( NumRecs <= mask_length - 1 ){
+	        printf( "Sequence must be at least %d characters in length\n", mask_length );
+		return SEQUENCE_TOO_SHORT;
+	}
+
+	NumRecs -= mask_length - 1;
+	printf( "NumRecs is: %llu \n", NumRecs );
+    // get the number of records we should process
+    RecsProcessed = 0;
+    RecsUnread = NumRecs;
+    if( NumRecs <= 0 ) {
+    	return INVALID_NUMRECS;
+        printf( "invalid NumRecs: %llu\n", NumRecs );
+    }
+    
+    
+    
+    // go ahead and create the working set.
+    actual_ws_size = MakeWorkingSet( &WS, desired_ws_size, BufferSizeMin, BufferSizeMax );
+    printf( "desired working set: %llu, actual working set: %llu\n", 
+        desired_ws_size, actual_ws_size );
+
+    // initialize the Free list -- just put all the buffers on it.
+    for( i = 0; i < WS.nbufs; i++ ) {
+        PushHead( &Free, &(WS.bufs[i]) );
+    }
+
+    printf( "working set size        : %llu\n", actual_ws_size );
+    printf( "total buffers           : %d\n", WS.nbufs );
+    // FIXME: can any touching of the memory here help us?
+    // toprocess and reading list empty to start
+    ToProcess.nitems = Reading.nitems = 0;
+    ToProcess.head = Reading.head = NULL;
+	Restructure.nitems = 0;
+	Restructure.head = NULL;
+		
+	// allocate Seqbuf
+	Seqbuf.file = Output;
+	Seqbuf.dev = OutputDev;
+	Seqbuf.bufpos = 0;
+	Seqbuf.seq_pos = 0;
+    if( Free.nitems ) {
+        PushHead( &(Seqbuf.bufs), AllocateFree() );
+    } else {
+        printf( "error: could not give a buffer to Seqbuf\n" );
+        return NO_FREE_BUFFERS;
+    }
+
+    // allocate the bins.
+    Bins = malloc( sizeof( *Bins ) * NumBins );
+    memset( Bins, 0, sizeof( *Bins ) * NumBins );
+
+    // allocate the bins in a round-robin fashion, so when we read
+    // things back for sorting, we're not swamping one device at a time --
+    // instead, things are spread out.
+    printf( "opening %d bins\n", NumBins );
+    j = -1;
+    for( i = 0; i < NumBins; i++ ) {
+        // find a bin on the next device.
+        while( 1 ) {
+            j = (j+1) % NumBinDevs;
+            if( bins[j].nbins ) {
+                // make this bin on that device, and
+                // round-robin switch to the next device.
+                const char *fname = Fmt("%sout%05d.binned",Devices[bins[j].devnum].path,i);
+                Bins[i].dev = bins[j].devnum;
+                Bins[i].fname = malloc( strlen( fname ) + 1 );
+                strcpy( Bins[i].fname, fname );
+
+#ifndef NO_BINNING_PERF_TEST
+                Bins[i].file = aOpen( fname, A_WRITE );
+                //printf( "opened '%s' on device '%s'\n", fname, Devices[bins[j].devnum].devname );
+                if( Bins[i].file == NULL ) {
+                    printf( "couldn't open output bin file '%s'\n", fname );
+					return BIN_NOT_OPENED;
+                }
+#else
+                Bins[i].nrecs = aStatSize( fname );
+		if( Bins[i].nrecs == 0 ){
+			// just make sure the file exists
+	                Bins[i].file = aOpen( fname, A_WRITE );
+			aClose( Bins[i].file );
+			Bins[i].file = NULL;
+		}
+#endif // NO_BINNING_PERF_TEST
+                bins[j].nbins--;
+                break;
+            }
+        }
+    }
+
+    // now we allocate one buffer for each bin
+    // and each bin will hold onto at least one buffer
+    // so that we can guarantee no locking cases
+    for( i = 0; i < NumBins; i++ ) {
+        if( Free.nitems ) {
+            PushHead( &(Bins[i].bufs), AllocateFree() );
+        } else {
+            printf( "error: could not give one buffer to each bin\n" );
+	        return NO_FREE_BUFFERS;
+        }
+    }
+	
+	// all went well
+	return 0;
+}
+
+
+
+void DisplayStatusHeader( void ) {
+    printf( "time recs_read recs_processed recs_committed recs_written binning_rate free reading toprocess bins restructure\n" );
+}
+
+
+void DisplayStatus( void ) {
+
+    printf( "%f %llu %llu %llu %llu %f %d %d %d %d %d\n",
+        RunningTime, RecsRead, RecsProcessed, RecsCommitted, RecsWritten, 
+        RecsProcessed/RunningTime, Free.nitems, Reading.nitems, ToProcess.nitems, 
+        WS.nbufs - Free.nitems - Reading.nitems - ToProcess.nitems - Restructure.nitems, Restructure.nitems );
+
+    /*
+    int i;
+    printf( "-----------------------------------------------------------\n" );
+    printf( "Records Processed : %d/%d\n", RecsProcessed, NumRecs );
+    printf( "Records Committed : %d\n", RecsCommitted );
+    printf( "Records Written   : %d\n", RecsWritten );
+    printf( "Records Read      : %d\n", RecsRead );
+    printf( "Running Time      : %f seconds\n", RunningTime );
+    printf( "Binning Rate      : %f records/sec  (%f bytes/sec)\n",
+        RecsProcessed / RunningTime, RecsProcessed * sizeof(record_t) / RunningTime );
+    printf( "Freelist entries  : %d\n", Free.nitems );
+    printf( "Reading entries   : %d\n", Reading.nitems );
+    printf( "ToProcess entries : %d\n", ToProcess.nitems );
+    printf( "Bin entries:\n" );
+    for( i = 0; i < NumBins; i++ ) {
+        printf( "  %4d : %4d\n", i, Bins[i].bufs.nitems );
+    }    
+    printf( "Device status:\n" );
+    for( i = 0; i < NumDevices; i++ ) {
+        printf( "  %d : '%16s' : '%16s' : %s\n", i, Devices[i].devname,
+            Devices[i].path, Devices[i].dev.state == DEV_FREE ? "FREE" : "BUSY" );
+    }
+    */
+}
+
+
+void UpdateIOState( void ) {
+    int i;
+    //printf( "update io state\n" );
+    
+    // first update aio ops on the data file
+    aUpdateOperations( Data );
+    // next update aio ops on the bin files
+    for( i = 0; i < NumBins; i++ ) {
+        aUpdateOperations( Bins[i].file );
+    }
+    // update aio ops on the output file
+    aUpdateOperations( Output );
+    // next, let the working set adjust operation states and such
+    UpdateWSIOFinishedState( &WS );
+    // finally, let the devices start new operations if possible.
+    for( i = 0; i < NumDevices; i++ ) {
+        UpdateDeviceIOExecuteState( &WS, &(Devices[i].dev) );
+    }
+    
+}
+
+
+void EnsureAllOperationsComplete( void ) {
+    int i;
+    int not_complete = 1;
+    dmtimer_t *wait;
+    wait = StartTimer();
+    while( not_complete ) {
+        UpdateIOState();
+        // see if we're done
+        not_complete = 0;
+        for( i = 0; i < WS.nbufs; i++ ) {
+            if( WS.bufs[i].device &&
+                WS.bufs[i].file &&
+                (WS.bufs[i].operation == OP_PENDING || WS.bufs[i].operation > OP_NONE) ) {
+                not_complete = 1;
+                break;
+            }
+        }
+    }
+    printf( "Ensure All Operations Complete: %d msec\n", ReadTimer( wait ) );
+    StopTimer( wait );
+}
+
+
+
+
+
+static double lasttime = 0;
+
+void BinningPhase( void ) {
+
+    int i;
+    // for progress output
+    int iter;
+    int timeaccum;
+
+    // the main loop.
+    printf( "----------------- Starting -----------------\n" );
+    printf( "working set buffers : %d\n", WS.nbufs );
+    printf( "number of bins      : %d\n", NumBins );
+    timeaccum = 0;
+    iter = 0;
+    DisplayStatusHeader();
+    while( RecsProcessed < NumRecs ) {
+
+        // print status every few seconds or so.
+        // not until timing gets fixed
+        //if( RunningTime - lasttime >= 5.0f ) {
+        if( (RunningTime - lasttime) >= 2.0f ) {
+            DisplayStatus();
+            lasttime = RunningTime;
+        }
+        
+        // keep the async io running
+        // first update the operations on all our files.
+        UpdateIOState();
+        
+        // Handle read and write completions
+        // (transition reads to ToProcess, writes to Free)
+        HandleReadingCompletions();
+		HandleSeqbufWriteCompletions();
+		RestructureReadSMLBins();
+        HandleBinWriteCompletions();
+
+        // do reading and binning
+        DoReading();
+        DoBinning();
+        
+        // finish up the loop.
+        iter++;
+
+        RunningTime = (double)ReadTimer( RunningTimer ) / 1000.0;
+
+    }
+    
+    printf( "total iters: %d\n", iter );
+    // now, we *must* take care to make sure all writes have completed
+    // We can't simply call aClose on a file.  It's true that that will
+    // wait until all the currently scheduled operations on that file
+    // complete, but with the device method, we only allow one operation
+    // on any device at a time.  Thus, we must ask the device managers
+    // to complete their own IO.
+    // FIXME: this could potentially be moved into the buffer stuff for
+    // a DeviceClose type of call, but then if there is lots of stuff
+    // pending, unless DeviceClose could know about more than one device
+    // at a time, we would get effectively synchronous IO here, so we
+    // have the ugly hack for now.
+    FinishBinning();
+    EnsureAllOperationsComplete();
+
+    // close the input file.
+    aClose( Data );
+    Data = NULL;
+    // Finally, close all the bin files
+    for( i = 0; i < NumBins; i++ ) {
+        aClose( Bins[i].file );
+        Bins[i].file = NULL;
+    }
+    printf( "Finally, RecsCommitted: %llu\n", RecsCommitted );
+
+    DisplayStatus();
+
+}
+
+
+
+
+
+void SortReading( void ) {
+
+    int i;
+
+    // if anything is in WAIT_READ, and we have crap to read yet,
+    // start reading it in.
+
+    for( i = 0; i < NSortBufs; i++ ) {
+        // quick out if we're done reading.
+        if( BinToRead >= NumBins ) {
+            return;
+        }
+        if( SortBufs[i].state == WAIT_READ ) {
+            // schedule a read here.
+            const char *fname = Fmt("%sout%05d.binned",Devices[Bins[BinToRead].dev].path,BinToRead);
+            aFILE *in = aOpen( fname, A_READ );
+            if( !in ) {
+                printf( "couldn't open '%s' to read!\n", fname );
+            }
+            if( Bins[BinToRead].nrecs > SortBufs[i].buf->totalrecs ) {
+                printf( "buffer not big enough to hold bin!\n" );
+            }
+            SortBufs[i].bin = BinToRead;
+            SortBufs[i].dev = &(Devices[Bins[BinToRead].dev].dev);
+            SortBufs[i].state = BUSY_READ;
+            SortBufs[i].buf->file = in;
+            ReadBuffer( SortBufs[i].buf, Bins[BinToRead].nrecs, SortBufs[i].dev );
+            printf( "scheduled read of bin %d\n", BinToRead );
+            BinToRead++;
+            return;
+        }
+    }
+
+}
+
+
+
+#ifdef USE_QSORT_ONLY
+
+int comp_keys( record_t a, record_t b ){
+	int compval;
+	sml_t *mer_a, *mer_b;
+	mer_a = (sml_t*)&a;
+	mer_b = (sml_t*)&b;
+/*	if( ( mer_a->pos == 4554307 &&
+		mer_b->pos == 4407600 ) ||
+		( mer_a->pos == 4407600 &&
+		mer_b->pos == 4554307 ) )
+		__asm( nop );
+*/	compval = COMPARE_KEYS( a, b );
+	return compval;
+}
+
+void QBrute( record_t a[], int lo, int hi ) {
+    if ((hi-lo) == 1) {
+        if( comp_keys( a[hi], a[lo] ) < 0 ) {
+            record_t T = a[lo];
+            a[lo] = a[hi];
+            a[hi] = T;
+        }
+    }
+    if ((hi-lo) == 2) {
+        int pmin = comp_keys( a[lo], a[lo+1] ) < 0 ? lo : lo+1;
+        pmin = comp_keys( a[pmin], a[lo+2] ) < 0 ? pmin : lo+2;
+        if (pmin != lo) {
+            record_t T = a[lo];
+            a[lo] = a[pmin];
+            a[pmin] = T;
+        }
+        QBrute(a, lo+1, hi);
+    }
+    if ((hi-lo) == 3) {
+        int pmin, pmax;
+        pmin = comp_keys( a[lo], a[lo+1] ) < 0 ? lo : lo+1;
+        pmin = comp_keys( a[pmin], a[lo+2] ) < 0 ? pmin : lo+2;
+        pmin = comp_keys( a[pmin], a[lo+3] ) < 0 ? pmin : lo+3;
+        if (pmin != lo) {
+            record_t T = a[lo];
+            a[lo] = a[pmin];
+            a[pmin] = T;
+        }
+        pmax = comp_keys( a[hi], a[hi-1] ) > 0 ? hi : hi-1;
+        pmax = comp_keys( a[pmax], a[hi-2] ) > 0 ? pmax : hi-2;
+        if (pmax != hi) {
+            record_t T = a[hi];
+            a[hi] = a[pmax];
+            a[pmax] = T;
+        }
+        QBrute(a, lo+1, hi-1);
+    }
+}
+
+
+
+void QSort( record_t a[], int lo0, int hi0 ) {
+    
+    int lo = lo0;
+    int hi = hi0;
+    
+    record_t pivot;
+
+    if ((hi-lo) <= 3) {
+        QBrute(a, lo, hi);
+        return;
+    }
+    
+    // Pick a pivot and move it out of the way
+    pivot = a[(lo + hi) / 2];
+    a[(lo + hi) / 2] = a[hi];
+    a[hi] = pivot;
+    
+    while( lo < hi ) {
+
+    // Search forward from a[lo] until an element is found that
+    // is greater than the pivot or lo >= hi 
+        //while( a[lo] <= pivot && lo < hi ) {
+        while( (comp_keys( a[lo], pivot ) <= 0) && lo < hi ) {
+            lo++;
+        }
+        
+        //
+        //  Search backward from a[hi] until element is found that
+        //  is less than the pivot, or hi <= lo 
+        //
+        //while (pivot <= a[hi] && lo < hi ) {
+        while( (comp_keys( pivot, a[hi] ) <= 0) && lo < hi ) {
+            hi--;
+        }
+        
+        //
+        //  Swap elements a[lo] and a[hi]
+        //
+        if( lo < hi ) {
+            record_t T = a[lo];
+            a[lo] = a[hi];
+            a[hi] = T;
+        }
+    }
+    
+    //
+    // Put the median in the "center" of the list
+    //
+    a[hi0] = a[hi];
+    a[hi] = pivot;
+    
+    //
+    // Recursive calls, elements a[lo0] to a[lo-1] are less than or
+    // equal to pivot, elements a[hi+1] to a[hi0] are greater than
+    // pivot.
+    //
+    QSort( a, lo0, lo-1 );
+    QSort( a, hi+1, hi0 );
+}
+
+
+
+
+
+void RecSort( record_t a[], int nelems ) {
+
+    QSort( a, 0, nelems-1 );
+
+}
+
+
+int SortBuffer( buffer_t * buf ) {
+
+    RecSort( buf->recs, buf->numrecs );
+    return( 1 );
+
+}
+
+
+void SortSorting( void ) {
+
+    int i, finished;
+    int lowest = -1;
+    QSortTimer = StartTimer();
+
+    for( i = 0; i < NSortBufs; i++ ) {
+        if( SortBufs[i].state == SORTING ) {
+            if( lowest == -1 || SortBufs[i].bin < SortBufs[lowest].bin ) {
+                lowest = i;
+            }
+        }
+    }
+
+    if( lowest != -1 ) {
+        printf( "sorting bin %d\n", SortBufs[lowest].bin );
+        finished = SortBuffer( SortBufs[lowest].buf );
+        if( finished ) {
+            SortBufs[lowest].state = WRITE_RESTRUCTURE;
+//            SortBufs[lowest].state = WAIT_WRITE;
+        }
+    }
+
+    QSortTime += ReadTimer( QSortTimer ) / 1000.0;
+    StopTimer( QSortTimer );
+
+}
+
+#elif defined NO_SORT_PERF_TEST
+
+
+
+void SortSorting( void ) {
+    
+    int i;
+
+    QSortTimer = StartTimer();
+
+    for( i = 0; i < NSortBufs; i++ ) {
+        if( SortBufs[i].state == SORTING ) {
+            SortBufs[i].state = WAIT_WRITE;
+        }
+    }
+
+    QSortTime += ReadTimer( QSortTimer ) / 1000.0;
+    StopTimer( QSortTimer );
+
+}
+
+
+
+#else 
+
+sort_buf_t* CurrentSortBuf;
+buffer_t* SortScratchBuffer;
+
+void SortSorting( void ) {
+
+    int i;
+
+    QSortTimer = StartTimer();
+
+    // SortData -- sort everything in SORTING -- if it finishes, transition
+    // to WAIT_WRITE.
+	if( CurrentSortBuf == NULL ){
+	    for( i = 0; i < NSortBufs; i++ ) {
+	        // if this one is ready to sort, and it's the bin we're looking for...
+	        if( SortBufs[i].state == SORTING && SortBufs[i].bin == BinToSort ) {
+	        	CurrentSortBuf = &SortBufs[i];
+	        	InitRadixSort( CurrentSortBuf, SortScratchBuffer );
+	            printf( "scheduling sort of bin %d\n", BinToSort );
+	            break;
+	        }
+	    }
+	}
+	
+	// if there is something to sort right now then try to sort it.
+	if( CurrentSortBuf != NULL ){
+		if( CurrentSortBuf->state != WRITE_RESTRUCTURE ){
+
+			// automatically transitions to WAIT_WRITE when done.
+			RadixSort( CurrentSortBuf );
+
+			// prepare this bin for writing and setup to sort the next
+			if( CurrentSortBuf->state == WRITE_RESTRUCTURE ){
+				CurrentSortBuf = NULL;
+	            BinToSort++;
+	        }
+		}
+    }
+
+    QSortTime += ReadTimer( QSortTimer ) / 1000.0;
+    StopTimer( QSortTimer );
+
+}
+
+#endif
+
+void RestructureSMLBinsForWrite( void ) {
+    int i;
+    offset_t j;
+	position_t* positions;
+	sml_t *sml;
+
+    for( i = 0; i < NSortBufs; i++ ) {
+        // if this one is ready to be restructured...
+        if( SortBufs[i].state == WRITE_RESTRUCTURE ) {
+            printf( "restructuring bin %d\n", SortBufs[i].bin );
+            positions = (position_t*)SortBufs[i].buf->recs;
+            sml = (sml_t*)SortBufs[i].buf->recs;
+            for( j = 0; j < Bins[SortBufs[i].bin].nrecs; j++ ){
+            	positions[ j ] = sml[ j ].pos;
+            }
+            
+            // set its state for writing
+            SortBufs[i].state = WAIT_WRITE;
+        }
+    }
+}
+
+// use this version if no pre-write modifications are required
+/*
+void RestructureSMLBinsForWrite( void ) {
+    int i;
+
+    for( i = 0; i < NSortBufs; i++ ) {
+        // if this one is ready to be restructured...
+        if( SortBufs[i].state == WRITE_RESTRUCTURE ) {
+            // set its state for writing
+            SortBufs[i].state = WAIT_WRITE;
+        }
+    }
+}
+*/
+
+int CalculateSortWriteSize( int sortI ){
+     return Bins[SortBufs[sortI].bin].nrecs * sizeof( position_t );
+}
+
+void SortWriting( void ) {
+
+    int i;
+
+    for( i = 0; i < NSortBufs; i++ ) {
+        // if this one is ready to write, and it's the bin we're looking for...
+        if( SortBufs[i].state == WAIT_WRITE && SortBufs[i].bin == BinToWrite ) {
+#ifdef NO_WRITE_PERF_TEST
+			// skip writing by setting the state to wait_read
+            SortBufs[i].state = WAIT_READ;
+#else
+            printf( "scheduling write of bin %d\n", BinToWrite );
+            // write it out.
+            SortBufs[i].dev = &(Devices[OutputDev].dev);
+            SortBufs[i].state = BUSY_WRITE;
+            SortBufs[i].buf->file = Output;
+            WriteBuffer( SortBufs[i].buf, Bins[SortBufs[i].bin].nrecs, &(Devices[OutputDev].dev) );
+			SortBufs[i].buf->io_size = CalculateSortWriteSize( i );
+#endif // NO_WRITE_PERF_TEST
+            BinToWrite++;
+        }
+    }
+
+}
+
+
+
+
+
+void SortHandleCompletions( void ) {
+
+    int i;
+
+    // transition states of those that finished.
+    for( i = 0; i < NSortBufs; i++ ) {
+        if( SortBufs[i].state == BUSY_READ || SortBufs[i].state == BUSY_WRITE ) {
+            if( SortBufs[i].buf->operation == OP_FINISHED ) {
+                //printf( "operation finished on buf %d\n", i );
+                SortBufs[i].buf->operation = OP_NONE;
+                SortBufs[i].state = SortBufs[i].state == BUSY_READ ? SORTING : WAIT_READ;
+#ifdef NNNNN_KEYBYTES
+				// bin 0 doesn't need to be sorted
+				if( SortBufs[i].bin == 0 && SortBufs[i].state == SORTING )
+					SortBufs[i].state = WAIT_WRITE;
+#endif
+            }
+        }
+    }
+
+}
+
+
+
+
+
+void SortUpdateIOState() {
+
+    int i;
+    //printf( "update io state\n" );
+    
+    // first update aio ops on the data file
+    aUpdateOperations( Output );
+    // next update aio ops on the sortbuf files
+    for( i = 0; i < NSortBufs; i++ ) {
+        if( SortBufs[i].buf->file ) {
+            aUpdateOperations( SortBufs[i].buf->file );
+        }
+    }
+    // next, let the working set adjust operation states and such
+    UpdateWSIOFinishedState( &WS );
+    // finally, let the devices start new operations if possible.
+    for( i = 0; i < NumDevices; i++ ) {
+        UpdateDeviceIOExecuteState( &WS, &(Devices[i].dev) );
+    }
+
+}
+
+
+
+
+
+
+void SortingEnsureAllOperationsComplete() {
+    int i;
+    int not_complete = 1;
+    dmtimer_t *wait;
+    wait = StartTimer();
+    while( not_complete ) {
+        SortUpdateIOState();
+        // see if we're done
+        not_complete = 0;
+        for( i = 0; i < WS.nbufs; i++ ) {
+            if( WS.bufs[i].device &&
+                WS.bufs[i].file &&
+                (WS.bufs[i].operation == OP_PENDING || WS.bufs[i].operation > OP_NONE) ) {
+                not_complete = 1;
+                break;
+            }
+        }
+    }
+
+    // flush the output file to disk.
+    aFlush( Output );
+
+    printf( "Sort Ensure All Operations Complete: %d msec\n", ReadTimer( wait ) );
+    StopTimer( wait );
+}
+
+
+
+
+
+
+
+void SortingPhase( void ) {
+
+    // now reorganize the working set, and start up the sort procedure.
+
+    // we need to have the ability to read from N bin files at a time, where
+    // N is the number of bin devices.
+
+    // We read entire bin files at a time into each slot.  We wait for the
+    // first one to finish, and then we sort it.  We can start sorting the
+    // others too, as they finish.  When the first sort is done, we write it
+    // out to the sorted output file, similarly we write everything out in
+    // order.  When one is confirmed finished writing, we can start reading
+    // the next bin file from that device in.
+
+    int i;
+    offset_t recs_per_buffer;
+    offset_t biggest_bin = 0;
+    offset_t biggest_nrecs = 0;
+    
+    NSortBufs = NumBinDevs;
+
+    for( i = 0; i < NumBins; i++ ) {
+        if( Bins[i].nrecs > biggest_nrecs ) {
+            biggest_nrecs = Bins[i].nrecs;
+            biggest_bin = i;
+        }
+    }
+    
+    //recs_per_buffer = (WS.size / sizeof( record_t )) / NSortBufs;
+
+    recs_per_buffer = biggest_nrecs;
+
+    if( (WS.size / sizeof( record_t )) < (unsigned)recs_per_buffer ) {
+        printf( "working set holds %llu recs, but we need %llu\n", 
+            (WS.size / sizeof( record_t )), recs_per_buffer );
+    }
+
+    NSortBufs = (WS.size / sizeof( record_t )) / recs_per_buffer;
+
+    printf( "NSortBufs = %d\n", NSortBufs );
+
+    // this goes from 0 to NumBins-1 as we read stuff.
+    BinToRead = 0;
+    BinToWrite = 0;
+	BinToSort = 0;
+	
+    printf( "reorganizing working set: %llu recs per buffer, %d sort bufs\n", recs_per_buffer, NSortBufs );
+    ReorganizeWorkingSet( &WS, recs_per_buffer, recs_per_buffer );
+
+#if !defined USE_QSORT_ONLY && !defined NO_SORT_PERF_TEST
+	// steal the last buffer for scratch space
+    NSortBufs--;
+	SortScratchBuffer = &(WS.bufs[NSortBufs]);
+	SortScratchBuffer->operation = SORTING_SCRATCH;
+#endif
+    
+    // nbufs should be same as NumBinDevs
+    printf( "reorganized working set has %d buffers of %llu bytes\n", WS.nbufs, recs_per_buffer * sizeof(record_t) );
+    SortBufs = malloc( sizeof( *SortBufs ) * NSortBufs );
+    memset( SortBufs, 0, sizeof( *SortBufs ) * NSortBufs );
+
+    // put everything in WAIT_READ;
+    
+    for( i = 0; i < NSortBufs; i++ ) {
+
+        SortBufs[i].state = WAIT_READ;
+        SortBufs[i].buf = &(WS.bufs[i]);
+        SortBufs[i].dev = NULL;
+
+    }
+#ifdef NNNNN_KEYBYTES
+	// process the first bin then restructure the working set again
+	
+    while( BinToWrite < 1 ) {
+        SortReading();
+        SortSorting();
+        RestructureSMLBinsForWrite();
+        SortWriting();
+        SortUpdateIOState();
+        SortHandleCompletions();
+    }
+    SortingEnsureAllOperationsComplete();
+
+    for( i = 1; i < NumBins; i++ ) {
+        if( Bins[i].nrecs > biggest_nrecs ) {
+            biggest_nrecs = Bins[i].nrecs;
+            biggest_bin = i;
+        }
+    }
+    recs_per_buffer = biggest_nrecs;
+    if( (WS.size / sizeof( record_t )) < (unsigned)recs_per_buffer ) {
+        printf( "working set holds %llu recs, but we need %llu\n", 
+            (WS.size / sizeof( record_t )), recs_per_buffer );
+    }
+    NSortBufs = (WS.size / sizeof( record_t )) / recs_per_buffer;
+    printf( "NSortBufs = %d\n", NSortBufs );
+    // this goes from 0 to NumBins-1 as we read stuff.
+    BinToRead = 1;
+    BinToWrite = 1;
+	BinToSort = 1;
+	
+    printf( "reorganizing working set: %llu recs per buffer, %d sort bufs\n", recs_per_buffer, NSortBufs );
+    ReorganizeWorkingSet( &WS, recs_per_buffer, recs_per_buffer );
+
+#if !defined USE_QSORT_ONLY && !defined NO_SORT_PERF_TEST
+	// steal the last buffer for scratch space
+    NSortBufs--;
+	SortScratchBuffer = &(WS.bufs[NSortBufs]);
+	SortScratchBuffer->operation = SORTING_SCRATCH;
+#endif
+    
+    // nbufs should be same as NumBinDevs
+    printf( "reorganized working set has %d buffers of %llu bytes\n", WS.nbufs, recs_per_buffer * sizeof(record_t) );
+    SortBufs = malloc( sizeof( *SortBufs ) * NSortBufs );
+    memset( SortBufs, 0, sizeof( *SortBufs ) * NSortBufs );
+
+    // put everything in WAIT_READ;
+    for( i = 0; i < NSortBufs; i++ ) {
+        SortBufs[i].state = WAIT_READ;
+        SortBufs[i].buf = &(WS.bufs[i]);
+        SortBufs[i].dev = NULL;
+    }
+#endif    
+    
+
+    while( BinToWrite < NumBins ) {
+        
+        // ReadFiles -- schedule reading operations if we can (are any buffers
+        // in WAIT_READ?)
+        //printf( "sortreading\n" );
+        SortReading();
+        
+        // SortData -- sort everything in SORTING -- if it finishes, transition
+        // to WAIT_WRITE.
+        //printf( "sortsorting\n" );
+        SortSorting();
+        
+        // Perform any necessary post-sort processing on the data to prepare it for
+        // writing out to the sorted file
+        RestructureSMLBinsForWrite();
+        // WriteFiles -- schedule writing operations for everything in WAIT_WRITE, if
+        // it is the next file we need to write (make sure to schedule in order).
+        //printf( "sortwriting\n" );
+        SortWriting();
+        
+        // update io state
+        //printf( "sortupdateiostate\n" );
+        SortUpdateIOState();
+
+
+        // HandleCompletions -- if something finishes,
+        // if it was reading, transition to SORTING
+        // if it was writing, transition to WAIT_READ.
+        //printf( "sorthandlecompletions\n" );
+        SortHandleCompletions();
+        
+        
+    }
+
+    SortingEnsureAllOperationsComplete();
+
+    printf( "QSort took %f seconds\n", QSortTime );
+
+}
+
+
+
+
+
+
+
+int dmsort() {
+
+
+    // Do the first pass binning stuff
+    BinningTimer = StartTimer();
+#ifndef NO_BINNING_PERF_TEST
+    BinningPhase();
+    BinningTime = ReadTimer( BinningTimer ) / 1000.0;
+#endif // NO_BINNING_PERF_TEST
+    StopTimer( BinningTimer );
+
+
+    // Do the second pass sort
+    SortingTimer = StartTimer();
+    SortingPhase();
+    SortingTime = ReadTimer( SortingTimer ) / 1000.0;
+    StopTimer( SortingTimer );
+
+
+    RunningTime = ReadTimer( RunningTimer ) / 1000.0;
+    StopTimer( RunningTimer );
+
+    printf( "total time      : %f sec\n", RunningTime );
+    printf( "binning time    : %f sec (%f%%)\n", BinningTime, BinningTime/RunningTime * sizeof(record_t) );
+    printf( "sorting time    : %f sec (%f%%)\n", SortingTime, SortingTime/RunningTime * sizeof(record_t) );
+    
+    printf( "total rate      : %f MB/sec\n", (((double)NumRecs)/10485.760)/RunningTime );
+    printf( "total bin rate  : %f MB/sec\n", (((double)NumRecs)/10485.760)/BinningTime );
+    printf( "total sort rate : %f MB/sec\n", (((double)NumRecs)/10485.760)/SortingTime );
+
+    return 0;
+}
+
+
+int dmSML( const char* input_file, const char* output_file, const char* const* scratch_paths, uint64 seed ) {
+	long working_mb = 300;
+	long buffer_size = 1000;
+	int rval = 0;
+	int i = 0;
+	rval = InitdmSML( 0, 0, input_file, output_file, scratch_paths, seed );
+	if( rval != 0 )
+		return rval;
+	rval = dmsort();
+	
+	// Hey slob!  cleanup after yourself!
+	for( i = 0; i < NumBins; i++ ){
+		removeFile( Bins[ i ].fname, FALSE );
+		free( Bins[ i ].fname );
+	}
+	if( Bins )
+		free( Bins );
+	Bins = NULL;
+	NumBins = 0;
+//	for( i = 0; i < NumDevices; i++ )
+//		free( Devices[i].devname );
+	NumDevices = 0;
+	if( Devices )
+		free( Devices );
+	Devices = NULL;
+	if( SortBufs )
+		free( SortBufs );
+	SortBufs = NULL;
+
+	NSortBufs = 0;
+
+	BufferSizeMin = 0;
+	BufferSizeMax = 0;
+	
+    memset( &Seqbuf, 0, sizeof( seqbuf_t ) );
+
+	DataDev = 0;
+
+	OutFileName = "unset";
+
+	// close the sorted file
+    aClose( Output );
+    Output = NULL;
+	OutputDev = 0;
+
+	BinToRead = 0;
+	BinToWrite = 0;
+	BinToSort = 0;
+
+	free( WS.bufs );
+	memset( &WS, 0, sizeof( working_set_t ) );
+
+	NumRecs = 0;
+	RecsProcessed = 0;
+	RecsRead = 0;
+	RecsUnread = 0;
+	RecsCommitted = 0;
+	RecsWritten = 0;
+	
+
+// timers
+	RunningTime = 0;
+	RunningTimer= NULL;
+	BinningTime = 0;
+	BinningTimer= NULL;
+	SortingTime = 0;
+	SortingTimer = NULL;
+
+	QSortTime = 0;
+	QSortTimer = NULL;
+
+	ReadIdleTime = 0;
+	ReadIdleTimer = NULL;
+	SortIdleTime = 0;
+	SortIdleTimer = NULL;
+	WriteIdleTime = 0;
+	WriteIdleTimer = NULL;
+	
+	
+	memset( &Free, 0, sizeof( buffer_list_t ) );
+	memset( &ToProcess, 0, sizeof( buffer_list_t ) );
+	memset( &Reading, 0, sizeof( buffer_list_t ) );
+	memset( &Restructure, 0, sizeof( buffer_list_t ) );
+
+	// static variables
+	divisor = 0;
+	consumed_recs = 0;
+	toprocess = NULL;
+	lasttime = 0;
+	
+	// from asyncio.c
+//	OperationNumber = 0;
+	
+	return rval;
+}
+
diff --git a/libMems/dmSML/dmsort.h b/libMems/dmSML/dmsort.h
new file mode 100644
index 0000000..2f3cf09
--- /dev/null
+++ b/libMems/dmSML/dmsort.h
@@ -0,0 +1,197 @@
+#ifndef __DMSORT_H__
+#define __DMSORT_H__
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "libMems/dmSML/util.h"
+#include "libMems/dmSML/timing.h"
+#include "libMems/dmSML/asyncio.h"
+#include "libMems/dmSML/buffer.h"
+#include "libMems/dmSML/sorting.h"
+#include "libMems/dmSML/sml.h"
+
+// define this if you're using the ASCII sortgen data.
+// don't define if you're using random data (dmsortgen)
+//#define ASCII_KEYBYTES
+
+// define this if using dmSML with sequences that have large
+// stretches of NNNNN...  such as an unfinished eukaryote
+//#define NNNNN_KEYBYTES
+
+// define this if you want to measure the overlapping
+// of your sorting with I/O in the sorting phase --
+// this makes the sort routine do nothing.
+//#define NO_SORT_PERF_TEST
+
+// define the following if you don't want to write
+// data during the sort phase in order to get timings
+//#define NO_WRITE_PERF_TEST
+
+// define this to skip the binning phase in order to
+// perform measurements on the sort phase.  The bin
+// files to use during sorting must already exist (duh!)
+//#define NO_BINNING_PERF_TEST
+
+// define this to test the performance of binning and
+// restructuring without bin writing
+//#define NO_BIN_WRITE_PERF_TEST
+
+// define this to test the performance without restructuring
+// each SML bin
+//#define NO_RESTRUCTURE_PERF_TEST
+
+#ifndef NELEMS
+#define NELEMS(x) \
+    ( sizeof((x)) / sizeof((x)[0]) )
+#endif
+
+#define MIN(x,y)    ((x)<(y)?(x):(y))
+#define MINRECS     (1311)
+#define MAXRECS     (1311)
+
+
+// this is somewhat less appealing than a config file,
+// but speed is critical and parsing a config file at
+// startup is just inconvenient.  Besides, specifying
+// what we care about is easy enough this way.
+typedef struct device_s {
+    const char      *devname;
+    const char      *path;
+    iodevice_t      dev;
+} device_t;
+
+
+// ugly hack
+#define BIN_SPECIAL     (-10000)
+
+
+
+// what we use to represent a bin.
+typedef struct bin_s {
+    aFILE               *file;      // File we write/read on.
+    int                 dev;        // This is an index into the Devices table.
+    offset_t            nrecs;      // Number of records written to bin.
+    buffer_list_t       bufs;       // Our list of buffers that holds our data.
+    char*				fname;		/**< The file name of this bin */
+} bin_t;
+
+typedef struct seqbuf_s {
+	aFILE				*file;		// Output file
+	int					dev;		// device table index for output file
+	offset_t			bufpos;		// position in current buffer
+	uint64				seq_pos;	// position in sequence that is next to translate
+	buffer_list_t		bufs;		// list of buffers for data
+} seqbuf_t;
+
+enum dm_errors {
+	SUCCESS,
+	TOO_FEW_BINS,
+	TOO_MANY_BINS,
+	INPUT_NOT_OPENED,
+	INVALID_WS_SIZE,
+	SEQUENCE_TOO_SHORT,
+	OUTPUT_NOT_OPENED,
+	INVALID_NUMRECS,
+	NO_FREE_BUFFERS,
+	BIN_NOT_OPENED,
+};
+
+
+void print_usage( const char* pname );
+
+
+static buffer_t * AllocateFree( void );
+
+static int ComputeBinNumber( const unsigned char key[10] );
+
+// just like ComputeBinNumber except we reserve one bin for zero keys.
+static int ComputeNNNNNBinNumber( const unsigned char key[10] );
+
+static int ComputeAsciiBinNumber( const unsigned char key[10] );
+
+static void DoBinning( void );
+
+void FinishBinning();
+
+offset_t CalculateDataReadSize( buffer_t* b );
+
+static void DoReading( void );
+
+static void HandleBinWriteCompletions( void );
+
+static void HandleSeqbufWriteCompletions( void );
+
+#define ALPHA_BITS 2
+
+static void Translate32(uint32* dest, const char* src, const unsigned len);
+
+void RestructureReadSMLBins( void );
+
+static void HandleReadingCompletions( void );
+
+int InitdmSML( long working_mb, long buffer_size, const char* input_filename, const char* output_filename, const char* const* scratch_paths, uint64 seed );
+
+void DisplayStatusHeader( void );
+
+void DisplayStatus( void );
+
+void UpdateIOState( void );
+
+void EnsureAllOperationsComplete( void );
+
+void BinningPhase( void );
+
+void SortReading( void );
+
+#ifdef USE_QSORT_ONLY
+
+int comp_keys( record_t a, record_t b );
+
+void QBrute( record_t a[], int lo, int hi );
+
+void QSort( record_t a[], int lo0, int hi0 );
+
+void RecSort( record_t a[], int nelems );
+
+int SortBuffer( buffer_t * buf );
+
+void SortSorting( void );
+
+#elif defined NO_SORT_PERF_TEST
+
+void SortSorting( void );
+
+#else 
+
+sort_buf_t* CurrentSortBuf;
+buffer_t* SortScratchBuffer;
+
+void SortSorting( void );
+
+#endif
+
+void RestructureSMLBinsForWrite( void );
+
+int CalculateSortWriteSize( int sortI );
+
+void SortWriting( void );
+
+void SortHandleCompletions( void );
+
+void SortUpdateIOState();
+
+void SortingEnsureAllOperationsComplete();
+
+void SortingPhase( void );
+
+int dmsort( void );
+
+int dmSML( const char* input_file, const char* output_file, const char* const* scratch_paths, uint64 seed );
+
+
+#endif // __DMSORT_H__
diff --git a/libMems/dmSML/sml.c b/libMems/dmSML/sml.c
new file mode 100644
index 0000000..310d879
--- /dev/null
+++ b/libMems/dmSML/sml.c
@@ -0,0 +1,55 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/dmSML/sml.h"
+#include "libMems/SeedMasks.h"
+
+
+SMLHeader_t InitSML( aFILE* file, uint64 file_size, uint64 seed ){
+	SMLHeader_t header;
+	int retcode;
+	
+	header.version = 5;
+	header.alphabet_bits = 2;
+	header.seed = seed;
+	header.seed_length = getSeedLength( seed );
+	header.seed_weight = getSeedWeight( seed );
+	header.length = file_size;
+	header.unique_mers = -1;
+	header.word_size = 32;
+	header.little_endian = 1;
+	header.id = 0;
+	header.circular = 0;
+	memcpy(header.translation_table, CreateBasicDNATable(), UINT8_MAX);
+	header.description[ 0 ] = 0;
+	
+	retcode = aWrite( (void*)&header, sizeof( header ), 1, file, 0 );
+	if( retcode == 0 )
+		printf( "Error writing to SML\n" );
+	aWaitComplete( file, retcode );
+	return header;
+}
+
+/*
+// use this version of RestructureReadSMLBins when no restructuring is necessary
+void RestructureReadSMLBins( void ) {
+    buffer_t *b, *tmpnext;
+    // go through and see if any have completed.
+    b = Restructure.head;
+    do {
+        if( !b ) {
+            break;
+        }
+
+        tmpnext = b->next;
+		
+		// b has been restructured, add it to the ToProcess list
+        PushTail( &ToProcess, RemoveItem( &Restructure, b ) );
+        // bookkeeping
+        RecsRead += b->numrecs;
+		
+        b = tmpnext;
+    } while( b != Restructure.head && Restructure.nitems );
+}
+*/
diff --git a/libMems/dmSML/sml.h b/libMems/dmSML/sml.h
new file mode 100644
index 0000000..0af1cf9
--- /dev/null
+++ b/libMems/dmSML/sml.h
@@ -0,0 +1,79 @@
+#ifndef _sml_h_
+#define _sml_h_
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/dmSML/asyncio.h"
+#include <string.h>
+#include "libGenome/gnDefs.h"
+
+#ifndef UINT8_MAX
+
+#define UINT8_MAX 256
+typedef unsigned char uint8;
+typedef unsigned uint32;
+typedef unsigned long long uint64;
+
+#endif
+
+static uint8* CreateBasicDNATable(){
+	uint8* bdt = (uint8*)malloc( sizeof(uint8) * UINT8_MAX );
+	memset(bdt, 0, UINT8_MAX);
+	bdt['c'] = 1;
+	bdt['C'] = 1;
+	bdt['b'] = 1;
+	bdt['B'] = 1;
+	bdt['y'] = 1;
+	bdt['Y'] = 1;
+	bdt['g'] = 2;
+	bdt['G'] = 2;
+	bdt['s'] = 2;
+	bdt['S'] = 2;
+	bdt['k'] = 2;
+	bdt['K'] = 2;
+	bdt['t'] = 3;
+	bdt['T'] = 3;
+	return bdt;
+}
+
+static uint8* DNA_TABLE;
+typedef unsigned position_t;
+typedef unsigned long long mask_t;
+#define MASK_T_BYTES 8
+static mask_t seed_mask = 0x7FFFFFFF;
+static int mask_length = 31;
+static int mask_weight = 31;
+
+#define DESCRIPTION_SIZE 2048	/**< Number of bytes for the freeform text description of an SML */
+
+
+typedef signed short sarID_t;
+
+typedef struct SMLHeader_s{
+	uint32 version;						/**< Format version - 4 bytes */
+	uint32 alphabet_bits;				/**< Bits per character in the alphabet - 4 bytes */
+	uint64 seed;						/**< The pattern used in each seed */
+	uint32 seed_length;					/**< The length of the seed mask */
+	uint32 seed_weight;					/**< The weight of the seed mask */
+	uint64 length;						/**< length of the sequence before circularity - 8 bytes */
+	uint32 unique_mers;					/**< Number of unique mers in the sequence 4 bytes */
+	uint32 word_size;					/**< Word size on the machine the sequence was translated */
+	boolean little_endian;				/**< Is the byte order little endian?  0==no, !0==yes */
+	signed short id;					/**< Obsolete ID value - 1 byte, eaten by alignment? */
+	boolean circular;					/**< Circularity of sequence - 1 byte */
+	uint8 translation_table[UINT8_MAX];	/**< Translation table for ascii characters to binary values -- 256 bytes */
+	char description[DESCRIPTION_SIZE]; /**< Freeform text description of sequence data -- 2048 bytes */
+} SMLHeader_t;
+
+
+typedef struct sml_s {
+		char key[8];
+		position_t pos;
+} sml_t;
+
+SMLHeader_t InitSML( aFILE* file, uint64 file_size, uint64 seed );
+
+
+#endif /* _sml_h_ */
diff --git a/libMems/dmSML/sorting.c b/libMems/dmSML/sorting.c
new file mode 100644
index 0000000..172e310
--- /dev/null
+++ b/libMems/dmSML/sorting.c
@@ -0,0 +1,323 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/dmSML/sorting.h"
+#include "math.h"
+#include <string.h>
+
+#ifndef USE_QSORT_ONLY
+
+
+// Other helper functions in this file:
+void RadixHistogram( sort_buf_t* sortbuf );
+void RadixCopy( sort_buf_t* sortbuf );
+void QSortPointers( sort_buf_t* sortbuf );
+void QBrute( record_t* a[], int lo, int hi );
+void QSort( record_t* a[], int lo0, int hi0 );
+void CopySortedData ( sort_buf_t* sortbuf );
+
+
+void InitRadixSort( sort_buf_t* sortbuf, buffer_t* scratch_buffer )
+{
+	// allocate the sortbuf struct
+	unsigned int bin_divisor;
+	unsigned int i, keyval = 0;
+	// allocate the histogram memory
+	sortbuf->histogram_size = 1;
+	sortbuf->histogram_size <<= RADIX_BITS;
+	sortbuf->histogram = (unsigned*) malloc( sortbuf->histogram_size * sizeof(unsigned) );
+	sortbuf->cur_ptr_offsets = (unsigned*) malloc( sortbuf->histogram_size * sizeof(unsigned) );
+
+	// init histogram to 0's
+	memset( sortbuf->histogram, 0, sortbuf->histogram_size * sizeof(unsigned) );
+	
+	// calculate the base number and divisor
+
+    bin_divisor = (unsigned)16777216 / (unsigned)NumBins;
+    // need ceiling of this
+    bin_divisor += (unsigned)16777216 % (unsigned)NumBins ? 1 : 0;
+
+    for( i = 0; i < 3; i++ ) {
+        keyval <<= 8;
+        keyval += sortbuf->buf->recs[0].key[i];
+    }
+	sortbuf->base_number = (keyval / bin_divisor) * bin_divisor;
+	
+	sortbuf->divisor = (unsigned)bin_divisor / (unsigned)sortbuf->histogram_size;
+	sortbuf->divisor += (unsigned)bin_divisor % (unsigned)sortbuf->histogram_size ? 1 : 0;
+	
+	// init some values
+	sortbuf->cur_position = 0;
+	sortbuf->sort_state = CalculateHistogram;
+	sortbuf->radix_tmp = scratch_buffer;
+	
+	// allocate ptr buffer memory
+	sortbuf->rec_ptrs = (record_t**) malloc( sortbuf->buf->numrecs * sizeof(record_t*) );
+
+}
+
+void RadixSort( sort_buf_t* sortbuf )
+{
+	switch(sortbuf->sort_state){
+		case CalculateHistogram:
+			RadixHistogram( sortbuf );
+			break;
+		case CopyPointers:
+			RadixCopy( sortbuf );
+			break;
+		case QsortPointers:
+			QSortPointers( sortbuf );
+			break;
+		case CopyData:
+			CopySortedData( sortbuf );
+			break;
+		default:
+			printf("Error in sort_state\n");
+	}
+}
+
+void RadixHistogram( sort_buf_t* sortbuf ){
+	unsigned data_bucket;
+	unsigned maxI;
+	unsigned histI;
+	unsigned cur_offset;
+	unsigned tmp;
+	record_t* cur_rec;
+	
+	maxI = sortbuf->cur_position + HISTOGRAM_CHUNK_SIZE;
+	maxI = maxI < (unsigned)sortbuf->buf->numrecs ? maxI : (unsigned)sortbuf->buf->numrecs;
+
+	// do a complete pass over the data set, summing the number of entries
+	// in each bucket
+	for(; sortbuf->cur_position < maxI; sortbuf->cur_position++){
+		cur_rec = &(sortbuf->buf->recs[ sortbuf->cur_position ]);
+		data_bucket = cur_rec->key[0];
+		data_bucket <<= 8;
+		data_bucket += cur_rec->key[1];
+		data_bucket <<= 8;
+		data_bucket += cur_rec->key[2];
+
+		data_bucket -= sortbuf->base_number;
+		data_bucket /= sortbuf->divisor;
+		sortbuf->histogram[data_bucket]++;
+	}
+	
+	// check if we've completed this stage
+	if( sortbuf->cur_position == (unsigned)sortbuf->buf->numrecs ){
+
+		// do a pass over the histogram converting the counts to offsets
+		cur_offset = 0;
+		for( histI = 0; histI < sortbuf->histogram_size; histI++){
+			tmp = sortbuf->histogram[ histI ];
+			sortbuf->histogram[ histI ] = cur_offset;
+			cur_offset += tmp;
+		}
+
+		// copy pointers is the next stage
+		sortbuf->sort_state = CopyPointers;
+		sortbuf->cur_position = 0;
+	}
+}
+
+void RadixCopy( sort_buf_t* sortbuf ){
+
+	unsigned data_bucket;
+
+	unsigned maxI;
+	record_t* cur_rec;
+
+	maxI = sortbuf->cur_position + PTR_COPY_CHUNK_SIZE;
+	maxI = maxI < (unsigned)sortbuf->buf->numrecs ? maxI : (unsigned)sortbuf->buf->numrecs;
+
+	// if its the first time through then initialize cur_ptr_offsets
+	if(sortbuf->cur_position == 0 )
+		memcpy(sortbuf->cur_ptr_offsets, sortbuf->histogram, sortbuf->histogram_size * sizeof(unsigned) );
+
+	// do a complete pass over the data set, setting an entry in the pointer
+	// array for the correct bucket
+	for(; sortbuf->cur_position < maxI; sortbuf->cur_position++){
+		cur_rec = &(sortbuf->buf->recs[ sortbuf->cur_position ]);
+		data_bucket = cur_rec->key[0];
+		data_bucket <<= 8;
+		data_bucket += cur_rec->key[1];
+		data_bucket <<= 8;
+		data_bucket += cur_rec->key[2];
+
+		data_bucket -= sortbuf->base_number;
+		data_bucket /= sortbuf->divisor;
+		
+		sortbuf->rec_ptrs[ sortbuf->cur_ptr_offsets[ data_bucket ] ] = cur_rec;
+		sortbuf->cur_ptr_offsets[ data_bucket ]++;
+	}
+	
+	// check if we've completed this stage
+	if( sortbuf->cur_position == (unsigned)sortbuf->buf->numrecs ){
+		sortbuf->sort_state = QsortPointers;
+		sortbuf->cur_position = 0;
+	}
+	
+}
+
+void QSortPointers( sort_buf_t* sortbuf )
+{
+	unsigned binI = sortbuf->cur_position;
+	unsigned maxI = binI + SORT_BINS_SIZE;
+
+	maxI = maxI < sortbuf->histogram_size ? maxI : sortbuf->histogram_size - 1;
+
+	for(; binI < maxI; binI++){
+		if( sortbuf->histogram[binI + 1] - sortbuf->histogram[binI] > 1 )
+			QSort( sortbuf->rec_ptrs, sortbuf->histogram[binI], sortbuf->histogram[binI + 1] - 1 );
+	}
+	sortbuf->cur_position = binI;
+
+	if( binI == sortbuf->histogram_size - 1 ){
+		if( (sortbuf->buf->numrecs - 1) - sortbuf->histogram[binI] > 1 )
+			QSort( sortbuf->rec_ptrs, sortbuf->histogram[binI], sortbuf->buf->numrecs - 1 );
+		sortbuf->sort_state = CopyData;
+		sortbuf->cur_position = 0;
+	}
+}
+
+
+void CopySortedData ( sort_buf_t* sortbuf ){
+	unsigned recordI = sortbuf->cur_position;
+	unsigned maxI = recordI + COPY_CHUNK_SIZE;
+	record_t* tmp;
+	
+	// set the processing limit for this time through.
+	maxI = maxI < (unsigned)sortbuf->buf->numrecs ? maxI : (unsigned)sortbuf->buf->numrecs;
+
+	for(; recordI < maxI; recordI++ )
+		sortbuf->radix_tmp->recs[recordI] = *(sortbuf->rec_ptrs[recordI]);
+
+	sortbuf->cur_position = recordI;
+
+	// check if we're all done with sorting
+	if(recordI == (unsigned)sortbuf->buf->numrecs){
+		// swap the pointers
+		tmp = sortbuf->radix_tmp->recs;
+		sortbuf->radix_tmp->recs = sortbuf->buf->recs;
+		sortbuf->buf->recs = tmp;
+
+		// set our state to completion
+		sortbuf->state = WRITE_RESTRUCTURE;
+		
+		// release memory
+		free( sortbuf->rec_ptrs );
+		free( sortbuf->histogram );
+		free( sortbuf->cur_ptr_offsets );
+	}
+
+
+}
+
+
+// QBrute sorts less than 3 elements at a time
+void QBrute( record_t* a[], int lo, int hi ) {
+    if ((hi-lo) == 1) {
+        if( CompareKeyPtrs( a[hi], a[lo] ) < 0 ) {
+            record_t* T = a[lo];
+            a[lo] = a[hi];
+            a[hi] = T;
+        }
+    }else
+    if ((hi-lo) == 2) {
+        int pmin = CompareKeyPtrs( a[lo], a[lo+1] ) < 0 ? lo : lo+1;
+        pmin = CompareKeyPtrs( a[pmin], a[lo+2] ) < 0 ? pmin : lo+2;
+        if (pmin != lo) {
+            record_t* T = a[lo];
+            a[lo] = a[pmin];
+            a[pmin] = T;
+        }
+        QBrute(a, lo+1, hi);
+    }else
+    if ((hi-lo) == 3) {
+        int pmin, pmax;
+        pmin = CompareKeyPtrs( a[lo], a[lo+1] ) < 0 ? lo : lo+1;
+        pmin = CompareKeyPtrs( a[pmin], a[lo+2] ) < 0 ? pmin : lo+2;
+        pmin = CompareKeyPtrs( a[pmin], a[lo+3] ) < 0 ? pmin : lo+3;
+        if (pmin != lo) {
+            record_t* T = a[lo];
+            a[lo] = a[pmin];
+            a[pmin] = T;
+        }
+        pmax = CompareKeyPtrs( a[hi], a[hi-1] ) > 0 ? hi : hi-1;
+        pmax = CompareKeyPtrs( a[pmax], a[hi-2] ) > 0 ? pmax : hi-2;
+        if (pmax != hi) {
+            record_t* T = a[hi];
+            a[hi] = a[pmax];
+            a[pmax] = T;
+        }
+        QBrute(a, lo+1, hi-1);
+    }
+}
+
+
+
+void QSort( record_t* a[], int lo0, int hi0 ) {
+    
+    int lo = lo0;
+    int hi = hi0;
+    
+    record_t* pivot;
+
+    if ((hi-lo) <= 3) {
+        QBrute(a, lo, hi);
+        return;
+    }
+    
+    /*
+    *  Pick a pivot and move it out of the way
+    */
+    pivot = a[(lo + hi) / 2];
+    a[(lo + hi) / 2] = a[hi];
+    a[hi] = pivot;
+    
+    while( lo < hi ) {
+    /*
+    *  Search forward from a[lo] until an element is found that
+    *  is greater than the pivot or lo >= hi 
+        */
+        //while( a[lo] <= pivot && lo < hi ) {
+        while( (CompareKeyPtrs( a[lo], pivot ) <= 0) && lo < hi ) {
+            lo++;
+        }
+        
+        /*
+        *  Search backward from a[hi] until element is found that
+        *  is less than the pivot, or hi <= lo 
+        */
+        //while (pivot <= a[hi] && lo < hi ) {
+        while( (CompareKeyPtrs( pivot, a[hi] ) <= 0) && lo < hi ) {
+            hi--;
+        }
+        
+        /*
+        *  Swap elements a[lo] and a[hi]
+        */
+        if( lo < hi ) {
+            record_t* T = a[lo];
+            a[lo] = a[hi];
+            a[hi] = T;
+        }
+    }
+    
+    /*
+    *  Put the median in the "center" of the list
+    */
+    a[hi0] = a[hi];
+    a[hi] = pivot;
+    
+    /*
+    *  Recursive calls, elements a[lo0] to a[lo-1] are less than or
+    *  equal to pivot, elements a[hi+1] to a[hi0] are greater than
+    *  pivot.
+    */
+    QSort( a, lo0, lo-1 );
+    QSort( a, hi+1, hi0 );
+}
+
+
+#endif /* !USE_QSORT_ONLY */
diff --git a/libMems/dmSML/sorting.h b/libMems/dmSML/sorting.h
new file mode 100644
index 0000000..fe527ec
--- /dev/null
+++ b/libMems/dmSML/sorting.h
@@ -0,0 +1,81 @@
+#ifndef _sorting_h_
+#define _sorting_h_
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/dmSML/buffer.h"
+
+
+// define this if you want to use the qsort only version
+// of dmsort.
+#define USE_QSORT_ONLY
+
+
+
+
+// START configurable values
+
+// the number of bits in each radix.
+#define RADIX_BITS 12
+
+// the number of bins to qsort during each call to RadixSort during the qsort phase
+#define SORT_BINS_SIZE 1000
+// the number of records to copy into sorted order during each call to CopySortedData
+#define COPY_CHUNK_SIZE 50000
+#define HISTOGRAM_CHUNK_SIZE 50000
+#define PTR_COPY_CHUNK_SIZE 50000
+
+// END configurable values
+
+// sorting states -- this is for the second phase, after binning
+#define WAIT_WRITE      (-100)
+#define WAIT_READ       (-200)
+#define SORTING         (-300)
+#define BUSY_READ       (-400)
+#define BUSY_WRITE      (-500)
+#define SORTING_SCRATCH (-600)
+#define WRITE_RESTRUCTURE (-700)
+
+enum{
+	CalculateHistogram = 0,	// At this stage we compute a histogram on the current radix
+	CopyPointers = 1,		// This stage copies the pointers into (more) sorted order
+	QsortPointers = 2,	// This stage qsorts the pointers
+	CopyData = 3		// This stage copies the data into totally sorted order
+};
+
+typedef struct sort_buf_s {
+    int state;          // WAIT_READ, WAIT_WRITE, SORTING, BUSY
+    int bin;            // what bin this buffer holds right now.
+    iodevice_t *dev;
+    buffer_t *buf;		// the buffer where records live
+    buffer_t *radix_tmp;		// temp space for the radix sort copy
+    record_t **rec_ptrs;		// array of pointers to records
+	
+	unsigned base_number;
+	unsigned divisor;
+	unsigned histogram_size;
+    unsigned *histogram;		// the histogram of bins
+    unsigned *cur_ptr_offsets;	// the locations to copy data in each histogram bucket
+	unsigned cur_position;	// the current record or bin position in the current stage.
+	int sort_state;			// current state of the sort algorithm
+} sort_buf_t;
+
+// Need NumBins so that we can compute the amount already sorted
+extern int NumBins;
+
+//typedef unsigned long long uint64;
+
+/* Fills and returns a new sort_buf_t with the appropriate
+ * data.
+ */
+void InitRadixSort( sort_buf_t* sortbuf, buffer_t* scratch_buffer );
+
+/* Checks the current state of the radix sort and performs a fixed
+ * amount of sorting computation before returning.
+ * call until state is set to WriteData
+ */
+void RadixSort( sort_buf_t* sortbuffer );
+
+#endif /* _sorting_h_ */
diff --git a/libMems/dmSML/timing.c b/libMems/dmSML/timing.c
new file mode 100644
index 0000000..d651d9a
--- /dev/null
+++ b/libMems/dmSML/timing.c
@@ -0,0 +1,164 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifndef WIN32
+#include <sys/time.h>
+#include <unistd.h>
+//#include <malloc.h>
+#endif
+
+#include <stdlib.h>
+#include <stdio.h>
+
+
+#include "libMems/dmSML/util.h"
+#include "libMems/dmSML/timing.h"
+
+
+struct dmtimer_s {
+#ifdef WIN32    
+    unsigned int last;
+#else
+    struct timeval tv;
+#endif
+};
+
+
+
+typedef int Int;
+typedef unsigned int UInt;
+typedef double Float64;
+
+#ifdef WIN32
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#include <mmsystem.h>
+// keep this many significant bits from the PerformanceCounter values.
+#define NUM_FREQ_BITS   (14)
+static Int ShiftAmt;
+static Int TicksPerSecond;
+static Int LastReadValue;
+static Int BaseTime;
+#endif /* WIN32 */
+
+
+dmtimer_t * StartTimer() {
+#ifdef WIN32
+    dmtimer_t * t = malloc( sizeof( *t ) );
+    t->last = timeGetTime();
+    return( t );
+#else
+    dmtimer_t * t = malloc( sizeof( *t ) );
+    gettimeofday( &(t->tv), NULL );
+    return( t );
+#endif /* WIN32 */
+}
+
+
+
+unsigned int ReadTimer( dmtimer_t * t ) {
+#ifdef WIN32
+    /*
+    Int ticks;
+    LARGE_INTEGER pcnow;
+    Float64 seconds;
+    QueryPerformanceCounter( &pcnow );
+    Shift64( ShiftAmt, (int*)&pcnow.HighPart, (int*)&pcnow.LowPart );
+    ticks = pcnow.LowPart;
+    LastReadValue = ticks;
+    if( ticks < BaseTime ) {
+        // handle wraparound.
+        ticks += ((1 << NUM_FREQ_BITS)) - BaseTime;
+    } else {
+        ticks -= BaseTime;
+    }
+    seconds = (Float64)ticks / (Float64)TicksPerSecond;
+    return( (int)(seconds * 10000 + 0.5) );
+    */
+    unsigned int cur = timeGetTime();
+    return( cur - t->last );
+#else
+    struct timeval current;
+    struct timezone dummy;
+    unsigned int begintime, endtime;
+    gettimeofday( &current, &dummy );
+    begintime = 1000 * t->tv.tv_sec + (t->tv.tv_usec/1000);
+    endtime = 1000 * current.tv_sec + (current.tv_usec/1000);
+    return( endtime - begintime );
+#endif
+}
+
+
+
+void StopTimer( dmtimer_t * t ) {
+    free( t );
+}
+
+
+
+#ifdef WIN32
+static void InitTimeWIN32() {
+
+    timeBeginPeriod( 1 );
+
+    /*
+    LARGE_INTEGER pcfreq;
+    UInt pchi, pclow, hihibit, lowhibit, highbit;
+    UInt i;
+    ShiftAmt = 0;
+    QueryPerformanceFrequency( &pcfreq );
+    pchi = pcfreq.HighPart;
+    pclow = pcfreq.LowPart;
+    // we want to look at the most significant 14 bits of the counter,
+    // so we get about 1/10000th second accuracy
+    // (between 8192ths - 16383ths second accuracy to be exact).
+    // find the highest bit set in the high part.
+    for( i = sizeof( pchi ) * 8; i ; i-- ) {
+        if( pchi & 0x80000000 ) {
+            break;
+        }
+        pchi = pchi << 1;
+    }
+    hihibit = i;
+    // find the highest bit set in the low part.
+    for( i = sizeof( pclow ) * 8; i ; i-- ) {
+        if( pclow & 0x80000000 ) {
+            break;
+        }
+        pclow = pclow << 1;
+    }
+    lowhibit = i;
+    if( hihibit ) {
+        highbit = hihibit + 32;
+    } else {
+        highbit = lowhibit;
+    }
+    pchi = pcfreq.HighPart;
+    pclow = pcfreq.LowPart;
+    if( highbit <= NUM_FREQ_BITS ) {
+        ShiftAmt = 0;
+    } else {
+        ShiftAmt = highbit - NUM_FREQ_BITS;
+    }
+    Shift64( ShiftAmt, (int*)&pchi, (int*)&pclow );
+    // now we have the most significant 14 bits of frequency.
+    TicksPerSecond = pclow;
+    // now actually read the counter, compute the ticks and store it away
+    // so we have a base for the first call.
+    QueryPerformanceCounter( &pcfreq );
+    // this demonstrates the procedure for converting a LARGE_INTEGER
+    // to ticks.
+    Shift64( ShiftAmt, (int*)&pcfreq.HighPart, (int*)&pcfreq.LowPart );
+    LastReadValue = pcfreq.LowPart;
+    BaseTime = LastReadValue;
+    */
+}
+#endif /* WIN32 */
+
+
+void InitTime() {
+#ifdef WIN32    
+    InitTimeWIN32();
+#endif
+}
diff --git a/libMems/dmSML/timing.h b/libMems/dmSML/timing.h
new file mode 100644
index 0000000..28d921e
--- /dev/null
+++ b/libMems/dmSML/timing.h
@@ -0,0 +1,24 @@
+#ifndef _timing_h_
+#define _timing_h_
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+// an opaque timer type.
+typedef struct dmtimer_s dmtimer_t;
+
+// starts the timer
+dmtimer_t * StartTimer();
+
+// reads the timer (msec)
+unsigned int ReadTimer( dmtimer_t * t );
+
+// stops the timer.
+void StopTimer( dmtimer_t * t );
+
+// initialize the timing code.
+void InitTime();
+
+
+#endif /* _timing_h_ */
diff --git a/libMems/dmSML/util.c b/libMems/dmSML/util.c
new file mode 100644
index 0000000..14f285e
--- /dev/null
+++ b/libMems/dmSML/util.c
@@ -0,0 +1,132 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdio.h>
+#include "libMems/dmSML/util.h"
+
+#define FMT_BUFFER_SIZE     (32)
+#define FMT_MAX_STRING      (1024)
+
+static char FmtBuffer[FMT_BUFFER_SIZE][FMT_MAX_STRING];
+static int FmtIdx;
+
+const char * Fmt( const char * fmt, ... ) {
+    const char * ret;
+    va_list args;
+    va_start( args, fmt );
+    ret = VFmt( fmt, args );
+    va_end( args );
+    return( ret );
+}
+
+
+const char * VFmt( const char * fmt, va_list args ) {
+    if( ++FmtIdx >= FMT_BUFFER_SIZE ) {
+        FmtIdx = 0;
+    }
+    // silly windows....
+#ifdef WIN32
+    _vsnprintf( FmtBuffer[FmtIdx], sizeof( FmtBuffer[FmtIdx] ), fmt, args );
+#else
+    vsnprintf( FmtBuffer[FmtIdx], sizeof( FmtBuffer[FmtIdx] ), fmt, args );
+#endif
+    FmtBuffer[FmtIdx][FMT_MAX_STRING-1] = '\0';
+    return( FmtBuffer[FmtIdx] );
+}
+
+
+/// shifts a 64-bit value (in two 32 bit parts) either right or left.
+/// amt negative -> left, positive -> right
+void Shift64( int amt, int * hi, int * lo ) {
+    if( amt == 0 ) {
+        return;
+    }
+    if( amt > 0 ) {
+        *lo >>= amt;
+        *lo |= *hi << ((sizeof( *hi ) * 8) - amt);
+        *hi >>= amt;
+    } else {
+        amt = -amt;
+        *hi <<= amt;
+        *hi |= *lo >> ((sizeof( *lo ) * 8) - amt);
+        *lo <<= amt;
+    }
+}
+
+
+
+
+void AddTo64( unsigned int amt, unsigned int *hi, unsigned int *lo ) {
+
+    int i;
+    // holds each byte value.
+    int in[8], out[8], tmp[8];
+    int carry;
+
+    for( i = 0; i < 8; i++ ) {
+        in[i] = out[i] = tmp[i] = 0;
+    }
+
+    in[0] = amt & 0xFF;
+    in[1] = (amt >> 8) & 0xFF;
+    in[2] = (amt >> 16) & 0xFF;
+    in[3] = (amt >> 24) & 0xFF;
+
+    tmp[0] = *lo & 0xFF;
+    tmp[1] = (*lo >> 8) & 0xFF;
+    tmp[2] = (*lo >> 16) & 0xFF;
+    tmp[3] = (*lo >> 24) & 0xFF;
+    tmp[4] = *hi & 0xFF;
+    tmp[5] = (*hi >> 8) & 0xFF;
+    tmp[6] = (*hi >> 16) & 0xFF;
+    tmp[7] = (*hi >> 24) & 0xFF;
+    
+
+    /*
+    out[0] = (tmp[0] + in[0]);
+    carry = out[0] >> 8;
+    out[0] &= 0xFF;
+
+    out[1] = (tmp[1] + in[1] + carry);
+    carry = out[1] >> 8;
+    out[1] &= 0xFF;
+
+    out[2] = (tmp[2] + in[2] + carry);
+    carry = out[2] >> 8;
+    out[2] &= 0xFF;
+
+    out[3] = (tmp[3] + in[3] + carry);
+    carry = out[3] >> 8;
+    out[3] &= 0xFF;
+    */
+
+    carry = 0;
+    for( i = 0; i < 8; i++ ) {
+        out[i] = in[i] + tmp[i] + carry;
+        carry = out[i] >> 8;
+        out[i] &= 0xFF;
+    }
+
+    // convert back to 2-int form.
+    *lo = out[0] + (out[1] << 8) + (out[2] << 16) + (out[3] << 24);
+    *hi = out[4] + (out[5] << 8) + (out[6] << 16) + (out[7] << 24);
+
+}
+
+/** Utility function to delete a file */
+int removeFile( const char* filename, int verbose )
+{
+#ifdef WIN32
+		return remove( filename );
+//        return !DeleteFile( filename );
+#else
+        char* rm_cmd;
+        if( verbose )
+                rm_cmd = Fmt( "/bin/rm -fv %s", filename );
+        else
+                rm_cmd = Fmt( "/bin/rm -f %s", filename );
+        return system( rm_cmd );
+#endif
+}
+
diff --git a/libMems/dmSML/util.h b/libMems/dmSML/util.h
new file mode 100644
index 0000000..5d2acc8
--- /dev/null
+++ b/libMems/dmSML/util.h
@@ -0,0 +1,28 @@
+#ifndef _util_h_
+#define _util_h_
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdarg.h>
+
+// these just let you get a temporary string -- don't hang onto it for very
+// long though -- it will get overwritten at some point.  This is useful for
+// passing as parms and such.
+const char * Fmt( const char * fmt, ... );
+const char * VFmt( const char * fmt, va_list args );
+
+
+/// shifts a 64-bit value (in two 32 bit parts) either right or left.
+/// amt negative -> left, positive -> right
+void Shift64( int amt, int * hi, int * lo );
+
+
+void AddTo64( unsigned int amt, unsigned int *hi, unsigned int *lo );
+
+/** cross-platform file deletion */
+int removeFile( const char* filename, int verbose );
+
+
+#endif /* _util_h_ */
diff --git a/libMems/gnAlignedSequences.cpp b/libMems/gnAlignedSequences.cpp
new file mode 100644
index 0000000..b00cd28
--- /dev/null
+++ b/libMems/gnAlignedSequences.cpp
@@ -0,0 +1,1570 @@
+/*******************************************************************************
+ * $Id: gnAlignedSequences.cpp,v 1.11 2004/03/01 02:40:08 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * Please see the file called COPYING for licensing, copying, and modification
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libMems/gnAlignedSequences.h"
+#include <sstream>
+
+using namespace std;
+using namespace genome;
+namespace mems {
+
+gnAlignedSequences::gnAlignedSequences()
+{
+	alignedSequenceFileName = "";
+	
+	
+}
+
+
+gnAlignedSequences::gnAlignedSequences(const gnAlignedSequences &toCopy)
+{
+	alignedSequenceFileName = toCopy.alignedSequenceFileName;
+	consensus = toCopy.consensus;
+	
+	names = toCopy.names;
+	sequences = toCopy.sequences;
+	positions = toCopy.positions;
+}
+
+
+void gnAlignedSequences::constructFromClustalW(string alignedFileName)
+{
+	alignedSequenceFileName = alignedFileName;
+	
+	readClustalWAlignment();
+	buildConsensus();
+	
+	indexPositions.resize(consensus.size());
+	for (int i=0; i<consensus.size(); i++)
+		indexPositions[i] = i+1;
+}
+
+
+void gnAlignedSequences::constructFromPhylip(string alignedFileName)
+{
+	alignedSequenceFileName = alignedFileName;
+	
+	readPhylipAlignment();
+	buildConsensus();
+	
+	indexPositions.resize(consensus.size());
+	for (int i=0; i<consensus.size(); i++)
+		indexPositions[i] = i+1;
+}
+
+
+void gnAlignedSequences::constructFromMSF(string alignedFileName)
+{
+	alignedSequenceFileName = alignedFileName;
+	
+	readMSFAlignment();
+	buildConsensus();
+	
+	indexPositions.resize(consensus.size());
+	for (int i=0; i<consensus.size(); i++)
+		indexPositions[i] = i+1;
+}
+
+
+void gnAlignedSequences::constructFromRelaxedNexus( istream& align_stream ){
+	readRelaxedNexusAlignment( align_stream );
+//	buildConsensus();
+	
+//	indexPositions.resize(consensus.size());
+//	for (int i=0; i<consensus.size(); i++)
+//		indexPositions[i] = i+1;
+}
+
+void gnAlignedSequences::constructFromNexus(string alignedFileName)
+{
+	alignedSequenceFileName = alignedFileName;
+	
+	readNexusAlignment();
+	buildConsensus();
+	
+	indexPositions.resize(consensus.size());
+	for (int i=0; i<consensus.size(); i++)
+		indexPositions[i] = i+1;
+}
+
+
+void gnAlignedSequences::constructFromMega(string alignedFileName)
+{
+	alignedSequenceFileName = alignedFileName;
+	
+	readMegaAlignment();
+	buildConsensus();
+	
+	indexPositions.resize(consensus.size());
+	for (int i=0; i<consensus.size(); i++)
+		indexPositions[i] = i+1;
+}
+
+const vector< string >& gnAlignedSequences::getSupportedFormats()
+{
+	static vector< string > formats;
+	if( formats.size() == 0 ){
+		formats.push_back( "phylip" );
+		formats.push_back( "clustal" );
+		formats.push_back( "msf" );
+		formats.push_back( "nexus" );
+		formats.push_back( "mega" );
+		formats.push_back( "codon" );
+	}
+	return formats;
+}
+
+boolean gnAlignedSequences::isSupportedFormat( const string& format_name )
+{
+	const vector< string >& formats = getSupportedFormats();
+	for( int formatI = 0; formatI < formats.size(); formatI++ ){
+		if( formats[ formatI ] == format_name )
+			return true;
+	}
+	return false;
+}
+void gnAlignedSequences::output( const string& format_name, ostream& os ) const
+{
+	bool rval = false;
+
+	if( format_name == "phylip" )
+		rval = outputPhylip( os );
+
+	if( format_name == "clustal" )
+		rval = outputClustalW( os );
+
+	if( format_name == "msf" )
+		rval = outputMSF( os );
+
+	if( format_name == "nexus" )
+		rval = outputNexus( os );
+
+	if( format_name == "mega" )
+		rval = outputMega( os );
+	
+	if( format_name == "codon" )
+		rval = outputCodon( os );
+	
+	if( !rval )
+		throw "Error writing alignment\n";
+
+}
+
+bool gnAlignedSequences::outputPhylip(ostream& os) const
+{
+	
+	os << "Sequences in Alignment: " << sequences.size()
+		<< "  Bases in Each Aligned Sequence: " << sequences[0].length() << endl;
+	
+	int offset = 10;
+	uint seqI;
+	for( seqI = 0; seqI < sequences.size(); seqI++ )
+	{
+		int position = 0;
+		const string& seq = sequences[ seqI ];
+		string seqName = names[ seqI ].substr( 0, offset );
+		seqName.append( offset - seqName.length() + 1, ' ' ); 
+		
+		os << seqName;
+
+		for ( position=0; position + offset < seq.size(); position += offset){
+			if ( position % 50 == 0)
+				os << endl;
+			os.write( seq.data() + position, offset );
+			os << ' ';
+		}
+
+		if ( position % 50 == 0)
+			os << endl;
+
+		os.write( seq.data() + position, seq.size() - position );
+		os << endl;
+	}
+	
+	return true;
+}
+
+uint64 countGaps( string& seq );
+uint64 countGaps( string& seq ){
+	uint gap_count = 0;
+	for( uint charI = 0; charI < seq.length(); charI++ )
+		if( seq[ charI ] == '-' )
+			gap_count++;
+	return gap_count;
+}
+
+bool gnAlignedSequences::outputClustalW(ostream& os) const
+{
+	boolean output_positions = true;
+	
+	os << "Clustal W multiple sequence alignment" << endl;
+	
+	vector< int64 > seq_pos( sequences.size(), 0 );
+	if( positions.size() == sequences.size() )
+		seq_pos = positions;
+	vector< string > seq_names;
+	int pos;
+	uint seqI = 0;
+	int longestNameSize = 0;
+	for( ; seqI < sequences.size(); seqI++ )
+	{
+		seq_names.push_back( names[ seqI ].substr( 0, 30 ) );
+		if ( seq_names[ seq_names.size() - 1 ].length() > longestNameSize)
+			longestNameSize=seq_names[ seq_names.size() - 1 ].length();
+	}
+	// add space padding to the names
+	for( seqI = 0; seqI < seq_names.size(); seqI++ )
+		seq_names[ seqI ] += string( (longestNameSize - seq_names[ seqI ].length()) + 6, ' ' ); 
+	for (pos=0; pos+60 < alignedSeqsSize(); pos+=60)
+	{
+		os << endl
+		   << endl;
+		for( seqI = 0; seqI < sequences.size(); seqI++ )
+		{
+			os << seq_names[ seqI ];
+			const string& seq = sequences[ seqI ];
+			string cur_seq = seq.substr( pos, 60 );
+			os << cur_seq;
+			if( output_positions ){
+				seq_pos[ seqI ] += 60 - countGaps( cur_seq );
+				os << " " << seq_pos[ seqI ];
+			}
+			os << endl;
+		}
+	}
+	
+	if (pos<alignedSeqsSize())
+	{
+		os << endl
+		   << endl;
+	
+		for( seqI = 0; seqI < sequences.size(); seqI++ )
+		{
+			os << seq_names[ seqI ];
+			const string& seq = sequences[ seqI ];
+			string cur_seq = seq.substr( pos, 60 );
+			os << cur_seq;
+			if( output_positions ){
+				seq_pos[ seqI ] += 60 - countGaps( cur_seq );
+				os << " " << seq_pos[ seqI ];
+			}
+			os << endl;
+		}
+	}		   
+	return true;
+}
+
+
+bool gnAlignedSequences::outputMSF(ostream& os) const
+{
+	os << "//" << endl;
+	
+	list <pair <string*, string*> >::const_iterator sequenceItr = alignedSequences.begin();
+	int longestSeqNameLength = 0;
+	for ( ; sequenceItr!=alignedSequences.end(); sequenceItr++)
+	{
+		if ((*(*sequenceItr).first).length() > longestSeqNameLength)
+			longestSeqNameLength = (*(*sequenceItr).first).length();
+	}
+	
+	int pos = 0;
+	for ( ; pos+60<(*(*alignedSequences.begin()).second).size(); pos+=60)
+	{
+		// output spaces until sequence ordinates
+		for (int i=0; i<longestSeqNameLength+2; i++)
+			os << " ";
+		
+		os << pos+1;
+		for (int i=0; i<54; i++) // output appropriate number of spaces on ordinate line
+			os << " ";
+		os << pos+60 << endl;
+		
+		for (sequenceItr=alignedSequences.begin(); sequenceItr!=alignedSequences.end(); sequenceItr++)
+		{
+			int spaces = longestSeqNameLength-(*(*sequenceItr).first).length();
+			for (int i=0; i<spaces; i++)
+				os << " ";
+				
+			os << (*(*sequenceItr).first) << "  ";
+			
+			string seq = (*(*sequenceItr).second).substr(pos, 60);
+			for (int i=0; i<60; i++)
+			{
+				if (seq[i]=='-')
+					os << ".";
+				else
+					os << seq[i];
+			}
+			os << endl;
+		}
+		
+		os << endl;
+	}
+	
+	if (pos<(*(*alignedSequences.begin()).second).size())
+	{
+		// output spaces until sequence ordinates
+		for (int i=0; i<longestSeqNameLength+2; i++)
+			os << " ";
+		
+		os << pos+1;
+		for (int i=0; i<(*(*alignedSequences.begin()).second).size()-pos; i++) // output appropriate number of spaces on ordinate line
+			os << " ";
+		os << (*(*alignedSequences.begin()).second).size() << endl;
+		
+		for (sequenceItr=alignedSequences.begin(); sequenceItr!=alignedSequences.end(); sequenceItr++)
+		{
+			int spaces = longestSeqNameLength-(*(*sequenceItr).first).length();
+			for (int i=0; i<spaces; i++)
+				os << " ";
+				
+			os << (*(*sequenceItr).first) << "  ";
+			
+			string seq = (*(*sequenceItr).second).substr(pos, (*(*alignedSequences.begin()).second).size()-pos );
+			for (int i=0; i<seq.length(); i++)
+			{
+				if (seq[i]=='-')
+					os << ".";
+				else
+					os << seq[i];
+			}
+			os << endl;
+		}
+		
+		os << endl;
+	}
+	
+	return false;
+}
+
+
+
+bool gnAlignedSequences::outputNexus(ostream& os) const
+{
+	os << "begin data;" << endl
+	   << "  dimensions ntax=" << sequences.size();
+	if( sequences.size() == 0 )
+		return true;
+	os << " nchar=" 
+	   << sequences[0].length() << ";" << endl
+	   << "  ;" << endl
+	   << "  matrix" << endl;
+	   
+	list <pair <string*, string*> >::const_iterator sequenceItr = alignedSequences.begin();
+	int i;
+	int seqI;
+	int longestSeqNameLength = 0;
+	for( seqI = 0; seqI < sequences.size(); seqI++ ){
+		if( names[ seqI ].length() > longestSeqNameLength )
+			longestSeqNameLength = names[ seqI ].length();
+	}
+	
+	int pos = 1;
+	for ( ; pos+59 < sequences[0].size(); pos+=60)
+	{
+		os << "[";
+		// output spaces until sequence ordinates
+		for (i = 0; i < longestSeqNameLength+2; i++)
+			os << " ";
+		
+		os << pos;
+		for (i = 0; i < 54; i++) // output appropriate number of spaces on ordinate line
+			os << " ";
+		os << pos+59 << "]" << endl;
+		
+		for( seqI = 0; seqI < sequences.size(); seqI++ )
+		{
+			os << names[ seqI ];
+			
+			int spaces = longestSeqNameLength - names[ seqI ].length();
+			for (i = 0; i < spaces + 2; i++)
+				os << " ";
+				
+			string seq = sequences[ seqI ].substr( pos, 60 );
+			os << seq << endl;
+		}
+		
+		os << endl;
+	}
+	
+	// write out the last little bit
+	if (pos - 1 < sequences[0].size())
+	{
+		// output spaces until sequence ordinates
+		os << "[";
+		// output spaces until sequence ordinates
+		for (i = 0; i < longestSeqNameLength + 2; i++)
+			os << " ";
+		
+		os << pos;
+		for (i=0; i < sequences[0].size() - pos + 1; i++) // output appropriate number of spaces on ordinate line
+			os << " ";
+		os << pos+59 << "]" << endl;
+		
+		for (sequenceItr = alignedSequences.begin(); sequenceItr != alignedSequences.end(); sequenceItr++)
+		for( seqI = 0; seqI < sequences.size(); seqI++ )
+		{
+			os << names[ seqI ];
+			
+			int spaces = longestSeqNameLength - names[ seqI ].length();
+			for (i=0; i<spaces+2; i++)
+				os << " ";
+				
+			string seq = sequences[seqI].substr( pos, sequences[seqI].size()-pos+1 );
+			os << seq << endl;
+		}
+		
+		os << endl;
+	}
+	
+	return true;
+}
+/*
+bool gnAlignedSequences::outputNexus(ostream& os) const
+{
+	os << "begin data;" << endl
+	   << "  dimensions ntax=" << alignedSequences.size() << " nchar=" 
+	   << alignedSequences.begin()->second->size() << ";" << endl
+	   << "  ;" << endl
+	   << "  matrix" << endl;
+	   
+	list <pair <string*, string*> >::const_iterator sequenceItr = alignedSequences.begin();
+	int i;
+	int longestSeqNameLength = 0;
+	for ( ; sequenceItr != alignedSequences.end(); sequenceItr++)
+	{
+		if ( sequenceItr->first->length() > longestSeqNameLength )
+			longestSeqNameLength = sequenceItr->first->length();
+	}
+	
+	int pos = 1;
+	for ( ; pos+59 < alignedSequences.begin()->second->size(); pos+=60)
+	{
+		os << "[";
+		// output spaces until sequence ordinates
+		for (i = 0; i < longestSeqNameLength+2; i++)
+			os << " ";
+		
+		os << pos;
+		for (i = 0; i < 54; i++) // output appropriate number of spaces on ordinate line
+			os << " ";
+		os << pos+59 << "]" << endl;
+		
+		for (sequenceItr=alignedSequences.begin(); sequenceItr != alignedSequences.end(); sequenceItr++)
+		{
+			os << (*(*sequenceItr).first);
+			
+			int spaces = longestSeqNameLength - sequenceItr->first->length();
+			for (i = 0; i < spaces + 2; i++)
+				os << " ";
+				
+			string seq = sequenceItr->second->substr( pos, 60 );
+			for (i = 0; i < 60; i++)
+				os << seq[i];
+			os << endl;
+		}
+		
+		os << endl;
+	}
+	
+	if (pos - 1 < alignedSequences.begin()->second->size())
+	{
+		// output spaces until sequence ordinates
+		os << "[";
+		// output spaces until sequence ordinates
+		for (i = 0; i < longestSeqNameLength + 2; i++)
+			os << " ";
+		
+		os << pos;
+		for (i=0; i < alignedSequences.begin()->second->size() - pos + 1; i++) // output appropriate number of spaces on ordinate line
+			os << " ";
+		os << pos+59 << "]" << endl;
+		
+		for (sequenceItr = alignedSequences.begin(); sequenceItr != alignedSequences.end(); sequenceItr++)
+		{
+			os << *(sequenceItr->first);
+			
+			int spaces = longestSeqNameLength-(*(*sequenceItr).first).length();
+			for (i=0; i<spaces+2; i++)
+				os << " ";
+				
+			string seq = (*(*sequenceItr).second).substr( pos, (*(*alignedSequences.begin()).second).size()-pos+1 );
+			for (i=0; i<seq.length(); i++)
+				os << seq[i];
+			os << endl;
+		}
+		
+		os << endl;
+	}
+	
+	return false;
+}
+*/
+bool gnAlignedSequences::outputMega(ostream& os) const
+{
+	os << "#MEGA" << endl
+	   << "TITLE:" << endl;
+	   
+	list <pair <string*, string*> >::const_iterator sequenceItr = alignedSequences.begin();
+	int longestSeqNameLength = 0;
+
+	for ( ; sequenceItr!=alignedSequences.end(); sequenceItr++){
+		if (sequenceItr->first->length() > longestSeqNameLength)
+			longestSeqNameLength = sequenceItr->first->length();
+	}
+	
+	gnSeqI pos = 1;
+	gnSeqI remaining_len = alignedSequences.begin()->second->size();	//determine the amount to be written
+	// loop while there is more to write
+	while(remaining_len > 0){
+		os << endl;
+		gnSeqI write_chars = MEGA_ALIGN_COLUMNS < remaining_len ? MEGA_ALIGN_COLUMNS : remaining_len;
+
+		//write each sequence's line
+		for (sequenceItr = alignedSequences.begin(); sequenceItr != alignedSequences.end(); sequenceItr++){
+			os << "#" << *(sequenceItr->first);
+			
+			int spaces = longestSeqNameLength - sequenceItr->first->length();
+			for (int i = 0; i < spaces + 5; i++)
+				os << " ";
+
+			string seq = sequenceItr->second->substr( pos, write_chars );
+			for (int i = 0; i < write_chars; i++)
+				os << seq[i];
+			os << endl;
+		}
+		os << endl;
+
+		pos += write_chars;
+		remaining_len -= write_chars;
+	}
+	return true;
+}
+
+
+bool gnAlignedSequences::outputCodon(ostream& os) const
+{
+	list <pair <string*, string*> >::const_iterator sequenceItr = alignedSequences.begin();
+	
+	os << '\t' << alignedSequences.size() << '\t' << (*(*sequenceItr).second).size() << endl;
+	
+	int offset = 10;
+	for ( ; sequenceItr != alignedSequences.end(); sequenceItr++)
+	{
+		int position = 0;
+		string seq = (*(*sequenceItr).second);
+		string seqName = (*(*sequenceItr).first);
+		if (seqName.size() <= offset) 
+		{
+			for (int i=seqName.size(); i<offset; i++)
+				seqName += " ";
+		}
+		
+		else 
+		{
+			string temp = seqName;
+			seqName = "";
+			for (int i=0; i<offset; i++)
+				seqName += temp[i];
+		}
+		
+		os << seqName;
+		int count = 0;
+		for ( ; position+3<seq.size(); position+=3)
+		{
+			if (count == 20)
+			{
+				count = 0;
+				os << endl;
+			}
+			for (int i=position; i<position+3; i++)
+			   os << seq[i];
+			   
+			os << ' ';
+			count++;
+		}
+		
+		for ( ; position < seq.size(); position++)
+			os << seq[position];
+		
+		os << endl;
+	}
+	
+	return false;
+}
+
+
+bool gnAlignedSequences::outputWithConsensus(ostream& os)
+{
+	list <pair <string*, string*> >::iterator sequenceItr = alignedSequences.begin();
+	
+	os << '\t' << alignedSequences.size() << '\t' << (*(*sequenceItr).second).size() << endl;
+	
+	int offset = 10;
+	for ( ; sequenceItr != alignedSequences.end(); sequenceItr++)
+	{
+		int position = 0;
+		string seq = (*(*sequenceItr).second);
+		string seqName = (*(*sequenceItr).first);
+		if (seqName.size() <= offset) 
+		{
+			for (int i=seqName.size(); i<offset; i++)
+				seqName += " ";
+		}
+		
+		else 
+		{
+			string temp = seqName;
+			seqName = "";
+			for (int i=0; i<offset; i++)
+				seqName += temp[i];
+		}
+		
+		os << seqName;
+		int count = 0;
+		for ( ; position+10<seq.size(); position+=10)
+		{
+			if (count == 5)
+			{
+				count = 0;
+				os << endl;
+			}
+			for (int i=position; i<position+10; i++)
+			   os << seq[i];
+			   
+			os << ' ';
+			count++;
+		}
+		
+		for ( ; position < seq.size(); position++)
+			os << seq[position];
+		
+		os << endl;
+	}
+	
+	int position = 0;
+	int count = 0;
+	os << "Consensus:";
+	for ( ; position+10<consensus.size(); position+=10)
+	{
+		if (count == 5)
+		{
+			count = 0;
+			os << endl;
+		}
+		for (int i=position; i<position+10; i++)
+		   os << consensus[i];
+		   
+		os << ' ';
+		count++;
+	}
+	
+	for ( ; position < consensus.size(); position++)
+		os << consensus[position];
+	
+	os << endl;
+	
+	return false;
+}
+
+
+gnAlignedSequences gnAlignedSequences::getAlignedSegment(unsigned start, unsigned stop)
+{
+	gnAlignedSequences newAlignment;
+	
+	addAllSegments(newAlignment, start, stop);
+	newAlignment.buildConsensus();
+	
+	return newAlignment;
+}
+
+
+gnAlignedSequences gnAlignedSequences::getCodons(int readingFrame, int startCodon, int codonMultiple)
+{
+	gnAlignedSequences toReturn;
+	int startBase = ((startCodon*3)-2)+(readingFrame-1);
+	
+	for (int index=startBase; (index+2)<(*(*alignedSequences.begin()).second).size(); index+=(codonMultiple*3))
+		addAllSegmentsReplaceGaps(toReturn, index, index+2);
+		
+	toReturn.buildConsensus();
+	
+	return toReturn;
+}
+
+
+gnSeqI gnAlignedSequences::alignedSeqsSize() const
+{
+	if( sequences.size() > 0 )
+		return sequences[ 0 ].size();
+	return 0;
+}
+
+
+bool gnAlignedSequences::removeAlignedSeq(string seqName)
+{
+	list <pair <string*, string*> >::iterator sequenceItr = alignedSequences.begin();
+	
+	for ( ; sequenceItr != alignedSequences.end(); sequenceItr++)
+	{
+		if ((*(*sequenceItr).first) == seqName)
+		{
+			alignedSequences.erase(sequenceItr);
+			return true;
+		}
+	}	
+	
+	return false;
+}
+
+
+bool gnAlignedSequences::removeAlignedSeq(unsigned index)
+{
+	list <pair <string*, string*> >::iterator sequenceItr = alignedSequences.begin();
+	int i = 0;
+	
+	for ( ; sequenceItr != alignedSequences.end(); sequenceItr++)
+	{
+		if (i == index)
+		{
+			alignedSequences.erase(sequenceItr);
+			return true;
+		}
+		
+		i++;
+	}	
+	
+	return false;
+}
+
+
+void gnAlignedSequences::concatenateAlignedSequences(gnAlignedSequences toConcat)
+{
+	list <pair <string*, string*> >::iterator toConcatItr = toConcat.alignedSequences.begin();
+	list <pair <string*, string*> >::iterator originalItr;
+	
+	unsigned largestSeqSize = 0;
+	
+	for ( ; toConcatItr != toConcat.alignedSequences.end(); toConcatItr++)
+	{
+		for (originalItr = alignedSequences.begin(); originalItr != alignedSequences.end(); originalItr++)
+		{
+			if ((*(*originalItr).second).size() > largestSeqSize)
+				largestSeqSize = (*(*originalItr).second).size();
+			
+			if ((*(*toConcatItr).first) == (*(*originalItr).first)) {
+				string seq = (*(*originalItr).second);
+				seq += (*(*toConcatItr).second);
+				(*(*originalItr).second) = seq;
+				break;
+			}
+		}
+	}
+	
+	for (originalItr = alignedSequences.begin(); originalItr != alignedSequences.end(); originalItr++)
+	{
+		while ((*(*originalItr).second).size() < largestSeqSize)
+			(*(*originalItr).second).append("-");
+	}
+	
+	buildConsensus();
+}
+
+
+void gnAlignedSequences::extractVariableSites(gnAlignedSequences &variableSites, bool countGapsAsMismatches)
+{
+	list <pair <string*, string*> >::iterator originalItr = alignedSequences.begin();
+	
+	int alignedSeqSize = (*((*originalItr).second)).size();
+	
+	char positionBase;
+	int matchStart = alignedSeqSize,
+		matchStop = alignedSeqSize;
+		
+	bool mismatch = false;
+	
+	indexPositions.resize(0);
+	
+	for (int position=alignedSeqSize; position > 0; position--)
+	{
+		originalItr = alignedSequences.begin();
+		positionBase = (*((*originalItr).second))[position-1];
+		while (!countGapsAsMismatches && (*((*originalItr).second))[position-1] == '-')
+		{
+			originalItr++;
+			positionBase = (*((*originalItr).second))[position-1];
+			if (originalItr == alignedSequences.end()) break;
+		}
+		
+		if (originalItr == alignedSequences.end()) break;
+			
+		for ( ; originalItr != alignedSequences.end(); originalItr++)
+		{
+			// extend matched segment before adding match to variableSites
+			// much less expensive to add blocks of sites rather than a single site at a time
+			if (positionBase != (*((*originalItr).second))[position-1])// && matchStop==position)
+			{
+				if (!(!countGapsAsMismatches && (*((*originalItr).second))[position-1] == '-'))
+				{
+					mismatch = true;
+					break;
+				}
+			}
+		}
+		
+		if (!mismatch)
+			matchStart--;
+		
+		else
+		{
+			matchStart--;
+			matchStop = matchStart;
+			
+			//variableSites.indexPositions.resize(variableSites.indexPositions.size()+1);
+			variableSites.indexPositions.push_back(position);//[indexPositions.size()-1]=position;
+		}
+		
+		mismatch = false;
+	}
+	
+	for (int i=variableSites.indexPositions.size()-1; i>=0; i--)
+		addAllSegments(variableSites, variableSites.indexPositions[i], variableSites.indexPositions[i]);
+		
+	variableSites.buildConsensus();
+}
+
+
+bool gnAlignedSequences::collapseIdenticalSequences()
+{
+	list <pair <string*, string*> >::iterator itr1 = alignedSequences.begin();
+	list <pair <string*, string*> >::iterator itr2;
+	bool toReturn = false;
+	
+	for ( ; itr1!=alignedSequences.end(); itr1++)
+	{
+		itr2=alignedSequences.begin();
+		for (itr2++; itr2!=alignedSequences.end(); itr2++)
+		{
+			if (((*(*itr1).second)==(*(*itr2).second)) && itr1!=itr2)
+			{
+				list <pair <string*, string*> >::iterator itrTemp = itr2;
+				itr2--;
+				alignedSequences.erase(itrTemp);
+				toReturn = true;
+			}
+		}
+	}
+
+	return toReturn;
+}
+
+
+vector <char> gnAlignedSequences::operator[]( const int offset ) //const
+{
+	vector <char> toReturn;
+	list <pair <string*, string*> >::iterator itr;
+	
+	for (itr=alignedSequences.begin(); itr!=alignedSequences.end(); itr++)
+		toReturn.push_back((*(*itr).second)[offset]);
+	
+	return toReturn;
+}
+
+
+bool gnAlignedSequences::readClustalWAlignment()
+{
+	ifstream alignmentFile;
+	
+	alignmentFile.open(alignedSequenceFileName.c_str(), ios::in | ios::binary);
+	
+	if (!(alignmentFile.is_open()))
+	{
+		cout << "Unable to open " << alignedSequenceFileName << ".\n"
+			 << "Exiting.\n";
+		
+		exit(-1);
+	}
+	
+	string line;
+	
+	// REMOVE 1st 3 LINES FROM .ALN FILE - SEQUENCE BEGINS ON LINE 4
+	getline(alignmentFile, line);
+	getline(alignmentFile, line);
+	getline(alignmentFile, line);
+	
+	bool constructSuccess = constructClustalWAlignedSequenceList(alignmentFile);
+	
+	alignmentFile.close();
+	
+	if (constructSuccess) return true;
+	
+	return false;
+}
+
+
+bool gnAlignedSequences::readPhylipAlignment()
+{
+	ifstream alignmentFile;
+	
+	alignmentFile.open(alignedSequenceFileName.c_str(), ios::in | ios::binary);
+	
+	if (!(alignmentFile.is_open()))
+	{
+		cout << "Unable to open " << alignedSequenceFileName << ".\n"
+			 << "Exiting.\n";
+		
+		exit(-1);
+	}
+	
+	string line;
+	
+	// REMOVE 1st LINE FROM PHYLIP FILE - SEQUENCE NUMBER AND LENGTH OF SEQUENCES
+	getline(alignmentFile, line);
+	
+	bool constructSuccess = constructPhylipAlignedSequenceList(alignmentFile);
+	
+	alignmentFile.close();
+	
+	if (constructSuccess) return true;
+	
+	return false;
+}
+
+
+bool gnAlignedSequences::readMSFAlignment()
+{
+	ifstream alignmentFile;
+	
+	alignmentFile.open(alignedSequenceFileName.c_str(), ios::in | ios::binary);
+	
+	if (!(alignmentFile.is_open()))
+	{
+		cout << "Unable to open " << alignedSequenceFileName << ".\n"
+			 << "Exiting.\n";
+		
+		exit(-1);
+	}
+	
+	string line;
+	getline(alignmentFile, line);
+	
+	// remove format's initial annotation
+	while (line.find("//")<0 || line.find("//")>line.size())
+		getline(alignmentFile, line);
+		
+	bool constructSuccess = constructMSFAlignedSequenceList(alignmentFile);
+	
+	alignmentFile.close();
+	
+	if (constructSuccess) return true;
+	
+	return false;
+}
+
+
+/**
+ * This function assumes that the #NEXUS at the beginning of the file has
+ * been read off already.  It will read a single aligned sequences entry.
+ */
+bool gnAlignedSequences::readRelaxedNexusAlignment( istream& align_stream ){
+	
+	string line;
+	string comments;
+	getline( align_stream, line );
+	if( line == "#NEXUS" ){
+		getline( align_stream, line );
+	}
+	if( line[0] == '[' ){
+		getline( align_stream, line );
+		while( line[0] != ']' ){
+			comments += line + "\n";
+			getline( align_stream, line );
+		}
+		getline( align_stream, line );	// possibly empty line
+		if( line.size() == 0 )
+			getline( align_stream, line );
+	}
+	while( line.length() == 0 )
+		getline( align_stream, line );
+	// this is the alignment info line
+	stringstream align_info( line );
+	uint seq_count;
+	gnSeqI align_len;
+	align_info >> seq_count;
+	align_info >> align_len;
+	sequences = vector< string >( seq_count );
+	// now read in each alignment line
+	for( uint seqI = 0; seqI < seq_count; seqI++ ){
+		align_stream >> line;
+		names.push_back( line );
+//		getline( align_stream, line );
+		align_stream >> sequences[ seqI ];
+//		Array< char > seq_data( align_len );
+//		align_stream.read( seq_data.data, align_len );
+//		sequences.push_back( seq_data.data );
+	}
+	
+	// read off the trailing newline
+	getline( align_stream, line );
+	return true;
+}
+
+
+bool gnAlignedSequences::readNexusAlignment()
+{
+	ifstream alignmentFile;
+	
+	alignmentFile.open(alignedSequenceFileName.c_str(), ios::in | ios::binary);
+	
+	if (!(alignmentFile.is_open()))
+	{
+		cout << "Unable to open " << alignedSequenceFileName << ".\n"
+			 << "Exiting.\n";
+		
+		exit(-1);
+	}
+	
+	string line;
+	getline(alignmentFile, line);
+	
+	// remove format's initial annotation
+	while (line.find("begin data;")<0 || line.find("begin data;")>line.length()) // searching for "begin data;"
+		getline(alignmentFile, line);
+		
+	bool constructSuccess = constructNexusAlignedSequenceList(alignmentFile);
+	
+	alignmentFile.close();
+	
+	if (constructSuccess) return true;
+	
+	return false;
+}
+
+
+bool gnAlignedSequences::readMegaAlignment()
+{
+	ifstream alignmentFile;
+	
+	alignmentFile.open(alignedSequenceFileName.c_str(), ios::in | ios::binary);
+	
+	if (!(alignmentFile.is_open()))
+	{
+		cout << "Unable to open " << alignedSequenceFileName << ".\n"
+			 << "Exiting.\n";
+		
+		exit(-1);
+	}
+	
+	string line;
+	// remove first three lines from mega file - prior to begining of sequence data
+	getline(alignmentFile, line);
+	getline(alignmentFile, line);
+	getline(alignmentFile, line);
+		
+	bool constructSuccess = constructMegaAlignedSequenceList(alignmentFile);
+	
+	alignmentFile.close();
+	
+	if (constructSuccess) return true;
+	
+	return false;
+}
+
+
+bool gnAlignedSequences::constructClustalWAlignedSequenceList(ifstream& alignmentFile)
+{
+	string line;
+	
+	// GET THE 1st LINE OF SEQUENCE
+	getline(alignmentFile, line);
+	
+	while (alignmentFile.good())
+	{
+		while (line[0] != ' ' && line[0] != '\0')
+		{
+			string sequenceName;
+			int i;
+			for (i=0; line[i] != ' '; i++)
+				sequenceName += line[i];
+				
+			const gnFilter* newFilter = gnFilter::fullDNASeqFilter();
+			string sequenceBases;
+			for(int i=sequenceName.size(); i < line.length(); i++){
+				if ((*newFilter).IsValid(line[i]))
+			    	sequenceBases += line[i];
+			}
+			
+			list <pair <string*, string*> >::iterator sequenceItr; 
+			if (!(sequenceNameInList(sequenceName, sequenceItr)))
+			{
+				pair <string*, string*> sequence;
+				sequence.first = new string(sequenceName);
+				
+				sequence.second = new string( sequenceBases );
+				
+				alignedSequences.push_back(sequence);
+			}
+			
+			else
+				(*(*sequenceItr).second).append(sequenceBases);
+	
+			getline(alignmentFile, line);
+		}
+		
+		getline(alignmentFile, line);
+		getline(alignmentFile, line);
+	}
+	
+	
+	return false;
+}
+
+
+bool gnAlignedSequences::constructPhylipAlignedSequenceList(ifstream& alignmentFile)
+{
+	string line;
+	
+	// GET THE 1st LINE OF SEQUENCE
+	getline(alignmentFile, line);
+	
+	while (alignmentFile.good())
+	{
+		if (line[10]!=' ')
+		{
+			string sequenceName = line.substr(0,10);
+			cout << sequenceName << endl;
+			const gnFilter* newFilter = gnFilter::fullDNASeqFilter();
+			string sequenceBases;
+			for(int i=10; i < line.length(); i++)
+			{
+				if ((*newFilter).IsValid(line[i]))
+			    	sequenceBases += line[i];
+			}
+		
+			pair <string*, string*> sequence;
+			sequence.first = new string(sequenceName);
+			
+			sequence.second = new string( sequenceBases );
+			
+			alignedSequences.push_back(sequence);
+			
+			getline(alignmentFile, line);
+		}
+
+		// NOT THE 1st LINE IN SEQUENCE (CONTAINS SEQ NAME)
+		else
+		{
+			string sequenceBases;
+			while (line[10]==' ' && line[0]!='\0' && line.length()>0)
+			{
+				const gnFilter* newFilter = gnFilter::fullDNASeqFilter();
+				for(int i=0; i < line.length(); i++)
+				{
+					if ((*newFilter).IsValid(line[i]))
+				    	sequenceBases += line[i];
+				}
+				
+				getline(alignmentFile, line);
+			}
+			
+			list <pair <string*, string*> >::iterator sequenceItr = alignedSequences.end();
+			sequenceItr--;
+			(*(*sequenceItr).second) += sequenceBases;
+		}
+	}
+	
+	return false;
+}
+
+
+bool gnAlignedSequences::constructMSFAlignedSequenceList(ifstream& alignmentFile)
+{
+	string line;
+	
+	// clear coordinate line
+	getline(alignmentFile, line);
+
+	while (alignmentFile.good())//line[0] != '\0')
+	{
+		getline(alignmentFile, line); // 1st line of sequence
+		while (!coordinates(line))
+		{
+			string sequenceName;
+			int i;
+
+			for (i=0; line[i] == ' '; i++) {}
+
+			for (; line[i] != ' '; i++)
+				sequenceName += line[i];
+				
+			const gnFilter* newFilter = gnFilter::fullDNASeqFilter();
+			string sequenceBases;
+			for( ; i < line.length(); i++){
+				if ((*newFilter).IsValid(line[i]))
+			    	sequenceBases += line[i];
+			    else if (line[i] == '.' || line[i]=='~')
+			    	sequenceBases += '-';
+			}
+			
+			list <pair <string*, string*> >::iterator sequenceItr; 
+			if (!(sequenceNameInList(sequenceName, sequenceItr)))
+			{
+				pair <string*, string*> sequence;
+				sequence.first = new string(sequenceName);
+				
+				sequence.second = new string( sequenceBases );
+				
+				alignedSequences.push_back(sequence);
+			}
+			
+			else
+				(*(*sequenceItr).second).append(sequenceBases);
+	
+			getline(alignmentFile, line);
+		}
+	}	
+	
+	return false;
+}
+
+
+bool gnAlignedSequences::constructNexusAlignedSequenceList(ifstream& alignmentFile)
+{
+	string line;
+	
+	// GET THE 1st LINE OF SEQUENCE
+	getline(alignmentFile, line);
+	
+	// searching for "endblock;"
+	while (alignmentFile.good() && (line.find("endblock;")<0 || line.find("endblock;")>line.length())) 
+	{
+		while (line[0]!='[' && line[0]!=' ' && line[0]!='\n' && line[0]!='\r' && alignmentFile.good())
+		{
+			string sequenceName;
+			int i;
+			for (i=0; line[i] != ' '; i++)
+				sequenceName += line[i];
+				
+			const gnFilter* newFilter = gnFilter::fullDNASeqFilter();
+			string sequenceBases;
+			for(int i=sequenceName.size(); i < line.length(); i++){
+				if ( line[i] != '\r' && line[i] != '\n' && line[i] != ' ' )
+			    	sequenceBases += line[i];
+			}
+			
+			list <pair <string*, string*> >::iterator sequenceItr; 
+			if (!(sequenceNameInList(sequenceName, sequenceItr)))
+			{
+				pair <string*, string*> sequence;
+				sequence.first = new string(sequenceName);
+				
+				sequence.second = new string( sequenceBases );
+				
+				alignedSequences.push_back(sequence);
+			}
+			
+			else
+				(*(*sequenceItr).second).append(sequenceBases);
+	
+			getline(alignmentFile, line);
+		}
+		
+		getline(alignmentFile, line);
+	}
+	
+	
+	return false;
+}
+
+
+bool gnAlignedSequences::constructMegaAlignedSequenceList(ifstream& alignmentFile)
+{
+	string line;
+	string consensusSequenceBases;
+	list <pair <string*, string*> >::iterator alignedSequencesItr;
+	
+	// GET THE 1st LINE OF SEQUENCE
+	getline(alignmentFile, line);
+	
+	int previousLineLength = 0;
+	
+	// searching for "endblock;"
+	while (alignmentFile.good()) 
+	{
+		while (line.length()>0 && line[0]=='#')
+		{
+			string sequenceName;
+			for (int i=1; line[i] != ' '; i++)
+				sequenceName += line[i];
+				
+			const gnFilter* newFilter = gnFilter::fullDNASeqFilter();
+			string sequenceBases;
+			bool isInSeqName = true;
+			if (alignedSequences.size()>0)// && consensusSequenceBases.size()>0)
+				consensusSequenceBases = (*(*alignedSequencesItr).second);
+				
+			for(int i=sequenceName.size(); i < line.length(); i++)
+			{
+				// allow only valid characters to be placed - if '.' replace leter
+				// with consensus data
+				if ((*newFilter).IsValid(line[i]) && !isInSeqName)
+			    	sequenceBases += line[i];
+			    	
+			    else if (line[i] == ' ') isInSeqName=false;
+			    	
+			    else if (line[i]=='.' && alignedSequences.size()>0 && !isInSeqName) // a reference to the consensus
+			    	sequenceBases += consensusSequenceBases[sequenceBases.size()+previousLineLength];
+			}
+			
+			list <pair <string*, string*> >::iterator sequenceItr; 
+			if (!(sequenceNameInList(sequenceName, sequenceItr)))
+			{
+				pair <string*, string*> sequence;
+				sequence.first = new string(sequenceName);
+				
+				sequence.second = new string( sequenceBases );
+				
+				alignedSequences.push_back(sequence);
+				alignedSequencesItr = alignedSequences.begin();
+			}
+			
+			else
+				(*(*sequenceItr).second).append(sequenceBases);
+	
+			getline(alignmentFile, line);
+		}
+		
+		if (alignedSequences.size() > 0)
+			previousLineLength = (*(*alignedSequences.begin()).second).size();
+		
+		getline(alignmentFile, line);
+	}
+	
+	
+	return false;
+}
+
+
+int gnAlignedSequences::sequenceNameInList( string& sequenceName ){
+	for( uint nameI = 0; nameI < names.size(); nameI++ ){
+		if( sequenceName == names[ nameI ] )
+			return nameI;
+	}
+	return -1;
+}
+
+bool gnAlignedSequences::sequenceNameInList(string sequenceName, list <pair <string*, string*> >::iterator &sequenceItr)
+{
+	for (sequenceItr = alignedSequences.begin(); sequenceItr != alignedSequences.end(); sequenceItr++)
+	{
+		if (sequenceName == (*(*sequenceItr).first))
+			return true;
+	}
+	
+	return false;
+}
+
+
+bool gnAlignedSequences::buildConsensus()
+{
+	char consensusBase = '-';
+
+	consensus = "";
+	
+	vector <char> crossAlignmentBases;
+	for (int index=0; index<(*(*alignedSequences.begin()).second).size(); index++)
+	{
+		vector <int> baseCounts(26, 0);
+		crossAlignmentBases = (*this)[index];
+		/*list <pair <string*, string*> >::iterator itr = alignedSequences.begin();
+		itr++;*/
+		for (int i=0; i<crossAlignmentBases.size(); i++)
+		{
+			// to hold knowledge of consensus if MEGA '.' format employed
+			// ('.'==same as base in 1st sequence)
+			if (i == 0)
+				consensusBase=crossAlignmentBases[i];
+			
+			// consensus already established if in MEGA '.' format - the 1st seq	
+			if (i>0 && crossAlignmentBases[i]=='.')
+				break;
+		
+			else if (crossAlignmentBases[i] != '-')
+			{
+				int baseIndex = determineBaseIndex(crossAlignmentBases[i]);
+				baseCounts[baseIndex]++;
+			}
+		}
+		
+		int toAppendToConsensus = 0;
+		for (int i=1; i<baseCounts.size(); i++)
+		{
+			// strictly alphabetic - count ties are broken lexigraphically
+			if (baseCounts[i] > baseCounts[toAppendToConsensus])
+				toAppendToConsensus = i;
+
+			/* nearly functional code for replacing '.'s w/ consensus data
+			if (crossAlignmentBases[i]=='.')
+			{
+				(*(*itr).second).erase(index, 1);
+				string toInsert;
+				toInsert += crossAlignmentBases[0];
+				(*(*itr).second).insert(index, toInsert);
+			}
+
+			itr++;*/
+		}
+		
+		consensus += (toAppendToConsensus+65);
+	}
+
+	return false;
+}
+
+
+void gnAlignedSequences::addSequence(string& seqToAdd, string& seqName)
+{
+	sequences.push_back( seqToAdd );
+	names.push_back( seqName );
+}
+
+
+void gnAlignedSequences::addSequence(gnSequence& seqToAdd, string& seqName)
+{
+
+	ErrorMsg( "Fix gnAlignedSequences::addSequence()" );
+	sequences.push_back( seqToAdd.ToString() );
+	names.push_back( seqName );
+
+/*	list <pair <string*, string*> >::iterator itr;
+	if (!sequenceNameInList(seqName, itr))
+	{
+		pair <string*, string*> toAdd;
+		toAdd.first = new string(seqName);
+		toAdd.second = new string( seqToAdd.ToString() );
+		
+		alignedSequences.push_back(toAdd);
+	}
+
+	else
+	{
+		(*((*itr).second)) += seqToAdd.ToString();
+	}
+*/
+}
+
+
+void gnAlignedSequences::addSequence(gnSequence seqToAdd, string seqName, int consensusStart, string originalConsensus)
+{
+	list <pair <string*, string*> >::iterator itr;
+	if (!sequenceNameInList(seqName, itr))
+	{
+		pair <string*, string*> toAdd;
+		toAdd.first = new string(seqName);
+		string seq = seqToAdd.ToString();
+		toAdd.second = new string( seq );
+		(*toAdd.second).erase();
+		
+		for (int i=0; i<(*toAdd.second).size(); i++)
+		{
+			if (seq[i] == '-')
+				seq[i] = originalConsensus[consensusStart+i-1];
+		}
+		
+		(*toAdd.second) = seq;
+		
+		alignedSequences.push_back(toAdd);
+	}
+
+	else
+	{
+		string seq = (*((*itr).second));
+		seq += seqToAdd.ToString();
+		for (int i=0; i<seq.size(); i++)
+		{
+			if (seq[i+(*((*itr).second)).size()]=='-' && originalConsensus.size()>0)
+				seq[i+(*((*itr).second)).size()] = originalConsensus[consensusStart+i-1];
+		}
+		
+		(*((*itr).second)) = seq;
+	}
+}
+
+
+void gnAlignedSequences::addAllSegments(gnAlignedSequences &alignment, unsigned start, unsigned stop)
+{
+	for ( uint seqI = 0; seqI < alignment.sequences.size(); seqI++ ){
+		if (stop == 0 || stop == alignment.sequences[ seqI ].size()-1)
+			stop = alignment.sequences[ seqI ].size();
+		string seq = alignment.sequences[ seqI ].substr(start, stop-start+1);
+		alignment.addSequence( seq, alignment.names[ seqI ] );
+
+	}
+}
+
+
+void gnAlignedSequences::addAllSegmentsReplaceGaps(gnAlignedSequences &alignment, unsigned start, unsigned stop)
+{
+	list <pair <string*, string*> >::iterator alignedItr = alignedSequences.begin();
+	for ( ; alignedItr != alignedSequences.end(); alignedItr++)
+	{
+		if (stop == 0 || stop == (*(*alignedItr).second).size()-1)
+			stop = (*(*alignedItr).second).size();
+			
+		alignment.addSequence(((*(*alignedItr).second).substr(start, stop-start+1)), 
+							  ((*(*alignedItr).first)), start, consensus);
+	}
+}
+
+
+void gnAlignedSequences::removeAllSegments(unsigned start, unsigned stop)
+{
+	list <pair <string*, string*> >::iterator alignedItr = alignedSequences.begin();
+	for ( ; alignedItr != alignedSequences.end(); alignedItr++)
+	{
+		if (stop == 0)
+			stop = (*(*alignedItr).second).size();
+			
+		(alignedItr->second)->erase(start, stop-start+1);
+	}
+
+	cout << start << " " << stop << ": " << stop-start+1 << endl;
+}
+
+
+int gnAlignedSequences::determineBaseIndex(char base)
+{
+	if (base < 91) // Upper Case
+		return (base-65);
+		
+	// Lower Case
+	return (base-97);
+}
+
+
+bool gnAlignedSequences::coordinates(string line)
+{
+	bool toReturn = true;
+	
+	for (int i=0; i<line.length(); i++)
+	{
+		if (line[i]!=' ' && line[i]!='\r' && line[i]!='\n' && (line[i]<48 || line[i]>57))
+		{
+			toReturn = false;
+			break;
+		}
+	}
+	
+	return toReturn;
+}
+
+}
diff --git a/libMems/gnAlignedSequences.h b/libMems/gnAlignedSequences.h
new file mode 100644
index 0000000..9a1f644
--- /dev/null
+++ b/libMems/gnAlignedSequences.h
@@ -0,0 +1,401 @@
+/*******************************************************************************
+ * $Id: gnAlignedSequences.h,v 1.5 2004/02/27 23:08:55 darling Exp $
+ * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
+ * This file is licensed under the GPL.
+ * Please see the file called COPYING for licensing details.
+ * **************
+ ******************************************************************************/
+
+/////////////////////////////////////////////////////////////////////////////
+// File:            gnAlignedSequences.h
+// Purpose:         Aligned Sequences class
+// Discription:     Provides an alignment interface for any number of alignable
+//					sequences (the data of each of which is contained in a 
+//                  genome::gnSequence object).
+//                  Currently only compatible with ClustalW alignment files.
+// Revisions:       
+// Version:         A
+// Created:         August 3, 2000, 11:55am
+// Author:          Brian Gettler
+// Last Edited:     May 3, 2001, 4:25pm
+// Modified by:     
+// Copyright:       (c)
+// Licences:         
+/////////////////////////////////////////////////////////////////////////////
+#ifndef __gnAlignedSequences_h__
+#define __gnAlignedSequences_h__
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "libGenome/gnSequence.h"
+#include "libGenome/gnFilter.h"
+#include <list>
+#include <fstream>
+#include <vector>
+
+namespace mems {
+
+// the number of characters in each row of an alignment file
+const int MEGA_ALIGN_COLUMNS = 60;
+
+/**
+ * gnAlignedSequences allows for the manipulation of aligned sequence
+ * data. 
+ */
+
+class gnAlignedSequences// : blClone
+{
+public:
+	/**
+	 * Empty Constructor, creates a default gnAlignedSequences.
+	 */
+	gnAlignedSequences();
+	/**
+	 * Copy Constructor, creates a copy of toCopy.
+	 */
+	gnAlignedSequences(const gnAlignedSequences &toCopy);
+
+
+	/**
+	 * Returns a vector of supported format names
+	 */
+	static const std::vector< std::string >& getSupportedFormats();
+
+	/**
+	 * Checks whether a particular format name is supported
+	 */
+	static boolean isSupportedFormat( const std::string& format_name );
+
+	/**
+	 * Writes out this sequence alignment in the specified format, 
+	 * assuming the format is supported
+	 */
+	void output( const std::string& format_name, std::ostream& os ) const;
+
+// sequence alignment loading
+	/**
+	 * Loads the data held in file alignedFileName (in ClustalW format).
+	 * @param alignedFileName name of a file containing an alignment.
+	 */
+	void constructFromClustalW(std::string alignedFileName);
+	/**
+	 * Loads the data held in file alignedFileName (in Phylip format).
+	 * @param alignedFileName name of a file containing an alignment.
+	 */
+	void constructFromPhylip(std::string alignedFileName);
+	/**
+	 * Loads the data held in file alignedFileName (in MSF format).
+	 * @param alignedFileName name of a file containing an alignment.
+	 */
+	void constructFromMSF(std::string alignedFileName);
+	/**
+	 * Loads the data held in file alignedFileName (in Nexus format).
+	 * @param alignedFileName name of a file containing an alignment.
+	 */
+	void constructFromNexus(std::string alignedFileName);
+	/**
+	 * Loads the data held in file alignedFileName (in Mega format).
+	 * @param alignedFileName name of a file containing an alignment.
+	 */
+	void constructFromMega(std::string alignedFileName);
+	
+	/**
+	 * Reads a single sequence entry in relaxed NEXUS format.  Assumes that
+	 * the #NEXUS has already been read off.
+	 * @param align_stream	The stream to read data from
+	 */
+	void constructFromRelaxedNexus( std::istream& align_stream );
+
+	/**
+	 * Assigns a file name to the alignment data for purposes of output.
+	 * @param name the name of the file.
+	 */
+	void assignFileName(std::string name);
+
+// output
+	/**
+	 * Writes alignment using the given output stream (in Phylip format).
+	 * @param os the output stream.
+	 * @return true if successful.
+	 */
+	bool outputPhylip(std::ostream& os) const;
+	/**
+	 * Writes alignment using the given output stream (in ClustalW format).
+	 * @param os the output stream.
+	 * @return true if successful.
+	 */
+	bool outputClustalW(std::ostream& os) const;
+	/**
+	 * Writes alignment using the given output stream (in MSF format).
+	 * @param os the output stream.
+	 * @return true if successful.
+	 */
+	bool outputMSF(std::ostream& os) const;
+	/**
+	 * Writes alignment using the given output stream (in Nexus format).
+	 * @param os the output stream.
+	 * @return true if successful.
+	 */
+	bool outputNexus(std::ostream& os) const;
+	/**
+	 * Writes alignment using the given output stream (in Mega format).
+	 * @param os the output stream.
+	 * @return true if successful.
+	 */
+	bool outputMega(std::ostream& os) const;
+	/**
+	 * Writes alignment in 3-base, codon segments using the given output
+	 * stream (in Phylip format).
+	 * @param os the output stream.
+	 * @return true if successful.
+	 */
+	bool outputCodon(std::ostream& os) const;
+	/**
+	 * Writes alignment with consensus using the given output stream
+	 * (in Phylip format).
+	 * @param os the output stream.
+	 * @return true if successful.
+	 */
+	bool outputWithConsensus(std::ostream& os);
+
+// alignment manipulators that create new gnAlignedSequences
+	/**
+	 * Create a new alignment that is comprised of all of the segments
+	 * in the initial alignment from start to stop (inclusive)
+	 * if stop == 0, the alignment ends at the end
+	 * @param start the beginning point of the segment.
+	 * @param stop the end point of the segment.
+	 * @return the new gnAlignedSequences that is created
+	 */
+	gnAlignedSequences getAlignedSegment(unsigned start, unsigned stop);
+	/**
+	 * Extracts every codonMultiple-th codon in the reading
+	 * frame readingFrame beginning with startCodon
+	 * reading frames supported: 1, 2 & 3 (no reverse complementing)
+	 * @param readingFrame the codon reading frame.
+	 * @param startCodon the number codon in readingFrame with which to begin.
+	 * @param codonMultiple the multiple with which codons in readingFrame
+	 *        are selected.
+	 * @return the new gnAlignedSequences that is created
+	 */
+	gnAlignedSequences getCodons(int readingFrame, int startCodon, int codonMultiple);
+	
+	/**
+	 * Returns the name of the file associated with this gnAlignedSequences.
+	 * @return the alignment file name.
+	 */
+	std::string getAlignedSequenceFileName();
+	/**
+	 * Returns the size of each sequence in the alignment (all are identical).
+	 * @return the size of the aligned sequences.
+	 */
+	gnSeqI alignedSeqsSize() const;
+
+	/**
+	 * Removes a single sequence from the alignment.
+	 * @param seqName the name of the sequence to remove.
+	 * @return true if successful (a sequence called seqName exists).
+	 */
+	bool removeAlignedSeq(std::string seqName);
+	/**
+	 * Removes a single sequence from the alignment.
+	 * @param index the position in the of the sequence to be removed.
+	 * @return true if successful (a sequence at index exists).
+	 */
+	bool removeAlignedSeq(unsigned index);
+
+	/**
+	 * Concatenates 2 alignmnets.
+	 * @param toConcat the sequence which is appended to *this.
+	 */
+	void concatenateAlignedSequences(gnAlignedSequences toConcat);
+	
+	/**
+	 * Extracts the variable sites from *this.
+	 * @param variableSites the alignment consisting of all variable sites.
+	 * @param countGapsAsMismatches true if gaps are to be considered mismatches.
+	 */
+	void extractVariableSites(gnAlignedSequences &variableSites, bool countGapsAsMismatches);
+
+	/**
+	 * Collapses the alignment accross all sequences.
+	 * Sequences are compared in terms of base content -
+	 * if the sequences of base pairs of equal, the sequences are identical
+	 * @return true if there exist identical sequences that are collapsed.
+	 */
+	bool collapseIdenticalSequences();
+	/**
+	 * Accesses the alignment and returns the bases at that position in all
+	 * sequences.
+	 * @param offset the position in the alignment to access.
+	 * @return a vector of characters at position offset.
+	 */
+	std::vector <char> operator[]( const int offset ); //const;
+	
+	/**
+	 * Adds a sequence to the alignment.
+	 * @param seqToAdd the sequence data.
+	 * @param seqName the sequence's name.
+	 */
+	void addSequence(std::string& seqToAdd, std::string& seqName);
+	/**
+	 * Adds a sequence to the alignment.
+	 * @param seqToAdd the sequence data.
+	 * @param seqName the sequence's name.
+	 */
+	void addSequence(genome::gnSequence& seqToAdd, std::string& seqName);
+
+	std::list <std::pair <std::string*, std::string*> > alignedSequences;
+	std::vector< std::string > sequences;
+	std::vector< std::string > names;
+	std::vector< int64 > positions;		/**< If this is part of a larger alignment this vector stores start positions within that alignment */
+	void seq( uint seqI );
+
+private:
+
+	/**
+	 * Reads a relaxed NEXUS format alignment.
+	 * @return true if successful.
+	 */
+	bool readRelaxedNexusAlignment( std::istream& align_stream );
+	/**
+	 * Aids constructFromClustalW.
+	 * @return true if successful.
+	 */
+	bool readClustalWAlignment();
+	/**
+	 * Aids constructFromPhylip.
+	 * @return true if successful.
+	 */
+	bool readPhylipAlignment();
+	/**
+	 * Aids constructFromMSF.
+	 * @return true if successful.
+	 */
+	bool readMSFAlignment();
+	/**
+	 * Aids constructFromNexus.
+	 * @return true if successful.
+	 */
+	bool readNexusAlignment();
+	/**
+	 * Aids constructFromMega.
+	 * @return true if successful.
+	 */
+	bool readMegaAlignment();
+
+	/**
+	 * Aids readClustalWAlignment.
+	 * @param alignmentFile the file that contains the alignment.
+	 * @return true if successful.
+	 */
+	bool constructClustalWAlignedSequenceList(std::ifstream& alignmentFile);
+	/**
+	 * Aids readPhylipAlignment.
+	 * @param alignmentFile the file that contains the alignment.
+	 * @return true if successful.
+	 */
+	bool constructPhylipAlignedSequenceList(std::ifstream& alignmentFile);
+	/**
+	 * Aids readMSFAlignment.
+	 * @param alignmentFile the file that contains the alignment.
+	 * @return true if successful.
+	 */
+	bool constructMSFAlignedSequenceList(std::ifstream& alignmentFile);
+	/**
+	 * Aids readNexusAlignment.
+	 * @param alignmentFile the file that contains the alignment.
+	 * @return true if successful.
+	 */
+	bool constructNexusAlignedSequenceList(std::ifstream& alignmentFile);
+	/**
+	 * Aids readMegaAlignment.
+	 * @param alignmentFile the file that contains the alignment.
+	 * @return true if successful.
+	 */
+	bool constructMegaAlignedSequenceList(std::ifstream& alignmentFile);
+
+	/**
+	 * Determines whether a sequence of the given name is present in the list..
+	 * @param sequenceName the name to be found.
+	 * @param sequenceItr the list iterator to be employed.
+	 * @return true if sequenceName is present.
+	 */
+	bool sequenceNameInList(std::string sequenceName, std::list <std::pair <std::string*, std::string*> >::iterator &sequenceItr);
+
+	/**
+	 * Determines whether a sequence of the given name is present in the list.
+	 * @param sequenceName the name to be found.
+	 * @return the index in the list or -1 if not present
+	 */
+	int sequenceNameInList( std::string& sequenceName );
+
+	/**
+	 * Reads all sequences in the alignment and creates a consensus.
+	 * @return true if successful.
+	 */
+	bool buildConsensus();
+
+	/**
+	 * Adds a sequence to the alignment.
+	 * @param seqToAdd the sequence data.
+	 * @param seqName the sequence's name.
+	 * @param consensusStart position in consensus to add sequence.
+	 * @param originalConsensus the alignment's consensus.
+	 */
+	void addSequence(genome::gnSequence seqToAdd, std::string seqName, int consensusStart, std::string originalConsensus);
+
+	/**
+	 * Adds all segments in *this to the given alignment.
+	 * @param alignment sequences to add.
+	 * @param start segment start point.
+	 * @param stop segment stop point.
+	 */
+	void addAllSegments(gnAlignedSequences &alignment, unsigned start, unsigned stop);
+	/**
+	 * Adds all segments in *this to the given alignment -
+	 * replaces gaps with consensus data.
+	 * @param alignment sequences to add.
+	 * @param start segment start point.
+	 * @param stop segment stop point.
+	 */
+	void addAllSegmentsReplaceGaps(gnAlignedSequences &alignment, unsigned start, unsigned stop);
+	/**
+	 * Removes all segments across all sequences in *this.
+	 * @param start segment start point.
+	 * @param stop segment stop point.
+	 */
+	void removeAllSegments(unsigned start, unsigned stop);
+	
+	/**
+	 * Computes an index for a given base (0-25: a-z).
+	 * @param base base to be converted.
+	 * @return an index.
+	 */
+	int determineBaseIndex(char base);
+	
+	/**
+	 * Searches given line for coordinates.
+	 * @param line line to search.
+	 * @return true if coordinates.
+	 */
+	bool coordinates(std::string line);
+
+	std::string alignedSequenceFileName;
+//	list <pair <string*, genome::gnSequence*> > alignedSequences;
+	std::string consensus;
+	std::vector <int> indexPositions; // 1->n if a standard alignment, variable for varible sites
+}; // gnAlignedSequences
+
+
+inline
+void gnAlignedSequences::assignFileName(std::string name) {alignedSequenceFileName=name;}
+
+inline
+std::string gnAlignedSequences::getAlignedSequenceFileName() {return alignedSequenceFileName;}
+
+}
+
+#endif	// __gnAlignedSequences_h__
diff --git a/libMems/gnRAWSequence.h b/libMems/gnRAWSequence.h
new file mode 100644
index 0000000..02e33da
--- /dev/null
+++ b/libMems/gnRAWSequence.h
@@ -0,0 +1,202 @@
+/////////////////////////////////////////////////////////////////////////////
+// File:            gnRAWSequence.h
+// Purpose:         Optimized Sequence class for RAW sequence files
+// Description:     Provides a high level sequence interface to all types of
+//					sequence data.
+// Changes:        
+// Version:         libGenome 0.5.1 
+// Author:          Aaron Darling 
+// Modified by:     
+// Copyright:       (c) Aaron Darling 
+// Licenses:        See COPYING file for details 
+/////////////////////////////////////////////////////////////////////////////
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifndef _gnRAWSequence_h_
+#define _gnRAWSequence_h_
+
+#include "libGenome/gnDefs.h"
+
+#include <string>
+#include <iostream>
+#include <list>
+#include "libGenome/gnSequence.h"
+
+namespace genome {
+
+
+/**
+ * gnRAWSequence is a bastardization of gnSequence that creates a lightweight wrapper
+ * around a memory-mapped file of raw sequence data
+ */
+class GNDLLEXPORT gnRAWSequence : public gnSequence
+{
+public:
+	/**
+	 * Empty Constructor, creates an empty gnRAWSequence.
+	 */
+	gnRAWSequence();
+	/**
+	 * Creates a gnRAWSequence based on the file specified by filename
+	 */
+	gnRAWSequence( const std::string& filename )
+	{
+		this->filename = filename;
+		data.open( filename );
+	}
+
+	gnRAWSequence* Clone() const {return new gnRAWSequence(*this);}
+
+	gnSeqI contigListSize() const {return 1;}
+	gnSeqI contigListLength() const {return 1;}
+	uint32 contigIndexByBase( const gnSeqI baseI) const {
+		if(baseI >= data.size()) Throw_gnEx(SeqIndexOutOfBounds());
+		return 0;
+	}
+	gnRAWSequence contig( const uint32 contigI) const { 
+		if(contigI>0) Throw_gnEx(FragmentIndexOutOfBounds()); 
+		return *this;
+	}
+	gnRAWSequence contigByBase( const gnSeqI baseI) const {
+		if(baseI >= data.size()) Throw_gnEx(SeqIndexOutOfBounds());
+		return *this;
+	}
+	virtual gnSeqI contigStart( const uint32 contigI) const {
+		if(contigI>0) Throw_gnEx(FragmentIndexOutOfBounds()); 
+		return 0;
+	}
+	virtual gnSeqI contigLength( const uint32 contigI) const {
+		if(contigI>0) Throw_gnEx(FragmentIndexOutOfBounds()); 
+		return data.size();
+	}
+	virtual uint32 contigIndexByName( std::string& contigName) const {
+		return 0;
+	}
+	virtual std::string contigName( const uint32 contigI) const {
+		if(contigI>0) Throw_gnEx(FragmentIndexOutOfBounds()); 
+		return "";
+	}
+	virtual gnSequence contigByName( std::string& contigName) const {
+		Throw_gnEx(FragmentIndexOutOfBounds());
+	}
+	virtual void merge(const gnSeqI startI, const gnSeqI endI){ throw; }
+	virtual void mergeContigs(const uint32 startC, const uint32 endC){ throw; }
+	virtual void splitContig(const gnSeqI splitI, const uint32 contigI=ALL_CONTIGS) { throw; }
+
+	virtual void setContigName( const uint32 contigI, const std::string& contig_name) { throw; }
+
+	virtual uint32 getFeatureListLength() const {
+		return 0;
+	}
+	virtual gnBaseFeature* getFeature(const uint32 featureI) const{ Throw_gnEx(FeatureIndexOutOfBounds()); }
+	virtual void getContainedFeatures(const gnLocation& lt, std::vector<gnBaseFeature*>& feature_vector, std::vector<uint32>& index_vector) const {}
+	virtual void getIntersectingFeatures(const gnLocation& lt, std::vector<gnBaseFeature*>& feature_vector, std::vector<uint32>& index_vector) const {}
+	virtual uint32 addFeature(gnBaseFeature* feature) { throw; }
+	virtual void removeFeature(const uint32 featureI){ Throw_gnEx(FeatureIndexOutOfBounds()); }
+	virtual void getBrokenFeatures(const gnLocation& lt, std::vector<gnBaseFeature*>& feature_vector) const{};
+	virtual uint32 getHeaderListLength(const uint32 contigI) const{ return 0; }
+	virtual gnBaseHeader* getHeader(const uint32 contigI, const uint32 headerI) const{Throw_gnEx(HeaderIndexOutOfBounds());};
+	virtual void addHeader(const uint32 contigI, gnBaseHeader* header, const uint32 headerI){Throw_gnEx(FragmentIndexOutOfBounds());}
+	virtual void removeHeader(const uint32 contigI, const uint32 headerI){ Throw_gnEx(HeaderIndexOutOfBounds()); }
+	virtual void setReverseComplement( const boolean revComp, const uint32 contigI=ALL_CONTIGS){Throw_gnEx(FragmentIndexOutOfBounds());};
+	virtual boolean isReverseComplement( const uint32 contigI=ALL_CONTIGS ){return false;}
+	virtual boolean isCircular() const{ return false; }
+	virtual void setCircular( const boolean value ) {}
+	
+	virtual void globalToLocal(uint32& contigI, gnSeqI& baseI) const{};
+	virtual void localToGlobal(const uint32 contigI, gnSeqI& baseI) const {};
+	virtual void globalToSource(uint32& contigI, gnSeqI& baseI) const{};
+	virtual void localToSource(uint32& contigI, gnSeqI& baseI) const{};
+	virtual bool LoadSource(const std::string sourcename){
+		data.open( sourcename );
+		filename = sourcename;
+		return true;
+	}
+
+	/**
+	 * Appends the bases in "seq" to this sequence.
+	 */
+	gnRAWSequence& operator+=(const gnRAWSequence& seq);
+
+	/**
+	 * Compares the bases in "seq" to this sequence.
+	 * @param seq The sequence to compare this sequence to.
+	 * @return Negative if this sequence is lesser, 0 if the two sequences are
+	 * equal, and positive if this sequence is greater.
+	 */
+/*	virtual int compare(const gnRAWSequence& seq) const;
+	virtual int compare(const std::string& str) const;
+
+	virtual void append( const gnRAWSequence& seq);
+	virtual void insert( const gnSeqI offset, const gnSeqC *bases, const gnSeqI length);
+	virtual void insert( const gnSeqI offset, const gnRAWSequence& seq);
+	virtual void insert( const gnSeqI offset, const gnGenomeSpec& gnbs);
+	gnRAWSequence const operator+(const gnRAWSequence& seq) const;
+	virtual void erase( const gnSeqI offset=0, const gnSeqI length=GNSEQI_END );
+*/
+	gnRAWSequence subseq(const gnSeqI offset, const gnSeqI length) const
+	{
+		gnRAWSequence gnrs;
+		gnrs.data.open(filename, length, offset - 1);
+		return gnrs;
+	}
+//	friend std::istream& operator>>(std::istream& is, gnRAWSequence& gns);	//read from source.
+	/**
+	 * Writes the bases in this sequence to the specified output stream (e.g. cout).
+	 */
+//	friend std::ostream& operator<<(std::ostream& os, const gnRAWSequence& gns); //write to source.
+
+	virtual gnSeqI length() const { return data.size(); }
+	virtual gnSeqI size() const { return data.size(); }
+
+	virtual std::string ToString( const gnSeqI length=GNSEQI_END, const gnSeqI offset=1 ) const
+	{
+		gnSeqI len = length == GNSEQI_END ?  data.size() - offset - 1 : length;
+		std::string asdf(data.data()+offset-1, len);
+		return asdf;
+	}
+
+	virtual boolean ToString( std::string& str, const gnSeqI length=GNSEQI_END, const gnSeqI offset=1 ) const
+	{
+		gnSeqI len = length == GNSEQI_END ? data.size() - offset - 1 : length;
+		str.assign(data.data()+offset-1,len);
+		return true;
+	}
+	virtual boolean ToArray( gnSeqC* pSeqC, gnSeqI length, const gnSeqI offset=1 ) const
+	{
+		gnSeqI len = length == GNSEQI_END ? data.size() - offset - 1 : length;
+		memcpy(pSeqC, data.data()+offset-1, len);
+		return true;
+	}
+	virtual gnSeqC GetSeqC( const gnSeqI offset ) const
+	{
+		return *(data.data()+(offset-1));
+	}
+	gnSeqC operator[]( const gnSeqI offset ) const
+	{
+		return *(data.data()+(offset-1));
+	}
+
+	virtual gnSeqI find(const gnRAWSequence& search, const gnSeqI offset=0) const {return GNSEQI_ERROR;}
+	
+private:
+	boost::iostreams::mapped_file_source data;
+	std::string filename;
+}; // class gnRAWSequence
+
+/*
+GNDLLEXPORT
+std::istream& operator>>(std::istream& is, gnRAWSequence& gns);	//read from source.
+GNDLLEXPORT
+std::ostream& operator<<(std::ostream& os, const gnRAWSequence& gns); //write to source.
+*/
+
+
+
+}	// end namespace genome
+
+#endif
+	// _gnRAWSequence_h_
diff --git a/libMems/twister.c b/libMems/twister.c
new file mode 100644
index 0000000..977cf39
--- /dev/null
+++ b/libMems/twister.c
@@ -0,0 +1,224 @@
+/* 
+   A C-program for MT19937, with initialization improved 2002/1/26.
+   Coded by Takuji Nishimura and Makoto Matsumoto.
+
+   Before using, initialize the state by using init_genrand(seed)  
+   or init_by_array(init_key, key_length).
+
+   Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
+   All rights reserved.                          
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+     1. Redistributions of source code must retain the above copyright
+        notice, this list of conditions and the following disclaimer.
+
+     2. Redistributions in binary form must reproduce the above copyright
+        notice, this list of conditions and the following disclaimer in the
+        documentation and/or other materials provided with the distribution.
+
+     3. The names of its contributors may not be used to endorse or promote 
+        products derived from this software without specific prior written 
+        permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+   Any feedback is very welcome.
+   http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html
+   email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space)
+*/
+
+#include <stdio.h>
+#include <time.h>
+#include <limits.h>
+#include "twister.h"
+
+
+/* Period parameters */  
+#define N 624
+#define M 397
+#define MATRIX_A 0x9908b0dfUL   /* constant vector a */
+#define UPPER_MASK 0x80000000UL /* most significant w-r bits */
+#define LOWER_MASK 0x7fffffffUL /* least significant r bits */
+
+static unsigned long mt[N]; /* the array for the state vector  */
+static int mti=N+1; /* mti==N+1 means mt[N] is not initialized */
+
+/* initializes mt[N] with a seed */
+void init_genrand(unsigned long s)
+{
+    mt[0]= s & 0xffffffffUL;
+    for (mti=1; mti<N; mti++) {
+        mt[mti] = 
+	    (1812433253UL * (mt[mti-1] ^ (mt[mti-1] >> 30)) + mti); 
+        /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
+        /* In the previous versions, MSBs of the seed affect   */
+        /* only MSBs of the array mt[].                        */
+        /* 2002/01/09 modified by Makoto Matsumoto             */
+        mt[mti] &= 0xffffffffUL;
+        /* for >32 bit machines */
+    }
+}
+
+/* initialize by an array with array-length */
+/* init_key is the array for initializing keys */
+/* key_length is its length */
+/* slight change for C++, 2004/2/26 */
+void init_by_array(unsigned long init_key[], int key_length)
+{
+    int i, j, k;
+    init_genrand(19650218UL);
+    i=1; j=0;
+    k = (N>key_length ? N : key_length);
+    for (; k; k--) {
+        mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 30)) * 1664525UL))
+          + init_key[j] + j; /* non linear */
+        mt[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */
+        i++; j++;
+        if (i>=N) { mt[0] = mt[N-1]; i=1; }
+        if (j>=key_length) j=0;
+    }
+    for (k=N-1; k; k--) {
+        mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 30)) * 1566083941UL))
+          - i; /* non linear */
+        mt[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */
+        i++;
+        if (i>=N) { mt[0] = mt[N-1]; i=1; }
+    }
+
+    mt[0] = 0x80000000UL; /* MSB is 1; assuring non-zero initial array */ 
+}
+
+/* generates a random number on [0,0xffffffff]-interval */
+unsigned long genrand_int32(void)
+{
+    unsigned long y;
+    static unsigned long mag01[2]={0x0UL, MATRIX_A};
+    /* mag01[x] = x * MATRIX_A  for x=0,1 */
+
+    if (mti >= N) { /* generate N words at one time */
+        int kk;
+
+        if (mti == N+1)   /* if init_genrand() has not been called, */
+            init_genrand(5489UL); /* a default initial seed is used */
+
+        for (kk=0;kk<N-M;kk++) {
+            y = (mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK);
+            mt[kk] = mt[kk+M] ^ (y >> 1) ^ mag01[y & 0x1UL];
+        }
+        for (;kk<N-1;kk++) {
+            y = (mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK);
+            mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & 0x1UL];
+        }
+        y = (mt[N-1]&UPPER_MASK)|(mt[0]&LOWER_MASK);
+        mt[N-1] = mt[M-1] ^ (y >> 1) ^ mag01[y & 0x1UL];
+
+        mti = 0;
+    }
+  
+    y = mt[mti++];
+
+    /* Tempering */
+    y ^= (y >> 11);
+    y ^= (y << 7) & 0x9d2c5680UL;
+    y ^= (y << 15) & 0xefc60000UL;
+    y ^= (y >> 18);
+
+    return y;
+}
+
+/* generates a random number on [0,0x7fffffff]-interval */
+long genrand_int31(void)
+{
+    return (long)(genrand_int32()>>1);
+}
+
+/* generates a random number on [0,1]-real-interval */
+double genrand_real1(void)
+{
+    return genrand_int32()*(1.0/4294967295.0); 
+    /* divided by 2^32-1 */ 
+}
+
+/* generates a random number on [0,1)-real-interval */
+double genrand_real2(void)
+{
+    return genrand_int32()*(1.0/4294967296.0); 
+    /* divided by 2^32 */
+}
+
+/* generates a random number on (0,1)-real-interval */
+double genrand_real3(void)
+{
+    return (((double)genrand_int32()) + 0.5)*(1.0/4294967296.0); 
+    /* divided by 2^32 */
+}
+
+/* generates a random number on [0,1) with 53-bit resolution*/
+double genrand_res53(void) 
+{ 
+    unsigned long a=genrand_int32()>>5, b=genrand_int32()>>6; 
+    return(a*67108864.0+b)*(1.0/9007199254740992.0); 
+} 
+/* These real versions are due to Isaku Wada, 2002/01/09 added */
+
+void SetTwisterSeed(unsigned long seed)
+{
+	init_genrand(seed);
+}
+
+unsigned long CreateTwisterSeed( )
+{
+	static unsigned long differ = 0;  // guarantee time-based seeds will change
+
+	// Get a uint32 from t and c
+	// Better than uint32(x) in case x is floating point in [0,1]
+	// Based on code by Lawrence Kirby (fred at genesis.demon.co.uk)
+	time_t t = time(NULL);
+	clock_t c = clock();
+	
+	unsigned long h1 = 0;
+	unsigned long h2 = 0;
+
+	unsigned char *p = (unsigned char *) &t;
+
+	size_t i, j;
+	
+	for( i = 0; i < sizeof(t); ++i )
+	{
+		h1 *= UCHAR_MAX + 2U;
+		h1 += p[i];
+	}
+	p = (unsigned char *) &c;
+	for( j = 0; j < sizeof(c); ++j )
+	{
+		h2 *= UCHAR_MAX + 2U;
+		h2 += p[j];
+	}
+	return ( h1 + differ++ ) ^ h2;
+}
+
+double RandTwisterDouble()
+{
+	return genrand_real1();
+}
+
+unsigned RandTwisterUnsigned()
+{
+	return genrand_int32();
+}
+
+
diff --git a/libMems/twister.h b/libMems/twister.h
new file mode 100644
index 0000000..24c5e6b
--- /dev/null
+++ b/libMems/twister.h
@@ -0,0 +1,18 @@
+#ifndef __TWISTER_H__
+#define __TWISTER_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void SetTwisterSeed (unsigned long seed);
+unsigned long CreateTwisterSeed(void);
+double RandTwisterDouble (void);
+unsigned RandTwisterUnsigned(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif //  __TWISTER_H__
+
diff --git a/m4/ac_cxx_namespaces.m4 b/m4/ac_cxx_namespaces.m4
new file mode 100644
index 0000000..2f18477
--- /dev/null
+++ b/m4/ac_cxx_namespaces.m4
@@ -0,0 +1,25 @@
+dnl @synopsis AC_CXX_NAMESPACES
+dnl
+dnl If the compiler can prevent names clashes using namespaces, define
+dnl HAVE_NAMESPACES.
+dnl
+dnl @category Cxx
+dnl @author Todd Veldhuizen
+dnl @author Luc Maisonobe <luc at spaceroots.org>
+dnl @version 2004-02-04
+dnl @license AllPermissive
+
+AC_DEFUN([AC_CXX_NAMESPACES],
+[AC_CACHE_CHECK(whether the compiler implements namespaces,
+ac_cv_cxx_namespaces,
+[AC_LANG_SAVE
+ AC_LANG_CPLUSPLUS
+ AC_TRY_COMPILE([namespace Outer { namespace Inner { int i = 0; }}],
+                [using namespace Outer::Inner; return i;],
+ ac_cv_cxx_namespaces=yes, ac_cv_cxx_namespaces=no)
+ AC_LANG_RESTORE
+])
+if test "$ac_cv_cxx_namespaces" = yes; then
+  AC_DEFINE(HAVE_NAMESPACES,,[define if the compiler implements namespaces])
+fi
+])
diff --git a/m4/ax_openmp.m4 b/m4/ax_openmp.m4
new file mode 100644
index 0000000..91a2ee3
--- /dev/null
+++ b/m4/ax_openmp.m4
@@ -0,0 +1,104 @@
+##### http://autoconf-archive.cryp.to/ax_openmp.html
+#
+# SYNOPSIS
+#
+#   AX_OPENMP([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]])
+#
+# DESCRIPTION
+#
+#   This macro tries to find out how to compile programs that use
+#   OpenMP a standard API and set of compiler directives for parallel
+#   programming (see http://www-unix.mcs/)
+#
+#   On success, it sets the
+#   OPENMP_CFLAGS/OPENMP_CXXFLAGS/OPENMP_F77FLAGS output variable to
+#   the flag (e.g. -omp) used both to compile *and* link OpenMP
+#   programs in the current language.
+#
+#   NOTE: You are assumed to not only compile your program with these
+#   flags, but also link it with them as well.
+#
+#   If you want to compile everything with OpenMP, you should set:
+#
+#       CFLAGS="$CFLAGS $OPENMP_CFLAGS" 
+#       #OR#  CXXFLAGS="$CXXFLAGS $OPENMP_CXXFLAGS" 
+#       #OR#  FFLAGS="$FFLAGS $OPENMP_FFLAGS" 
+#
+#   (depending on the selected language).
+#
+#   The user can override the default choice by setting the
+#   corresponding environment variable (e.g. OPENMP_CFLAGS).
+#
+#   ACTION-IF-FOUND is a list of shell commands to run if an OpenMP
+#   flag is found, and ACTION-IF-NOT-FOUND is a list of commands to run
+#   it if it is not found. If ACTION-IF-FOUND is not specified, the
+#   default action will define HAVE_OPENMP.
+#
+# LAST MODIFICATION
+#
+#   2006-01-24
+#
+# COPYLEFT
+#
+#   Copyright (c) 2006 Steven G. Johnson <stevenj at alum.mit.edu>
+#
+#   This program is free software; you can redistribute it and/or
+#   modify it under the terms of the GNU General Public License as
+#   published by the Free Software Foundation; either version 2 of the
+#   License, or (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful, but
+#   WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+#   General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this program; if not, write to the Free Software
+#   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+#   02111-1307, USA.
+#
+#   As a special exception, the respective Autoconf Macro's copyright
+#   owner gives unlimited permission to copy, distribute and modify the
+#   configure scripts that are the output of Autoconf when processing
+#   the Macro. You need not follow the terms of the GNU General Public
+#   License when using or distributing such scripts, even though
+#   portions of the text of the Macro appear in them. The GNU General
+#   Public License (GPL) does govern all other use of the material that
+#   constitutes the Autoconf Macro.
+#
+#   This special exception to the GPL applies to versions of the
+#   Autoconf Macro released by the Autoconf Macro Archive. When you
+#   make and distribute a modified version of the Autoconf Macro, you
+#   may extend this special exception to the GPL to apply to your
+#   modified version as well.
+
+AC_DEFUN([AX_OPENMP], [
+AC_PREREQ(2.59) dnl for _AC_LANG_PREFIX
+
+AC_CACHE_CHECK([for OpenMP flag of _AC_LANG compiler], ax_cv_[]_AC_LANG_ABBREV[]_openmp, [save[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS
+ax_cv_[]_AC_LANG_ABBREV[]_openmp=unknown
+# Flags to try:  -fopenmp (gcc), -openmp (icc), -mp (SGI & PGI),
+#                -xopenmp (Sun), -omp (Tru64), -qsmp=omp (AIX), none
+ax_openmp_flags="-fopenmp -openmp -mp -xopenmp -omp -qsmp=omp none"
+if test "x$OPENMP_[]_AC_LANG_PREFIX[]FLAGS" != x; then
+  ax_openmp_flags="$OPENMP_[]_AC_LANG_PREFIX[]FLAGS $ax_openmp_flags"
+fi
+for ax_openmp_flag in $ax_openmp_flags; do
+  case $ax_openmp_flag in
+    none) []_AC_LANG_PREFIX[]FLAGS=$save[]_AC_LANG_PREFIX[] ;;
+    *) []_AC_LANG_PREFIX[]FLAGS="$save[]_AC_LANG_PREFIX[]FLAGS $ax_openmp_flag" ;;
+  esac
+  AC_TRY_LINK_FUNC(omp_set_num_threads,
+	[ax_cv_[]_AC_LANG_ABBREV[]_openmp=$ax_openmp_flag; break])
+done
+[]_AC_LANG_PREFIX[]FLAGS=$save[]_AC_LANG_PREFIX[]FLAGS
+])
+if test "x$ax_cv_[]_AC_LANG_ABBREV[]_openmp" = "xunknown"; then
+  m4_default([$2],:)
+else
+  if test "x$ax_cv_[]_AC_LANG_ABBREV[]_openmp" != "xnone"; then
+    OPENMP_[]_AC_LANG_PREFIX[]FLAGS=$ax_cv_[]_AC_LANG_ABBREV[]_openmp
+  fi
+  m4_default([$1], [AC_DEFINE(HAVE_OPENMP,1,[Define if OpenMP is enabled])])
+fi
+])dnl AX_OPENMP
diff --git a/m4/ax_prog_doxygen.m4 b/m4/ax_prog_doxygen.m4
new file mode 100644
index 0000000..14c31cb
--- /dev/null
+++ b/m4/ax_prog_doxygen.m4
@@ -0,0 +1,535 @@
+##### http://autoconf-archive.cryp.to/ax_prog_doxygen.html
+#
+# SYNOPSIS
+#
+#   DX_INIT_DOXYGEN(PROJECT-NAME, DOXYFILE-PATH, [OUTPUT-DIR])
+#   DX_DOXYGEN_FEATURE(ON|OFF)
+#   DX_DOT_FEATURE(ON|OFF)
+#   DX_HTML_FEATURE(ON|OFF)
+#   DX_CHM_FEATURE(ON|OFF)
+#   DX_CHI_FEATURE(ON|OFF)
+#   DX_MAN_FEATURE(ON|OFF)
+#   DX_RTF_FEATURE(ON|OFF)
+#   DX_XML_FEATURE(ON|OFF)
+#   DX_PDF_FEATURE(ON|OFF)
+#   DX_PS_FEATURE(ON|OFF)
+#
+# DESCRIPTION
+#
+#   The DX_*_FEATURE macros control the default setting for the given
+#   Doxygen feature. Supported features are 'DOXYGEN' itself, 'DOT' for
+#   generating graphics, 'HTML' for plain HTML, 'CHM' for compressed
+#   HTML help (for MS users), 'CHI' for generating a seperate .chi file
+#   by the .chm file, and 'MAN', 'RTF', 'XML', 'PDF' and 'PS' for the
+#   appropriate output formats. The environment variable
+#   DOXYGEN_PAPER_SIZE may be specified to override the default
+#   'a4wide' paper size.
+#
+#   By default, HTML, PDF and PS documentation is generated as this
+#   seems to be the most popular and portable combination. MAN pages
+#   created by Doxygen are usually problematic, though by picking an
+#   appropriate subset and doing some massaging they might be better
+#   than nothing. CHM and RTF are specific for MS (note that you can't
+#   generate both HTML and CHM at the same time). The XML is rather
+#   useless unless you apply specialized post-processing to it.
+#
+#   The macros mainly control the default state of the feature. The use
+#   can override the default by specifying --enable or --disable. The
+#   macros ensure that contradictory flags are not given (e.g.,
+#   --enable-doxygen-html and --enable-doxygen-chm,
+#   --enable-doxygen-anything with --disable-doxygen, etc.) Finally,
+#   each feature will be automatically disabled (with a warning) if the
+#   required programs are missing.
+#
+#   Once all the feature defaults have been specified, call
+#   DX_INIT_DOXYGEN with the following parameters: a one-word name for
+#   the project for use as a filename base etc., an optional
+#   configuration file name (the default is 'Doxyfile', the same as
+#   Doxygen's default), and an optional output directory name (the
+#   default is 'doxygen-doc').
+#
+#   Automake Support
+#
+#   The following is a template aminclude.am file for use with
+#   Automake. Make targets and variables values are controlled by the
+#   various DX_COND_* conditionals set by autoconf.
+#
+#   The provided targets are:
+#
+#     doxygen-doc: Generate all doxygen documentation.
+#
+#     doxygen-run: Run doxygen, which will generate some of the
+#                  documentation (HTML, CHM, CHI, MAN, RTF, XML)
+#                  but will not do the post processing required
+#                  for the rest of it (PS, PDF, and some MAN).
+#
+#     doxygen-man: Rename some doxygen generated man pages.
+#
+#     doxygen-ps:  Generate doxygen PostScript documentation.
+#
+#     doxygen-pdf: Generate doxygen PDF documentation.
+#
+#   Note that by default these are not integrated into the automake
+#   targets. If doxygen is used to generate man pages, you can achieve
+#   this integration by setting man3_MANS to the list of man pages
+#   generated and then adding the dependency:
+#
+#     $(man3_MANS): doxygen-doc
+#
+#   This will cause make to run doxygen and generate all the
+#   documentation.
+#
+#   The following variable is intended for use in Makefile.am:
+#
+#     DX_CLEANFILES = everything to clean.
+#
+#   Then add this variable to MOSTLYCLEANFILES.
+#
+#     ----- begin aminclude.am -------------------------------------
+#
+#     ## --------------------------------- ##
+#     ## Format-independent Doxygen rules. ##
+#     ## --------------------------------- ##
+#
+#     if DX_COND_doc
+#
+#     ## ------------------------------- ##
+#     ## Rules specific for HTML output. ##
+#     ## ------------------------------- ##
+#
+#     if DX_COND_html
+#
+#     DX_CLEAN_HTML = @DX_DOCDIR@/html
+#
+#     endif DX_COND_html
+#
+#     ## ------------------------------ ##
+#     ## Rules specific for CHM output. ##
+#     ## ------------------------------ ##
+#
+#     if DX_COND_chm
+#
+#     DX_CLEAN_CHM = @DX_DOCDIR@/chm
+#
+#     if DX_COND_chi
+#
+#     DX_CLEAN_CHI = @DX_DOCDIR@/@PACKAGE at .chi
+#
+#     endif DX_COND_chi
+#
+#     endif DX_COND_chm
+#
+#     ## ------------------------------ ##
+#     ## Rules specific for MAN output. ##
+#     ## ------------------------------ ##
+#
+#     if DX_COND_man
+#
+#     DX_CLEAN_MAN = @DX_DOCDIR@/man
+#
+#     endif DX_COND_man
+#
+#     ## ------------------------------ ##
+#     ## Rules specific for RTF output. ##
+#     ## ------------------------------ ##
+#
+#     if DX_COND_rtf
+#
+#     DX_CLEAN_RTF = @DX_DOCDIR@/rtf
+#
+#     endif DX_COND_rtf
+#
+#     ## ------------------------------ ##
+#     ## Rules specific for XML output. ##
+#     ## ------------------------------ ##
+#
+#     if DX_COND_xml
+#
+#     DX_CLEAN_XML = @DX_DOCDIR@/xml
+#
+#     endif DX_COND_xml
+#
+#     ## ----------------------------- ##
+#     ## Rules specific for PS output. ##
+#     ## ----------------------------- ##
+#
+#     if DX_COND_ps
+#
+#     DX_CLEAN_PS = @DX_DOCDIR@/@PACKAGE at .ps
+#
+#     DX_PS_GOAL = doxygen-ps
+#
+#     doxygen-ps: @DX_DOCDIR@/@PACKAGE at .ps
+#
+#     @DX_DOCDIR@/@PACKAGE at .ps: @DX_DOCDIR@/@PACKAGE at .tag
+#   	  cd @DX_DOCDIR@/latex; \
+#   	  rm -f *.aux *.toc *.idx *.ind *.ilg *.log *.out; \
+#   	  $(DX_LATEX) refman.tex; \
+#   	  $(MAKEINDEX_PATH) refman.idx; \
+#   	  $(DX_LATEX) refman.tex; \
+#   	  countdown=5; \
+#   	  while $(DX_EGREP) 'Rerun (LaTeX|to get cross-references right)' \
+#   			    refman.log > /dev/null 2>&1 \
+#   	     && test $$countdown -gt 0; do \
+#   	      $(DX_LATEX) refman.tex; \
+#   	      countdown=`expr $$countdown - 1`; \
+#   	  done; \
+#   	  $(DX_DVIPS) -o ../@PACKAGE at .ps refman.dvi
+#
+#     endif DX_COND_ps
+#
+#     ## ------------------------------ ##
+#     ## Rules specific for PDF output. ##
+#     ## ------------------------------ ##
+#
+#     if DX_COND_pdf
+#
+#     DX_CLEAN_PDF = @DX_DOCDIR@/@PACKAGE at .pdf
+#
+#     DX_PDF_GOAL = doxygen-pdf
+#
+#     doxygen-pdf: @DX_DOCDIR@/@PACKAGE at .pdf
+#
+#     @DX_DOCDIR@/@PACKAGE at .pdf: @DX_DOCDIR@/@PACKAGE at .tag
+#   	  cd @DX_DOCDIR@/latex; \
+#   	  rm -f *.aux *.toc *.idx *.ind *.ilg *.log *.out; \
+#   	  $(DX_PDFLATEX) refman.tex; \
+#   	  $(DX_MAKEINDEX) refman.idx; \
+#   	  $(DX_PDFLATEX) refman.tex; \
+#   	  countdown=5; \
+#   	  while $(DX_EGREP) 'Rerun (LaTeX|to get cross-references right)' \
+#   			    refman.log > /dev/null 2>&1 \
+#   	     && test $$countdown -gt 0; do \
+#   	      $(DX_PDFLATEX) refman.tex; \
+#   	      countdown=`expr $$countdown - 1`; \
+#   	  done; \
+#   	  mv refman.pdf ../@PACKAGE at .pdf
+#
+#     endif DX_COND_pdf
+#
+#     ## ------------------------------------------------- ##
+#     ## Rules specific for LaTeX (shared for PS and PDF). ##
+#     ## ------------------------------------------------- ##
+#
+#     if DX_COND_latex
+#
+#     DX_CLEAN_LATEX = @DX_DOCDIR@/latex
+#
+#     endif DX_COND_latex
+#
+#     .PHONY: doxygen-run doxygen-doc $(DX_PS_GOAL) $(DX_PDF_GOAL)
+#
+#     .INTERMEDIATE: doxygen-run $(DX_PS_GOAL) $(DX_PDF_GOAL)
+#
+#     doxygen-run: @DX_DOCDIR@/@PACKAGE at .tag
+#
+#     doxygen-doc: doxygen-run $(DX_PS_GOAL) $(DX_PDF_GOAL)
+#
+#     @DX_DOCDIR@/@PACKAGE at .tag: $(DX_CONFIG) $(pkginclude_HEADERS)
+#   	  rm -rf @DX_DOCDIR@
+#   	  $(DX_ENV) $(DX_DOXYGEN) $(srcdir)/$(DX_CONFIG)
+#
+#     DX_CLEANFILES = \
+#         @DX_DOCDIR@/@PACKAGE at .tag \
+#         -r \
+#         $(DX_CLEAN_HTML) \
+#         $(DX_CLEAN_CHM) \
+#         $(DX_CLEAN_CHI) \
+#         $(DX_CLEAN_MAN) \
+#         $(DX_CLEAN_RTF) \
+#         $(DX_CLEAN_XML) \
+#         $(DX_CLEAN_PS) \
+#         $(DX_CLEAN_PDF) \
+#         $(DX_CLEAN_LATEX)
+#
+#     endif DX_COND_doc
+#
+#     ----- end aminclude.am ---------------------------------------
+#
+# LAST MODIFICATION
+#
+#   2007-08-04
+#
+# COPYLEFT
+#
+#   Copyright (c) 2007 Oren Ben-Kiki <oren at ben-kiki.org>
+#
+#   Copying and distribution of this file, with or without
+#   modification, are permitted in any medium without royalty provided
+#   the copyright notice and this notice are preserved.
+
+## ----------##
+## Defaults. ##
+## ----------##
+
+DX_ENV=""
+AC_DEFUN([DX_FEATURE_doc],  ON)
+AC_DEFUN([DX_FEATURE_dot],  ON)
+AC_DEFUN([DX_FEATURE_man],  OFF)
+AC_DEFUN([DX_FEATURE_html], ON)
+AC_DEFUN([DX_FEATURE_chm],  OFF)
+AC_DEFUN([DX_FEATURE_chi],  OFF)
+AC_DEFUN([DX_FEATURE_rtf],  OFF)
+AC_DEFUN([DX_FEATURE_xml],  OFF)
+AC_DEFUN([DX_FEATURE_pdf],  ON)
+AC_DEFUN([DX_FEATURE_ps],   ON)
+
+## --------------- ##
+## Private macros. ##
+## --------------- ##
+
+# DX_ENV_APPEND(VARIABLE, VALUE)
+# ------------------------------
+# Append VARIABLE="VALUE" to DX_ENV for invoking doxygen.
+AC_DEFUN([DX_ENV_APPEND], [AC_SUBST([DX_ENV], ["$DX_ENV $1='$2'"])])
+
+# DX_DIRNAME_EXPR
+# ---------------
+# Expand into a shell expression prints the directory part of a path.
+AC_DEFUN([DX_DIRNAME_EXPR],
+         [[expr ".$1" : '\(\.\)[^/]*$' \| "x$1" : 'x\(.*\)/[^/]*$']])
+
+# DX_IF_FEATURE(FEATURE, IF-ON, IF-OFF)
+# -------------------------------------
+# Expands according to the M4 (static) status of the feature.
+AC_DEFUN([DX_IF_FEATURE], [ifelse(DX_FEATURE_$1, ON, [$2], [$3])])
+
+# DX_REQUIRE_PROG(VARIABLE, PROGRAM)
+# ----------------------------------
+# Require the specified program to be found for the DX_CURRENT_FEATURE to work.
+AC_DEFUN([DX_REQUIRE_PROG], [
+AC_PATH_TOOL([$1], [$2])
+if test "$DX_FLAG_[]DX_CURRENT_FEATURE$$1" = 1; then
+    AC_MSG_WARN([$2 not found - will not DX_CURRENT_DESCRIPTION])
+    AC_SUBST([DX_FLAG_]DX_CURRENT_FEATURE, 0)
+fi
+])
+
+# DX_TEST_FEATURE(FEATURE)
+# ------------------------
+# Expand to a shell expression testing whether the feature is active.
+AC_DEFUN([DX_TEST_FEATURE], [test "$DX_FLAG_$1" = 1])
+
+# DX_CHECK_DEPEND(REQUIRED_FEATURE, REQUIRED_STATE)
+# -------------------------------------------------
+# Verify that a required features has the right state before trying to turn on
+# the DX_CURRENT_FEATURE.
+AC_DEFUN([DX_CHECK_DEPEND], [
+test "$DX_FLAG_$1" = "$2" \
+|| AC_MSG_ERROR([doxygen-DX_CURRENT_FEATURE ifelse([$2], 1,
+                            requires, contradicts) doxygen-DX_CURRENT_FEATURE])
+])
+
+# DX_CLEAR_DEPEND(FEATURE, REQUIRED_FEATURE, REQUIRED_STATE)
+# ----------------------------------------------------------
+# Turn off the DX_CURRENT_FEATURE if the required feature is off.
+AC_DEFUN([DX_CLEAR_DEPEND], [
+test "$DX_FLAG_$1" = "$2" || AC_SUBST([DX_FLAG_]DX_CURRENT_FEATURE, 0)
+])
+
+# DX_FEATURE_ARG(FEATURE, DESCRIPTION,
+#                CHECK_DEPEND, CLEAR_DEPEND,
+#                REQUIRE, DO-IF-ON, DO-IF-OFF)
+# --------------------------------------------
+# Parse the command-line option controlling a feature. CHECK_DEPEND is called
+# if the user explicitly turns the feature on (and invokes DX_CHECK_DEPEND),
+# otherwise CLEAR_DEPEND is called to turn off the default state if a required
+# feature is disabled (using DX_CLEAR_DEPEND). REQUIRE performs additional
+# requirement tests (DX_REQUIRE_PROG). Finally, an automake flag is set and
+# DO-IF-ON or DO-IF-OFF are called according to the final state of the feature.
+AC_DEFUN([DX_ARG_ABLE], [
+    AC_DEFUN([DX_CURRENT_FEATURE], [$1])
+    AC_DEFUN([DX_CURRENT_DESCRIPTION], [$2])
+    AC_ARG_ENABLE(doxygen-$1,
+                  [AS_HELP_STRING(DX_IF_FEATURE([$1], [--disable-doxygen-$1],
+                                                      [--enable-doxygen-$1]),
+                                  DX_IF_FEATURE([$1], [don't $2], [$2]))],
+                  [
+case "$enableval" in
+#(
+y|Y|yes|Yes|YES)
+    AC_SUBST([DX_FLAG_$1], 1)
+    $3
+;; #(
+n|N|no|No|NO)
+    AC_SUBST([DX_FLAG_$1], 0)
+;; #(
+*)
+    AC_MSG_ERROR([invalid value '$enableval' given to doxygen-$1])
+;;
+esac
+], [
+AC_SUBST([DX_FLAG_$1], [DX_IF_FEATURE([$1], 1, 0)])
+$4
+])
+if DX_TEST_FEATURE([$1]); then
+    $5
+    :
+fi
+if DX_TEST_FEATURE([$1]); then
+    AM_CONDITIONAL(DX_COND_$1, :)
+    $6
+    :
+else
+    AM_CONDITIONAL(DX_COND_$1, false)
+    $7
+    :
+fi
+])
+
+## -------------- ##
+## Public macros. ##
+## -------------- ##
+
+# DX_XXX_FEATURE(DEFAULT_STATE)
+# -----------------------------
+AC_DEFUN([DX_DOXYGEN_FEATURE], [AC_DEFUN([DX_FEATURE_doc],  [$1])])
+AC_DEFUN([DX_MAN_FEATURE],     [AC_DEFUN([DX_FEATURE_man],  [$1])])
+AC_DEFUN([DX_HTML_FEATURE],    [AC_DEFUN([DX_FEATURE_html], [$1])])
+AC_DEFUN([DX_CHM_FEATURE],     [AC_DEFUN([DX_FEATURE_chm],  [$1])])
+AC_DEFUN([DX_CHI_FEATURE],     [AC_DEFUN([DX_FEATURE_chi],  [$1])])
+AC_DEFUN([DX_RTF_FEATURE],     [AC_DEFUN([DX_FEATURE_rtf],  [$1])])
+AC_DEFUN([DX_XML_FEATURE],     [AC_DEFUN([DX_FEATURE_xml],  [$1])])
+AC_DEFUN([DX_XML_FEATURE],     [AC_DEFUN([DX_FEATURE_xml],  [$1])])
+AC_DEFUN([DX_PDF_FEATURE],     [AC_DEFUN([DX_FEATURE_pdf],  [$1])])
+AC_DEFUN([DX_PS_FEATURE],      [AC_DEFUN([DX_FEATURE_ps],   [$1])])
+
+# DX_INIT_DOXYGEN(PROJECT, [CONFIG-FILE], [OUTPUT-DOC-DIR])
+# ---------------------------------------------------------
+# PROJECT also serves as the base name for the documentation files.
+# The default CONFIG-FILE is "Doxyfile" and OUTPUT-DOC-DIR is "doxygen-doc".
+AC_DEFUN([DX_INIT_DOXYGEN], [
+
+# Files:
+AC_SUBST([DX_PROJECT], [$1])
+AC_SUBST([DX_CONFIG], [ifelse([$2], [], Doxyfile, [$2])])
+AC_SUBST([DX_DOCDIR], [ifelse([$3], [], doxygen-doc, [$3])])
+
+# Environment variables used inside doxygen.cfg:
+DX_ENV_APPEND(SRCDIR, $srcdir)
+DX_ENV_APPEND(PROJECT, $DX_PROJECT)
+DX_ENV_APPEND(DOCDIR, $DX_DOCDIR)
+DX_ENV_APPEND(VERSION, $PACKAGE_VERSION)
+
+# Doxygen itself:
+DX_ARG_ABLE(doc, [generate any doxygen documentation],
+            [],
+            [],
+            [DX_REQUIRE_PROG([DX_DOXYGEN], doxygen)
+             DX_REQUIRE_PROG([DX_PERL], perl)],
+            [DX_ENV_APPEND(PERL_PATH, $DX_PERL)])
+
+# Dot for graphics:
+DX_ARG_ABLE(dot, [generate graphics for doxygen documentation],
+            [DX_CHECK_DEPEND(doc, 1)],
+            [DX_CLEAR_DEPEND(doc, 1)],
+            [DX_REQUIRE_PROG([DX_DOT], dot)],
+            [DX_ENV_APPEND(HAVE_DOT, YES)
+             DX_ENV_APPEND(DOT_PATH, [`DX_DIRNAME_EXPR($DX_DOT)`])],
+            [DX_ENV_APPEND(HAVE_DOT, NO)])
+
+# Man pages generation:
+DX_ARG_ABLE(man, [generate doxygen manual pages],
+            [DX_CHECK_DEPEND(doc, 1)],
+            [DX_CLEAR_DEPEND(doc, 1)],
+            [],
+            [DX_ENV_APPEND(GENERATE_MAN, YES)],
+            [DX_ENV_APPEND(GENERATE_MAN, NO)])
+
+# RTF file generation:
+DX_ARG_ABLE(rtf, [generate doxygen RTF documentation],
+            [DX_CHECK_DEPEND(doc, 1)],
+            [DX_CLEAR_DEPEND(doc, 1)],
+            [],
+            [DX_ENV_APPEND(GENERATE_RTF, YES)],
+            [DX_ENV_APPEND(GENERATE_RTF, NO)])
+
+# XML file generation:
+DX_ARG_ABLE(xml, [generate doxygen XML documentation],
+            [DX_CHECK_DEPEND(doc, 1)],
+            [DX_CLEAR_DEPEND(doc, 1)],
+            [],
+            [DX_ENV_APPEND(GENERATE_XML, YES)],
+            [DX_ENV_APPEND(GENERATE_XML, NO)])
+
+# (Compressed) HTML help generation:
+DX_ARG_ABLE(chm, [generate doxygen compressed HTML help documentation],
+            [DX_CHECK_DEPEND(doc, 1)],
+            [DX_CLEAR_DEPEND(doc, 1)],
+            [DX_REQUIRE_PROG([DX_HHC], hhc)],
+            [DX_ENV_APPEND(HHC_PATH, $DX_HHC)
+             DX_ENV_APPEND(GENERATE_HTML, YES)
+             DX_ENV_APPEND(GENERATE_HTMLHELP, YES)],
+            [DX_ENV_APPEND(GENERATE_HTMLHELP, NO)])
+
+# Seperate CHI file generation.
+DX_ARG_ABLE(chi, [generate doxygen seperate compressed HTML help index file],
+            [DX_CHECK_DEPEND(chm, 1)],
+            [DX_CLEAR_DEPEND(chm, 1)],
+            [],
+            [DX_ENV_APPEND(GENERATE_CHI, YES)],
+            [DX_ENV_APPEND(GENERATE_CHI, NO)])
+
+# Plain HTML pages generation:
+DX_ARG_ABLE(html, [generate doxygen plain HTML documentation],
+            [DX_CHECK_DEPEND(doc, 1) DX_CHECK_DEPEND(chm, 0)],
+            [DX_CLEAR_DEPEND(doc, 1) DX_CLEAR_DEPEND(chm, 0)],
+            [],
+            [DX_ENV_APPEND(GENERATE_HTML, YES)],
+            [DX_TEST_FEATURE(chm) || DX_ENV_APPEND(GENERATE_HTML, NO)])
+
+# PostScript file generation:
+DX_ARG_ABLE(ps, [generate doxygen PostScript documentation],
+            [DX_CHECK_DEPEND(doc, 1)],
+            [DX_CLEAR_DEPEND(doc, 1)],
+            [DX_REQUIRE_PROG([DX_LATEX], latex)
+             DX_REQUIRE_PROG([DX_MAKEINDEX], makeindex)
+             DX_REQUIRE_PROG([DX_DVIPS], dvips)
+             DX_REQUIRE_PROG([DX_EGREP], egrep)])
+
+# PDF file generation:
+DX_ARG_ABLE(pdf, [generate doxygen PDF documentation],
+            [DX_CHECK_DEPEND(doc, 1)],
+            [DX_CLEAR_DEPEND(doc, 1)],
+            [DX_REQUIRE_PROG([DX_PDFLATEX], pdflatex)
+             DX_REQUIRE_PROG([DX_MAKEINDEX], makeindex)
+             DX_REQUIRE_PROG([DX_EGREP], egrep)])
+
+# LaTeX generation for PS and/or PDF:
+if DX_TEST_FEATURE(ps) || DX_TEST_FEATURE(pdf); then
+    AM_CONDITIONAL(DX_COND_latex, :)
+    DX_ENV_APPEND(GENERATE_LATEX, YES)
+else
+    AM_CONDITIONAL(DX_COND_latex, false)
+    DX_ENV_APPEND(GENERATE_LATEX, NO)
+fi
+
+# Paper size for PS and/or PDF:
+AC_ARG_VAR(DOXYGEN_PAPER_SIZE,
+           [a4wide (default), a4, letter, legal or executive])
+case "$DOXYGEN_PAPER_SIZE" in
+#(
+"")
+    AC_SUBST(DOXYGEN_PAPER_SIZE, "")
+;; #(
+a4wide|a4|letter|legal|executive)
+    DX_ENV_APPEND(PAPER_SIZE, $DOXYGEN_PAPER_SIZE)
+;; #(
+*)
+    AC_MSG_ERROR([unknown DOXYGEN_PAPER_SIZE='$DOXYGEN_PAPER_SIZE'])
+;;
+esac
+
+#For debugging:
+#echo DX_FLAG_doc=$DX_FLAG_doc
+#echo DX_FLAG_dot=$DX_FLAG_dot
+#echo DX_FLAG_man=$DX_FLAG_man
+#echo DX_FLAG_html=$DX_FLAG_html
+#echo DX_FLAG_chm=$DX_FLAG_chm
+#echo DX_FLAG_chi=$DX_FLAG_chi
+#echo DX_FLAG_rtf=$DX_FLAG_rtf
+#echo DX_FLAG_xml=$DX_FLAG_xml
+#echo DX_FLAG_pdf=$DX_FLAG_pdf
+#echo DX_FLAG_ps=$DX_FLAG_ps
+#echo DX_ENV=$DX_ENV
+])
diff --git a/m4/boost.m4 b/m4/boost.m4
new file mode 100644
index 0000000..11a623d
--- /dev/null
+++ b/m4/boost.m4
@@ -0,0 +1,1343 @@
+# boost.m4: Locate Boost headers and libraries for autoconf-based projects.
+# Copyright (C) 2007-2011, 2014  Benoit Sigoure <tsuna at lrde.epita.fr>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# Additional permission under section 7 of the GNU General Public
+# License, version 3 ("GPLv3"):
+#
+# If you convey this file as part of a work that contains a
+# configuration script generated by Autoconf, you may do so under
+# terms of your choice.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+m4_define([_BOOST_SERIAL], [m4_translit([
+# serial 23
+], [#
+], [])])
+
+# Original sources can be found at http://github.com/tsuna/boost.m4
+# You can fetch the latest version of the script by doing:
+#   wget http://github.com/tsuna/boost.m4/raw/master/build-aux/boost.m4
+
+# ------ #
+# README #
+# ------ #
+
+# This file provides several macros to use the various Boost libraries.
+# The first macro is BOOST_REQUIRE.  It will simply check if it's possible to
+# find the Boost headers of a given (optional) minimum version and it will
+# define BOOST_CPPFLAGS accordingly.  It will add an option --with-boost to
+# your configure so that users can specify non standard locations.
+# If the user's environment contains BOOST_ROOT and --with-boost was not
+# specified, --with-boost=$BOOST_ROOT is implicitly used.
+# For more README and documentation, go to http://github.com/tsuna/boost.m4
+# Note: THESE MACROS ASSUME THAT YOU USE LIBTOOL.  If you don't, don't worry,
+# simply read the README, it will show you what to do step by step.
+
+m4_pattern_forbid([^_?(BOOST|Boost)_])
+
+
+# _BOOST_SED_CPP(SED-PROGRAM, PROGRAM,
+#                [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])
+# --------------------------------------------------------
+# Same as AC_EGREP_CPP, but leave the result in conftest.i.
+#
+# SED-PROGRAM is *not* overquoted, as in AC_EGREP_CPP.  It is expanded
+# in double-quotes, so escape your double quotes.
+#
+# It could be useful to turn this into a macro which extracts the
+# value of any macro.
+m4_define([_BOOST_SED_CPP],
+[AC_LANG_PUSH([C++])dnl
+AC_LANG_PREPROC_REQUIRE()dnl
+AC_REQUIRE([AC_PROG_SED])dnl
+AC_LANG_CONFTEST([AC_LANG_SOURCE([[$2]])])
+AS_IF([dnl eval is necessary to expand ac_cpp.
+dnl Ultrix and Pyramid sh refuse to redirect output of eval, so use subshell.
+dnl Beware of Windows end-of-lines, for instance if we are running
+dnl some Windows programs under Wine.  In that case, boost/version.hpp
+dnl is certainly using "\r\n", but the regular Unix shell will only
+dnl strip `\n' with backquotes, not the `\r'.  This results in
+dnl boost_cv_lib_version='1_37\r' for instance, which breaks
+dnl everything else.
+dnl Cannot use 'dnl' after [$4] because a trailing dnl may break AC_CACHE_CHECK
+(eval "$ac_cpp conftest.$ac_ext") 2>&AS_MESSAGE_LOG_FD |
+  tr -d '\r' |
+  $SED -n -e "$1" >conftest.i 2>&1],
+  [$3],
+  [$4])
+rm -rf conftest*
+AC_LANG_POP([C++])dnl
+])# _BOOST_SED_CPP
+
+
+
+# BOOST_REQUIRE([VERSION], [ACTION-IF-NOT-FOUND])
+# -----------------------------------------------
+# Look for Boost.  If version is given, it must either be a literal of the form
+# "X.Y.Z" where X, Y and Z are integers (the ".Z" part being optional) or a
+# variable "$var".
+# Defines the value BOOST_CPPFLAGS.  This macro only checks for headers with
+# the required version, it does not check for any of the Boost libraries.
+# On # success, defines HAVE_BOOST.  On failure, calls the optional
+# ACTION-IF-NOT-FOUND action if one was supplied.
+# Otherwise aborts with an error message.
+AC_DEFUN([BOOST_REQUIRE],
+[AC_REQUIRE([AC_PROG_CXX])dnl
+AC_REQUIRE([AC_PROG_GREP])dnl
+echo "$as_me: this is boost.m4[]_BOOST_SERIAL" >&AS_MESSAGE_LOG_FD
+boost_save_IFS=$IFS
+boost_version_req=$1
+IFS=.
+set x $boost_version_req 0 0 0
+IFS=$boost_save_IFS
+shift
+boost_version_req=`expr "$[1]" '*' 100000 + "$[2]" '*' 100 + "$[3]"`
+boost_version_req_string=$[1].$[2].$[3]
+AC_ARG_WITH([boost],
+   [AS_HELP_STRING([--with-boost=DIR],
+                   [prefix of Boost $1 @<:@guess@:>@])])dnl
+AC_ARG_VAR([BOOST_ROOT],[Location of Boost installation])dnl
+# If BOOST_ROOT is set and the user has not provided a value to
+# --with-boost, then treat BOOST_ROOT as if it the user supplied it.
+if test x"$BOOST_ROOT" != x; then
+  if test x"$with_boost" = x; then
+    AC_MSG_NOTICE([Detected BOOST_ROOT; continuing with --with-boost=$BOOST_ROOT])
+    with_boost=$BOOST_ROOT
+  else
+    AC_MSG_NOTICE([Detected BOOST_ROOT=$BOOST_ROOT, but overridden by --with-boost=$with_boost])
+  fi
+fi
+AC_SUBST([DISTCHECK_CONFIGURE_FLAGS],
+         ["$DISTCHECK_CONFIGURE_FLAGS '--with-boost=$with_boost'"])dnl
+boost_save_CPPFLAGS=$CPPFLAGS
+  AC_CACHE_CHECK([for Boost headers version >= $boost_version_req_string],
+    [boost_cv_inc_path],
+    [boost_cv_inc_path=no
+AC_LANG_PUSH([C++])dnl
+m4_pattern_allow([^BOOST_VERSION$])dnl
+    AC_LANG_CONFTEST([AC_LANG_PROGRAM([[#include <boost/version.hpp>
+#if !defined BOOST_VERSION
+# error BOOST_VERSION is not defined
+#elif BOOST_VERSION < $boost_version_req
+# error Boost headers version < $boost_version_req
+#endif
+]])])
+    # If the user provided a value to --with-boost, use it and only it.
+    case $with_boost in #(
+      ''|yes) set x '' /opt/local/include /usr/local/include /opt/include \
+                 /usr/include C:/Boost/include;; #(
+      *)      set x "$with_boost/include" "$with_boost";;
+    esac
+    shift
+    for boost_dir
+    do
+    # Without --layout=system, Boost (or at least some versions) installs
+    # itself in <prefix>/include/boost-<version>.  This inner loop helps to
+    # find headers in such directories.
+    #
+    # Any ${boost_dir}/boost-x_xx directories are searched in reverse version
+    # order followed by ${boost_dir}.  The final '.' is a sentinel for
+    # searching $boost_dir" itself.  Entries are whitespace separated.
+    #
+    # I didn't indent this loop on purpose (to avoid over-indented code)
+    boost_layout_system_search_list=`cd "$boost_dir" 2>/dev/null \
+        && ls -1 | "${GREP}" '^boost-' | sort -rn -t- -k2 \
+        && echo .`
+    for boost_inc in $boost_layout_system_search_list
+    do
+      if test x"$boost_inc" != x.; then
+        boost_inc="$boost_dir/$boost_inc"
+      else
+        boost_inc="$boost_dir" # Uses sentinel in boost_layout_system_search_list
+      fi
+      if test x"$boost_inc" != x; then
+        # We are going to check whether the version of Boost installed
+        # in $boost_inc is usable by running a compilation that
+        # #includes it.  But if we pass a -I/some/path in which Boost
+        # is not installed, the compiler will just skip this -I and
+        # use other locations (either from CPPFLAGS, or from its list
+        # of system include directories).  As a result we would use
+        # header installed on the machine instead of the /some/path
+        # specified by the user.  So in that precise case (trying
+        # $boost_inc), make sure the version.hpp exists.
+        #
+        # Use test -e as there can be symlinks.
+        test -e "$boost_inc/boost/version.hpp" || continue
+        CPPFLAGS="$CPPFLAGS -I$boost_inc"
+      fi
+      AC_COMPILE_IFELSE([], [boost_cv_inc_path=yes], [boost_cv_version=no])
+      if test x"$boost_cv_inc_path" = xyes; then
+        if test x"$boost_inc" != x; then
+          boost_cv_inc_path=$boost_inc
+        fi
+        break 2
+      fi
+    done
+    done
+AC_LANG_POP([C++])dnl
+    ])
+    case $boost_cv_inc_path in #(
+      no)
+        boost_errmsg="cannot find Boost headers version >= $boost_version_req_string"
+        m4_if([$2], [],  [AC_MSG_ERROR([$boost_errmsg])],
+                        [AC_MSG_NOTICE([$boost_errmsg])])
+        $2
+        ;;#(
+      yes)
+        BOOST_CPPFLAGS=
+        ;;#(
+      *)
+        AC_SUBST([BOOST_CPPFLAGS], ["-I$boost_cv_inc_path"])dnl
+        ;;
+    esac
+  if test x"$boost_cv_inc_path" != xno; then
+  AC_DEFINE([HAVE_BOOST], [1],
+            [Defined if the requested minimum BOOST version is satisfied])
+  AC_CACHE_CHECK([for Boost's header version],
+    [boost_cv_lib_version],
+    [m4_pattern_allow([^BOOST_LIB_VERSION$])dnl
+     _BOOST_SED_CPP([/^boost-lib-version = /{s///;s/\"//g;p;q;}],
+                    [#include <boost/version.hpp>
+boost-lib-version = BOOST_LIB_VERSION],
+    [boost_cv_lib_version=`cat conftest.i`])])
+    # e.g. "134" for 1_34_1 or "135" for 1_35
+    boost_major_version=`echo "$boost_cv_lib_version" | sed 's/_//;s/_.*//'`
+    case $boost_major_version in #(
+      '' | *[[!0-9]]*)
+        AC_MSG_ERROR([invalid value: boost_major_version=$boost_major_version])
+        ;;
+    esac
+fi
+CPPFLAGS=$boost_save_CPPFLAGS
+])# BOOST_REQUIRE
+
+
+# BOOST_STATIC()
+# --------------
+# Add the "--enable-static-boost" configure argument. If this argument is given
+# on the command line, static versions of the libraries will be looked up.
+AC_DEFUN([BOOST_STATIC],
+  [AC_ARG_ENABLE([static-boost],
+     [AS_HELP_STRING([--enable-static-boost],
+               [Prefer the static boost libraries over the shared ones [no]])],
+     [enable_static_boost=yes],
+     [enable_static_boost=no])])# BOOST_STATIC
+
+
+# BOOST_FIND_HEADER([HEADER-NAME], [ACTION-IF-NOT-FOUND], [ACTION-IF-FOUND])
+# --------------------------------------------------------------------------
+# Wrapper around AC_CHECK_HEADER for Boost headers.  Useful to check for
+# some parts of the Boost library which are only made of headers and don't
+# require linking (such as Boost.Foreach).
+#
+# Default ACTION-IF-NOT-FOUND: Fail with a fatal error unless Boost couldn't be
+# found in the first place, in which case by default a notice is issued to the
+# user.  Presumably if we haven't died already it's because it's OK to not have
+# Boost, which is why only a notice is issued instead of a hard error.
+#
+# Default ACTION-IF-FOUND: define the preprocessor symbol HAVE_<HEADER-NAME> in
+# case of success # (where HEADER-NAME is written LIKE_THIS, e.g.,
+# HAVE_BOOST_FOREACH_HPP).
+AC_DEFUN([BOOST_FIND_HEADER],
+[AC_REQUIRE([BOOST_REQUIRE])dnl
+if test x"$boost_cv_inc_path" = xno; then
+  m4_default([$2], [AC_MSG_NOTICE([Boost not available, not searching for $1])])
+else
+AC_LANG_PUSH([C++])dnl
+boost_save_CPPFLAGS=$CPPFLAGS
+CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS"
+AC_CHECK_HEADER([$1],
+  [m4_default([$3], [AC_DEFINE(AS_TR_CPP([HAVE_$1]), [1],
+                               [Define to 1 if you have <$1>])])],
+  [m4_default([$2], [AC_MSG_ERROR([cannot find $1])])])
+CPPFLAGS=$boost_save_CPPFLAGS
+AC_LANG_POP([C++])dnl
+fi
+])# BOOST_FIND_HEADER
+
+
+# BOOST_FIND_LIBS([COMPONENT-NAME], [CANDIDATE-LIB-NAMES],
+#                 [PREFERRED-RT-OPT], [HEADER-NAME], [CXX-TEST],
+#                 [CXX-PROLOGUE])
+# --------------------------------------------------------------
+# Look for the Boost library COMPONENT-NAME (e.g., `thread', for
+# libboost_thread) under the possible CANDIDATE-LIB-NAMES (e.g.,
+# "thread_win32 thread").  Check that HEADER-NAME works and check that
+# libboost_LIB-NAME can link with the code CXX-TEST.  The optional
+# argument CXX-PROLOGUE can be used to include some C++ code before
+# the `main' function.
+#
+# Invokes BOOST_FIND_HEADER([HEADER-NAME]) (see above).
+#
+# Boost libraries typically come compiled with several flavors (with different
+# runtime options) so PREFERRED-RT-OPT is the preferred suffix.  A suffix is one
+# or more of the following letters: sgdpn (in that order).  s = static
+# runtime, d = debug build, g = debug/diagnostic runtime, p = STLPort build,
+# n = (unsure) STLPort build without iostreams from STLPort (it looks like `n'
+# must always be used along with `p').  Additionally, PREFERRED-RT-OPT can
+# start with `mt-' to indicate that there is a preference for multi-thread
+# builds.  Some sample values for PREFERRED-RT-OPT: (nothing), mt, d, mt-d, gdp
+# ...  If you want to make sure you have a specific version of Boost
+# (eg, >= 1.33) you *must* invoke BOOST_REQUIRE before this macro.
+AC_DEFUN([BOOST_FIND_LIBS],
+[AC_REQUIRE([BOOST_REQUIRE])dnl
+AC_REQUIRE([_BOOST_FIND_COMPILER_TAG])dnl
+AC_REQUIRE([BOOST_STATIC])dnl
+AC_REQUIRE([_BOOST_GUESS_WHETHER_TO_USE_MT])dnl
+if test x"$boost_cv_inc_path" = xno; then
+  AC_MSG_NOTICE([Boost not available, not searching for the Boost $1 library])
+else
+dnl The else branch is huge and wasn't intended on purpose.
+AC_LANG_PUSH([C++])dnl
+AS_VAR_PUSHDEF([Boost_lib], [boost_cv_lib_$1])dnl
+AS_VAR_PUSHDEF([Boost_lib_LDFLAGS], [boost_cv_lib_$1_LDFLAGS])dnl
+AS_VAR_PUSHDEF([Boost_lib_LDPATH], [boost_cv_lib_$1_LDPATH])dnl
+AS_VAR_PUSHDEF([Boost_lib_LIBS], [boost_cv_lib_$1_LIBS])dnl
+BOOST_FIND_HEADER([$4])
+boost_save_CPPFLAGS=$CPPFLAGS
+CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS"
+AC_CACHE_CHECK([for the Boost $1 library], [Boost_lib],
+               [_BOOST_FIND_LIBS($@)])
+case $Boost_lib in #(
+  (no) _AC_MSG_LOG_CONFTEST
+    AC_MSG_ERROR([cannot find the flags to link with Boost $1])
+    ;;
+esac
+AC_SUBST(AS_TR_CPP([BOOST_$1_LDFLAGS]), [$Boost_lib_LDFLAGS])dnl
+AC_SUBST(AS_TR_CPP([BOOST_$1_LDPATH]), [$Boost_lib_LDPATH])dnl
+AC_SUBST([BOOST_LDPATH], [$Boost_lib_LDPATH])dnl
+AC_SUBST(AS_TR_CPP([BOOST_$1_LIBS]), [$Boost_lib_LIBS])dnl
+CPPFLAGS=$boost_save_CPPFLAGS
+AS_VAR_POPDEF([Boost_lib])dnl
+AS_VAR_POPDEF([Boost_lib_LDFLAGS])dnl
+AS_VAR_POPDEF([Boost_lib_LDPATH])dnl
+AS_VAR_POPDEF([Boost_lib_LIBS])dnl
+AC_LANG_POP([C++])dnl
+fi
+])
+
+
+# BOOST_FIND_LIB([LIB-NAME],
+#                [PREFERRED-RT-OPT], [HEADER-NAME], [CXX-TEST],
+#                [CXX-PROLOGUE])
+# --------------------------------------------------------------
+# Backward compatibility wrapper for BOOST_FIND_LIBS.
+AC_DEFUN([BOOST_FIND_LIB],
+[BOOST_FIND_LIBS([$1], $@)])
+
+
+# _BOOST_FIND_LIBS([LIB-NAME], [CANDIDATE-LIB-NAMES],
+#                 [PREFERRED-RT-OPT], [HEADER-NAME], [CXX-TEST],
+#                 [CXX-PROLOGUE])
+# --------------------------------------------------------------
+# Real implementation of BOOST_FIND_LIBS: rely on these local macros:
+# Boost_lib, Boost_lib_LDFLAGS, Boost_lib_LDPATH, Boost_lib_LIBS
+#
+# The algorithm is as follows: first look for a given library name
+# according to the user's PREFERRED-RT-OPT.  For each library name, we
+# prefer to use the ones that carry the tag (toolset name).  Each
+# library is searched through the various standard paths were Boost is
+# usually installed.  If we can't find the standard variants, we try
+# to enforce -mt (for instance on MacOSX, libboost_thread.dylib
+# doesn't exist but there's -obviously- libboost_thread-mt.dylib).
+AC_DEFUN([_BOOST_FIND_LIBS],
+[Boost_lib=no
+  case "$3" in #(
+    (mt | mt-) boost_mt=-mt; boost_rtopt=;; #(
+    (mt* | mt-*) boost_mt=-mt; boost_rtopt=`expr "X$3" : 'Xmt-*\(.*\)'`;; #(
+    (*) boost_mt=; boost_rtopt=$3;;
+  esac
+  if test $enable_static_boost = yes; then
+    boost_rtopt="s$boost_rtopt"
+  fi
+  # Find the proper debug variant depending on what we've been asked to find.
+  case $boost_rtopt in #(
+    (*d*) boost_rt_d=$boost_rtopt;; #(
+    (*[[sgpn]]*) # Insert the `d' at the right place (in between `sg' and `pn')
+      boost_rt_d=`echo "$boost_rtopt" | sed 's/\(s*g*\)\(p*n*\)/\1\2/'`;; #(
+    (*) boost_rt_d='-d';;
+  esac
+  # If the PREFERRED-RT-OPT are not empty, prepend a `-'.
+  test -n "$boost_rtopt" && boost_rtopt="-$boost_rtopt"
+  $boost_guess_use_mt && boost_mt=-mt
+  # Look for the abs path the static archive.
+  # $libext is computed by Libtool but let's make sure it's non empty.
+  test -z "$libext" &&
+    AC_MSG_ERROR([the libext variable is empty, did you invoke Libtool?])
+  boost_save_ac_objext=$ac_objext
+  # Generate the test file.
+  AC_LANG_CONFTEST([AC_LANG_PROGRAM([#include <$4>
+$6], [$5])])
+dnl Optimization hacks: compiling C++ is slow, especially with Boost.  What
+dnl we're trying to do here is guess the right combination of link flags
+dnl (LIBS / LDFLAGS) to use a given library.  This can take several
+dnl iterations before it succeeds and is thus *very* slow.  So what we do
+dnl instead is that we compile the code first (and thus get an object file,
+dnl typically conftest.o).  Then we try various combinations of link flags
+dnl until we succeed to link conftest.o in an executable.  The problem is
+dnl that the various TRY_LINK / COMPILE_IFELSE macros of Autoconf always
+dnl remove all the temporary files including conftest.o.  So the trick here
+dnl is to temporarily change the value of ac_objext so that conftest.o is
+dnl preserved accross tests.  This is obviously fragile and I will burn in
+dnl hell for not respecting Autoconf's documented interfaces, but in the
+dnl mean time, it optimizes the macro by a factor of 5 to 30.
+dnl Another small optimization: the first argument of AC_COMPILE_IFELSE left
+dnl empty because the test file is generated only once above (before we
+dnl start the for loops).
+  AC_COMPILE_IFELSE([],
+    [ac_objext=do_not_rm_me_plz],
+    [AC_MSG_ERROR([cannot compile a test that uses Boost $1])])
+  ac_objext=$boost_save_ac_objext
+  boost_failed_libs=
+# Don't bother to ident the following nested for loops, only the 2
+# innermost ones matter.
+for boost_lib_ in $2; do
+for boost_tag_ in -$boost_cv_lib_tag ''; do
+for boost_ver_ in -$boost_cv_lib_version ''; do
+for boost_mt_ in $boost_mt -mt ''; do
+for boost_rtopt_ in $boost_rtopt '' -d; do
+  for boost_lib in \
+    boost_$boost_lib_$boost_tag_$boost_mt_$boost_rtopt_$boost_ver_ \
+    boost_$boost_lib_$boost_tag_$boost_rtopt_$boost_ver_ \
+    boost_$boost_lib_$boost_tag_$boost_mt_$boost_ver_ \
+    boost_$boost_lib_$boost_tag_$boost_ver_
+  do
+    # Avoid testing twice the same lib
+    case $boost_failed_libs in #(
+      (*@$boost_lib@*) continue;;
+    esac
+    # If with_boost is empty, we'll search in /lib first, which is not quite
+    # right so instead we'll try to a location based on where the headers are.
+    boost_tmp_lib=$with_boost
+    test x"$with_boost" = x && boost_tmp_lib=${boost_cv_inc_path%/include}
+    for boost_ldpath in "$boost_tmp_lib/lib" '' \
+             /opt/local/lib* /usr/local/lib* /opt/lib* /usr/lib* \
+             "$with_boost" C:/Boost/lib /lib*
+    do
+      # Don't waste time with directories that don't exist.
+      if test x"$boost_ldpath" != x && test ! -e "$boost_ldpath"; then
+        continue
+      fi
+      boost_save_LDFLAGS=$LDFLAGS
+      # Are we looking for a static library?
+      case $boost_ldpath:$boost_rtopt_ in #(
+        (*?*:*s*) # Yes (Non empty boost_ldpath + s in rt opt)
+          Boost_lib_LIBS="$boost_ldpath/lib$boost_lib.$libext"
+          test -e "$Boost_lib_LIBS" || continue;; #(
+        (*) # No: use -lboost_foo to find the shared library.
+          Boost_lib_LIBS="-l$boost_lib";;
+      esac
+      boost_save_LIBS=$LIBS
+      LIBS="$Boost_lib_LIBS $LIBS"
+      test x"$boost_ldpath" != x && LDFLAGS="$LDFLAGS -L$boost_ldpath"
+dnl First argument of AC_LINK_IFELSE left empty because the test file is
+dnl generated only once above (before we start the for loops).
+      _BOOST_AC_LINK_IFELSE([],
+                            [Boost_lib=yes], [Boost_lib=no])
+      ac_objext=$boost_save_ac_objext
+      LDFLAGS=$boost_save_LDFLAGS
+      LIBS=$boost_save_LIBS
+      if test x"$Boost_lib" = xyes; then
+        # Check or used cached result of whether or not using -R or
+        # -rpath makes sense.  Some implementations of ld, such as for
+        # Mac OSX, require -rpath but -R is the flag known to work on
+        # other systems.  https://github.com/tsuna/boost.m4/issues/19
+        AC_CACHE_VAL([boost_cv_rpath_link_ldflag],
+          [case $boost_ldpath in
+           '') # Nothing to do.
+             boost_cv_rpath_link_ldflag=
+             boost_rpath_link_ldflag_found=yes;;
+           *)
+            for boost_cv_rpath_link_ldflag in -Wl,-R, -Wl,-rpath,; do
+              LDFLAGS="$boost_save_LDFLAGS -L$boost_ldpath $boost_cv_rpath_link_ldflag$boost_ldpath"
+              LIBS="$boost_save_LIBS $Boost_lib_LIBS"
+              _BOOST_AC_LINK_IFELSE([],
+                [boost_rpath_link_ldflag_found=yes
+                break],
+                [boost_rpath_link_ldflag_found=no])
+            done
+            ;;
+          esac
+          AS_IF([test "x$boost_rpath_link_ldflag_found" != "xyes"],
+            [AC_MSG_ERROR([Unable to determine whether to use -R or -rpath])])
+          LDFLAGS=$boost_save_LDFLAGS
+          LIBS=$boost_save_LIBS
+          ])
+        test x"$boost_ldpath" != x &&
+          Boost_lib_LDFLAGS="-L$boost_ldpath $boost_cv_rpath_link_ldflag$boost_ldpath"
+        Boost_lib_LDPATH="$boost_ldpath"
+        break 7
+      else
+        boost_failed_libs="$boost_failed_libs@$boost_lib@"
+      fi
+    done
+  done
+done
+done
+done
+done
+done # boost_lib_
+rm -f conftest.$ac_objext
+])
+
+
+
+# --------------------------------------- #
+# Checks for the various Boost libraries. #
+# --------------------------------------- #
+
+# List of boost libraries: http://www.boost.org/libs/libraries.htm
+# The page http://beta.boost.org/doc/libs is useful: it gives the first release
+# version of each library (among other things).
+
+# BOOST_DEFUN(LIBRARY, CODE)
+# --------------------------
+# Define BOOST_<LIBRARY-UPPERCASE> as a macro that runs CODE.
+#
+# Use indir to avoid the warning on underquoted macro name given to AC_DEFUN.
+m4_define([BOOST_DEFUN],
+[m4_indir([AC_DEFUN],
+          m4_toupper([BOOST_$1]),
+[m4_pushdef([BOOST_Library], [$1])dnl
+$2
+m4_popdef([BOOST_Library])dnl
+])
+])
+
+# BOOST_ARRAY()
+# -------------
+# Look for Boost.Array
+BOOST_DEFUN([Array],
+[BOOST_FIND_HEADER([boost/array.hpp])])
+
+
+# BOOST_ASIO()
+# ------------
+# Look for Boost.Asio (new in Boost 1.35).
+BOOST_DEFUN([Asio],
+[AC_REQUIRE([BOOST_SYSTEM])dnl
+BOOST_FIND_HEADER([boost/asio.hpp])])
+
+
+# BOOST_BIND()
+# ------------
+# Look for Boost.Bind.
+BOOST_DEFUN([Bind],
+[BOOST_FIND_HEADER([boost/bind.hpp])])
+
+
+# BOOST_CHRONO()
+# --------------
+# Look for Boost.Chrono.
+BOOST_DEFUN([Chrono],
+[# Do we have to check for Boost.System?  This link-time dependency was
+# added as of 1.35.0.  If we have a version <1.35, we must not attempt to
+# find Boost.System as it didn't exist by then.
+if test $boost_major_version -ge 135; then
+  BOOST_SYSTEM([$1])
+fi # end of the Boost.System check.
+boost_filesystem_save_LIBS=$LIBS
+boost_filesystem_save_LDFLAGS=$LDFLAGS
+m4_pattern_allow([^BOOST_SYSTEM_(LIBS|LDFLAGS)$])dnl
+LIBS="$LIBS $BOOST_SYSTEM_LIBS"
+LDFLAGS="$LDFLAGS $BOOST_SYSTEM_LDFLAGS"
+BOOST_FIND_LIB([chrono], [$1],
+                [boost/chrono.hpp],
+                [boost::chrono::thread_clock d;])
+if test $enable_static_boost = yes && test $boost_major_version -ge 135; then
+  BOOST_FILESYSTEM_LIBS="$BOOST_FILESYSTEM_LIBS $BOOST_SYSTEM_LIBS"
+fi
+LIBS=$boost_filesystem_save_LIBS
+LDFLAGS=$boost_filesystem_save_LDFLAGS
+])# BOOST_CHRONO
+
+
+# BOOST_CONVERSION()
+# ------------------
+# Look for Boost.Conversion (cast / lexical_cast)
+BOOST_DEFUN([Conversion],
+[BOOST_FIND_HEADER([boost/cast.hpp])
+BOOST_FIND_HEADER([boost/lexical_cast.hpp])
+])# BOOST_CONVERSION
+
+
+# BOOST_CRC()
+# -----------
+# Look for Boost.CRC
+BOOST_DEFUN([CRC],
+[BOOST_FIND_HEADER([boost/crc.hpp])
+])# BOOST_CRC
+
+
+# BOOST_DATE_TIME([PREFERRED-RT-OPT])
+# -----------------------------------
+# Look for Boost.Date_Time.  For the documentation of PREFERRED-RT-OPT, see the
+# documentation of BOOST_FIND_LIB above.
+BOOST_DEFUN([Date_Time],
+[BOOST_FIND_LIB([date_time], [$1],
+                [boost/date_time/posix_time/posix_time.hpp],
+                [boost::posix_time::ptime t;])
+])# BOOST_DATE_TIME
+
+
+# BOOST_FILESYSTEM([PREFERRED-RT-OPT])
+# ------------------------------------
+# Look for Boost.Filesystem.  For the documentation of PREFERRED-RT-OPT, see
+# the documentation of BOOST_FIND_LIB above.
+# Do not check for boost/filesystem.hpp because this file was introduced in
+# 1.34.
+BOOST_DEFUN([Filesystem],
+[# Do we have to check for Boost.System?  This link-time dependency was
+# added as of 1.35.0.  If we have a version <1.35, we must not attempt to
+# find Boost.System as it didn't exist by then.
+if test $boost_major_version -ge 135; then
+  BOOST_SYSTEM([$1])
+fi # end of the Boost.System check.
+boost_filesystem_save_LIBS=$LIBS
+boost_filesystem_save_LDFLAGS=$LDFLAGS
+m4_pattern_allow([^BOOST_SYSTEM_(LIBS|LDFLAGS)$])dnl
+LIBS="$LIBS $BOOST_SYSTEM_LIBS"
+LDFLAGS="$LDFLAGS $BOOST_SYSTEM_LDFLAGS"
+BOOST_FIND_LIB([filesystem], [$1],
+                [boost/filesystem/path.hpp], [boost::filesystem::path p;])
+if test $enable_static_boost = yes && test $boost_major_version -ge 135; then
+  BOOST_FILESYSTEM_LIBS="$BOOST_FILESYSTEM_LIBS $BOOST_SYSTEM_LIBS"
+fi
+LIBS=$boost_filesystem_save_LIBS
+LDFLAGS=$boost_filesystem_save_LDFLAGS
+])# BOOST_FILESYSTEM
+
+
+# BOOST_FLYWEIGHT()
+# -----------------
+# Look for Boost.Flyweight.
+BOOST_DEFUN([Flyweight],
+[dnl There's a hidden dependency on pthreads.
+AC_REQUIRE([_BOOST_PTHREAD_FLAG])dnl
+BOOST_FIND_HEADER([boost/flyweight.hpp])
+AC_SUBST([BOOST_FLYWEIGHT_LIBS], [$boost_cv_pthread_flag])
+])
+
+
+# BOOST_FOREACH()
+# ---------------
+# Look for Boost.Foreach.
+BOOST_DEFUN([Foreach],
+[BOOST_FIND_HEADER([boost/foreach.hpp])])
+
+
+# BOOST_FORMAT()
+# --------------
+# Look for Boost.Format.
+# Note: we can't check for boost/format/format_fwd.hpp because the header isn't
+# standalone.  It can't be compiled because it triggers the following error:
+# boost/format/detail/config_macros.hpp:88: error: 'locale' in namespace 'std'
+#                                                  does not name a type
+BOOST_DEFUN([Format],
+[BOOST_FIND_HEADER([boost/format.hpp])])
+
+
+# BOOST_FUNCTION()
+# ----------------
+# Look for Boost.Function
+BOOST_DEFUN([Function],
+[BOOST_FIND_HEADER([boost/function.hpp])])
+
+
+# BOOST_GEOMETRY()
+# ----------------
+# Look for Boost.Geometry (new since 1.47.0).
+BOOST_DEFUN([Geometry],
+[BOOST_FIND_HEADER([boost/geometry.hpp])
+])# BOOST_GEOMETRY
+
+
+# BOOST_GRAPH([PREFERRED-RT-OPT])
+# -------------------------------
+# Look for Boost.Graphs.  For the documentation of PREFERRED-RT-OPT, see the
+# documentation of BOOST_FIND_LIB above.
+BOOST_DEFUN([Graph],
+[BOOST_FIND_LIB([graph], [$1],
+                [boost/graph/adjacency_list.hpp], [boost::adjacency_list<> g;])
+])# BOOST_GRAPH
+
+
+# BOOST_IOSTREAMS([PREFERRED-RT-OPT])
+# -----------------------------------
+# Look for Boost.IOStreams.  For the documentation of PREFERRED-RT-OPT, see the
+# documentation of BOOST_FIND_LIB above.
+BOOST_DEFUN([IOStreams],
+[BOOST_FIND_LIB([iostreams], [$1],
+                [boost/iostreams/device/file_descriptor.hpp],
+                [boost::iostreams::file_descriptor fd; fd.close();])
+])# BOOST_IOSTREAMS
+
+
+# BOOST_HASH()
+# ------------
+# Look for Boost.Functional/Hash
+BOOST_DEFUN([Hash],
+[BOOST_FIND_HEADER([boost/functional/hash.hpp])])
+
+
+# BOOST_LAMBDA()
+# --------------
+# Look for Boost.Lambda
+BOOST_DEFUN([Lambda],
+[BOOST_FIND_HEADER([boost/lambda/lambda.hpp])])
+
+
+# BOOST_LOG([PREFERRED-RT-OPT])
+# -----------------------------
+# Look for Boost.Log.  For the documentation of PREFERRED-RT-OPT, see the
+# documentation of BOOST_FIND_LIB above.
+BOOST_DEFUN([Log],
+[BOOST_FIND_LIB([log], [$1],
+    [boost/log/core/core.hpp],
+    [boost::log::attribute a; a.get_value();])
+])# BOOST_LOG
+
+
+# BOOST_LOG_SETUP([PREFERRED-RT-OPT])
+# -----------------------------------
+# Look for Boost.Log.  For the documentation of PREFERRED-RT-OPT, see the
+# documentation of BOOST_FIND_LIB above.
+BOOST_DEFUN([Log_Setup],
+[AC_REQUIRE([BOOST_LOG])dnl
+BOOST_FIND_LIB([log_setup], [$1],
+    [boost/log/utility/setup/from_settings.hpp],
+    [boost::log::basic_settings<char> bs; bs.empty();])
+])# BOOST_LOG_SETUP
+
+
+# BOOST_MATH()
+# ------------
+# Look for Boost.Math
+# TODO: This library isn't header-only but it comes in multiple different
+# flavors that don't play well with BOOST_FIND_LIB (e.g, libboost_math_c99,
+# libboost_math_c99f, libboost_math_c99l, libboost_math_tr1,
+# libboost_math_tr1f, libboost_math_tr1l).  This macro must be fixed to do the
+# right thing anyway.
+BOOST_DEFUN([Math],
+[BOOST_FIND_HEADER([boost/math/special_functions.hpp])])
+
+
+# BOOST_MPI([PREFERRED-RT-OPT])
+# -------------------------------
+# Look for Boost MPI.  For the documentation of PREFERRED-RT-OPT, see the
+# documentation of BOOST_FIND_LIB above.  Uses MPICXX variable if it is
+# set, otherwise tries CXX
+#
+BOOST_DEFUN([MPI],
+[boost_save_CXX=${CXX}
+boost_save_CXXCPP=${CXXCPP}
+if test x"${MPICXX}" != x; then
+  CXX=${MPICXX}
+  CXXCPP="${MPICXX} -E"
+fi
+BOOST_FIND_LIB([mpi], [$1],
+               [boost/mpi.hpp],
+               [int argc = 0;
+                char **argv = 0;
+                boost::mpi::environment env(argc,argv);])
+CXX=${boost_save_CXX}
+CXXCPP=${boost_save_CXXCPP}
+])# BOOST_MPI
+
+
+# BOOST_MULTIARRAY()
+# ------------------
+# Look for Boost.MultiArray
+BOOST_DEFUN([MultiArray],
+[BOOST_FIND_HEADER([boost/multi_array.hpp])])
+
+
+# BOOST_NUMERIC_UBLAS()
+# --------------------------
+# Look for Boost.NumericUblas (Basic Linear Algebra)
+BOOST_DEFUN([Numeric_Ublas],
+[BOOST_FIND_HEADER([boost/numeric/ublas/vector.hpp])
+])# BOOST_NUMERIC_UBLAS
+
+
+# BOOST_NUMERIC_CONVERSION()
+# --------------------------
+# Look for Boost.NumericConversion (policy-based numeric conversion)
+BOOST_DEFUN([Numeric_Conversion],
+[BOOST_FIND_HEADER([boost/numeric/conversion/converter.hpp])
+])# BOOST_NUMERIC_CONVERSION
+
+
+# BOOST_OPTIONAL()
+# ----------------
+# Look for Boost.Optional
+BOOST_DEFUN([Optional],
+[BOOST_FIND_HEADER([boost/optional.hpp])])
+
+
+# BOOST_PREPROCESSOR()
+# --------------------
+# Look for Boost.Preprocessor
+BOOST_DEFUN([Preprocessor],
+[BOOST_FIND_HEADER([boost/preprocessor/repeat.hpp])])
+
+
+# BOOST_UNORDERED()
+# -----------------
+# Look for Boost.Unordered
+BOOST_DEFUN([Unordered],
+[BOOST_FIND_HEADER([boost/unordered_map.hpp])])
+
+
+# BOOST_UUID()
+# ------------
+# Look for Boost.Uuid
+BOOST_DEFUN([Uuid],
+[BOOST_FIND_HEADER([boost/uuid/uuid.hpp])])
+
+
+# BOOST_PROGRAM_OPTIONS([PREFERRED-RT-OPT])
+# -----------------------------------------
+# Look for Boost.Program_options.  For the documentation of PREFERRED-RT-OPT,
+# see the documentation of BOOST_FIND_LIB above.
+BOOST_DEFUN([Program_Options],
+[BOOST_FIND_LIB([program_options], [$1],
+                [boost/program_options.hpp],
+                [boost::program_options::options_description d("test");])
+])# BOOST_PROGRAM_OPTIONS
+
+
+
+# _BOOST_PYTHON_CONFIG(VARIABLE, FLAG)
+# ------------------------------------
+# Save VARIABLE, and define it via `python-config --FLAG`.
+# Substitute BOOST_PYTHON_VARIABLE.
+m4_define([_BOOST_PYTHON_CONFIG],
+[AC_SUBST([BOOST_PYTHON_$1],
+          [`python-config --$2 2>/dev/null`])dnl
+boost_python_save_$1=$$1
+$1="$$1 $BOOST_PYTHON_$1"])
+
+
+# BOOST_PYTHON([PREFERRED-RT-OPT])
+# --------------------------------
+# Look for Boost.Python.  For the documentation of PREFERRED-RT-OPT,
+# see the documentation of BOOST_FIND_LIB above.
+BOOST_DEFUN([Python],
+[_BOOST_PYTHON_CONFIG([CPPFLAGS], [includes])
+_BOOST_PYTHON_CONFIG([LDFLAGS],   [ldflags])
+_BOOST_PYTHON_CONFIG([LIBS],      [libs])
+m4_pattern_allow([^BOOST_PYTHON_MODULE$])dnl
+BOOST_FIND_LIBS([python], [python python3], [$1],
+                [boost/python.hpp],
+                [], [BOOST_PYTHON_MODULE(empty) {}])
+CPPFLAGS=$boost_python_save_CPPFLAGS
+LDFLAGS=$boost_python_save_LDFLAGS
+LIBS=$boost_python_save_LIBS
+])# BOOST_PYTHON
+
+
+# BOOST_REF()
+# -----------
+# Look for Boost.Ref
+BOOST_DEFUN([Ref],
+[BOOST_FIND_HEADER([boost/ref.hpp])])
+
+
+# BOOST_REGEX([PREFERRED-RT-OPT])
+# -------------------------------
+# Look for Boost.Regex.  For the documentation of PREFERRED-RT-OPT, see the
+# documentation of BOOST_FIND_LIB above.
+BOOST_DEFUN([Regex],
+[BOOST_FIND_LIB([regex], [$1],
+                [boost/regex.hpp],
+                [boost::regex exp("*"); boost::regex_match("foo", exp);])
+])# BOOST_REGEX
+
+
+# BOOST_SERIALIZATION([PREFERRED-RT-OPT])
+# ---------------------------------------
+# Look for Boost.Serialization.  For the documentation of PREFERRED-RT-OPT, see
+# the documentation of BOOST_FIND_LIB above.
+BOOST_DEFUN([Serialization],
+[BOOST_FIND_LIB([serialization], [$1],
+                [boost/archive/text_oarchive.hpp],
+                [std::ostream* o = 0; // Cheap way to get an ostream...
+                boost::archive::text_oarchive t(*o);])
+])# BOOST_SERIALIZATION
+
+
+# BOOST_SIGNALS([PREFERRED-RT-OPT])
+# ---------------------------------
+# Look for Boost.Signals.  For the documentation of PREFERRED-RT-OPT, see the
+# documentation of BOOST_FIND_LIB above.
+BOOST_DEFUN([Signals],
+[BOOST_FIND_LIB([signals], [$1],
+                [boost/signal.hpp],
+                [boost::signal<void ()> s;])
+])# BOOST_SIGNALS
+
+
+# BOOST_SIGNALS2()
+# ----------------
+# Look for Boost.Signals2 (new since 1.39.0).
+BOOST_DEFUN([Signals2],
+[BOOST_FIND_HEADER([boost/signals2.hpp])
+])# BOOST_SIGNALS2
+
+
+# BOOST_SMART_PTR()
+# -----------------
+# Look for Boost.SmartPtr
+BOOST_DEFUN([Smart_Ptr],
+[BOOST_FIND_HEADER([boost/scoped_ptr.hpp])
+BOOST_FIND_HEADER([boost/shared_ptr.hpp])
+])
+
+
+# BOOST_STATICASSERT()
+# --------------------
+# Look for Boost.StaticAssert
+BOOST_DEFUN([StaticAssert],
+[BOOST_FIND_HEADER([boost/static_assert.hpp])])
+
+
+# BOOST_STRING_ALGO()
+# -------------------
+# Look for Boost.StringAlgo
+BOOST_DEFUN([String_Algo],
+[BOOST_FIND_HEADER([boost/algorithm/string.hpp])
+])
+
+
+# BOOST_SYSTEM([PREFERRED-RT-OPT])
+# --------------------------------
+# Look for Boost.System.  For the documentation of PREFERRED-RT-OPT, see the
+# documentation of BOOST_FIND_LIB above.  This library was introduced in Boost
+# 1.35.0.
+BOOST_DEFUN([System],
+[BOOST_FIND_LIB([system], [$1],
+                [boost/system/error_code.hpp],
+                [boost::system::error_code e; e.clear();])
+])# BOOST_SYSTEM
+
+
+# BOOST_TEST([PREFERRED-RT-OPT])
+# ------------------------------
+# Look for Boost.Test.  For the documentation of PREFERRED-RT-OPT, see the
+# documentation of BOOST_FIND_LIB above.
+BOOST_DEFUN([Test],
+[m4_pattern_allow([^BOOST_CHECK$])dnl
+BOOST_FIND_LIB([unit_test_framework], [$1],
+               [boost/test/unit_test.hpp], [BOOST_CHECK(2 == 2);],
+               [using boost::unit_test::test_suite;
+               test_suite* init_unit_test_suite(int argc, char ** argv)
+               { return NULL; }])
+])# BOOST_TEST
+
+
+# BOOST_THREAD([PREFERRED-RT-OPT])
+# ---------------------------------
+# Look for Boost.Thread.  For the documentation of PREFERRED-RT-OPT, see the
+# documentation of BOOST_FIND_LIB above.
+BOOST_DEFUN([Thread],
+[dnl Having the pthread flag is required at least on GCC3 where
+dnl boost/thread.hpp would complain if we try to compile without
+dnl -pthread on GNU/Linux.
+AC_REQUIRE([_BOOST_PTHREAD_FLAG])dnl
+boost_thread_save_LIBS=$LIBS
+boost_thread_save_LDFLAGS=$LDFLAGS
+boost_thread_save_CPPFLAGS=$CPPFLAGS
+# Link-time dependency from thread to system was added as of 1.49.0.
+if test $boost_major_version -ge 149; then
+BOOST_SYSTEM([$1])
+fi # end of the Boost.System check.
+m4_pattern_allow([^BOOST_SYSTEM_(LIBS|LDFLAGS)$])dnl
+LIBS="$LIBS $BOOST_SYSTEM_LIBS $boost_cv_pthread_flag"
+LDFLAGS="$LDFLAGS $BOOST_SYSTEM_LDFLAGS"
+CPPFLAGS="$CPPFLAGS $boost_cv_pthread_flag"
+
+# When compiling for the Windows platform, the threads library is named
+# differently.
+case $host_os in
+  (*mingw*) boost_thread_lib_ext=_win32;;
+esac
+BOOST_FIND_LIBS([thread], [thread$boost_thread_lib_ext],
+                [$1],
+                [boost/thread.hpp], [boost::thread t; boost::mutex m;])
+
+BOOST_THREAD_LIBS="$BOOST_THREAD_LIBS $BOOST_SYSTEM_LIBS $boost_cv_pthread_flag"
+BOOST_THREAD_LDFLAGS="$BOOST_SYSTEM_LDFLAGS"
+BOOST_CPPFLAGS="$BOOST_CPPFLAGS $boost_cv_pthread_flag"
+LIBS=$boost_thread_save_LIBS
+LDFLAGS=$boost_thread_save_LDFLAGS
+CPPFLAGS=$boost_thread_save_CPPFLAGS
+])# BOOST_THREAD
+
+AU_ALIAS([BOOST_THREADS], [BOOST_THREAD])
+
+
+# BOOST_TOKENIZER()
+# -----------------
+# Look for Boost.Tokenizer
+BOOST_DEFUN([Tokenizer],
+[BOOST_FIND_HEADER([boost/tokenizer.hpp])])
+
+
+# BOOST_TRIBOOL()
+# ---------------
+# Look for Boost.Tribool
+BOOST_DEFUN([Tribool],
+[BOOST_FIND_HEADER([boost/logic/tribool_fwd.hpp])
+BOOST_FIND_HEADER([boost/logic/tribool.hpp])
+])
+
+
+# BOOST_TUPLE()
+# -------------
+# Look for Boost.Tuple
+BOOST_DEFUN([Tuple],
+[BOOST_FIND_HEADER([boost/tuple/tuple.hpp])])
+
+
+# BOOST_TYPETRAITS()
+# --------------------
+# Look for Boost.TypeTraits
+BOOST_DEFUN([TypeTraits],
+[BOOST_FIND_HEADER([boost/type_traits.hpp])])
+
+
+# BOOST_UTILITY()
+# ---------------
+# Look for Boost.Utility (noncopyable, result_of, base-from-member idiom,
+# etc.)
+BOOST_DEFUN([Utility],
+[BOOST_FIND_HEADER([boost/utility.hpp])])
+
+
+# BOOST_VARIANT()
+# ---------------
+# Look for Boost.Variant.
+BOOST_DEFUN([Variant],
+[BOOST_FIND_HEADER([boost/variant/variant_fwd.hpp])
+BOOST_FIND_HEADER([boost/variant.hpp])])
+
+
+# BOOST_POINTER_CONTAINER()
+# ------------------------
+# Look for Boost.PointerContainer
+BOOST_DEFUN([Pointer_Container],
+[BOOST_FIND_HEADER([boost/ptr_container/ptr_deque.hpp])
+BOOST_FIND_HEADER([boost/ptr_container/ptr_list.hpp])
+BOOST_FIND_HEADER([boost/ptr_container/ptr_vector.hpp])
+BOOST_FIND_HEADER([boost/ptr_container/ptr_array.hpp])
+BOOST_FIND_HEADER([boost/ptr_container/ptr_set.hpp])
+BOOST_FIND_HEADER([boost/ptr_container/ptr_map.hpp])
+])# BOOST_POINTER_CONTAINER
+
+
+# BOOST_WAVE([PREFERRED-RT-OPT])
+# ------------------------------
+# NOTE: If you intend to use Wave/Spirit with thread support, make sure you
+# call BOOST_THREAD first.
+# Look for Boost.Wave.  For the documentation of PREFERRED-RT-OPT, see the
+# documentation of BOOST_FIND_LIB above.
+BOOST_DEFUN([Wave],
+[AC_REQUIRE([BOOST_FILESYSTEM])dnl
+AC_REQUIRE([BOOST_DATE_TIME])dnl
+boost_wave_save_LIBS=$LIBS
+boost_wave_save_LDFLAGS=$LDFLAGS
+m4_pattern_allow([^BOOST_((FILE)?SYSTEM|DATE_TIME|THREAD)_(LIBS|LDFLAGS)$])dnl
+LIBS="$LIBS $BOOST_SYSTEM_LIBS $BOOST_FILESYSTEM_LIBS $BOOST_DATE_TIME_LIBS \
+$BOOST_THREAD_LIBS"
+LDFLAGS="$LDFLAGS $BOOST_SYSTEM_LDFLAGS $BOOST_FILESYSTEM_LDFLAGS \
+$BOOST_DATE_TIME_LDFLAGS $BOOST_THREAD_LDFLAGS"
+BOOST_FIND_LIB([wave], [$1],
+                [boost/wave.hpp],
+                [boost::wave::token_id id; get_token_name(id);])
+LIBS=$boost_wave_save_LIBS
+LDFLAGS=$boost_wave_save_LDFLAGS
+])# BOOST_WAVE
+
+
+# BOOST_XPRESSIVE()
+# -----------------
+# Look for Boost.Xpressive (new since 1.36.0).
+BOOST_DEFUN([Xpressive],
+[BOOST_FIND_HEADER([boost/xpressive/xpressive.hpp])])
+
+
+# ----------------- #
+# Internal helpers. #
+# ----------------- #
+
+
+# _BOOST_PTHREAD_FLAG()
+# ---------------------
+# Internal helper for BOOST_THREAD.  Computes boost_cv_pthread_flag
+# which must be used in CPPFLAGS and LIBS.
+#
+# Yes, we *need* to put the -pthread thing in CPPFLAGS because with GCC3,
+# boost/thread.hpp will trigger a #error if -pthread isn't used:
+#   boost/config/requires_threads.hpp:47:5: #error "Compiler threading support
+#   is not turned on. Please set the correct command line options for
+#   threading: -pthread (Linux), -pthreads (Solaris) or -mthreads (Mingw32)"
+#
+# Based on ACX_PTHREAD: http://autoconf-archive.cryp.to/acx_pthread.html
+AC_DEFUN([_BOOST_PTHREAD_FLAG],
+[AC_REQUIRE([AC_PROG_CXX])dnl
+AC_REQUIRE([AC_CANONICAL_HOST])dnl
+AC_LANG_PUSH([C++])dnl
+AC_CACHE_CHECK([for the flags needed to use pthreads], [boost_cv_pthread_flag],
+[ boost_cv_pthread_flag=
+  # The ordering *is* (sometimes) important.  Some notes on the
+  # individual items follow:
+  # (none): in case threads are in libc; should be tried before -Kthread and
+  #       other compiler flags to prevent continual compiler warnings
+  # -lpthreads: AIX (must check this before -lpthread)
+  # -Kthread: Sequent (threads in libc, but -Kthread needed for pthread.h)
+  # -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able)
+  # -llthread: LinuxThreads port on FreeBSD (also preferred to -pthread)
+  # -pthread: GNU Linux/GCC (kernel threads), BSD/GCC (userland threads)
+  # -pthreads: Solaris/GCC
+  # -mthreads: MinGW32/GCC, Lynx/GCC
+  # -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it
+  #      doesn't hurt to check since this sometimes defines pthreads too;
+  #      also defines -D_REENTRANT)
+  #      ... -mt is also the pthreads flag for HP/aCC
+  # -lpthread: GNU Linux, etc.
+  # --thread-safe: KAI C++
+  case $host_os in #(
+    *solaris*)
+      # On Solaris (at least, for some versions), libc contains stubbed
+      # (non-functional) versions of the pthreads routines, so link-based
+      # tests will erroneously succeed.  (We need to link with -pthreads/-mt/
+      # -lpthread.)  (The stubs are missing pthread_cleanup_push, or rather
+      # a function called by this macro, so we could check for that, but
+      # who knows whether they'll stub that too in a future libc.)  So,
+      # we'll just look for -pthreads and -lpthread first:
+      boost_pthread_flags="-pthreads -lpthread -mt -pthread";; #(
+    *)
+      boost_pthread_flags="-lpthreads -Kthread -kthread -llthread -pthread \
+                           -pthreads -mthreads -lpthread --thread-safe -mt";;
+  esac
+  # Generate the test file.
+  AC_LANG_CONFTEST([AC_LANG_PROGRAM([#include <pthread.h>],
+    [pthread_t th; pthread_join(th, 0);
+    pthread_attr_init(0); pthread_cleanup_push(0, 0);
+    pthread_create(0,0,0,0); pthread_cleanup_pop(0);])])
+  for boost_pthread_flag in '' $boost_pthread_flags; do
+    boost_pthread_ok=false
+dnl Re-use the test file already generated.
+    boost_pthreads__save_LIBS=$LIBS
+    LIBS="$LIBS $boost_pthread_flag"
+    AC_LINK_IFELSE([],
+      [if grep ".*$boost_pthread_flag" conftest.err; then
+         echo "This flag seems to have triggered warnings" >&AS_MESSAGE_LOG_FD
+       else
+         boost_pthread_ok=:; boost_cv_pthread_flag=$boost_pthread_flag
+       fi])
+    LIBS=$boost_pthreads__save_LIBS
+    $boost_pthread_ok && break
+  done
+])
+AC_LANG_POP([C++])dnl
+])# _BOOST_PTHREAD_FLAG
+
+
+# _BOOST_gcc_test(MAJOR, MINOR)
+# -----------------------------
+# Internal helper for _BOOST_FIND_COMPILER_TAG.
+m4_define([_BOOST_gcc_test],
+["defined __GNUC__ && __GNUC__ == $1 && __GNUC_MINOR__ == $2 && !defined __ICC @ gcc$1$2"])dnl
+
+# _BOOST_mingw_test(MAJOR, MINOR)
+# -----------------------------
+# Internal helper for _BOOST_FIND_COMPILER_TAG.
+m4_define([_BOOST_mingw_test],
+["defined __GNUC__ && __GNUC__ == $1 && __GNUC_MINOR__ == $2 && !defined __ICC && \
+  (defined WIN32 || defined WINNT || defined _WIN32 || defined __WIN32 \
+         || defined __WIN32__ || defined __WINNT || defined __WINNT__) @ mgw$1$2"])dnl
+
+
+# _BOOST_FIND_COMPILER_TAG()
+# --------------------------
+# Internal.  When Boost is installed without --layout=system, each library
+# filename will hold a suffix that encodes the compiler used during the
+# build.  The Boost build system seems to call this a `tag'.
+AC_DEFUN([_BOOST_FIND_COMPILER_TAG],
+[AC_REQUIRE([AC_PROG_CXX])dnl
+AC_REQUIRE([AC_CANONICAL_HOST])dnl
+AC_CACHE_CHECK([for the toolset name used by Boost for $CXX],
+               [boost_cv_lib_tag],
+[boost_cv_lib_tag=unknown
+if test x$boost_cv_inc_path != xno; then
+  AC_LANG_PUSH([C++])dnl
+  # The following tests are mostly inspired by boost/config/auto_link.hpp
+  # The list is sorted to most recent/common to oldest compiler (in order
+  # to increase the likelihood of finding the right compiler with the
+  # least number of compilation attempt).
+  # Beware that some tests are sensible to the order (for instance, we must
+  # look for MinGW before looking for GCC3).
+  # I used one compilation test per compiler with a #error to recognize
+  # each compiler so that it works even when cross-compiling (let me know
+  # if you know a better approach).
+  # Known missing tags (known from Boost's tools/build/v2/tools/common.jam):
+  #   como, edg, kcc, bck, mp, sw, tru, xlc
+  # I'm not sure about my test for `il' (be careful: Intel's ICC pre-defines
+  # the same defines as GCC's).
+  for i in \
+    _BOOST_mingw_test(4, 10) \
+    _BOOST_gcc_test(4, 10) \
+    _BOOST_mingw_test(4, 9) \
+    _BOOST_gcc_test(4, 9) \
+    _BOOST_mingw_test(4, 8) \
+    _BOOST_gcc_test(4, 8) \
+    _BOOST_mingw_test(4, 7) \
+    _BOOST_gcc_test(4, 7) \
+    _BOOST_mingw_test(4, 6) \
+    _BOOST_gcc_test(4, 6) \
+    _BOOST_mingw_test(4, 5) \
+    _BOOST_gcc_test(4, 5) \
+    _BOOST_mingw_test(4, 4) \
+    _BOOST_gcc_test(4, 4) \
+    _BOOST_mingw_test(4, 3) \
+    _BOOST_gcc_test(4, 3) \
+    _BOOST_mingw_test(4, 2) \
+    _BOOST_gcc_test(4, 2) \
+    _BOOST_mingw_test(4, 1) \
+    _BOOST_gcc_test(4, 1) \
+    _BOOST_mingw_test(4, 0) \
+    _BOOST_gcc_test(4, 0) \
+    "defined __GNUC__ && __GNUC__ == 3 && !defined __ICC \
+     && (defined WIN32 || defined WINNT || defined _WIN32 || defined __WIN32 \
+         || defined __WIN32__ || defined __WINNT || defined __WINNT__) @ mgw" \
+    _BOOST_gcc_test(3, 4) \
+    _BOOST_gcc_test(3, 3) \
+    "defined _MSC_VER && _MSC_VER >= 1500 @ vc90" \
+    "defined _MSC_VER && _MSC_VER == 1400 @ vc80" \
+    _BOOST_gcc_test(3, 2) \
+    "defined _MSC_VER && _MSC_VER == 1310 @ vc71" \
+    _BOOST_gcc_test(3, 1) \
+    _BOOST_gcc_test(3, 0) \
+    "defined __BORLANDC__ @ bcb" \
+    "defined __ICC && (defined __unix || defined __unix__) @ il" \
+    "defined __ICL @ iw" \
+    "defined _MSC_VER && _MSC_VER == 1300 @ vc7" \
+    _BOOST_gcc_test(2, 95) \
+    "defined __MWERKS__ && __MWERKS__ <= 0x32FF @ cw9" \
+    "defined _MSC_VER && _MSC_VER < 1300 && !defined UNDER_CE @ vc6" \
+    "defined _MSC_VER && _MSC_VER < 1300 && defined UNDER_CE @ evc4" \
+    "defined __MWERKS__ && __MWERKS__ <= 0x31FF @ cw8"
+  do
+    boost_tag_test=`expr "X$i" : 'X\([[^@]]*\) @ '`
+    boost_tag=`expr "X$i" : 'X[[^@]]* @ \(.*\)'`
+    AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
+#if $boost_tag_test
+/* OK */
+#else
+# error $boost_tag_test
+#endif
+]])], [boost_cv_lib_tag=$boost_tag; break], [])
+  done
+AC_LANG_POP([C++])dnl
+  case $boost_cv_lib_tag in #(
+    # Some newer (>= 1.35?) versions of Boost seem to only use "gcc" as opposed
+    # to "gcc41" for instance.
+    *-gcc | *'-gcc ') :;; #(  Don't re-add -gcc: it's already in there.
+    gcc*)
+      boost_tag_x=
+      case $host_os in #(
+        darwin*)
+          if test $boost_major_version -ge 136; then
+            # The `x' added in r46793 of Boost.
+            boost_tag_x=x
+          fi;;
+      esac
+      # We can specify multiple tags in this variable because it's used by
+      # BOOST_FIND_LIB that does a `for tag in -$boost_cv_lib_tag' ...
+      boost_cv_lib_tag="$boost_tag_x$boost_cv_lib_tag -${boost_tag_x}gcc"
+      ;; #(
+    unknown)
+      AC_MSG_WARN([[could not figure out which toolset name to use for $CXX]])
+      boost_cv_lib_tag=
+      ;;
+  esac
+fi])dnl end of AC_CACHE_CHECK
+])# _BOOST_FIND_COMPILER_TAG
+
+
+# _BOOST_GUESS_WHETHER_TO_USE_MT()
+# --------------------------------
+# Compile a small test to try to guess whether we should favor MT (Multi
+# Thread) flavors of Boost.  Sets boost_guess_use_mt accordingly.
+AC_DEFUN([_BOOST_GUESS_WHETHER_TO_USE_MT],
+[# Check whether we do better use `mt' even though we weren't ask to.
+AC_LANG_PUSH([C++])dnl
+AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
+#if defined _REENTRANT || defined _MT || defined __MT__
+/* use -mt */
+#else
+# error MT not needed
+#endif
+]])], [boost_guess_use_mt=:], [boost_guess_use_mt=false])
+AC_LANG_POP([C++])dnl
+])
+
+# _BOOST_AC_LINK_IFELSE(PROGRAM, [ACTION-IF-TRUE], [ACTION-IF-FALSE])
+# -------------------------------------------------------------------
+# Fork of _AC_LINK_IFELSE that preserves conftest.o across calls.  Fragile,
+# will break when Autoconf changes its internals.  Requires that you manually
+# rm -f conftest.$ac_objext in between to really different tests, otherwise
+# you will try to link a conftest.o left behind by a previous test.
+# Used to aggressively optimize BOOST_FIND_LIB (see the big comment in this
+# macro).
+#
+# Don't use "break" in the actions, as it would short-circuit some code
+# this macro runs after the actions.
+m4_define([_BOOST_AC_LINK_IFELSE],
+[m4_ifvaln([$1], [AC_LANG_CONFTEST([$1])])dnl
+rm -f conftest$ac_exeext
+boost_save_ac_ext=$ac_ext
+boost_use_source=:
+# If we already have a .o, re-use it.  We change $ac_ext so that $ac_link
+# tries to link the existing object file instead of compiling from source.
+test -f conftest.$ac_objext && ac_ext=$ac_objext && boost_use_source=false &&
+  _AS_ECHO_LOG([re-using the existing conftest.$ac_objext])
+AS_IF([_AC_DO_STDERR($ac_link) && {
+         test -z "$ac_[]_AC_LANG_ABBREV[]_werror_flag" ||
+         test ! -s conftest.err
+       } && test -s conftest$ac_exeext && {
+         test "$cross_compiling" = yes ||
+         $as_executable_p conftest$ac_exeext
+dnl FIXME: use AS_TEST_X instead when 2.61 is widespread enough.
+       }],
+      [$2],
+      [if $boost_use_source; then
+         _AC_MSG_LOG_CONFTEST
+       fi
+       $3])
+ac_objext=$boost_save_ac_objext
+ac_ext=$boost_save_ac_ext
+dnl Delete also the IPA/IPO (Inter Procedural Analysis/Optimization)
+dnl information created by the PGI compiler (conftest_ipa8_conftest.oo),
+dnl as it would interfere with the next link command.
+rm -f core conftest.err conftest_ipa8_conftest.oo \
+      conftest$ac_exeext m4_ifval([$1], [conftest.$ac_ext])[]dnl
+])# _BOOST_AC_LINK_IFELSE
+
+# Local Variables:
+# mode: autoconf
+# End:
diff --git a/m4/pkg.m4 b/m4/pkg.m4
new file mode 100644
index 0000000..cbb46db
--- /dev/null
+++ b/m4/pkg.m4
@@ -0,0 +1,156 @@
+# pkg.m4 - Macros to locate and utilise pkg-config.            -*- Autoconf -*-
+# 
+# Copyright © 2004 Scott James Remnant <scott at netsplit.com>.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# PKG_PROG_PKG_CONFIG([MIN-VERSION])
+# ----------------------------------
+AC_DEFUN([PKG_PROG_PKG_CONFIG],
+[m4_pattern_forbid([^_?PKG_[A-Z_]+$])
+m4_pattern_allow([^PKG_CONFIG(_PATH)?$])
+AC_ARG_VAR([PKG_CONFIG], [path to pkg-config utility])dnl
+if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then
+	AC_PATH_TOOL([PKG_CONFIG], [pkg-config])
+fi
+if test -n "$PKG_CONFIG"; then
+	_pkg_min_version=m4_default([$1], [0.9.0])
+	AC_MSG_CHECKING([pkg-config is at least version $_pkg_min_version])
+	if $PKG_CONFIG --atleast-pkgconfig-version $_pkg_min_version; then
+		AC_MSG_RESULT([yes])
+	else
+		AC_MSG_RESULT([no])
+		PKG_CONFIG=""
+	fi
+		
+fi[]dnl
+])# PKG_PROG_PKG_CONFIG
+
+# PKG_CHECK_EXISTS(MODULES, [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])
+#
+# Check to see whether a particular set of modules exists.  Similar
+# to PKG_CHECK_MODULES(), but does not set variables or print errors.
+#
+#
+# Similar to PKG_CHECK_MODULES, make sure that the first instance of
+# this or PKG_CHECK_MODULES is called, or make sure to call
+# PKG_CHECK_EXISTS manually
+# --------------------------------------------------------------
+AC_DEFUN([PKG_CHECK_EXISTS],
+[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl
+if test -n "$PKG_CONFIG" && \
+    AC_RUN_LOG([$PKG_CONFIG --exists --print-errors "$1"]); then
+  m4_ifval([$2], [$2], [:])
+m4_ifvaln([$3], [else
+  $3])dnl
+fi])
+
+
+# _PKG_CONFIG([VARIABLE], [COMMAND], [MODULES])
+# ---------------------------------------------
+m4_define([_PKG_CONFIG],
+[if test -n "$PKG_CONFIG"; then
+    if test -n "$$1"; then
+        pkg_cv_[]$1="$$1"
+    else
+        PKG_CHECK_EXISTS([$3],
+                         [pkg_cv_[]$1=`$PKG_CONFIG --[]$2 "$3" 2>/dev/null`],
+			 [pkg_failed=yes])
+    fi
+else
+	pkg_failed=untried
+fi[]dnl
+])# _PKG_CONFIG
+
+# _PKG_SHORT_ERRORS_SUPPORTED
+# -----------------------------
+AC_DEFUN([_PKG_SHORT_ERRORS_SUPPORTED],
+[AC_REQUIRE([PKG_PROG_PKG_CONFIG])
+if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
+        _pkg_short_errors_supported=yes
+else
+        _pkg_short_errors_supported=no
+fi[]dnl
+])# _PKG_SHORT_ERRORS_SUPPORTED
+
+
+# PKG_CHECK_MODULES(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND],
+# [ACTION-IF-NOT-FOUND])
+#
+#
+# Note that if there is a possibility the first call to
+# PKG_CHECK_MODULES might not happen, you should be sure to include an
+# explicit call to PKG_PROG_PKG_CONFIG in your configure.ac
+#
+#
+# --------------------------------------------------------------
+AC_DEFUN([PKG_CHECK_MODULES],
+[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl
+AC_ARG_VAR([$1][_CFLAGS], [C compiler flags for $1, overriding pkg-config])dnl
+AC_ARG_VAR([$1][_LIBS], [linker flags for $1, overriding pkg-config])dnl
+
+pkg_failed=no
+AC_MSG_CHECKING([for $1])
+
+_PKG_CONFIG([$1][_CFLAGS], [cflags], [$2])
+_PKG_CONFIG([$1][_LIBS], [libs], [$2])
+
+m4_define([_PKG_TEXT], [Alternatively, you may set the environment variables $1[]_CFLAGS
+and $1[]_LIBS to avoid the need to call pkg-config.
+See the pkg-config man page for more details.])
+
+if test $pkg_failed = yes; then
+        _PKG_SHORT_ERRORS_SUPPORTED
+        if test $_pkg_short_errors_supported = yes; then
+	        $1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "$2"`
+        else 
+	        $1[]_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "$2"`
+        fi
+	# Put the nasty error message in config.log where it belongs
+	echo "$$1[]_PKG_ERRORS" >&AS_MESSAGE_LOG_FD
+
+	ifelse([$4], , [AC_MSG_ERROR(dnl
+[Package requirements ($2) were not met:
+
+$$1_PKG_ERRORS
+
+Consider adjusting the PKG_CONFIG_PATH environment variable if you
+installed software in a non-standard prefix.
+
+_PKG_TEXT
+])],
+		[$4])
+elif test $pkg_failed = untried; then
+	ifelse([$4], , [AC_MSG_FAILURE(dnl
+[The pkg-config script could not be found or is too old.  Make sure it
+is in your PATH or set the PKG_CONFIG environment variable to the full
+path to pkg-config.
+
+_PKG_TEXT
+
+To get pkg-config, see <http://www.freedesktop.org/software/pkgconfig>.])],
+		[$4])
+else
+	$1[]_CFLAGS=$pkg_cv_[]$1[]_CFLAGS
+	$1[]_LIBS=$pkg_cv_[]$1[]_LIBS
+        AC_MSG_RESULT([yes])
+	ifelse([$3], , :, [$3])
+fi[]dnl
+])# PKG_CHECK_MODULES
diff --git a/projects/libMems.doxygen b/projects/libMems.doxygen
new file mode 100644
index 0000000..b13de44
--- /dev/null
+++ b/projects/libMems.doxygen
@@ -0,0 +1,212 @@
+# Doxyfile 1.3.7
+
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+PROJECT_NAME           = $(PROJECT)-$(VERSION)
+PROJECT_NUMBER         =
+OUTPUT_DIRECTORY       = $(DOCDIR)
+CREATE_SUBDIRS         = NO
+OUTPUT_LANGUAGE        = English
+USE_WINDOWS_ENCODING   = NO
+BRIEF_MEMBER_DESC      = YES
+REPEAT_BRIEF           = YES
+ABBREVIATE_BRIEF       = 
+ALWAYS_DETAILED_SEC    = NO
+INLINE_INHERITED_MEMB  = NO
+STRIP_FROM_INC_PATH    = $(SRCDIR)
+FULL_PATH_NAMES        = YES
+STRIP_FROM_PATH        = $(SRCDIR)
+SHORT_NAMES            = NO
+JAVADOC_AUTOBRIEF      = YES
+MULTILINE_CPP_IS_BRIEF = NO
+DETAILS_AT_TOP         = YES
+INHERIT_DOCS           = YES
+DISTRIBUTE_GROUP_DOC   = NO
+TAB_SIZE               = 8
+ALIASES                = 
+OPTIMIZE_OUTPUT_FOR_C  = YES
+OPTIMIZE_OUTPUT_JAVA   = NO
+SUBGROUPING            = YES
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+EXTRACT_ALL            = YES
+EXTRACT_PRIVATE        = YES
+EXTRACT_STATIC         = YES
+EXTRACT_LOCAL_CLASSES  = YES
+EXTRACT_LOCAL_METHODS  = NO
+HIDE_UNDOC_MEMBERS     = NO
+HIDE_UNDOC_CLASSES     = NO
+HIDE_FRIEND_COMPOUNDS  = NO
+HIDE_IN_BODY_DOCS      = NO
+INTERNAL_DOCS          = NO
+CASE_SENSE_NAMES       = NO
+HIDE_SCOPE_NAMES       = NO
+SHOW_INCLUDE_FILES     = YES
+INLINE_INFO            = YES
+SORT_MEMBER_DOCS       = YES
+SORT_BRIEF_DOCS        = NO
+SORT_BY_SCOPE_NAME     = NO
+GENERATE_TODOLIST      = YES
+GENERATE_TESTLIST      = YES
+GENERATE_BUGLIST       = YES
+GENERATE_DEPRECATEDLIST= YES
+ENABLED_SECTIONS       = 
+MAX_INITIALIZER_LINES  = 30
+SHOW_USED_FILES        = YES
+#---------------------------------------------------------------------------
+# configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+QUIET                  = YES
+WARNINGS               = YES
+WARN_IF_UNDOCUMENTED   = YES
+WARN_IF_DOC_ERROR      = YES
+WARN_FORMAT            = "$file:$line: $text"
+WARN_LOGFILE           = 
+#---------------------------------------------------------------------------
+# configuration options related to the input files
+#---------------------------------------------------------------------------
+INPUT                  = $(SRCDIR)
+FILE_PATTERNS          = *.c *.h
+RECURSIVE              = YES
+EXCLUDE                = 
+EXCLUDE_SYMLINKS       = NO
+EXCLUDE_PATTERNS       = 
+EXAMPLE_PATH           = $(SRCDIR)
+EXAMPLE_PATTERNS       = 
+EXAMPLE_RECURSIVE      = NO
+IMAGE_PATH             = 
+INPUT_FILTER           = 
+FILTER_SOURCE_FILES    = NO
+#---------------------------------------------------------------------------
+# configuration options related to source browsing
+#---------------------------------------------------------------------------
+SOURCE_BROWSER         = NO
+INLINE_SOURCES         = NO
+STRIP_CODE_COMMENTS    = YES
+REFERENCED_BY_RELATION = YES
+REFERENCES_RELATION    = YES
+VERBATIM_HEADERS       = YES
+#---------------------------------------------------------------------------
+# configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+ALPHABETICAL_INDEX     = NO
+COLS_IN_ALPHA_INDEX    = 5
+IGNORE_PREFIX          = 
+#---------------------------------------------------------------------------
+# configuration options related to the HTML output
+#---------------------------------------------------------------------------
+GENERATE_HTML          = $(GENERATE_HTML)
+HTML_OUTPUT            = html
+HTML_FILE_EXTENSION    = .html
+HTML_HEADER            = 
+HTML_FOOTER            = 
+HTML_STYLESHEET        = 
+HTML_ALIGN_MEMBERS     = YES
+GENERATE_HTMLHELP      = $(GENERATE_CHM)
+CHM_FILE               = ../$(PROJECT).chm
+HHC_LOCATION           = $(HHC_PATH)
+GENERATE_CHI           = $(GENERATE_CHI)
+BINARY_TOC             = NO
+TOC_EXPAND             = NO
+DISABLE_INDEX          = NO
+ENUM_VALUES_PER_LINE   = 4
+GENERATE_TREEVIEW      = YES
+TREEVIEW_WIDTH         = 250
+#---------------------------------------------------------------------------
+# configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+GENERATE_LATEX         = $(GENERATE_LATEX)
+LATEX_OUTPUT           = latex
+LATEX_CMD_NAME         = latex
+MAKEINDEX_CMD_NAME     = makeindex
+COMPACT_LATEX          = NO
+PAPER_TYPE             = $(PAPER_SIZE)
+EXTRA_PACKAGES         = 
+LATEX_HEADER           = 
+PDF_HYPERLINKS         = NO
+USE_PDFLATEX           = NO
+LATEX_BATCHMODE        = YES
+LATEX_HIDE_INDICES     = NO
+#---------------------------------------------------------------------------
+# configuration options related to the RTF output
+#---------------------------------------------------------------------------
+GENERATE_RTF           = $(GENERATE_RTF)
+RTF_OUTPUT             = rtf
+COMPACT_RTF            = NO
+RTF_HYPERLINKS         = NO
+RTF_STYLESHEET_FILE    = 
+RTF_EXTENSIONS_FILE    = 
+#---------------------------------------------------------------------------
+# configuration options related to the man page output
+#---------------------------------------------------------------------------
+GENERATE_MAN           = $(GENERATE_MAN)
+MAN_OUTPUT             = man
+MAN_EXTENSION          = .1
+MAN_LINKS              = NO
+#---------------------------------------------------------------------------
+# configuration options related to the XML output
+#---------------------------------------------------------------------------
+GENERATE_XML           = $(GENERATE_XML)
+XML_OUTPUT             = xml
+XML_SCHEMA             = 
+XML_DTD                = 
+XML_PROGRAMLISTING     = YES
+#---------------------------------------------------------------------------
+# configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+GENERATE_AUTOGEN_DEF   = NO
+#---------------------------------------------------------------------------
+# configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+GENERATE_PERLMOD       = NO
+PERLMOD_LATEX          = NO
+PERLMOD_PRETTY         = YES
+PERLMOD_MAKEVAR_PREFIX = 
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor   
+#---------------------------------------------------------------------------
+ENABLE_PREPROCESSING   = YES
+MACRO_EXPANSION        = NO
+EXPAND_ONLY_PREDEF     = NO
+SEARCH_INCLUDES        = YES
+INCLUDE_PATH           = 
+INCLUDE_FILE_PATTERNS  = 
+PREDEFINED             = 
+EXPAND_AS_DEFINED      = 
+SKIP_FUNCTION_MACROS   = YES
+#---------------------------------------------------------------------------
+# Configuration::additions related to external references   
+#---------------------------------------------------------------------------
+TAGFILES               = 
+GENERATE_TAGFILE       = $(DOCDIR)/$(PROJECT).tag
+ALLEXTERNALS           = NO
+EXTERNAL_GROUPS        = YES
+PERL_PATH              = $(PERL_PATH)
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool   
+#---------------------------------------------------------------------------
+CLASS_DIAGRAMS         = YES
+HIDE_UNDOC_RELATIONS   = YES
+HAVE_DOT               = $(HAVE_DOT)
+CLASS_GRAPH            = YES
+COLLABORATION_GRAPH    = YES
+UML_LOOK               = NO
+TEMPLATE_RELATIONS     = NO
+INCLUDE_GRAPH          = YES
+INCLUDED_BY_GRAPH      = YES
+CALL_GRAPH             = NO
+GRAPHICAL_HIERARCHY    = YES
+DOT_IMAGE_FORMAT       = png
+DOT_PATH               = $(DOT_PATH)
+DOTFILE_DIRS           = 
+MAX_DOT_GRAPH_WIDTH    = 1024
+MAX_DOT_GRAPH_HEIGHT   = 1024
+MAX_DOT_GRAPH_DEPTH    = 0
+GENERATE_LEGEND        = YES
+DOT_CLEANUP            = YES
+#---------------------------------------------------------------------------
+# Configuration::additions related to the search engine   
+#---------------------------------------------------------------------------
+SEARCHENGINE           = NO
diff --git a/projects/libMems.kdevprj b/projects/libMems.kdevprj
new file mode 100644
index 0000000..3af1b86
--- /dev/null
+++ b/projects/libMems.kdevprj
@@ -0,0 +1,281 @@
+[AUTHORS]
+dist=true
+install=false
+install_location=
+type=DATA
+
+[COPYING]
+dist=true
+install=false
+install_location=
+type=DATA
+
+[ChangeLog]
+dist=true
+install=false
+install_location=
+type=DATA
+
+[Config for BinMakefileAm]
+bin_program=mormems
+cxxflags=-O0 -g3 -Wall
+ldflags=\s
+
+[General]
+author=Aaron Darling
+email=darling at cs.wisc.edu
+kdevprj_version=1.3
+lfv_open_groups=
+makefiles=Makefile.am,mormems/Makefile.am,mormems/docs/Makefile.am,mormems/docs/en/Makefile.am,po/Makefile.am
+project_name=Mormems
+project_type=normal_empty
+sub_dir=mormems/
+version=0.1
+version_control=CVS
+workspace=1
+
+[INSTALL]
+dist=true
+install=false
+install_location=
+type=DATA
+
+[LFV Groups]
+GNU=AUTHORS,COPYING,ChangeLog,INSTALL,README,TODO,NEWS
+Headers=*.h,*.hh,*.hxx,*.hpp,*.H
+Others=*
+Sources=*.cpp,*.c,*.cc,*.C,*.cxx,*.ec,*.ecpp,*.lxx,*.l++,*.ll,*.l
+User Interface=*.kdevdlg,*.ui,*.rc
+groups=Headers,Sources,User Interface,GNU,Others
+
+[Makefile.am]
+files=mormems.kdevprj,AUTHORS,COPYING,ChangeLog,INSTALL,README,TODO,mormems.lsm
+sub_dirs=mormems
+type=normal
+
+[README]
+dist=true
+install=false
+install_location=
+type=DATA
+
+[TODO]
+dist=true
+install=false
+install_location=
+type=DATA
+
+[mormems.kdevprj]
+dist=true
+install=false
+install_location=
+type=DATA
+
+[mormems.lsm]
+dist=true
+install=false
+install_location=
+type=DATA
+
+[mormems/BigDiskSuffixArray.cpp]
+dist=true
+install=false
+install_location=
+type=SOURCE
+
+[mormems/BigDiskSuffixArray.h]
+dist=true
+install=false
+install_location=
+type=HEADER
+
+[mormems/BigDnaSar.cpp]
+dist=true
+install=false
+install_location=
+type=SOURCE
+
+[mormems/BigDnaSar.h]
+dist=true
+install=false
+install_location=
+type=HEADER
+
+[mormems/DiskSuffixArray.cpp]
+dist=true
+install=false
+install_location=
+type=SOURCE
+
+[mormems/DiskSuffixArray.h]
+dist=true
+install=false
+install_location=
+type=HEADER
+
+[mormems/HashingMatchFinder.cpp]
+dist=true
+install=false
+install_location=
+type=SOURCE
+
+[mormems/HashingMatchFinder.h]
+dist=true
+install=false
+install_location=
+type=HEADER
+
+[mormems/Makefile.am]
+files=mormems/BigDiskSuffixArray.cpp,mormems/BigDnaSar.cpp,mormems/DiskSuffixArray.cpp,mormems/HashingMatchFinder.cpp,mormems/MatchFinder.cpp,mormems/MemHash.cpp,mormems/MemHashEntry.cpp,mormems/MemScorer.cpp,mormems/MimHash.cpp,mormems/MimHashEntry.cpp,mormems/SmallDiskSuffixArray.cpp,mormems/SmallDnaSar.cpp,mormems/SuffixArray.cpp,mormems/genomeApp.cpp,mormems/BigDiskSuffixArray.h,mormems/BigDnaSar.h,mormems/DiskSuffixArray.h,mormems/HashingMatchFinder.h,mormems/MatchFinder.h,mormems/M [...]
+sub_dirs=
+type=prog_main
+
+[mormems/MatchFinder.cpp]
+dist=true
+install=false
+install_location=
+type=SOURCE
+
+[mormems/MatchFinder.h]
+dist=true
+install=false
+install_location=
+type=HEADER
+
+[mormems/MemHash.cpp]
+dist=true
+install=false
+install_location=
+type=SOURCE
+
+[mormems/MemHash.h]
+dist=true
+install=false
+install_location=
+type=HEADER
+
+[mormems/MemHashEntry.cpp]
+dist=true
+install=false
+install_location=
+type=SOURCE
+
+[mormems/MemHashEntry.h]
+dist=true
+install=false
+install_location=
+type=HEADER
+
+[mormems/MemScorer.cpp]
+dist=true
+install=false
+install_location=
+type=SOURCE
+
+[mormems/MemScorer.h]
+dist=true
+install=false
+install_location=
+type=HEADER
+
+[mormems/MemorySuffixArray.h]
+dist=true
+install=false
+install_location=
+type=HEADER
+
+[mormems/MimHash.cpp]
+dist=true
+install=false
+install_location=
+type=SOURCE
+
+[mormems/MimHash.h]
+dist=true
+install=false
+install_location=
+type=HEADER
+
+[mormems/MimHashEntry.cpp]
+dist=true
+install=false
+install_location=
+type=SOURCE
+
+[mormems/MimHashEntry.h]
+dist=true
+install=false
+install_location=
+type=HEADER
+
+[mormems/RepeatFinder.h]
+dist=true
+install=false
+install_location=
+type=HEADER
+
+[mormems/SmallDiskSuffixArray.cpp]
+dist=true
+install=false
+install_location=
+type=SOURCE
+
+[mormems/SmallDiskSuffixArray.h]
+dist=true
+install=false
+install_location=
+type=HEADER
+
+[mormems/SmallDnaSar.cpp]
+dist=true
+install=false
+install_location=
+type=SOURCE
+
+[mormems/SmallDnaSar.h]
+dist=true
+install=false
+install_location=
+type=HEADER
+
+[mormems/SuffixArray.cpp]
+dist=true
+install=false
+install_location=
+type=SOURCE
+
+[mormems/SuffixArray.h]
+dist=true
+install=false
+install_location=
+type=HEADER
+
+[mormems/docs/Makefile.am]
+sub_dirs=
+type=normal
+
+[mormems/docs/en/Makefile.am]
+sub_dirs=
+type=normal
+
+[mormems/genomeApp.cpp]
+dist=true
+install=false
+install_location=
+type=SOURCE
+
+[mormems/precomp.h]
+dist=true
+install=false
+install_location=
+type=HEADER
+
+[mormems/precomp_d.h]
+dist=true
+install=false
+install_location=
+type=HEADER
+
+[po/Makefile.am]
+sub_dirs=
+type=po
diff --git a/projects/libMems.sln b/projects/libMems.sln
new file mode 100644
index 0000000..e2fb0d2
--- /dev/null
+++ b/projects/libMems.sln
@@ -0,0 +1,20 @@
+
+Microsoft Visual Studio Solution File, Format Version 9.00
+# Visual C++ Express 2005
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libMems", "libMems.vcproj", "{20FE3C39-9B04-4D5F-8249-115D8812B93E}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|Win32 = Debug|Win32
+		Release|Win32 = Release|Win32
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{20FE3C39-9B04-4D5F-8249-115D8812B93E}.Debug|Win32.ActiveCfg = Debug|Win32
+		{20FE3C39-9B04-4D5F-8249-115D8812B93E}.Debug|Win32.Build.0 = Debug|Win32
+		{20FE3C39-9B04-4D5F-8249-115D8812B93E}.Release|Win32.ActiveCfg = Release|Win32
+		{20FE3C39-9B04-4D5F-8249-115D8812B93E}.Release|Win32.Build.0 = Release|Win32
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
diff --git a/projects/libMems.vcproj b/projects/libMems.vcproj
new file mode 100644
index 0000000..946529e
--- /dev/null
+++ b/projects/libMems.vcproj
@@ -0,0 +1,1033 @@
+<?xml version="1.0" encoding="Windows-1252"?>
+<VisualStudioProject
+	ProjectType="Visual C++"
+	Version="8.00"
+	Name="libMems"
+	ProjectGUID="{20FE3C39-9B04-4D5F-8249-115D8812B93E}"
+	RootNamespace="libMems"
+	Keyword="Win32Proj"
+	>
+	<Platforms>
+		<Platform
+			Name="Win32"
+		/>
+		<Platform
+			Name="x64"
+		/>
+	</Platforms>
+	<ToolFiles>
+	</ToolFiles>
+	<Configurations>
+		<Configuration
+			Name="Release|Win32"
+			OutputDirectory="..\lib"
+			IntermediateDirectory="Release"
+			ConfigurationType="4"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="3"
+				EnableIntrinsicFunctions="true"
+				FavorSizeOrSpeed="1"
+				WholeProgramOptimization="true"
+				AdditionalIncludeDirectories="../../muscle/libMUSCLE;../../muscle;../../libGenome;../;..\..\boost\boost_1_34_0\;"
+				PreprocessorDefinitions="WIN32;NDEBUG;_LIB;_SCL_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_DEPRECATE"
+				RuntimeLibrary="0"
+				EnableEnhancedInstructionSet="1"
+				UsePrecompiledHeader="0"
+				WarningLevel="2"
+				Detect64BitPortabilityProblems="true"
+				DebugInformationFormat="3"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+				OutputFile="$(OutDir)/mems.lib"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="FastDebug|Win32"
+			OutputDirectory="..\lib"
+			IntermediateDirectory="$(ConfigurationName)"
+			ConfigurationType="4"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				EnableIntrinsicFunctions="false"
+				WholeProgramOptimization="false"
+				AdditionalIncludeDirectories="../../muscle/libMUSCLE;../../muscle;../../libGenome;../;..\..\boost\boost_1_34_0\;"
+				PreprocessorDefinitions="WIN32;NDEBUG;FASTDEBUG;_LIB;_SCL_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_DEPRECATE"
+				RuntimeLibrary="0"
+				EnableEnhancedInstructionSet="1"
+				OpenMP="false"
+				UsePrecompiledHeader="0"
+				WarningLevel="2"
+				Detect64BitPortabilityProblems="true"
+				DebugInformationFormat="3"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+				OutputFile="$(OutDir)/memsfd.lib"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release OpenMP|Win32"
+			OutputDirectory="..\lib"
+			IntermediateDirectory="$(ConfigurationName)"
+			ConfigurationType="4"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="3"
+				EnableIntrinsicFunctions="true"
+				FavorSizeOrSpeed="1"
+				WholeProgramOptimization="true"
+				AdditionalIncludeDirectories="../../muscle/libMUSCLE;../../muscle;../../libGenome;../;..\..\boost\boost_1_34_0\;"
+				PreprocessorDefinitions="WIN32;NDEBUG;_LIB;_SCL_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_DEPRECATE"
+				RuntimeLibrary="0"
+				EnableEnhancedInstructionSet="1"
+				OpenMP="true"
+				UsePrecompiledHeader="0"
+				WarningLevel="2"
+				Detect64BitPortabilityProblems="true"
+				DebugInformationFormat="3"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+				OutputFile="$(OutDir)/memsomp.lib"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="FastDebug OpenMP|Win32"
+			OutputDirectory="..\lib"
+			IntermediateDirectory="$(ConfigurationName)"
+			ConfigurationType="4"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				EnableIntrinsicFunctions="false"
+				WholeProgramOptimization="false"
+				AdditionalIncludeDirectories="../../muscle/libMUSCLE;../../muscle;../../libGenome;../;..\..\boost\boost_1_34_0\;"
+				PreprocessorDefinitions="WIN32;NDEBUG;_LIB;_SCL_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_DEPRECATE"
+				RuntimeLibrary="0"
+				EnableEnhancedInstructionSet="1"
+				OpenMP="true"
+				UsePrecompiledHeader="0"
+				WarningLevel="2"
+				Detect64BitPortabilityProblems="true"
+				DebugInformationFormat="3"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+				OutputFile="$(OutDir)/memsfdomp.lib"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release|x64"
+			OutputDirectory="..\lib"
+			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+			ConfigurationType="4"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				TargetEnvironment="3"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="3"
+				EnableIntrinsicFunctions="true"
+				FavorSizeOrSpeed="1"
+				WholeProgramOptimization="true"
+				AdditionalIncludeDirectories="../../muscle/libMUSCLE;../../muscle;../../libGenome;../;..\..\boost\boost_1_34_0\;"
+				PreprocessorDefinitions="WIN32;WIN64;NDEBUG;_LIB;_SCL_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_DEPRECATE"
+				RuntimeLibrary="0"
+				EnableEnhancedInstructionSet="0"
+				OpenMP="true"
+				UsePrecompiledHeader="0"
+				WarningLevel="2"
+				Detect64BitPortabilityProblems="true"
+				DebugInformationFormat="3"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+				OutputFile="$(OutDir)/mems64omp.lib"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="FastDebug|x64"
+			OutputDirectory="..\lib"
+			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+			ConfigurationType="4"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				TargetEnvironment="3"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				EnableIntrinsicFunctions="false"
+				WholeProgramOptimization="false"
+				AdditionalIncludeDirectories="../../muscle/libMUSCLE;../../muscle;../../libGenome;../;..\..\boost\boost_1_34_0\;"
+				PreprocessorDefinitions="WIN32;WIN64;NDEBUG;FASTDEBUG;_LIB;_SCL_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_DEPRECATE"
+				RuntimeLibrary="0"
+				EnableEnhancedInstructionSet="0"
+				OpenMP="false"
+				UsePrecompiledHeader="0"
+				WarningLevel="2"
+				Detect64BitPortabilityProblems="true"
+				DebugInformationFormat="3"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+				OutputFile="$(OutDir)/mems64fd.lib"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release OpenMP|x64"
+			OutputDirectory="..\lib"
+			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+			ConfigurationType="4"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				TargetEnvironment="3"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="3"
+				EnableIntrinsicFunctions="true"
+				FavorSizeOrSpeed="1"
+				WholeProgramOptimization="true"
+				AdditionalIncludeDirectories="../../muscle/libMUSCLE;../../muscle;../../libGenome;../;..\..\boost\boost_1_34_0\;"
+				PreprocessorDefinitions="WIN32;WIN64;NDEBUG;_LIB;_SCL_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_DEPRECATE"
+				RuntimeLibrary="0"
+				EnableEnhancedInstructionSet="0"
+				OpenMP="true"
+				UsePrecompiledHeader="0"
+				WarningLevel="2"
+				Detect64BitPortabilityProblems="true"
+				DebugInformationFormat="3"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+				OutputFile="$(OutDir)/mems64omp.lib"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="FastDebug OpenMP|x64"
+			OutputDirectory="..\lib"
+			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+			ConfigurationType="4"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				TargetEnvironment="3"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				EnableIntrinsicFunctions="false"
+				WholeProgramOptimization="false"
+				AdditionalIncludeDirectories="../../muscle/libMUSCLE;../../muscle;../../libGenome;../;..\..\boost\boost_1_34_0\;"
+				PreprocessorDefinitions="WIN32;WIN64;NDEBUG;_LIB;_SCL_SECURE_NO_DEPRECATE;_CRT_SECURE_NO_DEPRECATE"
+				RuntimeLibrary="0"
+				EnableEnhancedInstructionSet="0"
+				OpenMP="true"
+				UsePrecompiledHeader="0"
+				WarningLevel="2"
+				Detect64BitPortabilityProblems="true"
+				DebugInformationFormat="3"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+				OutputFile="$(OutDir)/mems64fdomp.lib"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+	</Configurations>
+	<References>
+	</References>
+	<Files>
+		<Filter
+			Name="Header Files"
+			Filter="h;hpp;hxx;hm;inl;inc;xsd"
+			UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
+			>
+			<Filter
+				Name="dmSML"
+				>
+				<File
+					RelativePath="..\libmems\dmsml\alibc.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\dmsml\alinuxaio.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\dmsml\aPOSIXaio.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\dmsml\asyncio.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\dmsml\awin32aio.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\dmsml\buffer.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\dmsml\dmsort.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\dmsml\sml.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\dmsml\sorting.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\dmsml\timing.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\dmsml\util.h"
+					>
+				</File>
+			</Filter>
+			<Filter
+				Name="SortedMerList"
+				>
+				<File
+					RelativePath="..\libmems\DNAFileSML.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\DNAMemorySML.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\FileSML.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\MemorySML.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\SortedMerList.h"
+					>
+				</File>
+			</Filter>
+			<Filter
+				Name="Matches"
+				>
+				<File
+					RelativePath="..\libMems\AbstractGappedAlignment.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\AbstractMatch.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libMems\CompactGappedAlignment.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libMems\DenseAbstractMatch.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\GappedAlignment.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\gnAlignedSequences.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libMems\HybridAbstractMatch.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\Interval.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\IntervalList.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\Match.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libMems\MatchHashEntry.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\MatchList.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libMems\MatchProjectionAdapter.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libMems\RepeatMatch.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libMems\RepeatMatchList.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libMems\SparseAbstractMatch.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libMems\UngappedLocalAlignment.h"
+					>
+				</File>
+			</Filter>
+			<Filter
+				Name="MatchFinder"
+				>
+				<File
+					RelativePath="..\libmems\MaskedMemHash.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\MatchFinder.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\MemHash.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\RepeatHash.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\SeedMasks.h"
+					>
+				</File>
+			</Filter>
+			<Filter
+				Name="Aligner"
+				>
+				<File
+					RelativePath="..\libmems\Aligner.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libMems\GappedAligner.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\Islands.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libMems\LCB.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libMems\MuscleInterface.h"
+					>
+				</File>
+			</Filter>
+			<Filter
+				Name="Utility"
+				>
+				<File
+					RelativePath=".\libmems\configuration.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\Matrix.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libMems\Memory.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\NumericMatrix.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libMems\PhyloTree.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libMems\SlotAllocator.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libMems\twister.h"
+					>
+				</File>
+			</Filter>
+			<Filter
+				Name="ProgressiveAligner"
+				>
+				<File
+					RelativePath="..\libMems\Backbone.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libMems\GreedyBreakpointElimination.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libMems\PairwiseMatchAdapter.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libMems\PairwiseMatchFinder.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libMems\ProgressiveAligner.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libMems\SeedOccurrenceList.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libMems\SubstitutionMatrix.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libMems\SuperInterval.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libMems\TreeUtilities.h"
+					>
+				</File>
+			</Filter>
+			<Filter
+				Name="HomologyHMM"
+				>
+				<File
+					RelativePath="..\libMems\HomologyHMM\algebras.cc"
+					>
+				</File>
+				<File
+					RelativePath="..\libMems\HomologyHMM\algebras.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libMems\HomologyHMM\dptables.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libMems\HomologyHMM\homology.cc"
+					>
+				</File>
+				<File
+					RelativePath="..\libMems\HomologyHMM\homology.h"
+					>
+				</File>
+				<File
+					RelativePath="..\libMems\HomologyHMM\homologymain.cc"
+					>
+				</File>
+				<File
+					RelativePath="..\libMems\HomologyHMM\parameters.h"
+					>
+				</File>
+			</Filter>
+		</Filter>
+		<Filter
+			Name="Resource Files"
+			Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx"
+			UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
+			>
+		</Filter>
+		<Filter
+			Name="Source Files"
+			Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
+			UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
+			>
+			<Filter
+				Name="dmSML"
+				>
+				<File
+					RelativePath="..\libmems\dmsml\alibc.c"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\dmsml\alinuxaio.c"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\dmsml\aPOSIXaio.c"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\dmsml\asyncio.c"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\dmsml\awin32aio.c"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\dmsml\buffer.c"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\dmsml\dmsort.c"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\dmsml\sml.c"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\dmsml\sorting.c"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\dmsml\timing.c"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\dmsml\util.c"
+					>
+				</File>
+			</Filter>
+			<Filter
+				Name="SortedMerList"
+				>
+				<File
+					RelativePath="..\libmems\DNAFileSML.cpp"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\DNAMemorySML.cpp"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\FileSML.cpp"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\MemorySML.cpp"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\SortedMerList.cpp"
+					>
+				</File>
+			</Filter>
+			<Filter
+				Name="Matches"
+				>
+				<File
+					RelativePath="..\libMems\GappedAlignment.cpp"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\gnAlignedSequences.cpp"
+					>
+				</File>
+				<File
+					RelativePath="..\libMems\MatchHashEntry.cpp"
+					>
+				</File>
+				<File
+					RelativePath="..\libMems\RepeatMatch.cpp"
+					>
+				</File>
+				<File
+					RelativePath="..\libMems\RepeatMatchList.cpp"
+					>
+				</File>
+			</Filter>
+			<Filter
+				Name="MatchFinder"
+				>
+				<File
+					RelativePath="..\libmems\MaskedMemHash.cpp"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\MatchFinder.cpp"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\MemHash.cpp"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\RepeatHash.cpp"
+					>
+				</File>
+			</Filter>
+			<Filter
+				Name="Aligner"
+				>
+				<File
+					RelativePath="..\libmems\Aligner.cpp"
+					>
+				</File>
+				<File
+					RelativePath="..\libmems\Islands.cpp"
+					>
+				</File>
+				<File
+					RelativePath="..\libMems\MuscleInterface.cpp"
+					>
+				</File>
+			</Filter>
+			<Filter
+				Name="Utility"
+				>
+				<File
+					RelativePath="..\libMems\twister.c"
+					>
+				</File>
+			</Filter>
+			<Filter
+				Name="ProgressiveAligner"
+				>
+				<File
+					RelativePath="..\libMems\Backbone.cpp"
+					>
+				</File>
+				<File
+					RelativePath="..\libMems\GreedyBreakpointElimination.cpp"
+					>
+				</File>
+				<File
+					RelativePath="..\libMems\PairwiseMatchFinder.cpp"
+					>
+				</File>
+				<File
+					RelativePath="..\libMems\ProgressiveAligner.cpp"
+					>
+				</File>
+				<File
+					RelativePath="..\libMems\SuperInterval.cpp"
+					>
+				</File>
+			</Filter>
+		</Filter>
+	</Files>
+	<Globals>
+	</Globals>
+</VisualStudioProject>

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/libmems.git