[med-svn] r82 - in trunk/packages: . muscle muscle/branches muscle/branches/upstream muscle/branches/upstream/current

Mon Aug 7 00:09:14 UTC 2006

Author: charles-guest
Date: 2006-08-07 00:08:59 +0000 (Mon, 07 Aug 2006)
New Revision: 82

Added:
   trunk/packages/muscle/
   trunk/packages/muscle/branches/
   trunk/packages/muscle/branches/upstream/
   trunk/packages/muscle/branches/upstream/current/
   trunk/packages/muscle/branches/upstream/current/Makefile
   trunk/packages/muscle/branches/upstream/current/aligngivenpath.cpp
   trunk/packages/muscle/branches/upstream/current/aligngivenpathsw.cpp
   trunk/packages/muscle/branches/upstream/current/aligntwomsas.cpp
   trunk/packages/muscle/branches/upstream/current/aligntwoprofs.cpp
   trunk/packages/muscle/branches/upstream/current/aln.cpp
   trunk/packages/muscle/branches/upstream/current/alpha.cpp
   trunk/packages/muscle/branches/upstream/current/alpha.h
   trunk/packages/muscle/branches/upstream/current/anchors.cpp
   trunk/packages/muscle/branches/upstream/current/bittraceback.cpp
   trunk/packages/muscle/branches/upstream/current/blosumla.cpp
   trunk/packages/muscle/branches/upstream/current/clust.cpp
   trunk/packages/muscle/branches/upstream/current/clust.h
   trunk/packages/muscle/branches/upstream/current/cluster.cpp
   trunk/packages/muscle/branches/upstream/current/cluster.h
   trunk/packages/muscle/branches/upstream/current/clustset.h
   trunk/packages/muscle/branches/upstream/current/clustsetdf.h
   trunk/packages/muscle/branches/upstream/current/clustsetmsa.h
   trunk/packages/muscle/branches/upstream/current/clwwt.cpp
   trunk/packages/muscle/branches/upstream/current/color.cpp
   trunk/packages/muscle/branches/upstream/current/cons.cpp
   trunk/packages/muscle/branches/upstream/current/diaglist.cpp
   trunk/packages/muscle/branches/upstream/current/diaglist.h
   trunk/packages/muscle/branches/upstream/current/diffobjscore.cpp
   trunk/packages/muscle/branches/upstream/current/diffpaths.cpp
   trunk/packages/muscle/branches/upstream/current/difftrees.cpp
   trunk/packages/muscle/branches/upstream/current/difftreese.cpp
   trunk/packages/muscle/branches/upstream/current/distcalc.cpp
   trunk/packages/muscle/branches/upstream/current/distcalc.h
   trunk/packages/muscle/branches/upstream/current/distfunc.cpp
   trunk/packages/muscle/branches/upstream/current/distfunc.h
   trunk/packages/muscle/branches/upstream/current/distpwkimura.cpp
   trunk/packages/muscle/branches/upstream/current/domuscle.cpp
   trunk/packages/muscle/branches/upstream/current/dosp.cpp
   trunk/packages/muscle/branches/upstream/current/dpregionlist.h
   trunk/packages/muscle/branches/upstream/current/dpreglist.cpp
   trunk/packages/muscle/branches/upstream/current/dpreglist.h
   trunk/packages/muscle/branches/upstream/current/drawtree.cpp
   trunk/packages/muscle/branches/upstream/current/edgelist.cpp
   trunk/packages/muscle/branches/upstream/current/edgelist.h
   trunk/packages/muscle/branches/upstream/current/enumopts.cpp
   trunk/packages/muscle/branches/upstream/current/enumopts.h
   trunk/packages/muscle/branches/upstream/current/enums.h
   trunk/packages/muscle/branches/upstream/current/enumtostr.cpp
   trunk/packages/muscle/branches/upstream/current/estring.cpp
   trunk/packages/muscle/branches/upstream/current/estring.h
   trunk/packages/muscle/branches/upstream/current/fasta.cpp
   trunk/packages/muscle/branches/upstream/current/fasta2.cpp
   trunk/packages/muscle/branches/upstream/current/fastclust.cpp
   trunk/packages/muscle/branches/upstream/current/fastdist.cpp
   trunk/packages/muscle/branches/upstream/current/fastdistjones.cpp
   trunk/packages/muscle/branches/upstream/current/fastdistkbit.cpp
   trunk/packages/muscle/branches/upstream/current/fastdistkmer.cpp
   trunk/packages/muscle/branches/upstream/current/fastdistmafft.cpp
   trunk/packages/muscle/branches/upstream/current/fastdistnuc.cpp
   trunk/packages/muscle/branches/upstream/current/fastscorepath2.cpp
   trunk/packages/muscle/branches/upstream/current/finddiags.cpp
   trunk/packages/muscle/branches/upstream/current/finddiagsn.cpp
   trunk/packages/muscle/branches/upstream/current/gapscoredimer.h
   trunk/packages/muscle/branches/upstream/current/glbalign.cpp
   trunk/packages/muscle/branches/upstream/current/glbalign352.cpp
   trunk/packages/muscle/branches/upstream/current/glbaligndiag.cpp
   trunk/packages/muscle/branches/upstream/current/glbalignle.cpp
   trunk/packages/muscle/branches/upstream/current/glbalignsimple.cpp
   trunk/packages/muscle/branches/upstream/current/glbalignsp.cpp
   trunk/packages/muscle/branches/upstream/current/glbalignspn.cpp
   trunk/packages/muscle/branches/upstream/current/glbalignss.cpp
   trunk/packages/muscle/branches/upstream/current/glbalndimer.cpp
   trunk/packages/muscle/branches/upstream/current/globals.cpp
   trunk/packages/muscle/branches/upstream/current/globalslinux.cpp
   trunk/packages/muscle/branches/upstream/current/globalswin32.cpp
   trunk/packages/muscle/branches/upstream/current/gonnet.cpp
   trunk/packages/muscle/branches/upstream/current/gonnet.h
   trunk/packages/muscle/branches/upstream/current/gotowt.cpp
   trunk/packages/muscle/branches/upstream/current/henikoffweight.cpp
   trunk/packages/muscle/branches/upstream/current/henikoffweightpb.cpp
   trunk/packages/muscle/branches/upstream/current/html.cpp
   trunk/packages/muscle/branches/upstream/current/hydro.cpp
   trunk/packages/muscle/branches/upstream/current/intmath.cpp
   trunk/packages/muscle/branches/upstream/current/intmath.h
   trunk/packages/muscle/branches/upstream/current/local.cpp
   trunk/packages/muscle/branches/upstream/current/main.cpp
   trunk/packages/muscle/branches/upstream/current/makerootmsa.cpp
   trunk/packages/muscle/branches/upstream/current/makerootmsab.cpp
   trunk/packages/muscle/branches/upstream/current/mhack.cpp
   trunk/packages/muscle/branches/upstream/current/mk
   trunk/packages/muscle/branches/upstream/current/mpam200.cpp
   trunk/packages/muscle/branches/upstream/current/msa.cpp
   trunk/packages/muscle/branches/upstream/current/msa.h
   trunk/packages/muscle/branches/upstream/current/msa2.cpp
   trunk/packages/muscle/branches/upstream/current/msadist.h
   trunk/packages/muscle/branches/upstream/current/msadistkimura.cpp
   trunk/packages/muscle/branches/upstream/current/msf.cpp
   trunk/packages/muscle/branches/upstream/current/muscle.cpp
   trunk/packages/muscle/branches/upstream/current/muscle.h
   trunk/packages/muscle/branches/upstream/current/muscleout.cpp
   trunk/packages/muscle/branches/upstream/current/nucmx.cpp
   trunk/packages/muscle/branches/upstream/current/nwdasimple.cpp
   trunk/packages/muscle/branches/upstream/current/nwdasimple2.cpp
   trunk/packages/muscle/branches/upstream/current/nwdasmall.cpp
   trunk/packages/muscle/branches/upstream/current/nwrec.cpp
   trunk/packages/muscle/branches/upstream/current/nwsmall.cpp
   trunk/packages/muscle/branches/upstream/current/objscore.cpp
   trunk/packages/muscle/branches/upstream/current/objscore.h
   trunk/packages/muscle/branches/upstream/current/objscore2.cpp
   trunk/packages/muscle/branches/upstream/current/objscoreda.cpp
   trunk/packages/muscle/branches/upstream/current/onexception.cpp
   trunk/packages/muscle/branches/upstream/current/options.cpp
   trunk/packages/muscle/branches/upstream/current/outweights.cpp
   trunk/packages/muscle/branches/upstream/current/pam200mafft.cpp
   trunk/packages/muscle/branches/upstream/current/params.cpp
   trunk/packages/muscle/branches/upstream/current/params.h
   trunk/packages/muscle/branches/upstream/current/phy.cpp
   trunk/packages/muscle/branches/upstream/current/phy2.cpp
   trunk/packages/muscle/branches/upstream/current/phy3.cpp
   trunk/packages/muscle/branches/upstream/current/phy4.cpp
   trunk/packages/muscle/branches/upstream/current/phyfromclust.cpp
   trunk/packages/muscle/branches/upstream/current/phyfromfile.cpp
   trunk/packages/muscle/branches/upstream/current/physeq.cpp
   trunk/packages/muscle/branches/upstream/current/phytofile.cpp
   trunk/packages/muscle/branches/upstream/current/posgap.cpp
   trunk/packages/muscle/branches/upstream/current/ppscore.cpp
   trunk/packages/muscle/branches/upstream/current/profdb.cpp
   trunk/packages/muscle/branches/upstream/current/profile.cpp
   trunk/packages/muscle/branches/upstream/current/profile.h
   trunk/packages/muscle/branches/upstream/current/profilefrommsa.cpp
   trunk/packages/muscle/branches/upstream/current/progalign.cpp
   trunk/packages/muscle/branches/upstream/current/progress.cpp
   trunk/packages/muscle/branches/upstream/current/progressivealign.cpp
   trunk/packages/muscle/branches/upstream/current/pwpath.cpp
   trunk/packages/muscle/branches/upstream/current/pwpath.h
   trunk/packages/muscle/branches/upstream/current/readmx.cpp
   trunk/packages/muscle/branches/upstream/current/realigndiffs.cpp
   trunk/packages/muscle/branches/upstream/current/realigndiffse.cpp
   trunk/packages/muscle/branches/upstream/current/refine.cpp
   trunk/packages/muscle/branches/upstream/current/refinehoriz.cpp
   trunk/packages/muscle/branches/upstream/current/refinesubfams.cpp
   trunk/packages/muscle/branches/upstream/current/refinetree.cpp
   trunk/packages/muscle/branches/upstream/current/refinetreee.cpp
   trunk/packages/muscle/branches/upstream/current/refinevert.cpp
   trunk/packages/muscle/branches/upstream/current/refinew.cpp
   trunk/packages/muscle/branches/upstream/current/savebest.cpp
   trunk/packages/muscle/branches/upstream/current/scoregaps.cpp
   trunk/packages/muscle/branches/upstream/current/scorehistory.cpp
   trunk/packages/muscle/branches/upstream/current/scorehistory.h
   trunk/packages/muscle/branches/upstream/current/scorepp.cpp
   trunk/packages/muscle/branches/upstream/current/seq.cpp
   trunk/packages/muscle/branches/upstream/current/seq.h
   trunk/packages/muscle/branches/upstream/current/seqvect.cpp
   trunk/packages/muscle/branches/upstream/current/seqvect.h
   trunk/packages/muscle/branches/upstream/current/setblosumweights.cpp
   trunk/packages/muscle/branches/upstream/current/setgscweights.cpp
   trunk/packages/muscle/branches/upstream/current/setnewhandler.cpp
   trunk/packages/muscle/branches/upstream/current/spfast.cpp
   trunk/packages/muscle/branches/upstream/current/sptest.cpp
   trunk/packages/muscle/branches/upstream/current/stabilize.cpp
   trunk/packages/muscle/branches/upstream/current/subfam.cpp
   trunk/packages/muscle/branches/upstream/current/subfams.cpp
   trunk/packages/muscle/branches/upstream/current/sw.cpp
   trunk/packages/muscle/branches/upstream/current/termgaps.cpp
   trunk/packages/muscle/branches/upstream/current/textfile.cpp
   trunk/packages/muscle/branches/upstream/current/textfile.h
   trunk/packages/muscle/branches/upstream/current/threewaywt.cpp
   trunk/packages/muscle/branches/upstream/current/timing.h
   trunk/packages/muscle/branches/upstream/current/traceback.cpp
   trunk/packages/muscle/branches/upstream/current/tracebackopt.cpp
   trunk/packages/muscle/branches/upstream/current/tracebacksw.cpp
   trunk/packages/muscle/branches/upstream/current/tree.h
   trunk/packages/muscle/branches/upstream/current/treefrommsa.cpp
   trunk/packages/muscle/branches/upstream/current/types.h
   trunk/packages/muscle/branches/upstream/current/typetostr.cpp
   trunk/packages/muscle/branches/upstream/current/unixio.h
   trunk/packages/muscle/branches/upstream/current/upgma2.cpp
   trunk/packages/muscle/branches/upstream/current/usage.cpp
   trunk/packages/muscle/branches/upstream/current/validateids.cpp
   trunk/packages/muscle/branches/upstream/current/vtml2.cpp
   trunk/packages/muscle/branches/upstream/current/writescorefile.cpp
   trunk/packages/muscle/tags/
Log:
[svn-inject] Installing original source of muscle

Added: trunk/packages/muscle/branches/upstream/current/Makefile
===================================================================

--- trunk/packages/muscle/branches/upstream/current/Makefile	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/Makefile	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,36 @@
+# Porting notes:
+# For Solaris and other platforms where the logf function
+# is missing from the math library, add the following line
+# to the end of muscle.h:
+# #define logf(x)	((float) log(x))
+# Using -static increases the executable size and thus gives a very
+# small increase in start time, but is more portable (the binding
+# to dynamic libraries often breaks when a new library is released).
+# On OSX, using -static gives the error "ld: can't locate file for: -lcrt0.o",
+# this is fixed by deleting "-static" from the LDLIBS line.
+
+CFLAGS = -O3 -funroll-loops -Winline -DNDEBUG=1
+LDLIBS = -lm -static
+# LDLIBS = -lm
+
+OBJ = .o
+EXE =
+
+RM = rm -f
+CP = cp
+
+GPP = g++
+LD = $(GPP) $(CFLAGS)
+CPP = $(GPP) -c $(CFLAGS) 
+
+all: muscle
+
+CPPSRC = $(sort $(wildcard *.cpp))
+CPPOBJ	= $(subst .cpp,.o,$(CPPSRC))
+
+$(CPPOBJ): %.o: %.cpp
+	$(CPP) $< -o $@
+
+muscle: $(CPPOBJ)
+	$(LD) -o muscle $(CPPOBJ) $(LDLIBS)
+	strip muscle

Added: trunk/packages/muscle/branches/upstream/current/aligngivenpath.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/aligngivenpath.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/aligngivenpath.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,802 @@
+#include "muscle.h"
+#include "msa.h"
+#include "pwpath.h"
+#include "profile.h"
+
+#define	TRACE	0
+
+static void LogPP(const ProfPos &PP)
+	{
+	Log("ResidueGroup   %u\n", PP.m_uResidueGroup);
+	Log("AllGaps      %d\n", PP.m_bAllGaps);
+	Log("Occ          %.3g\n", PP.m_fOcc);
+	Log("LL=%.3g LG=%.3g GL=%.3g GG=%.3g\n", PP.m_LL, PP.m_LG, PP.m_GL, PP.m_GG);
+	Log("Freqs        ");
+	for (unsigned i = 0; i < 20; ++i)
+		if (PP.m_fcCounts[i] > 0)
+			Log("%c=%.3g ", LetterToChar(i), PP.m_fcCounts[i]);
+	Log("\n");
+	}
+
+static void AssertProfPosEq(const ProfPos *PA, const ProfPos *PB, unsigned i)
+	{
+	const ProfPos &PPA = PA[i];
+	const ProfPos &PPB = PB[i];
+#define	eq(x)	if (PPA.m_##x != PPB.m_##x) { LogPP(PPA); LogPP(PPB); Quit("AssertProfPosEq." #x); }
+#define be(x)	if (!BTEq(PPA.m_##x, PPB.m_##x)) { LogPP(PPA); LogPP(PPB); Quit("AssertProfPosEq." #x); }
+	eq(bAllGaps)
+	eq(uResidueGroup)
+
+	be(LL)
+	be(LG)
+	be(GL)
+	be(GG)
+	be(fOcc)
+	be(scoreGapOpen)
+	be(scoreGapClose)
+
+	for (unsigned j = 0; j < 20; ++j)
+		{
+#define	eqj(x)	if (PPA.m_##x != PPB.m_##x) Quit("AssertProfPosEq j=%u " #x, j);
+#define bej(x)	if (!BTEq(PPA.m_##x, PPB.m_##x)) Quit("AssertProfPosEq j=%u " #x, j);
+		bej(fcCounts[j]);
+//		eqj(uSortOrder[j]) // may differ due to ties, don't check?
+		bej(AAScores[j])
+#undef eqj
+#undef bej
+		}
+#undef eq
+#undef be
+	}
+
+void AssertProfsEq(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
+  unsigned uLengthB)
+	{
+	if (uLengthA != uLengthB)
+		Quit("AssertProfsEq: lengths differ %u %u", uLengthA, uLengthB);
+	for (unsigned i = 0; i < uLengthB; ++i)
+		AssertProfPosEq(PA, PB, i);
+	}
+
+#if	DEBUG
+static void ValidateProf(const ProfPos *Prof, unsigned uLength)
+	{
+	for (unsigned i = 0; i < uLength; ++i)
+		{
+		const ProfPos &PP = Prof[i];
+
+		FCOUNT s1 = PP.m_LL + PP.m_LG + PP.m_GL + PP.m_GG;
+		assert(BTEq(s1, 1.0));
+
+		if (i > 0)
+			{
+			const ProfPos &PPPrev = Prof[i-1];
+			FCOUNT s2 = PPPrev.m_LL + PPPrev.m_GL;
+			FCOUNT s3 = PP.m_LL + PP.m_LG;
+			assert(BTEq(s2, s3));
+			}
+		if (i < uLength - 1)
+			{
+			const ProfPos &PPNext = Prof[i+1];
+			FCOUNT s4 = PP.m_LL + PP.m_GL;
+			FCOUNT s5 = PPNext.m_LL + PPNext.m_LG;
+			assert(BTEq(s4, s5));
+			}
+		}
+	}
+#else
+#define ValidateProf(Prof, Length)	/* empty */
+#endif
+
+static void ScoresFromFreqsPos(ProfPos *Prof, unsigned uLength, unsigned uPos)
+	{
+	ProfPos &PP = Prof[uPos];
+	SortCounts(PP.m_fcCounts, PP.m_uSortOrder);
+	PP.m_uResidueGroup = ResidueGroupFromFCounts(PP.m_fcCounts);
+
+// "Occupancy"
+	PP.m_fOcc = PP.m_LL + PP.m_GL;
+
+// Frequency of gap-opens in this position (i)
+// Gap open 	= letter in i-1 and gap in i
+//				= iff LG in i
+	FCOUNT fcOpen = PP.m_LG;
+
+// Frequency of gap-closes in this position
+// Gap close	= gap in i and letter in i+1
+//				= iff GL in i+1
+	FCOUNT fcClose;
+	if (uPos + 1 < uLength)
+		fcClose = Prof[uPos + 1].m_GL;
+	else
+		fcClose = PP.m_GG + PP.m_LG;
+
+	PP.m_scoreGapOpen = (SCORE) ((1.0 - fcOpen)*g_scoreGapOpen/2.0);
+	PP.m_scoreGapClose = (SCORE) ((1.0 - fcClose)*g_scoreGapOpen/2.0);
+#if	DOUBLE_AFFINE
+	PP.m_scoreGapOpen2 = (SCORE) ((1.0 - fcOpen)*g_scoreGapOpen2/2.0);
+	PP.m_scoreGapClose2 = (SCORE) ((1.0 - fcClose)*g_scoreGapOpen2/2.0);
+#endif
+
+	for (unsigned i = 0; i < g_AlphaSize; ++i)
+		{
+		SCORE scoreSum = 0;
+		for (unsigned j = 0; j < g_AlphaSize; ++j)
+			scoreSum += PP.m_fcCounts[j]*(*g_ptrScoreMatrix)[i][j];
+		PP.m_AAScores[i] = scoreSum;
+		}
+	}
+
+void ProfScoresFromFreqs(ProfPos *Prof, unsigned uLength)
+	{
+	for (unsigned i = 0; i < uLength; ++i)
+		ScoresFromFreqsPos(Prof, uLength, i);
+	}
+
+static void AppendDelete(const MSA &msaA, unsigned &uColIndexA,
+  unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined,
+  unsigned &uColIndexCombined)
+	{
+#if	TRACE
+	Log("AppendDelete ColIxA=%u ColIxCmb=%u\n",
+	  uColIndexA, uColIndexCombined);
+#endif
+	for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
+		{
+		char c = msaA.GetChar(uSeqIndexA, uColIndexA);
+		msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c);
+		}
+
+	for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
+		msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, '-');
+
+	++uColIndexCombined;
+	++uColIndexA;
+	}
+
+static void AppendInsert(const MSA &msaB, unsigned &uColIndexB,
+  unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined,
+  unsigned &uColIndexCombined)
+	{
+#if	TRACE
+	Log("AppendInsert ColIxB=%u ColIxCmb=%u\n",
+	  uColIndexB, uColIndexCombined);
+#endif
+	for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
+		msaCombined.SetChar(uSeqIndexA, uColIndexCombined, '-');
+
+	for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
+		{
+		char c = msaB.GetChar(uSeqIndexB, uColIndexB);
+		msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c);
+		}
+
+	++uColIndexCombined;
+	++uColIndexB;
+	}
+
+static void AppendTplInserts(const MSA &msaA, unsigned &uColIndexA, unsigned uColCountA,
+  const MSA &msaB, unsigned &uColIndexB, unsigned uColCountB, unsigned uSeqCountA,
+  unsigned uSeqCountB, MSA &msaCombined, unsigned &uColIndexCombined)
+	{
+#if	TRACE
+	Log("AppendTplInserts ColIxA=%u ColIxB=%u ColIxCmb=%u\n",
+	  uColIndexA, uColIndexB, uColIndexCombined);
+#endif
+	const unsigned uLengthA = msaA.GetColCount();
+	const unsigned uLengthB = msaB.GetColCount();
+
+	unsigned uNewColCount = uColCountA;
+	if (uColCountB > uNewColCount)
+		uNewColCount = uColCountB;
+
+	for (unsigned n = 0; n < uColCountA; ++n)
+		{
+		for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
+			{
+			char c = msaA.GetChar(uSeqIndexA, uColIndexA + n);
+			c = UnalignChar(c);
+			msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, c);
+			}
+		}
+	for (unsigned n = uColCountA; n < uNewColCount; ++n)
+		{
+		for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
+			msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, '.');
+		}
+
+	for (unsigned n = 0; n < uColCountB; ++n)
+		{
+		for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
+			{
+			char c = msaB.GetChar(uSeqIndexB, uColIndexB + n);
+			c = UnalignChar(c);
+			msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, c);
+			}
+		}
+	for (unsigned n = uColCountB; n < uNewColCount; ++n)
+		{
+		for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
+			msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, '.');
+		}
+
+	uColIndexCombined += uNewColCount;
+	uColIndexA += uColCountA;
+	uColIndexB += uColCountB;
+	}
+
+static void AppendMatch(const MSA &msaA, unsigned &uColIndexA, const MSA &msaB,
+  unsigned &uColIndexB, unsigned uSeqCountA, unsigned uSeqCountB,
+  MSA &msaCombined, unsigned &uColIndexCombined)
+	{
+#if	TRACE
+	Log("AppendMatch ColIxA=%u ColIxB=%u ColIxCmb=%u\n",
+	  uColIndexA, uColIndexB, uColIndexCombined);
+#endif
+
+	for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
+		{
+		char c = msaA.GetChar(uSeqIndexA, uColIndexA);
+		msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c);
+		}
+
+	for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
+		{
+		char c = msaB.GetChar(uSeqIndexB, uColIndexB);
+		msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c);
+		}
+
+	++uColIndexA;
+	++uColIndexB;
+	++uColIndexCombined;
+	}
+
+void AlignTwoMSAsGivenPath(const PWPath &Path, const MSA &msaA, const MSA &msaB,
+  MSA &msaCombined)
+	{
+	msaCombined.Clear();
+
+#if	TRACE
+	Log("FastAlignProfiles\n");
+	Log("Template A:\n");
+	msaA.LogMe();
+	Log("Template B:\n");
+	msaB.LogMe();
+#endif
+
+	const unsigned uColCountA = msaA.GetColCount();
+	const unsigned uColCountB = msaB.GetColCount();
+
+	const unsigned uSeqCountA = msaA.GetSeqCount();
+	const unsigned uSeqCountB = msaB.GetSeqCount();
+
+	msaCombined.SetSeqCount(uSeqCountA + uSeqCountB);
+
+// Copy sequence names into combined MSA
+	for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
+		{
+		msaCombined.SetSeqName(uSeqIndexA, msaA.GetSeqName(uSeqIndexA));
+		msaCombined.SetSeqId(uSeqIndexA, msaA.GetSeqId(uSeqIndexA));
+		}
+
+	for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
+		{
+		msaCombined.SetSeqName(uSeqCountA + uSeqIndexB, msaB.GetSeqName(uSeqIndexB));
+		msaCombined.SetSeqId(uSeqCountA + uSeqIndexB, msaB.GetSeqId(uSeqIndexB));
+		}
+
+	unsigned uColIndexA = 0;
+	unsigned uColIndexB = 0;
+	unsigned uColIndexCombined = 0;
+	const unsigned uEdgeCount = Path.GetEdgeCount();
+	for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
+		{
+		const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
+#if	TRACE
+		Log("\nEdge %u %c%u.%u\n",
+		  uEdgeIndex,
+		  Edge.cType,
+		  Edge.uPrefixLengthA,
+		  Edge.uPrefixLengthB);
+#endif
+		const char cType = Edge.cType;
+		const unsigned uPrefixLengthA = Edge.uPrefixLengthA;
+		unsigned uColCountA = 0;
+		if (uPrefixLengthA > 0)
+			{
+			const unsigned uNodeIndexA = uPrefixLengthA - 1;
+			const unsigned uTplColIndexA = uNodeIndexA;
+			if (uTplColIndexA > uColIndexA)
+				uColCountA = uTplColIndexA - uColIndexA;
+			}
+
+		const unsigned uPrefixLengthB = Edge.uPrefixLengthB;
+		unsigned uColCountB = 0;
+		if (uPrefixLengthB > 0)
+			{
+			const unsigned uNodeIndexB = uPrefixLengthB - 1;
+			const unsigned uTplColIndexB = uNodeIndexB;
+			if (uTplColIndexB > uColIndexB)
+				uColCountB = uTplColIndexB - uColIndexB;
+			}
+
+// TODO: This code looks like a hangover from HMM estimation -- can we delete it?
+		assert(uColCountA == 0);
+		assert(uColCountB == 0);
+		AppendTplInserts(msaA, uColIndexA, uColCountA, msaB, uColIndexB, uColCountB,
+		  uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
+
+		switch (cType)
+			{
+		case 'M':
+			{
+			assert(uPrefixLengthA > 0);
+			assert(uPrefixLengthB > 0);
+			const unsigned uColA = uPrefixLengthA - 1;
+			const unsigned uColB = uPrefixLengthB - 1;
+			assert(uColIndexA == uColA);
+			assert(uColIndexB == uColB);
+			AppendMatch(msaA, uColIndexA, msaB, uColIndexB, uSeqCountA, uSeqCountB,
+			  msaCombined, uColIndexCombined);
+			break;
+			}
+		case 'D':
+			{
+			assert(uPrefixLengthA > 0);
+			const unsigned uColA = uPrefixLengthA - 1;
+			assert(uColIndexA == uColA);
+			AppendDelete(msaA, uColIndexA, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
+			break;
+			}
+		case 'I':
+			{
+			assert(uPrefixLengthB > 0);
+			const unsigned uColB = uPrefixLengthB - 1;
+			assert(uColIndexB == uColB);
+			AppendInsert(msaB, uColIndexB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
+			break;
+			}
+		default:
+			assert(false);
+			}
+		}
+	unsigned uInsertColCountA = uColCountA - uColIndexA;
+	unsigned uInsertColCountB = uColCountB - uColIndexB;
+
+// TODO: This code looks like a hangover from HMM estimation -- can we delete it?
+	assert(uInsertColCountA == 0);
+	assert(uInsertColCountB == 0);
+	AppendTplInserts(msaA, uColIndexA, uInsertColCountA, msaB, uColIndexB,
+	  uInsertColCountB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
+
+	assert(msaCombined.GetColCount() == uEdgeCount);
+	}
+
+static const ProfPos PPStart =
+	{
+	false,		//m_bAllGaps;
+	{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_uSortOrder[21];
+	{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_fcCounts[20];
+	1.0,	// m_LL;
+	0.0,	// m_LG;
+	0.0,	// m_GL;
+	0.0,	// m_GG;
+	{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_ALScores
+	0,		// m_uResidueGroup;
+	1.0,	// m_fOcc;
+	0.0,	// m_fcStartOcc;
+	0.0,	// m_fcEndOcc;
+	0.0,	// m_scoreGapOpen;
+	0.0,	// m_scoreGapClose;
+	};
+
+// MM
+//  Ai–1	Ai		Out
+//  X		X	LL	LL
+//  X		-	LG	LG
+//  -		X	GL	GL
+//  -		-	GG	GG
+//  
+//  Bj–1	Bj
+//  X		X	LL	LL
+//  X		-	LG	LG
+//  -		X	GL	GL
+//  -		-	GG	GG
+static void SetGapsMM(
+  const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
+  const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
+  ProfPos *POut, unsigned uColIndexOut)
+	{
+	const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
+	const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
+	ProfPos &PPO = POut[uColIndexOut];
+
+	PPO.m_LL = wA*PPA.m_LL + wB*PPB.m_LL;
+	PPO.m_LG = wA*PPA.m_LG + wB*PPB.m_LG;
+	PPO.m_GL = wA*PPA.m_GL + wB*PPB.m_GL;
+	PPO.m_GG = wA*PPA.m_GG + wB*PPB.m_GG;
+	}
+
+// MD
+//  Ai–1	Ai		Out
+//  X		X	LL	LL
+//  X		-	LG	LG
+//  -		X	GL	GL
+//  -		-	GG	GG
+//  
+//  Bj		(-)
+//  X		-	?L	LG
+//  -		-	?G	GG
+static void SetGapsMD(
+  const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
+  const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
+  ProfPos *POut, unsigned uColIndexOut)
+	{
+	const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
+	const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
+	ProfPos &PPO = POut[uColIndexOut];
+
+	PPO.m_LL = wA*PPA.m_LL;
+	PPO.m_LG = wA*PPA.m_LG + wB*(PPB.m_LL + PPB.m_GL);
+	PPO.m_GL = wA*PPA.m_GL;
+	PPO.m_GG = wA*PPA.m_GG + wB*(PPB.m_LG + PPB.m_GG);
+	}
+
+// DD
+//  Ai–1	Ai		Out
+//  X		X	LL	LL
+//  X		-	LG	LG
+//  -		X	GL	GL
+//  -		-	GG	GG
+//  
+//  (-)		(-)
+//  -		-	??	GG
+static void SetGapsDD(
+  const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
+  const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
+  ProfPos *POut, unsigned uColIndexOut)
+	{
+	const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
+	ProfPos &PPO = POut[uColIndexOut];
+
+	PPO.m_LL = wA*PPA.m_LL;
+	PPO.m_LG = wA*PPA.m_LG;
+	PPO.m_GL = wA*PPA.m_GL;
+	PPO.m_GG = wA*PPA.m_GG + wB;
+	}
+
+// MI
+//  Ai		(-)		Out
+//  X		-	?L	LG
+//  -		-	?G	GG
+
+//  Bj–1	Bj
+//  X		X	LL	LL
+//  X		-	LG	LG
+//  -		X	GL	GL
+//  -		-	GG	GG
+static void SetGapsMI(
+  const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
+  const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
+  ProfPos *POut, unsigned uColIndexOut)
+	{
+	const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
+	const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
+	ProfPos &PPO = POut[uColIndexOut];
+
+	PPO.m_LL = wB*PPB.m_LL;
+	PPO.m_LG = wB*PPB.m_LG + wA*(PPA.m_LL + PPA.m_GL);
+	PPO.m_GL = wB*PPB.m_GL;
+	PPO.m_GG = wB*PPB.m_GG + wA*(PPA.m_LG + PPA.m_GG);
+	}
+
+// DM
+//  Ai–1	Ai		Out
+//  X		X	LL	LL
+//  X		-	LG	LG
+//  -		X	GL	GL
+//  -		-	GG	GG
+//  
+//  (-)		Bj		
+//  -		X		?L	GL
+//  -		-		?G	GG
+static void SetGapsDM(
+  const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
+  const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
+  ProfPos *POut, unsigned uColIndexOut)
+	{
+	const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
+	const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
+	ProfPos &PPO = POut[uColIndexOut];
+
+	PPO.m_LL = wA*PPA.m_LL;
+	PPO.m_LG = wA*PPA.m_LG;
+	PPO.m_GL = wA*PPA.m_GL + wB*(PPB.m_LL + PPB.m_GL);
+	PPO.m_GG = wA*PPA.m_GG + wB*(PPB.m_LG + PPB.m_GG);
+	}
+
+// IM
+//  (-)		Ai		Out		
+//  -		X	?L	GL
+//  -		-	?G	GG
+
+//  Bj–1	Bj
+//  X		X	LL	LL
+//  X		-	LG	LG
+//  -		X	GL	GL
+//  -		-	GG	GG
+static void SetGapsIM(
+  const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
+  const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
+  ProfPos *POut, unsigned uColIndexOut)
+	{
+	const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
+	const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
+	ProfPos &PPO = POut[uColIndexOut];
+
+	PPO.m_LL = wB*PPB.m_LL;
+	PPO.m_LG = wB*PPB.m_LG;
+	PPO.m_GL = wB*PPB.m_GL + wA*(PPA.m_LL + PPA.m_GL);
+	PPO.m_GG = wB*PPB.m_GG + wA*(PPA.m_LG + PPA.m_GG);
+	}
+
+// ID
+//  (-)		Ai		Out
+//  -		X	?L	GL
+//  -		-	?G	GG
+
+//  Bj		(-)
+//  X		-	?L	LG
+//  -		-	?G	GG
+static void SetGapsID(
+  const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
+  const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
+  ProfPos *POut, unsigned uColIndexOut)
+	{
+	const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
+	const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
+	ProfPos &PPO = POut[uColIndexOut];
+
+	PPO.m_LL = 0;
+	PPO.m_LG = wB*PPB.m_GL + wB*PPB.m_LL;
+	PPO.m_GL = wA*PPA.m_GL + wA*PPA.m_LL;
+	PPO.m_GG = wA*(PPA.m_LG + PPA.m_GG) + wB*(PPB.m_LG + PPB.m_GG);
+	}
+
+// DI
+//  Ai		(-)		Out
+//  X		-	?L	LG
+//  -		-	?G	GG
+
+//  (-)		Bj
+//  -		X	?L	GL
+//  -		-	?G	GG
+static void SetGapsDI(
+  const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
+  const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
+  ProfPos *POut, unsigned uColIndexOut)
+	{
+	const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
+	const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
+	ProfPos &PPO = POut[uColIndexOut];
+
+	PPO.m_LL = 0;
+	PPO.m_LG = wA*PPA.m_GL + wA*PPA.m_LL;
+	PPO.m_GL = wB*PPB.m_GL + wB*PPB.m_LL;
+	PPO.m_GG = wA*(PPA.m_LG + PPA.m_GG) + wB*(PPB.m_LG + PPB.m_GG);
+	}
+
+// II
+//  (-)		(-)		Out
+//  -		-	??	GG
+
+//  Bj–1	Bj
+//  X		X	LL	LL
+//  X		-	LG	LG
+//  -		X	GL	GL
+//  -		-	GG	GG
+static void SetGapsII(
+  const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
+  const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
+  ProfPos *POut, unsigned uColIndexOut)
+	{
+	const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
+	ProfPos &PPO = POut[uColIndexOut];
+
+	PPO.m_LL = wB*PPB.m_LL;
+	PPO.m_LG = wB*PPB.m_LG;
+	PPO.m_GL = wB*PPB.m_GL;
+	PPO.m_GG = wB*PPB.m_GG + wA;
+	}
+
+static void SetFreqs(
+  const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
+  const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
+  ProfPos *POut, unsigned uColIndexOut)
+	{
+	const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
+	const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
+	ProfPos &PPO = POut[uColIndexOut];
+
+	if (g_bNormalizeCounts)
+		{
+		const FCOUNT fA = PPA.m_fOcc*wA/(wA + wB);
+		const FCOUNT fB = PPB.m_fOcc*wB/(wA + wB);
+		FCOUNT fTotal = 0;
+		for (unsigned i = 0; i < 20; ++i)
+			{
+			const FCOUNT f = fA*PPA.m_fcCounts[i] + fB*PPB.m_fcCounts[i];
+			PPO.m_fcCounts[i] = f;
+			fTotal += f;
+			}
+		if (fTotal > 0)
+			for (unsigned i = 0; i < 20; ++i)
+				PPO.m_fcCounts[i] /= fTotal;
+		}
+	else
+		{
+		for (unsigned i = 0; i < 20; ++i)
+			PPO.m_fcCounts[i] = wA*PPA.m_fcCounts[i] + wB*PPB.m_fcCounts[i];
+		}
+	}
+
+void AlignTwoProfsGivenPath(const PWPath &Path,
+  const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
+  const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
+  ProfPos **ptrPOut, unsigned *ptruLengthOut)
+	{
+#if	TRACE
+	Log("AlignTwoProfsGivenPath wA=%.3g wB=%.3g Path=\n", wA, wB);
+	Path.LogMe();
+#endif
+	assert(BTEq(wA + wB, 1.0));
+
+	unsigned uColIndexA = 0;
+	unsigned uColIndexB = 0;
+	unsigned uColIndexOut = 0;
+	const unsigned uEdgeCount = Path.GetEdgeCount();
+	ProfPos *POut = new ProfPos[uEdgeCount];
+	char cPrevType = 'M';
+	for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
+		{
+		const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
+		const char cType = Edge.cType;
+
+		const unsigned uPrefixLengthA = Edge.uPrefixLengthA;
+		const unsigned uPrefixLengthB = Edge.uPrefixLengthB;
+
+#if	TRACE
+		Log("\nEdge %u %c%u.%u ColA=%u ColB=%u\n",
+		  uEdgeIndex,
+		  Edge.cType,
+		  Edge.uPrefixLengthA,
+		  Edge.uPrefixLengthB,
+		  uColIndexA,
+		  uColIndexB);
+#endif
+
+		POut[uColIndexOut].m_bAllGaps = false;
+		switch (cType)
+			{
+		case 'M':
+			{
+			assert(uPrefixLengthA > 0);
+			assert(uPrefixLengthB > 0);
+			SetFreqs(
+			  PA, uPrefixLengthA, wA,
+			  PB, uPrefixLengthB, wB,
+			  POut, uColIndexOut);
+			switch (cPrevType)
+				{
+			case 'M':
+				SetGapsMM(
+				  PA, uPrefixLengthA, wA,
+				  PB, uPrefixLengthB, wB,
+				  POut, uColIndexOut);
+			  break;
+			case 'D':
+				SetGapsDM(
+				  PA, uPrefixLengthA, wA,
+				  PB, uPrefixLengthB, wB,
+				  POut, uColIndexOut);
+				break;
+			case 'I':
+				SetGapsIM(
+				  PA, uPrefixLengthA, wA,
+				  PB, uPrefixLengthB, wB,
+				  POut, uColIndexOut);
+				break;
+			default:
+				Quit("Bad cPrevType");
+				}
+			++uColIndexA;
+			++uColIndexB;
+			++uColIndexOut;
+			break;
+			}
+		case 'D':
+			{
+			assert(uPrefixLengthA > 0);
+			SetFreqs(
+			  PA, uPrefixLengthA, wA,
+			  PB, uPrefixLengthB, 0,
+			  POut, uColIndexOut);
+			switch (cPrevType)
+				{
+			case 'M':
+				SetGapsMD(
+				  PA, uPrefixLengthA, wA,
+				  PB, uPrefixLengthB, wB,
+				  POut, uColIndexOut);
+			  break;
+			case 'D':
+				SetGapsDD(
+				  PA, uPrefixLengthA, wA,
+				  PB, uPrefixLengthB, wB,
+				  POut, uColIndexOut);
+				break;
+			case 'I':
+				SetGapsID(
+				  PA, uPrefixLengthA, wA,
+				  PB, uPrefixLengthB, wB,
+				  POut, uColIndexOut);
+				break;
+			default:
+				Quit("Bad cPrevType");
+				}
+			++uColIndexA;
+			++uColIndexOut;
+			break;
+			}
+		case 'I':
+			{
+			assert(uPrefixLengthB > 0);
+			SetFreqs(
+			  PA, uPrefixLengthA, 0,
+			  PB, uPrefixLengthB, wB,
+			  POut, uColIndexOut);
+			switch (cPrevType)
+				{
+			case 'M':
+				SetGapsMI(
+				  PA, uPrefixLengthA, wA,
+				  PB, uPrefixLengthB, wB,
+				  POut, uColIndexOut);
+			  break;
+			case 'D':
+				SetGapsDI(
+				  PA, uPrefixLengthA, wA,
+				  PB, uPrefixLengthB, wB,
+				  POut, uColIndexOut);
+				break;
+			case 'I':
+				SetGapsII(
+				  PA, uPrefixLengthA, wA,
+				  PB, uPrefixLengthB, wB,
+				  POut, uColIndexOut);
+				break;
+			default:
+				Quit("Bad cPrevType");
+				}
+			++uColIndexB;
+			++uColIndexOut;
+			break;
+			}
+		default:
+			assert(false);
+			}
+		cPrevType = cType;
+		}
+	assert(uColIndexOut == uEdgeCount);
+
+	ProfScoresFromFreqs(POut, uEdgeCount);
+	ValidateProf(POut, uEdgeCount);
+
+	*ptrPOut = POut;
+	*ptruLengthOut = uEdgeCount;
+
+#if	TRACE
+	Log("AlignTwoProfsGivenPath:\n");
+	ListProfile(POut, uEdgeCount, 0);
+#endif
+	}

Added: trunk/packages/muscle/branches/upstream/current/aligngivenpathsw.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/aligngivenpathsw.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/aligngivenpathsw.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,237 @@
+#include "muscle.h"
+#include "msa.h"
+#include "pwpath.h"
+#include "profile.h"
+
+#define	TRACE	0
+
+static void AppendDelete(const MSA &msaA, unsigned &uColIndexA,
+  unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined,
+  unsigned &uColIndexCombined)
+	{
+#if	TRACE
+	Log("AppendDelete ColIxA=%u ColIxCmb=%u\n",
+	  uColIndexA, uColIndexCombined);
+#endif
+	for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
+		{
+		char c = msaA.GetChar(uSeqIndexA, uColIndexA);
+		msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c);
+		}
+
+	for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
+		msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, '-');
+
+	++uColIndexCombined;
+	++uColIndexA;
+	}
+
+static void AppendInsert(const MSA &msaB, unsigned &uColIndexB,
+  unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined,
+  unsigned &uColIndexCombined)
+	{
+#if	TRACE
+	Log("AppendInsert ColIxB=%u ColIxCmb=%u\n",
+	  uColIndexB, uColIndexCombined);
+#endif
+	for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
+		msaCombined.SetChar(uSeqIndexA, uColIndexCombined, '-');
+
+	for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
+		{
+		char c = msaB.GetChar(uSeqIndexB, uColIndexB);
+		msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c);
+		}
+
+	++uColIndexCombined;
+	++uColIndexB;
+	}
+
+static void AppendUnalignedTerminals(const MSA &msaA, unsigned &uColIndexA, unsigned uColCountA,
+  const MSA &msaB, unsigned &uColIndexB, unsigned uColCountB, unsigned uSeqCountA,
+  unsigned uSeqCountB, MSA &msaCombined, unsigned &uColIndexCombined)
+	{
+#if	TRACE
+	Log("AppendUnalignedTerminals ColIxA=%u ColIxB=%u ColIxCmb=%u\n",
+	  uColIndexA, uColIndexB, uColIndexCombined);
+#endif
+	const unsigned uLengthA = msaA.GetColCount();
+	const unsigned uLengthB = msaB.GetColCount();
+
+	unsigned uNewColCount = uColCountA;
+	if (uColCountB > uNewColCount)
+		uNewColCount = uColCountB;
+
+	for (unsigned n = 0; n < uColCountA; ++n)
+		{
+		for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
+			{
+			char c = msaA.GetChar(uSeqIndexA, uColIndexA + n);
+			c = UnalignChar(c);
+			msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, c);
+			}
+		}
+	for (unsigned n = uColCountA; n < uNewColCount; ++n)
+		{
+		for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
+			msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, '.');
+		}
+
+	for (unsigned n = 0; n < uColCountB; ++n)
+		{
+		for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
+			{
+			char c = msaB.GetChar(uSeqIndexB, uColIndexB + n);
+			c = UnalignChar(c);
+			msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, c);
+			}
+		}
+	for (unsigned n = uColCountB; n < uNewColCount; ++n)
+		{
+		for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
+			msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, '.');
+		}
+
+	uColIndexCombined += uNewColCount;
+	uColIndexA += uColCountA;
+	uColIndexB += uColCountB;
+	}
+
+static void AppendMatch(const MSA &msaA, unsigned &uColIndexA, const MSA &msaB,
+  unsigned &uColIndexB, unsigned uSeqCountA, unsigned uSeqCountB,
+  MSA &msaCombined, unsigned &uColIndexCombined)
+	{
+#if	TRACE
+	Log("AppendMatch ColIxA=%u ColIxB=%u ColIxCmb=%u\n",
+	  uColIndexA, uColIndexB, uColIndexCombined);
+#endif
+
+	for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
+		{
+		char c = msaA.GetChar(uSeqIndexA, uColIndexA);
+		msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c);
+		}
+
+	for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
+		{
+		char c = msaB.GetChar(uSeqIndexB, uColIndexB);
+		msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c);
+		}
+
+	++uColIndexA;
+	++uColIndexB;
+	++uColIndexCombined;
+	}
+
+void AlignTwoMSAsGivenPathSW(const PWPath &Path, const MSA &msaA, const MSA &msaB,
+  MSA &msaCombined)
+	{
+	msaCombined.Clear();
+
+#if	TRACE
+	Log("AlignTwoMSAsGivenPathSW\n");
+	Log("Template A:\n");
+	msaA.LogMe();
+	Log("Template B:\n");
+	msaB.LogMe();
+#endif
+
+	const unsigned uColCountA = msaA.GetColCount();
+	const unsigned uColCountB = msaB.GetColCount();
+
+	const unsigned uSeqCountA = msaA.GetSeqCount();
+	const unsigned uSeqCountB = msaB.GetSeqCount();
+
+	msaCombined.SetSeqCount(uSeqCountA + uSeqCountB);
+
+// Copy sequence names into combined MSA
+	for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
+		{
+		msaCombined.SetSeqName(uSeqIndexA, msaA.GetSeqName(uSeqIndexA));
+		msaCombined.SetSeqId(uSeqIndexA, msaA.GetSeqId(uSeqIndexA));
+		}
+
+	for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
+		{
+		msaCombined.SetSeqName(uSeqCountA + uSeqIndexB, msaB.GetSeqName(uSeqIndexB));
+		msaCombined.SetSeqId(uSeqCountA + uSeqIndexB, msaB.GetSeqId(uSeqIndexB));
+		}
+
+	unsigned uColIndexA = 0;
+	unsigned uColIndexB = 0;
+	unsigned uColIndexCombined = 0;
+	const unsigned uEdgeCount = Path.GetEdgeCount();
+	for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
+		{
+		const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
+#if	TRACE
+		Log("\nEdge %u %c%u.%u\n",
+		  uEdgeIndex,
+		  Edge.cType,
+		  Edge.uPrefixLengthA,
+		  Edge.uPrefixLengthB);
+#endif
+		const char cType = Edge.cType;
+		const unsigned uPrefixLengthA = Edge.uPrefixLengthA;
+		unsigned uColCountA = 0;
+		if (uPrefixLengthA > 0)
+			{
+			const unsigned uNodeIndexA = uPrefixLengthA - 1;
+			const unsigned uTplColIndexA = uNodeIndexA;
+			if (uTplColIndexA > uColIndexA)
+				uColCountA = uTplColIndexA - uColIndexA;
+			}
+
+		const unsigned uPrefixLengthB = Edge.uPrefixLengthB;
+		unsigned uColCountB = 0;
+		if (uPrefixLengthB > 0)
+			{
+			const unsigned uNodeIndexB = uPrefixLengthB - 1;
+			const unsigned uTplColIndexB = uNodeIndexB;
+			if (uTplColIndexB > uColIndexB)
+				uColCountB = uTplColIndexB - uColIndexB;
+			}
+
+		AppendUnalignedTerminals(msaA, uColIndexA, uColCountA, msaB, uColIndexB, uColCountB,
+		  uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
+
+		switch (cType)
+			{
+		case 'M':
+			{
+			assert(uPrefixLengthA > 0);
+			assert(uPrefixLengthB > 0);
+			const unsigned uColA = uPrefixLengthA - 1;
+			const unsigned uColB = uPrefixLengthB - 1;
+			assert(uColIndexA == uColA);
+			assert(uColIndexB == uColB);
+			AppendMatch(msaA, uColIndexA, msaB, uColIndexB, uSeqCountA, uSeqCountB,
+			  msaCombined, uColIndexCombined);
+			break;
+			}
+		case 'D':
+			{
+			assert(uPrefixLengthA > 0);
+			const unsigned uColA = uPrefixLengthA - 1;
+			assert(uColIndexA == uColA);
+			AppendDelete(msaA, uColIndexA, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
+			break;
+			}
+		case 'I':
+			{
+			assert(uPrefixLengthB > 0);
+			const unsigned uColB = uPrefixLengthB - 1;
+			assert(uColIndexB == uColB);
+			AppendInsert(msaB, uColIndexB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
+			break;
+			}
+		default:
+			assert(false);
+			}
+		}
+	unsigned uInsertColCountA = uColCountA - uColIndexA;
+	unsigned uInsertColCountB = uColCountB - uColIndexB;
+
+	AppendUnalignedTerminals(msaA, uColIndexA, uInsertColCountA, msaB, uColIndexB,
+	  uInsertColCountB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
+	}

Added: trunk/packages/muscle/branches/upstream/current/aligntwomsas.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/aligntwomsas.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/aligntwomsas.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,41 @@
+#include "muscle.h"
+#include "msa.h"
+#include "profile.h"
+#include "pwpath.h"
+#include "textfile.h"
+#include "timing.h"
+
+SCORE AlignTwoMSAs(const MSA &msa1, const MSA &msa2, MSA &msaOut, PWPath &Path,
+  bool bLockLeft, bool bLockRight)
+	{
+	const unsigned uLengthA = msa1.GetColCount();
+	const unsigned uLengthB = msa2.GetColCount();
+
+	ProfPos *PA = ProfileFromMSA(msa1);
+	ProfPos *PB = ProfileFromMSA(msa2);
+
+	if (bLockLeft)
+		{
+		PA[0].m_scoreGapOpen = MINUS_INFINITY;
+		PB[0].m_scoreGapOpen = MINUS_INFINITY;
+		}
+
+	if (bLockRight)
+		{
+		PA[uLengthA-1].m_scoreGapClose = MINUS_INFINITY;
+		PB[uLengthB-1].m_scoreGapClose = MINUS_INFINITY;
+		}
+
+	float r = (float) uLengthA/ (float) (uLengthB + 1); // +1 to prevent div 0
+	if (r < 1)
+		r = 1/r;
+
+	SCORE Score = GlobalAlign(PA, uLengthA, PB, uLengthB, Path);
+
+	AlignTwoMSAsGivenPath(Path, msa1, msa2, msaOut);
+
+	delete[] PA;
+	delete[] PB;
+
+	return Score;
+	}

Added: trunk/packages/muscle/branches/upstream/current/aligntwoprofs.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/aligntwoprofs.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/aligntwoprofs.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,31 @@
+#include "muscle.h"
+#include "msa.h"
+#include "profile.h"
+#include "pwpath.h"
+
+SCORE GlobalAlign4(ProfPos *PA, unsigned uLengthA, ProfPos *PB,
+  unsigned uLengthB, PWPath &Path);
+
+SCORE AlignTwoProfs(
+  const ProfPos *PA, unsigned uLengthA, WEIGHT wA,
+  const ProfPos *PB, unsigned uLengthB, WEIGHT wB,
+  PWPath &Path, ProfPos **ptrPout, unsigned *ptruLengthOut)
+	{
+	assert(uLengthA < 100000);
+	assert(uLengthB < 100000);
+
+	float r = (float) uLengthA/ (float) (uLengthB + 1); // +1 to prevent div 0
+	if (r < 1)
+		r = 1/r;
+
+	SCORE Score = GlobalAlign(PA, uLengthA, PB, uLengthB, Path);
+
+	AlignTwoProfsGivenPath(Path, PA, uLengthB, wA/(wA + wB), PB, uLengthB, wB/(wA + wB),
+	  ptrPout, ptruLengthOut);
+
+#if	HYDRO
+	if (ALPHA_Amino == g_Alpha)
+		Hydro(*ptrPout, *ptruLengthOut);
+#endif
+	return Score;
+	}

Added: trunk/packages/muscle/branches/upstream/current/aln.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/aln.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/aln.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,170 @@
+#include "muscle.h"
+#include <stdio.h>
+#include <ctype.h>
+#include "msa.h"
+#include "textfile.h"
+
+const unsigned uCharsPerLine = 60;
+const int MIN_NAME = 10;
+const int MAX_NAME = 32;
+
+static char GetAlnConsensusChar(const MSA &a, unsigned uColIndex);
+
+void MSA::ToAlnFile(TextFile &File) const
+	{
+	if (g_bClwStrict)
+		File.PutString("CLUSTAL W (1.81) multiple sequence alignment\n");
+	else
+		{
+		File.PutString("MUSCLE ("
+		  MUSCLE_MAJOR_VERSION "." MUSCLE_MINOR_VERSION ")"
+		  " multiple sequence alignment\n");
+		File.PutString("\n");
+		}
+
+	int iLongestNameLength = 0;
+	for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
+		{
+		const char *ptrName = GetSeqName(uSeqIndex);
+		const char *ptrBlank = strchr(ptrName, ' ');
+		int iLength;
+		if (0 != ptrBlank)
+			iLength = (int) (ptrBlank - ptrName);
+		else
+			iLength = (int) strlen(ptrName);
+		if (iLength > iLongestNameLength)
+			iLongestNameLength = iLength;
+		}
+	if (iLongestNameLength > MAX_NAME)
+		iLongestNameLength = MAX_NAME;
+	if (iLongestNameLength < MIN_NAME)
+		iLongestNameLength = MIN_NAME;
+
+	unsigned uLineCount = (GetColCount() - 1)/uCharsPerLine + 1;
+	for (unsigned uLineIndex = 0; uLineIndex < uLineCount; ++uLineIndex)
+		{
+		File.PutString("\n");
+		unsigned uStartColIndex = uLineIndex*uCharsPerLine;
+		unsigned uEndColIndex = uStartColIndex + uCharsPerLine - 1;
+		if (uEndColIndex >= GetColCount())
+			uEndColIndex = GetColCount() - 1;
+		char Name[MAX_NAME+1];
+		for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
+			{
+			const char *ptrName = GetSeqName(uSeqIndex);
+			const char *ptrBlank = strchr(ptrName, ' ');
+			int iLength;
+			if (0 != ptrBlank)
+				iLength = (int) (ptrBlank - ptrName);
+			else
+				iLength = (int) strlen(ptrName);
+			if (iLength > MAX_NAME)
+				iLength = MAX_NAME;
+			memset(Name, ' ', MAX_NAME);
+			memcpy(Name, ptrName, iLength);
+			Name[iLongestNameLength] = 0;
+
+			File.PutFormat("%s      ", Name);
+			for (unsigned uColIndex = uStartColIndex; uColIndex <= uEndColIndex;
+			  ++uColIndex)
+				{
+				const char c = GetChar(uSeqIndex, uColIndex);
+				File.PutFormat("%c", toupper(c));
+				}
+			File.PutString("\n");
+			}
+
+		memset(Name, ' ', MAX_NAME);
+		Name[iLongestNameLength] = 0;
+		File.PutFormat("%s      ", Name);
+		for (unsigned uColIndex = uStartColIndex; uColIndex <= uEndColIndex;
+		  ++uColIndex)
+			{
+			const char c = GetAlnConsensusChar(*this, uColIndex);
+			File.PutChar(c);
+			}
+		File.PutString("\n");
+		}
+	}
+
+static char GetAlnConsensusChar(const MSA &a, unsigned uColIndex)
+	{
+	const unsigned uSeqCount = a.GetSeqCount();
+	unsigned BitMap = 0;
+	unsigned Count = 0;
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		unsigned uLetter = a.GetLetterEx(uSeqIndex, uColIndex);
+		assert(uLetter < 32);
+		unsigned Bit = (1 << uLetter);
+		if (!(BitMap & Bit))
+			++Count;
+		BitMap |= Bit;
+		}
+
+//	'*' indicates positions which have a single, fully conserved residue
+	if (1 == Count)
+		return '*';
+
+	if (ALPHA_Amino != g_Alpha)
+		return ' ';
+
+#define B(a)	(1 << AX_##a)
+#define S2(a, b)		S(B(a) | B(b))
+#define S3(a, b, c)		S(B(a) | B(b) | B(c))
+#define S4(a, b, c, d)	S(B(a) | B(b) | B(c) | B(d))
+#define S(w)	if (0 == (BitMap & ~(w)) && (BitMap & (w)) != 0) return ':';
+
+#define W3(a, b, c)				W(B(a) | B(b) | B(c))
+#define W4(a, b, c, d)			W(B(a) | B(b) | B(c) | B(d))
+#define W5(a, b, c, d, e)		W(B(a) | B(b) | B(c) | B(d) | B(e))
+#define W6(a, b, c, d, e, f)	W(B(a) | B(b) | B(c) | B(d) | B(e) | B(f))
+#define W(w)	if (0 == (BitMap & ~(w)) && (BitMap & (w)) != 0) return '.';
+
+//	':' indicates that one of the following 'strong'
+// groups is fully conserved
+//                 STA
+//                 NEQK
+//                 NHQK
+//                 NDEQ
+//                 QHRK
+//                 MILV
+//                 MILF
+//                 HY
+//                 FYW
+//
+	S3(S, T, A)
+	S4(N, E, Q, K)
+	S4(N, H, Q, K)
+	S4(N, D, E, Q)
+	S4(M, I, L, V)
+	S4(M, I, L, F)
+	S2(H, Y)
+	S3(F, Y, W)
+
+//	'.' indicates that one of the following 'weaker' 
+// groups is fully conserved
+//                 CSA
+//                 ATV
+//                 SAG
+//                 STNK
+//                 STPA
+//                 SGND
+//                 SNDEQK
+//                 NDEQHK
+//                 NEQHRK
+//                 FVLIM
+//                 HFY
+	W3(C, S, A)
+	W3(A, T, V)
+	W3(S, A, G)
+	W4(S, T, N, K)
+	W4(S, T, P, A)
+	W4(S, G, N, D)
+	W6(S, N, D, E, Q, K)
+	W6(N, W, Q, H, R, K)
+	W5(F, V, L, I, M)
+	W3(H, F, Y)
+
+	return ' ';
+	}

Added: trunk/packages/muscle/branches/upstream/current/alpha.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/alpha.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/alpha.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,283 @@
+#include "muscle.h"
+#include <ctype.h>
+
+/***
+From Bioperl docs:
+Extended DNA / RNA alphabet
+------------------------------------------
+Symbol       Meaning      Nucleic Acid
+------------------------------------------
+    A            A           Adenine
+    C            C           Cytosine
+    G            G           Guanine
+    T            T           Thymine
+    U            U           Uracil
+    M          A or C
+    R          A or G
+    W          A or T
+    S          C or G
+    Y          C or T
+    K          G or T
+    V        A or C or G
+    H        A or C or T
+    D        A or G or T
+    B        C or G or T
+    X      G or A or T or C
+    N      G or A or T or C
+
+IUPAC-IUB SYMBOLS FOR NUCLEOTIDE NOMENCLATURE:
+         Cornish-Bowden (1985) Nucl. Acids Res. 13: 3021-3030.
+***/
+
+unsigned g_CharToLetter[MAX_CHAR];
+unsigned g_CharToLetterEx[MAX_CHAR];
+
+char g_LetterToChar[MAX_ALPHA];
+char g_LetterExToChar[MAX_ALPHA_EX];
+
+char g_UnalignChar[MAX_CHAR];
+char g_AlignChar[MAX_CHAR];
+
+bool g_IsWildcardChar[MAX_CHAR];
+bool g_IsResidueChar[MAX_CHAR];
+
+ALPHA g_Alpha = ALPHA_Undefined;
+unsigned g_AlphaSize = 0;
+
+#define Res(c, Letter)												\
+	{																\
+	const unsigned char Upper = (unsigned char) toupper(c);			\
+	const unsigned char Lower = (unsigned char) tolower(c);			\
+	g_CharToLetter[Upper] = Letter;									\
+	g_CharToLetter[Lower] = Letter;									\
+	g_CharToLetterEx[Upper] = Letter;								\
+	g_CharToLetterEx[Lower] = Letter;								\
+	g_LetterToChar[Letter] = Upper;									\
+	g_LetterExToChar[Letter] = Upper;								\
+	g_IsResidueChar[Upper] = true;									\
+	g_IsResidueChar[Lower] = true;									\
+	g_AlignChar[Upper] = Upper;										\
+	g_AlignChar[Lower] = Upper;										\
+	g_UnalignChar[Upper] = Lower;									\
+	g_UnalignChar[Lower] = Lower;									\
+	}
+
+#define Wild(c, Letter)												\
+	{																\
+	const unsigned char Upper = (unsigned char) toupper(c);			\
+	const unsigned char Lower = (unsigned char) tolower(c);			\
+	g_CharToLetterEx[Upper] = Letter;								\
+	g_CharToLetterEx[Lower] = Letter;								\
+	g_LetterExToChar[Letter] = Upper;								\
+	g_IsResidueChar[Upper] = true;									\
+	g_IsResidueChar[Lower] = true;									\
+	g_AlignChar[Upper] = Upper;										\
+	g_AlignChar[Lower] = Upper;										\
+	g_UnalignChar[Upper] = Lower;									\
+	g_UnalignChar[Lower] = Lower;									\
+	g_IsWildcardChar[Lower] = true;									\
+	g_IsWildcardChar[Upper] = true;									\
+	}
+
+static unsigned GetAlphaSize(ALPHA Alpha)
+	{
+	switch (Alpha)
+		{
+	case ALPHA_Amino:
+		return 20;
+
+	case ALPHA_RNA:
+	case ALPHA_DNA:
+		return 4;
+		}
+	Quit("Invalid Alpha=%d", Alpha);
+	return 0;
+	}
+
+static void InitArrays()
+	{
+	memset(g_CharToLetter, 0xff, sizeof(g_CharToLetter));
+	memset(g_CharToLetterEx, 0xff, sizeof(g_CharToLetterEx));
+
+	memset(g_LetterToChar, '?', sizeof(g_LetterToChar));
+	memset(g_LetterExToChar, '?', sizeof(g_LetterExToChar));
+
+	memset(g_AlignChar, '?', sizeof(g_UnalignChar));
+	memset(g_UnalignChar, '?', sizeof(g_UnalignChar));
+
+	memset(g_IsWildcardChar, 0, sizeof(g_IsWildcardChar));
+	}
+
+static void SetGapChar(char c)
+	{
+	unsigned char u = (unsigned char) c;
+
+	g_CharToLetterEx[u] = AX_GAP;
+	g_LetterExToChar[AX_GAP] = u;
+	g_AlignChar[u] = u;
+	g_UnalignChar[u] = u;
+	}
+
+static void SetAlphaDNA()
+	{
+	Res('A', NX_A)
+	Res('C', NX_C)
+	Res('G', NX_G)
+	Res('T', NX_T)
+	Wild('M', NX_M)
+	Wild('R', NX_R)
+	Wild('W', NX_W)
+	Wild('S', NX_S)
+	Wild('Y', NX_Y)
+	Wild('K', NX_K)
+	Wild('V', NX_V)
+	Wild('H', NX_H)
+	Wild('D', NX_D)
+	Wild('B', NX_B)
+	Wild('X', NX_X)
+	Wild('N', NX_N)
+	}
+
+static void SetAlphaRNA()
+	{
+	Res('A', NX_A)
+	Res('C', NX_C)
+	Res('G', NX_G)
+	Res('U', NX_U)
+	Res('T', NX_T)
+	Wild('M', NX_M)
+	Wild('R', NX_R)
+	Wild('W', NX_W)
+	Wild('S', NX_S)
+	Wild('Y', NX_Y)
+	Wild('K', NX_K)
+	Wild('V', NX_V)
+	Wild('H', NX_H)
+	Wild('D', NX_D)
+	Wild('B', NX_B)
+	Wild('X', NX_X)
+	Wild('N', NX_N)
+	}
+
+static void SetAlphaAmino()
+	{
+	Res('A', AX_A)
+	Res('C', AX_C)
+	Res('D', AX_D)
+	Res('E', AX_E)
+	Res('F', AX_F)
+	Res('G', AX_G)
+	Res('H', AX_H)
+	Res('I', AX_I)
+	Res('K', AX_K)
+	Res('L', AX_L)
+	Res('M', AX_M)
+	Res('N', AX_N)
+	Res('P', AX_P)
+	Res('Q', AX_Q)
+	Res('R', AX_R)
+	Res('S', AX_S)
+	Res('T', AX_T)
+	Res('V', AX_V)
+	Res('W', AX_W)
+	Res('Y', AX_Y)
+
+	Wild('B', AX_B)
+	Wild('X', AX_X)
+	Wild('Z', AX_Z)
+	}
+
+void SetAlpha(ALPHA Alpha)
+	{
+	InitArrays();
+
+	SetGapChar('.');
+	SetGapChar('-');
+
+	switch (Alpha)
+		{
+	case ALPHA_Amino:
+		SetAlphaAmino();
+		break;
+
+	case ALPHA_DNA:
+		SetAlphaDNA();
+
+	case ALPHA_RNA:
+		SetAlphaRNA();
+		break;
+
+	default:
+		Quit("Invalid Alpha=%d", Alpha);
+		}
+
+	g_AlphaSize = GetAlphaSize(Alpha);
+	g_Alpha = Alpha;
+
+	if (g_bVerbose)
+		Log("Alphabet %s\n", ALPHAToStr(g_Alpha));
+	}
+
+char GetWildcardChar()
+	{
+	switch (g_Alpha)
+		{
+	case ALPHA_Amino:
+		return 'X';
+
+	case ALPHA_DNA:
+	case ALPHA_RNA:
+		return 'N';
+
+	default:
+		Quit("Invalid Alpha=%d", g_Alpha);
+		}
+	return '?';
+	}
+
+bool IsNucleo(char c)
+	{
+	return strchr("ACGTURYNacgturyn", c) != 0;
+	}
+
+bool IsDNA(char c)
+	{
+	return strchr("AGCTNagctn", c) != 0;
+	}
+
+bool IsRNA(char c)
+	{
+	return strchr("AGCUNagcun", c) != 0;
+	}
+
+static char InvalidLetters[256];
+static int InvalidLetterCount = 0;
+
+void ClearInvalidLetterWarning()
+	{
+	memset(InvalidLetters, 0, 256);
+	}
+
+void InvalidLetterWarning(char c, char w)
+	{
+	InvalidLetters[(unsigned char) c] = 1;
+	++InvalidLetterCount;
+	}
+
+void ReportInvalidLetters()
+	{
+	if (0 == InvalidLetterCount)
+		return;
+
+	char Str[257];
+	memset(Str, 0, 257);
+
+	int n = 0;
+	for (int i = 0; i < 256; ++i)
+		{
+		if (InvalidLetters[i])
+			Str[n++] = (char) i;
+		}
+	Warning("Assuming %s (see -seqtype option), invalid letters found: %s",
+	  ALPHAToStr(g_Alpha), Str);
+	}

Added: trunk/packages/muscle/branches/upstream/current/alpha.h
===================================================================
--- trunk/packages/muscle/branches/upstream/current/alpha.h	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/alpha.h	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,106 @@
+#ifndef	alpha_h
+#define	alpha_h
+
+bool StrHasAmino(const char *Str);
+bool StrHasGap(const char *Str);
+void ClearInvalidLetterWarning();
+void InvalidLetterWarning(char c, char w);
+void ReportInvalidLetters();
+
+extern unsigned g_CharToLetter[];
+extern unsigned g_CharToLetterEx[];
+
+extern char g_LetterToChar[];
+extern char g_LetterExToChar[];
+
+extern char g_UnalignChar[];
+extern char g_AlignChar[];
+
+extern bool g_IsWildcardChar[];
+extern bool g_IsResidueChar[];
+
+#define CharToLetter(c)		(g_CharToLetter[(unsigned char) (c)])
+#define CharToLetterEx(c)	(g_CharToLetterEx[(unsigned char) (c)])
+
+#define LetterToChar(u)		(g_LetterToChar[u])
+#define LetterExToChar(u)	(g_LetterExToChar[u])
+
+#define IsResidueChar(c)	(g_IsResidueChar[(unsigned char) (c)])
+#define IsGapChar(c)		('-' == (c) || '.' == (c))
+#define IsWildcardChar(c)	(g_IsWildcardChar[(unsigned char) (c)])
+
+#define AlignChar(c)		(g_AlignChar[(unsigned char) (c)])
+#define UnalignChar(c)		(g_UnalignChar[(unsigned char) (c)])
+
+// AX=Amino alphabet with eXtensions (B, Z and X)
+enum AX
+	{
+	AX_A,
+	AX_C,
+	AX_D,
+	AX_E,
+	AX_F,
+	AX_G,
+	AX_H,
+	AX_I,
+	AX_K,
+	AX_L,
+	AX_M,
+	AX_N,
+	AX_P,
+	AX_Q,
+	AX_R,
+	AX_S,
+	AX_T,
+	AX_V,
+	AX_W,
+	AX_Y,
+
+	AX_X,	// Any
+
+	AX_B,	// D or N
+	AX_Z,	// E or Q
+
+	AX_GAP,
+	};
+const unsigned AX_COUNT = AX_GAP + 1;
+
+// NX=Nucleotide alphabet with extensions
+enum NX
+	{
+	NX_A,
+	NX_C,
+	NX_G,
+	NX_T,
+	NX_U = NX_T,
+
+    NX_M, // AC
+    NX_R, // AG
+    NX_W, // AT
+    NX_S, // CG
+    NX_Y, // CT
+    NX_K, // GT
+    NX_V, // ACG
+    NX_H, // ACT
+    NX_D, // AGT
+    NX_B, // CGT
+    NX_X, // GATC
+    NX_N, // GATC
+	NX_GAP
+	};
+const unsigned NX_COUNT = NX_GAP + 1;
+
+const unsigned MAX_ALPHA = 20;
+const unsigned MAX_ALPHA_EX = AX_COUNT;
+const unsigned MAX_CHAR = 256;
+
+extern ALPHA g_Alpha;
+extern unsigned g_AlphaSize;
+
+void SetAlpha(ALPHA Alpha);
+char GetWildcardChar();
+bool IsNucleo(char c);
+bool IsDNA(char c);
+bool IsRNA(char c);
+
+#endif	// alpha_h

Added: trunk/packages/muscle/branches/upstream/current/anchors.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/anchors.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/anchors.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,218 @@
+#include "muscle.h"
+#include "msa.h"
+#include "objscore.h"
+
+#define	TRACE	0
+
+static void WindowSmooth(const SCORE Score[], unsigned uCount, unsigned uWindowLength,
+  SCORE SmoothScore[], double dCeil)
+	{
+#define	Ceil(x)	((SCORE) ((x) > dCeil ? dCeil : (x)))
+
+	if (1 != uWindowLength%2)
+		Quit("WindowSmooth, length=%u", uWindowLength);
+
+	if (uCount <= uWindowLength)
+		{
+		for (unsigned i = 0; i < uCount; ++i)
+			SmoothScore[i] = 0;
+		return;
+		}
+
+	const unsigned w2 = uWindowLength/2;
+	for (unsigned i = 0; i < w2; ++i)
+		{
+		SmoothScore[i] = 0;
+		SmoothScore[uCount - i - 1] = 0;
+		}
+
+	SCORE scoreWindowTotal = 0;
+	for (unsigned i = 0; i < uWindowLength; ++i)
+		{
+		scoreWindowTotal += Ceil(Score[i]);
+		}
+
+	for (unsigned i = w2; ; ++i)
+		{
+		SmoothScore[i] = scoreWindowTotal/uWindowLength;
+		if (i == uCount - w2 - 1)
+			break;
+
+		scoreWindowTotal -= Ceil(Score[i - w2]);
+		scoreWindowTotal += Ceil(Score[i + w2 + 1]);
+		}
+#undef Ceil
+	}
+
+// Find columns that score above the given threshold.
+// A range of scores is defined between the average
+// and the maximum. The threshold is a fraction 0.0 .. 1.0
+// within that range, where 0.0 is the average score
+// and 1.0 is the maximum score.
+// "Grade" is by analogy with grading on a curve.
+static void FindBestColsGrade(const SCORE Score[], unsigned uCount,
+  double dThreshold, unsigned BestCols[], unsigned *ptruBestColCount)
+	{
+	SCORE scoreTotal = 0;
+	for (unsigned uIndex = 0; uIndex < uCount; ++uIndex)
+		scoreTotal += Score[uIndex];
+	const SCORE scoreAvg = scoreTotal / uCount;
+
+	SCORE scoreMax = MINUS_INFINITY;
+	for (unsigned uIndex = 0; uIndex < uCount; ++uIndex)
+		if (Score[uIndex] > scoreMax)
+			scoreMax = Score[uIndex];
+
+	unsigned uBestColCount = 0;
+	for (unsigned uIndex = 0; uIndex < uCount; ++uIndex)
+		{
+		const SCORE s = Score[uIndex];
+		const double dHeight = (s - scoreAvg)/(scoreMax - scoreAvg);
+		if (dHeight >= dThreshold)
+			{
+			BestCols[uBestColCount] = uIndex;
+			++uBestColCount;
+			}
+		}
+	*ptruBestColCount = uBestColCount;
+	}
+
+// Best col only if all following criteria satisfied:
+// (1) Score >= min
+// (2) Smoothed score >= min
+// (3) No gaps.
+static void FindBestColsCombo(const MSA &msa, const SCORE Score[],
+  const SCORE SmoothScore[], double dMinScore, double dMinSmoothScore,
+  unsigned BestCols[], unsigned *ptruBestColCount)
+	{
+	const unsigned uColCount = msa.GetColCount();
+
+	unsigned uBestColCount = 0;
+	for (unsigned uIndex = 0; uIndex < uColCount; ++uIndex)
+		{
+		if (Score[uIndex] < dMinScore)
+			continue;
+		if (SmoothScore[uIndex] < dMinSmoothScore)
+			continue;
+		if (msa.ColumnHasGap(uIndex))
+			continue;
+		BestCols[uBestColCount] = uIndex;
+		++uBestColCount;
+		}
+	*ptruBestColCount = uBestColCount;
+	}
+
+static void ListBestCols(const MSA &msa, const SCORE Score[], const SCORE SmoothScore[],
+  unsigned BestCols[], unsigned uBestColCount)
+	{
+	const unsigned uColCount = msa.GetColCount();
+	const unsigned uSeqCount = msa.GetSeqCount();
+
+	Log("Col  ");
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		Log("%u", uSeqIndex%10);
+	Log("  ");
+
+	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
+		{
+		Log("%3u  ", uColIndex);
+		for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+			Log("%c", msa.GetChar(uSeqIndex, uColIndex));
+
+		Log("  %10.3f", Score[uColIndex]);
+		Log("  %10.3f", SmoothScore[uColIndex]);
+
+		for (unsigned i = 0; i < uBestColCount; ++i)
+			if (BestCols[i] == uColIndex)
+				Log(" <-- Best");
+		Log("\n");
+		}
+	}
+
+// If two best columns are found within a window, choose
+// the highest-scoring. If more than two, choose the one
+// closest to the center of the window.
+static void MergeBestCols(const SCORE Scores[], const unsigned BestCols[],
+  unsigned uBestColCount, unsigned uWindowLength, unsigned AnchorCols[],
+  unsigned *ptruAnchorColCount)
+	{
+	unsigned uAnchorColCount = 0;
+	for (unsigned n = 0; n < uBestColCount; /* update inside loop */)
+		{
+		unsigned uBestColIndex = BestCols[n];
+		unsigned uCountWithinWindow = 0;
+		for (unsigned i = n + 1; i < uBestColCount; ++i)
+			{
+			unsigned uBestColIndex2 = BestCols[i];
+			if (uBestColIndex2 - uBestColIndex >= uWindowLength)
+				break;
+			++uCountWithinWindow;
+			}
+		unsigned uAnchorCol = uBestColIndex;
+		if (1 == uCountWithinWindow)
+			{
+			unsigned uBestColIndex2 = BestCols[n+1];
+			if (Scores[uBestColIndex] > Scores[uBestColIndex2])
+				uAnchorCol = uBestColIndex;
+			else
+				uAnchorCol = uBestColIndex2;
+			}
+		else if (uCountWithinWindow > 1)
+			{
+			unsigned uWindowCenter = uBestColIndex + uWindowLength/2;
+			int iClosestDist = uWindowLength;
+			unsigned uClosestCol = uBestColIndex;
+			for (unsigned i = n + 1; i < n + uCountWithinWindow; ++i)
+				{
+				unsigned uColIndex = BestCols[i];
+				int iDist = uColIndex - uBestColIndex;
+				if (iDist < 0)
+					iDist = -iDist;
+				if (iDist < iClosestDist)
+					{
+					uClosestCol = uColIndex;
+					iClosestDist = iDist;
+					}
+				}
+			uAnchorCol = uClosestCol;
+			}
+		AnchorCols[uAnchorColCount] = uAnchorCol;
+		++uAnchorColCount;
+		n += uCountWithinWindow + 1;
+		}
+	*ptruAnchorColCount = uAnchorColCount;
+	}
+
+void FindAnchorCols(const MSA &msa, unsigned AnchorCols[],
+  unsigned *ptruAnchorColCount)
+	{
+	const unsigned uColCount = msa.GetColCount();
+	if (uColCount < 16)
+		{
+		*ptruAnchorColCount = 0;
+		return;
+		}
+
+	SCORE *MatchScore = new SCORE[uColCount];
+	SCORE *SmoothScore = new SCORE[uColCount];
+	unsigned *BestCols = new unsigned[uColCount];
+
+	GetLetterScores(msa, MatchScore);
+	WindowSmooth(MatchScore, uColCount, g_uSmoothWindowLength, SmoothScore,
+	  g_dSmoothScoreCeil);
+
+	unsigned uBestColCount;
+	FindBestColsCombo(msa, MatchScore, SmoothScore, g_dMinBestColScore, g_dMinSmoothScore,
+	  BestCols, &uBestColCount);
+
+#if	TRACE
+	ListBestCols(msa, MatchScore, SmoothScore, BestCols, uBestColCount);
+#endif
+
+	MergeBestCols(MatchScore, BestCols, uBestColCount, g_uAnchorSpacing, AnchorCols,
+	  ptruAnchorColCount);
+
+	delete[] MatchScore;
+	delete[] SmoothScore;
+	delete[] BestCols;
+	}

Added: trunk/packages/muscle/branches/upstream/current/bittraceback.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/bittraceback.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/bittraceback.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,206 @@
+#include "muscle.h"
+#include "pwpath.h"
+
+#define TRACE 0
+
+static char XlatEdgeType(char c)
+	{
+	if ('E' == c)
+		return 'D';
+	if ('J' == c)
+		return 'I';
+	return c;
+	}
+
+static const char *BitsToStr(char Bits)
+	{
+	static char Str[] = "xM xD xI";
+
+	switch (Bits & BIT_xM)
+		{
+	case BIT_MM:
+		Str[0] = 'M';
+		break;
+	case BIT_DM:
+		Str[0] = 'D';
+		break;
+	case BIT_IM:
+		Str[0] = 'I';
+		break;
+		}
+
+	switch (Bits & BIT_xD)
+		{
+	case BIT_MD:
+		Str[3] = 'M';
+		break;
+	case BIT_DD:
+		Str[3] = 'D';
+		break;
+		}
+
+	switch (Bits & BIT_xI)
+		{
+	case BIT_MI:
+		Str[6] = 'M';
+		break;
+	case BIT_II:
+		Str[6] = 'I';
+		break;
+		}
+
+	return Str;
+	}
+
+static inline char XChar(char Bits, char cType)
+	{
+	switch (cType)
+		{
+	case 'M':
+		{
+		switch (Bits & BIT_xM)
+			{
+		case BIT_MM:
+			return 'M';
+		case BIT_DM:
+			return 'D';
+		case BIT_IM:
+			return 'I';
+#if	DOUBLE_AFFINE
+		case BIT_EM:
+			return 'E';
+		case BIT_JM:
+			return 'J';
+#endif
+			}
+		Quit("Huh!?");
+		return '?';
+		}
+	case 'D':
+		{
+		switch (Bits & BIT_xD)
+			{
+		case BIT_MD:
+			return 'M';
+		case BIT_DD:
+			return 'D';
+			}
+		Quit("Huh!?");
+		return '?';
+		}
+	case 'I':
+		{
+		switch (Bits & BIT_xI)
+			{
+		case BIT_MI:
+			return 'M';
+		case BIT_II:
+			return 'I';
+			}
+		Quit("Huh!?");
+		return '?';
+		}
+#if	DOUBLE_AFFINE
+	case 'E':
+		{
+		switch (Bits & BIT_xE)
+			{
+		case BIT_ME:
+			return 'M';
+		case BIT_EE:
+			return 'E';
+			}
+		Quit("Huh!?");
+		return '?';
+		}
+	case 'J':
+		{
+		switch (Bits & BIT_xJ)
+			{
+		case BIT_MJ:
+			return 'M';
+		case BIT_JJ:
+			return 'J';
+			}
+		Quit("Huh!?");
+		return '?';
+		}
+#endif
+	default:
+		Quit("Huh?");
+		return '?';
+		}
+	}
+
+void BitTraceBack(char **TraceBack, unsigned uLengthA, unsigned uLengthB,
+  char LastEdge, PWPath &Path)
+	{
+#if	TRACE
+	Log("BitTraceBack\n");
+#endif
+	Path.Clear();
+
+	PWEdge Edge;
+	Edge.uPrefixLengthA = uLengthA;
+	Edge.uPrefixLengthB = uLengthB;
+	char Bits = TraceBack[uLengthA][uLengthB];
+	Edge.cType = LastEdge;
+	for (;;)
+		{
+#if	TRACE
+		Log("Prepend %c%d.%d\n", Edge.cType, Edge.uPrefixLengthA, Edge.uPrefixLengthB);
+#endif
+		char cSave = Edge.cType;
+		Edge.cType = XlatEdgeType(cSave);
+		Path.PrependEdge(Edge);
+		Edge.cType = cSave;
+
+		unsigned PLA = Edge.uPrefixLengthA;
+		unsigned PLB = Edge.uPrefixLengthB;
+		char Bits = TraceBack[PLA][PLB];
+		char NextEdgeType = XChar(Bits, Edge.cType);
+#if	TRACE
+		Log("XChar(%s, %c) = %c\n", BitsToStr(Bits), Edge.cType, NextEdgeType);
+#endif
+		switch (Edge.cType)
+			{
+		case 'M':
+			{
+			if (Edge.uPrefixLengthA == 0)
+				Quit("BitTraceBack MA=0");
+			if (Edge.uPrefixLengthB == 0)
+				Quit("BitTraceBack MA=0");
+			--(Edge.uPrefixLengthA);
+			--(Edge.uPrefixLengthB);
+			break;
+			}
+		case 'D':
+		case 'E':
+			{
+			if (Edge.uPrefixLengthA == 0)
+				Quit("BitTraceBack DA=0");
+			--(Edge.uPrefixLengthA);
+			break;
+			}
+		case 'I':
+		case 'J':
+			{
+			if (Edge.uPrefixLengthB == 0)
+				Quit("BitTraceBack IB=0");
+			--(Edge.uPrefixLengthB);
+			break;
+			}
+		default:
+			Quit("BitTraceBack: Invalid edge %c", Edge);
+			}
+
+		if (0 == Edge.uPrefixLengthA && 0 == Edge.uPrefixLengthB)
+			break;
+
+		Edge.cType = NextEdgeType;
+		}
+
+#if	TRACE
+	Path.LogMe();
+#endif
+	}

Added: trunk/packages/muscle/branches/upstream/current/blosumla.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/blosumla.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/blosumla.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,118 @@
+#include "muscle.h"
+
+#define GAPVAL		0.3
+#define GAPGAPVAL	5.0
+
+// Blosum62 log-average factor matrix
+static float Blosum62LA[20][20] =
+	{
+#define v(x)	((float) x)
+#define S_ROW(n, c, A, C, D, E, F, G, H, I, K, L, M, N, P, Q, R, S, T, V, W, Y) \
+	{ v(A), v(C), v(D), v(E), v(F), v(G), v(H), v(I), v(K), v(L), v(M), v(N), v(P), v(Q), \
+	v(R), v(S), v(T), v(V), v(W), v(Y) },
+
+// Blosum62	log	average matrix
+//				A			C			D			E			F
+//				G			H			I			K			L
+//				M			N			P			Q			R
+//				S			T			V			W			Y
+S_ROW( 0, 'A',  3.9029401,  0.8679881,  0.5446049,  0.7412640,  0.4648942, 
+                1.0568696,  0.5693654,  0.6324813,  0.7753898,  0.6019460, 
+                0.7231498,  0.5883077,  0.7541214,  0.7568035,  0.6126988, 
+                1.4721037,  0.9844022,  0.9364584,  0.4165484,  0.5426125)
+
+S_ROW( 1, 'C',  0.8679881, 19.5765802,  0.3014542,  0.2859347,  0.4389910, 
+                0.4203886,  0.3550472,  0.6534589,  0.3491296,  0.6422760, 
+                0.6113537,  0.3978026,  0.3795628,  0.3657796,  0.3089379, 
+                0.7384148,  0.7405530,  0.7558448,  0.4499807,  0.4342013)
+
+S_ROW( 2, 'D',  0.5446049,  0.3014542,  7.3979253,  1.6878109,  0.2989696, 
+                0.6343015,  0.6785593,  0.3390155,  0.7840905,  0.2866128, 
+                0.3464547,  1.5538520,  0.5987177,  0.8970811,  0.5732000, 
+                0.9135051,  0.6947898,  0.3365004,  0.2321050,  0.3456829)
+
+S_ROW( 3, 'E',  0.7412640,  0.2859347,  1.6878109,  5.4695276,  0.3307441, 
+                0.4812675,  0.9600400,  0.3305223,  1.3082782,  0.3728734, 
+                0.5003421,  0.9112983,  0.6792027,  1.9017376,  0.9607983, 
+                0.9503570,  0.7414260,  0.4289431,  0.3743021,  0.4964664)
+
+S_ROW( 4, 'F',  0.4648942,  0.4389910,  0.2989696,  0.3307441,  8.1287983, 
+                0.3406407,  0.6519893,  0.9457698,  0.3440433,  1.1545978, 
+                1.0043715,  0.3542882,  0.2874440,  0.3339729,  0.3807263, 
+                0.4399736,  0.4816930,  0.7450894,  1.3743775,  2.7693817)
+
+S_ROW( 5, 'G',  1.0568696,  0.4203886,  0.6343015,  0.4812675,  0.3406407, 
+                6.8763075,  0.4929663,  0.2750096,  0.5888716,  0.2845039, 
+                0.3954865,  0.8637114,  0.4773858,  0.5386498,  0.4499840, 
+                0.9035965,  0.5792712,  0.3369551,  0.4216898,  0.3487141)
+
+S_ROW( 6, 'H',  0.5693654,  0.3550472,  0.6785593,  0.9600400,  0.6519893, 
+                0.4929663, 13.5060070,  0.3262878,  0.7788884,  0.3806759, 
+                0.5841316,  1.2220028,  0.4728797,  1.1679835,  0.9170473, 
+                0.7367319,  0.5575021,  0.3394474,  0.4440859,  1.7979036)
+
+S_ROW( 7, 'I',  0.6324813,  0.6534589,  0.3390155,  0.3305223,  0.9457698, 
+                0.2750096,  0.3262878,  3.9979299,  0.3963730,  1.6944349, 
+                1.4777449,  0.3279345,  0.3846629,  0.3829375,  0.3547509, 
+                0.4431634,  0.7798163,  2.4175121,  0.4088732,  0.6303898)
+
+S_ROW( 8, 'K',  0.7753898,  0.3491296,  0.7840905,  1.3082782,  0.3440433, 
+                0.5888716,  0.7788884,  0.3963730,  4.7643359,  0.4282702, 
+                0.6253033,  0.9398419,  0.7037741,  1.5543233,  2.0768092, 
+                0.9319192,  0.7929060,  0.4565429,  0.3589319,  0.5321784)
+
+S_ROW( 9, 'L',  0.6019460,  0.6422760,  0.2866128,  0.3728734,  1.1545978, 
+                0.2845039,  0.3806759,  1.6944349,  0.4282702,  3.7966214, 
+                1.9942957,  0.3100430,  0.3711219,  0.4773261,  0.4739194, 
+                0.4288939,  0.6603292,  1.3142355,  0.5680359,  0.6920589)
+
+S_ROW(10, 'M',  0.7231498,  0.6113537,  0.3464547,  0.5003421,  1.0043715, 
+                0.3954865,  0.5841316,  1.4777449,  0.6253033,  1.9942957, 
+                6.4814549,  0.4745299,  0.4238960,  0.8642486,  0.6226249, 
+                0.5985578,  0.7938018,  1.2689365,  0.6103022,  0.7083636)
+
+S_ROW(11, 'N',  0.5883077,  0.3978026,  1.5538520,  0.9112983,  0.3542882, 
+                0.8637114,  1.2220028,  0.3279345,  0.9398419,  0.3100430, 
+                0.4745299,  7.0940964,  0.4999337,  1.0005835,  0.8586298, 
+                1.2315289,  0.9841525,  0.3690340,  0.2777841,  0.4860309)
+
+S_ROW(12, 'P',  0.7541214,  0.3795628,  0.5987177,  0.6792027,  0.2874440, 
+                0.4773858,  0.4728797,  0.3846629,  0.7037741,  0.3711219, 
+                0.4238960,  0.4999337, 12.8375452,  0.6412803,  0.4815348, 
+                0.7555033,  0.6888962,  0.4430825,  0.2818321,  0.3635216)
+
+S_ROW(13, 'Q',  0.7568035,  0.3657796,  0.8970811,  1.9017376,  0.3339729, 
+                0.5386498,  1.1679835,  0.3829375,  1.5543233,  0.4773261, 
+                0.8642486,  1.0005835,  0.6412803,  6.2444210,  1.4057958, 
+                0.9655559,  0.7913219,  0.4667781,  0.5093584,  0.6110951)
+
+S_ROW(14, 'R',  0.6126988,  0.3089379,  0.5732000,  0.9607983,  0.3807263, 
+                0.4499840,  0.9170473,  0.3547509,  2.0768092,  0.4739194, 
+                0.6226249,  0.8586298,  0.4815348,  1.4057958,  6.6655769, 
+                0.7671661,  0.6777544,  0.4200721,  0.3951049,  0.5559652)
+
+S_ROW(15, 'S',  1.4721037,  0.7384148,  0.9135051,  0.9503570,  0.4399736, 
+                0.9035965,  0.7367319,  0.4431634,  0.9319192,  0.4288939, 
+                0.5985578,  1.2315289,  0.7555033,  0.9655559,  0.7671661, 
+                3.8428476,  1.6139205,  0.5652240,  0.3853031,  0.5575206)
+
+S_ROW(16, 'T',  0.9844022,  0.7405530,  0.6947898,  0.7414260,  0.4816930, 
+                0.5792712,  0.5575021,  0.7798163,  0.7929060,  0.6603292, 
+                0.7938018,  0.9841525,  0.6888962,  0.7913219,  0.6777544, 
+                1.6139205,  4.8321048,  0.9809432,  0.4309317,  0.5731577)
+
+S_ROW(17, 'V',  0.9364584,  0.7558448,  0.3365004,  0.4289431,  0.7450894, 
+                0.3369551,  0.3394474,  2.4175121,  0.4565429,  1.3142355, 
+                1.2689365,  0.3690340,  0.4430825,  0.4667781,  0.4200721, 
+                0.5652240,  0.9809432,  3.6921553,  0.3744576,  0.6580390)
+
+S_ROW(18, 'W',  0.4165484,  0.4499807,  0.2321050,  0.3743021,  1.3743775, 
+                0.4216898,  0.4440859,  0.4088732,  0.3589319,  0.5680359, 
+                0.6103022,  0.2777841,  0.2818321,  0.5093584,  0.3951049, 
+                0.3853031,  0.4309317,  0.3744576, 38.1077830,  2.1098056)
+
+S_ROW(19, 'Y',  0.5426125,  0.4342013,  0.3456829,  0.4964664,  2.7693817, 
+                0.3487141,  1.7979036,  0.6303898,  0.5321784,  0.6920589, 
+                0.7083636,  0.4860309,  0.3635216,  0.6110951,  0.5559652, 
+                0.5575206,  0.5731577,  0.6580390,  2.1098056,  9.8322054)
+	};

Added: trunk/packages/muscle/branches/upstream/current/clust.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/clust.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/clust.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,666 @@
+#include "muscle.h"
+#include "clust.h"
+#include "clustset.h"
+#include <stdio.h>
+
+#define TRACE		0
+
+Clust::Clust()
+	{
+	m_Nodes = 0;
+	m_uNodeCount = 0;
+	m_uLeafCount = 0;
+	m_uClusterCount = 0;
+	m_JoinStyle = JOIN_Undefined;
+	m_dDist = 0;
+	m_uLeafCount = 0;
+	m_ptrSet = 0;
+	}
+
+Clust::~Clust()
+	{
+	delete[] m_Nodes;
+	delete[] m_dDist;
+	delete[] m_ClusterIndexToNodeIndex;
+	}
+
+void Clust::Create(ClustSet &Set, CLUSTER Method)
+	{
+	m_ptrSet = &Set;
+
+	SetLeafCount(Set.GetLeafCount());
+
+	switch (Method)
+		{
+	case CLUSTER_UPGMA:
+		m_JoinStyle = JOIN_NearestNeighbor;
+		m_CentroidStyle = LINKAGE_Avg;
+		break;
+
+	case CLUSTER_UPGMAMax:
+		m_JoinStyle = JOIN_NearestNeighbor;
+		m_CentroidStyle = LINKAGE_Max;
+		break;
+
+	case CLUSTER_UPGMAMin:
+		m_JoinStyle = JOIN_NearestNeighbor;
+		m_CentroidStyle = LINKAGE_Min;
+		break;
+
+	case CLUSTER_UPGMB:
+		m_JoinStyle = JOIN_NearestNeighbor;
+		m_CentroidStyle = LINKAGE_Biased;
+		break;
+
+	case CLUSTER_NeighborJoining:
+		m_JoinStyle = JOIN_NeighborJoining;
+		m_CentroidStyle = LINKAGE_NeighborJoining;
+		break;
+
+	default:
+		Quit("Clust::Create, invalid method %d", Method);
+		}
+
+	if (m_uLeafCount <= 1)
+		Quit("Clust::Create: no leaves");
+
+	m_uNodeCount = 2*m_uLeafCount - 1;
+	m_Nodes = new ClustNode[m_uNodeCount];
+	m_ClusterIndexToNodeIndex = new unsigned[m_uLeafCount];
+
+	m_ptrClusterList = 0;
+	for (unsigned uNodeIndex = 0; uNodeIndex < m_uNodeCount; ++uNodeIndex)
+		{
+		ClustNode &Node = m_Nodes[uNodeIndex];
+		Node.m_uIndex = uNodeIndex;
+		if (uNodeIndex < m_uLeafCount)
+			{
+			Node.m_uSize = 1;
+			Node.m_uLeafIndexes = new unsigned[1];
+			Node.m_uLeafIndexes[0] = uNodeIndex;
+			AddToClusterList(uNodeIndex);
+			}
+		else
+			Node.m_uSize = 0;
+		}
+
+// Compute initial distance matrix between leaves
+	SetProgressDesc("Build dist matrix");
+	unsigned uPairIndex = 0;
+	const unsigned uPairCount = (m_uLeafCount*(m_uLeafCount - 1))/2;
+	for (unsigned i = 0; i < m_uLeafCount; ++i)
+		for (unsigned j = 0; j < i; ++j)
+			{
+			const float dDist = (float) m_ptrSet->ComputeDist(*this, i, j);
+			SetDist(i, j, dDist);
+			if (0 == uPairIndex%10000)
+				Progress(uPairIndex, uPairCount);
+			++uPairIndex;
+			}
+	ProgressStepsDone();
+
+// Call CreateCluster once for each internal node in the tree
+	SetProgressDesc("Build guide tree");
+	m_uClusterCount = m_uLeafCount;
+	const unsigned uInternalNodeCount = m_uNodeCount - m_uLeafCount;
+	for (unsigned uNodeIndex = m_uLeafCount; uNodeIndex < m_uNodeCount; ++uNodeIndex)
+		{
+		unsigned i = uNodeIndex + 1 - m_uLeafCount;
+		Progress(i, uInternalNodeCount);
+		CreateCluster();
+		}
+	ProgressStepsDone();
+	}
+
+void Clust::CreateCluster()
+	{
+	unsigned uLeftNodeIndex;
+	unsigned uRightNodeIndex;
+	float dLeftLength;
+	float dRightLength;
+	ChooseJoin(&uLeftNodeIndex, &uRightNodeIndex, &dLeftLength, &dRightLength);
+
+	const unsigned uNewNodeIndex = m_uNodeCount - m_uClusterCount + 1;
+
+	JoinNodes(uLeftNodeIndex, uRightNodeIndex, dLeftLength, dRightLength,
+	  uNewNodeIndex);
+
+#if	TRACE
+	Log("Merge New=%u L=%u R=%u Ld=%7.2g Rd=%7.2g\n",
+	  uNewNodeIndex, uLeftNodeIndex, uRightNodeIndex, dLeftLength, dRightLength);
+#endif
+
+// Compute distances to other clusters
+	--m_uClusterCount;
+	for (unsigned uNodeIndex = GetFirstCluster(); uNodeIndex != uInsane;
+	  uNodeIndex = GetNextCluster(uNodeIndex))
+		{
+		if (uNodeIndex == uLeftNodeIndex || uNodeIndex == uRightNodeIndex)
+			continue;
+
+		if (uNewNodeIndex == uNodeIndex)
+			continue;
+
+		const float dDist = ComputeDist(uNewNodeIndex, uNodeIndex);
+		SetDist(uNewNodeIndex, uNodeIndex, dDist);
+		}
+
+	for (unsigned uNodeIndex = GetFirstCluster(); uNodeIndex != uInsane;
+	  uNodeIndex = GetNextCluster(uNodeIndex))
+		{
+		if (uNodeIndex == uLeftNodeIndex || uNodeIndex == uRightNodeIndex)
+			continue;
+
+		if (uNewNodeIndex == uNodeIndex)
+			continue;
+
+#if	REDLACK
+		const float dMetric = ComputeMetric(uNewNodeIndex, uNodeIndex);
+		InsertMetric(uNewNodeIndex, uNodeIndex, dMetric);
+#endif
+		}
+	}
+
+void Clust::ChooseJoin(unsigned *ptruLeftIndex, unsigned *ptruRightIndex,
+  float *ptrdLeftLength, float *ptrdRightLength)
+	{
+	switch (m_JoinStyle)
+		{
+	case JOIN_NearestNeighbor:
+		ChooseJoinNearestNeighbor(ptruLeftIndex, ptruRightIndex, ptrdLeftLength,
+		  ptrdRightLength);
+		return;
+	case JOIN_NeighborJoining:
+		ChooseJoinNeighborJoining(ptruLeftIndex, ptruRightIndex, ptrdLeftLength,
+		  ptrdRightLength);
+		return;
+		}
+	Quit("Clust::ChooseJoin, Invalid join style %u", m_JoinStyle);
+	}
+
+void Clust::ChooseJoinNearestNeighbor(unsigned *ptruLeftIndex,
+  unsigned *ptruRightIndex, float *ptrdLeftLength, float *ptrdRightLength)
+	{
+	const unsigned uClusterCount = GetClusterCount();
+
+	unsigned uMinLeftNodeIndex;
+	unsigned uMinRightNodeIndex;
+	GetMinMetric(&uMinLeftNodeIndex, &uMinRightNodeIndex);
+
+	float dMinDist = GetDist(uMinLeftNodeIndex, uMinRightNodeIndex);
+
+	const float dLeftHeight = GetHeight(uMinLeftNodeIndex);
+	const float dRightHeight = GetHeight(uMinRightNodeIndex);
+
+	*ptruLeftIndex = uMinLeftNodeIndex;
+	*ptruRightIndex = uMinRightNodeIndex;
+	*ptrdLeftLength = dMinDist/2 - dLeftHeight;
+	*ptrdRightLength = dMinDist/2 - dRightHeight;
+	}
+
+void Clust::ChooseJoinNeighborJoining(unsigned *ptruLeftIndex,
+  unsigned *ptruRightIndex, float *ptrdLeftLength, float *ptrdRightLength)
+	{
+	const unsigned uClusterCount = GetClusterCount();
+
+	//unsigned uMinLeftNodeIndex = uInsane;
+	//unsigned uMinRightNodeIndex = uInsane;
+	//float dMinD = PLUS_INFINITY;
+	//for (unsigned i = GetFirstCluster(); i != uInsane; i = GetNextCluster(i))
+	//	{
+	//	const float ri = Calc_r(i);
+	//	for (unsigned j = GetNextCluster(i); j != uInsane; j = GetNextCluster(j))
+	//		{
+	//		const float rj = Calc_r(j);
+	//		const float dij = GetDist(i, j);
+	//		const float Dij = dij - (ri + rj);
+	//		if (Dij < dMinD)
+	//			{
+	//			dMinD = Dij;
+	//			uMinLeftNodeIndex = i;
+	//			uMinRightNodeIndex = j;
+	//			}
+	//		}
+	//	}
+
+	unsigned uMinLeftNodeIndex;
+	unsigned uMinRightNodeIndex;
+	GetMinMetric(&uMinLeftNodeIndex, &uMinRightNodeIndex);
+
+	const float dDistLR = GetDist(uMinLeftNodeIndex, uMinRightNodeIndex);
+	const float rL = Calc_r(uMinLeftNodeIndex);
+	const float rR = Calc_r(uMinRightNodeIndex);
+
+	const float dLeftLength = (dDistLR + rL - rR)/2;
+	const float dRightLength = (dDistLR - rL + rR)/2;
+
+	*ptruLeftIndex = uMinLeftNodeIndex;
+	*ptruRightIndex = uMinRightNodeIndex;
+	*ptrdLeftLength = dLeftLength;
+	*ptrdRightLength = dRightLength;
+	}
+
+void Clust::JoinNodes(unsigned uLeftIndex, unsigned uRightIndex, float dLeftLength,
+  float dRightLength, unsigned uNodeIndex)
+	{
+	ClustNode &Parent = m_Nodes[uNodeIndex];
+	ClustNode &Left = m_Nodes[uLeftIndex];
+	ClustNode &Right = m_Nodes[uRightIndex];
+
+	Left.m_dLength = dLeftLength;
+	Right.m_dLength = dRightLength;
+
+	Parent.m_ptrLeft = &Left;
+	Parent.m_ptrRight = &Right;
+
+	Left.m_ptrParent = &Parent;
+	Right.m_ptrParent = &Parent;
+
+	const unsigned uLeftSize = Left.m_uSize;
+	const unsigned uRightSize = Right.m_uSize;
+	const unsigned uParentSize = uLeftSize + uRightSize;
+	Parent.m_uSize = uParentSize;
+
+	assert(0 == Parent.m_uLeafIndexes);
+	Parent.m_uLeafIndexes = new unsigned[uParentSize];
+
+	const unsigned uLeftBytes = uLeftSize*sizeof(unsigned);
+	const unsigned uRightBytes = uRightSize*sizeof(unsigned);
+	memcpy(Parent.m_uLeafIndexes, Left.m_uLeafIndexes, uLeftBytes);
+	memcpy(Parent.m_uLeafIndexes + uLeftSize, Right.m_uLeafIndexes, uRightBytes);
+
+	DeleteFromClusterList(uLeftIndex);
+	DeleteFromClusterList(uRightIndex);
+	AddToClusterList(uNodeIndex);
+	}
+
+float Clust::Calc_r(unsigned uNodeIndex) const
+	{
+	const unsigned uClusterCount = GetClusterCount();
+	if (2 == uClusterCount)
+		return 0;
+
+	float dSum = 0;
+	for (unsigned i = GetFirstCluster(); i != uInsane; i = GetNextCluster(i))
+		{
+		if (i == uNodeIndex)
+			continue;
+		dSum += GetDist(uNodeIndex, i);
+		}
+	return dSum/(uClusterCount - 2);
+	}
+
+float Clust::ComputeDist(unsigned uNewNodeIndex, unsigned uNodeIndex)
+	{
+	switch (m_CentroidStyle)
+		{
+	case LINKAGE_Avg:
+		return ComputeDistAverageLinkage(uNewNodeIndex, uNodeIndex);
+
+	case LINKAGE_Min:
+		return ComputeDistMinLinkage(uNewNodeIndex, uNodeIndex);
+
+	case LINKAGE_Max:
+		return ComputeDistMaxLinkage(uNewNodeIndex, uNodeIndex);
+
+	case LINKAGE_Biased:
+		return ComputeDistMAFFT(uNewNodeIndex, uNodeIndex);
+
+	case LINKAGE_NeighborJoining:
+		return ComputeDistNeighborJoining(uNewNodeIndex, uNodeIndex);
+		}
+	Quit("Clust::ComputeDist, invalid centroid style %u", m_CentroidStyle);
+	return (float) g_dNAN;
+	}
+
+float Clust::ComputeDistMinLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex)
+	{
+	const unsigned uLeftNodeIndex = GetLeftIndex(uNewNodeIndex);
+	const unsigned uRightNodeIndex = GetRightIndex(uNewNodeIndex);
+	const float dDistL = GetDist(uLeftNodeIndex, uNodeIndex);
+	const float dDistR = GetDist(uRightNodeIndex, uNodeIndex);
+	return (dDistL < dDistR ? dDistL : dDistR);
+	}
+
+float Clust::ComputeDistMaxLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex)
+	{
+	const unsigned uLeftNodeIndex = GetLeftIndex(uNewNodeIndex);
+	const unsigned uRightNodeIndex = GetRightIndex(uNewNodeIndex);
+	const float dDistL = GetDist(uLeftNodeIndex, uNodeIndex);
+	const float dDistR = GetDist(uRightNodeIndex, uNodeIndex);
+	return (dDistL > dDistR ? dDistL : dDistR);
+	}
+
+float Clust::ComputeDistAverageLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex)
+	{
+	const unsigned uLeftNodeIndex = GetLeftIndex(uNewNodeIndex);
+	const unsigned uRightNodeIndex = GetRightIndex(uNewNodeIndex);
+	const float dDistL = GetDist(uLeftNodeIndex, uNodeIndex);
+	const float dDistR = GetDist(uRightNodeIndex, uNodeIndex);
+	return (dDistL + dDistR)/2;
+	}
+
+float Clust::ComputeDistNeighborJoining(unsigned uNewNodeIndex, unsigned uNodeIndex)
+	{
+	const unsigned uLeftNodeIndex = GetLeftIndex(uNewNodeIndex);
+	const unsigned uRightNodeIndex = GetRightIndex(uNewNodeIndex);
+	const float dDistLR = GetDist(uLeftNodeIndex, uRightNodeIndex);
+	const float dDistL = GetDist(uLeftNodeIndex, uNodeIndex);
+	const float dDistR = GetDist(uRightNodeIndex, uNodeIndex);
+	const float dDist = (dDistL + dDistR - dDistLR)/2;
+	return dDist;
+	}
+
+// This is a mysterious variant of UPGMA reverse-engineered from MAFFT source.
+float Clust::ComputeDistMAFFT(unsigned uNewNodeIndex, unsigned uNodeIndex)
+	{
+	const unsigned uLeftNodeIndex = GetLeftIndex(uNewNodeIndex);
+	const unsigned uRightNodeIndex = GetRightIndex(uNewNodeIndex);
+
+	const float dDistLR = GetDist(uLeftNodeIndex, uRightNodeIndex);
+	const float dDistL = GetDist(uLeftNodeIndex, uNodeIndex);
+	const float dDistR = GetDist(uRightNodeIndex, uNodeIndex);
+	const float dMinDistLR = (dDistL < dDistR ? dDistL : dDistR);
+	const float dSumDistLR = dDistL + dDistR;
+	const float dDist = dMinDistLR*(1 - g_dSUEFF) + dSumDistLR*g_dSUEFF/2;
+	return dDist;
+	}
+
+unsigned Clust::GetClusterCount() const
+	{
+	return m_uClusterCount;
+	}
+
+void Clust::LogMe() const
+	{
+	Log("Clust %u leaves, %u nodes, %u clusters.\n",
+	  m_uLeafCount, m_uNodeCount, m_uClusterCount);
+
+	Log("Distance matrix\n");
+	const unsigned uNodeCount = GetNodeCount();
+	Log("       ");
+	for (unsigned i = 0; i < uNodeCount - 1; ++i)
+		Log(" %7u", i);
+	Log("\n");
+
+	Log("       ");
+	for (unsigned i = 0; i < uNodeCount - 1; ++i)
+		Log("  ------");
+	Log("\n");
+
+	for (unsigned i = 0; i < uNodeCount - 1; ++i)
+		{
+		Log("%4u:  ", i);
+		for (unsigned j = 0; j < i; ++j)
+			Log(" %7.2g", GetDist(i, j));
+		Log("\n");
+		}
+
+	Log("\n");
+	Log("Node  Size  Prnt  Left  Rght   Length  Name\n");
+	Log("----  ----  ----  ----  ----   ------  ----\n");
+	for (unsigned uNodeIndex = 0; uNodeIndex < m_uNodeCount; ++uNodeIndex)
+		{
+		const ClustNode &Node = m_Nodes[uNodeIndex];
+		Log("%4u  %4u", uNodeIndex, Node.m_uSize);
+		if (0 != Node.m_ptrParent)
+			Log("  %4u", Node.m_ptrParent->m_uIndex);
+		else
+			Log("      ");
+
+		if (0 != Node.m_ptrLeft)
+			Log("  %4u", Node.m_ptrLeft->m_uIndex);
+		else
+			Log("      ");
+
+		if (0 != Node.m_ptrRight)
+			Log("  %4u", Node.m_ptrRight->m_uIndex);
+		else
+			Log("      ");
+
+		if (uNodeIndex != m_uNodeCount - 1)
+			Log("  %7.3g", Node.m_dLength);
+		if (IsLeaf(uNodeIndex))
+			{
+			const char *ptrName = GetNodeName(uNodeIndex);
+			if (0 != ptrName)
+				Log("  %s", ptrName);
+			}
+		if (GetRootNodeIndex() == uNodeIndex)
+			Log("    [ROOT]");
+		Log("\n");
+		}
+	}
+
+const ClustNode &Clust::GetNode(unsigned uNodeIndex) const
+	{
+	if (uNodeIndex >= m_uNodeCount)
+		Quit("ClustNode::GetNode(%u) %u", uNodeIndex, m_uNodeCount);
+	return m_Nodes[uNodeIndex];
+	}
+
+bool Clust::IsLeaf(unsigned uNodeIndex) const
+	{
+	return uNodeIndex < m_uLeafCount;
+	}
+
+unsigned Clust::GetClusterSize(unsigned uNodeIndex) const
+	{
+	const ClustNode &Node = GetNode(uNodeIndex);
+	return Node.m_uSize;
+	}
+
+unsigned Clust::GetLeftIndex(unsigned uNodeIndex) const
+	{
+	const ClustNode &Node = GetNode(uNodeIndex);
+	if (0 == Node.m_ptrLeft)
+		Quit("Clust::GetLeftIndex: leaf");
+	return Node.m_ptrLeft->m_uIndex;
+	}
+
+unsigned Clust::GetRightIndex(unsigned uNodeIndex) const
+	{
+	const ClustNode &Node = GetNode(uNodeIndex);
+	if (0 == Node.m_ptrRight)
+		Quit("Clust::GetRightIndex: leaf");
+	return Node.m_ptrRight->m_uIndex;
+	}
+
+float Clust::GetLength(unsigned uNodeIndex) const
+	{
+	const ClustNode &Node = GetNode(uNodeIndex);
+	return Node.m_dLength;
+	}
+
+void Clust::SetLeafCount(unsigned uLeafCount)
+	{
+	if (uLeafCount <= 1)
+		Quit("Clust::SetLeafCount(%u)", uLeafCount);
+
+	m_uLeafCount = uLeafCount;
+	const unsigned uNodeCount = GetNodeCount();
+
+// Triangular matrix size excluding diagonal (all zeros in our case).
+	m_uTriangularMatrixSize = (uNodeCount*(uNodeCount - 1))/2;
+	m_dDist = new float[m_uTriangularMatrixSize];
+	}
+
+unsigned Clust::GetLeafCount() const
+	{
+	return m_uLeafCount;
+	}
+
+unsigned Clust::VectorIndex(unsigned uIndex1, unsigned uIndex2) const
+	{
+	const unsigned uNodeCount = GetNodeCount();
+	if (uIndex1 >= uNodeCount || uIndex2 >= uNodeCount)
+		Quit("DistVectorIndex(%u,%u) %u", uIndex1, uIndex2, uNodeCount);
+	unsigned v;
+	if (uIndex1 >= uIndex2)
+		v = uIndex2 + (uIndex1*(uIndex1 - 1))/2;
+	else
+		v = uIndex1 + (uIndex2*(uIndex2 - 1))/2;
+	assert(v < m_uTriangularMatrixSize);
+	return v;
+	}
+
+float Clust::GetDist(unsigned uIndex1, unsigned uIndex2) const
+	{
+	unsigned v = VectorIndex(uIndex1, uIndex2);
+	return m_dDist[v];
+	}
+
+void Clust::SetDist(unsigned uIndex1, unsigned uIndex2, float dDist)
+	{
+	unsigned v = VectorIndex(uIndex1, uIndex2);
+	m_dDist[v] = dDist;
+	}
+
+float Clust::GetHeight(unsigned uNodeIndex) const
+	{
+	if (IsLeaf(uNodeIndex))
+		return 0;
+
+	const unsigned uLeftIndex = GetLeftIndex(uNodeIndex);
+	const unsigned uRightIndex = GetRightIndex(uNodeIndex);
+	const float dLeftLength = GetLength(uLeftIndex);
+	const float dRightLength = GetLength(uRightIndex);
+	const float dLeftHeight = dLeftLength + GetHeight(uLeftIndex);
+	const float dRightHeight = dRightLength + GetHeight(uRightIndex);
+	return (dLeftHeight + dRightHeight)/2;
+	}
+
+const char *Clust::GetNodeName(unsigned uNodeIndex) const
+	{
+	if (!IsLeaf(uNodeIndex))
+		Quit("Clust::GetNodeName, is not leaf");
+	return m_ptrSet->GetLeafName(uNodeIndex);
+	}
+
+unsigned Clust::GetNodeId(unsigned uNodeIndex) const
+	{
+	if (uNodeIndex >= GetLeafCount())
+		return 0;
+	return m_ptrSet->GetLeafId(uNodeIndex);
+	}
+
+unsigned Clust::GetLeaf(unsigned uNodeIndex, unsigned uLeafIndex) const
+	{
+	const ClustNode &Node = GetNode(uNodeIndex);
+	const unsigned uLeafCount = Node.m_uSize;
+	if (uLeafIndex >= uLeafCount)
+		Quit("Clust::GetLeaf, invalid index");
+	const unsigned uIndex = Node.m_uLeafIndexes[uLeafIndex];
+	if (uIndex >= m_uNodeCount)
+		Quit("Clust::GetLeaf, index out of range");
+	return uIndex;
+	}
+
+unsigned Clust::GetFirstCluster() const
+	{
+	if (0 == m_ptrClusterList)
+		return uInsane;
+	return m_ptrClusterList->m_uIndex;
+	}
+
+unsigned Clust::GetNextCluster(unsigned uIndex) const
+	{
+	ClustNode *ptrNode = &m_Nodes[uIndex];
+	if (0 == ptrNode->m_ptrNextCluster)
+		return uInsane;
+	return ptrNode->m_ptrNextCluster->m_uIndex;
+	}
+
+void Clust::DeleteFromClusterList(unsigned uNodeIndex)
+	{
+	assert(uNodeIndex < m_uNodeCount);
+	ClustNode *ptrNode = &m_Nodes[uNodeIndex];
+	ClustNode *ptrPrev = ptrNode->m_ptrPrevCluster;
+	ClustNode *ptrNext = ptrNode->m_ptrNextCluster;
+
+	if (0 != ptrNext)
+		ptrNext->m_ptrPrevCluster = ptrPrev;
+	if (0 == ptrPrev)
+		{
+		assert(m_ptrClusterList == ptrNode);
+		m_ptrClusterList = ptrNext;
+		}
+	else
+		ptrPrev->m_ptrNextCluster = ptrNext;
+
+	ptrNode->m_ptrNextCluster = 0;
+	ptrNode->m_ptrPrevCluster = 0;
+	}
+
+void Clust::AddToClusterList(unsigned uNodeIndex)
+	{
+	assert(uNodeIndex < m_uNodeCount);
+	ClustNode *ptrNode = &m_Nodes[uNodeIndex];
+
+	if (0 != m_ptrClusterList)
+		m_ptrClusterList->m_ptrPrevCluster = ptrNode;
+
+	ptrNode->m_ptrNextCluster = m_ptrClusterList;
+	ptrNode->m_ptrPrevCluster = 0;
+
+	m_ptrClusterList = ptrNode;
+	}
+
+float Clust::ComputeMetric(unsigned uIndex1, unsigned uIndex2) const
+	{
+	switch (m_JoinStyle)
+		{
+	case JOIN_NearestNeighbor:
+		return ComputeMetricNearestNeighbor(uIndex1, uIndex2);
+
+	case JOIN_NeighborJoining:
+		return ComputeMetricNeighborJoining(uIndex1, uIndex2);
+		}
+	Quit("Clust::ComputeMetric");
+	return 0;
+	}
+
+float Clust::ComputeMetricNeighborJoining(unsigned i, unsigned j) const
+	{
+	float ri = Calc_r(i);
+	float rj = Calc_r(j);
+	float dij = GetDist(i, j);
+	float dMetric = dij - (ri + rj);
+	return (float) dMetric;
+	}
+
+float Clust::ComputeMetricNearestNeighbor(unsigned i, unsigned j) const
+	{
+	return (float) GetDist(i, j);
+	}
+
+float Clust::GetMinMetricBruteForce(unsigned *ptruIndex1, unsigned *ptruIndex2) const
+	{
+	unsigned uMinLeftNodeIndex = uInsane;
+	unsigned uMinRightNodeIndex = uInsane;
+	float dMinMetric = PLUS_INFINITY;
+	for (unsigned uLeftNodeIndex = GetFirstCluster(); uLeftNodeIndex != uInsane;
+	  uLeftNodeIndex = GetNextCluster(uLeftNodeIndex))
+		{
+		for (unsigned uRightNodeIndex = GetNextCluster(uLeftNodeIndex);
+		  uRightNodeIndex != uInsane;
+		  uRightNodeIndex = GetNextCluster(uRightNodeIndex))
+			{
+			float dMetric = ComputeMetric(uLeftNodeIndex, uRightNodeIndex);
+			if (dMetric < dMinMetric)
+				{
+				dMinMetric = dMetric;
+				uMinLeftNodeIndex = uLeftNodeIndex;
+				uMinRightNodeIndex = uRightNodeIndex;
+				}
+			}
+		}
+	*ptruIndex1 = uMinLeftNodeIndex;
+	*ptruIndex2 = uMinRightNodeIndex;
+	return dMinMetric;
+	}
+
+float Clust::GetMinMetric(unsigned *ptruIndex1, unsigned *ptruIndex2) const
+	{
+	return GetMinMetricBruteForce(ptruIndex1, ptruIndex2);
+	}

Added: trunk/packages/muscle/branches/upstream/current/clust.h
===================================================================
--- trunk/packages/muscle/branches/upstream/current/clust.h	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/clust.h	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,148 @@
+#ifndef Clust_h
+#define Clust_h
+
+class Clust;
+class ClustNode;
+class ClustSet;
+class Phylip;
+class SortedNode;
+
+const unsigned RB_NIL = ((unsigned) 0xfff0);
+
+class ClustNode
+	{
+public:
+	ClustNode()
+		{
+		m_uIndex = uInsane;
+		m_uSize = uInsane;
+		m_dLength = (float) dInsane;
+		m_ptrLeft = 0;
+		m_ptrRight = 0;
+		m_ptrParent = 0;
+		m_ptrNextCluster = 0;
+		m_ptrPrevCluster = 0;
+		m_uLeafIndexes = 0;
+		}
+	~ClustNode()
+		{
+		delete[] m_uLeafIndexes;
+		}
+	unsigned m_uIndex;
+	unsigned m_uSize;
+	float m_dLength;
+	ClustNode *m_ptrLeft;
+	ClustNode *m_ptrRight;
+	ClustNode *m_ptrParent;
+	ClustNode *m_ptrNextCluster;
+	ClustNode *m_ptrPrevCluster;
+	unsigned *m_uLeafIndexes;
+	};
+
+class Clust
+	{
+public:
+	Clust();
+	virtual ~Clust();
+
+	void Create(ClustSet &Set, CLUSTER Method);
+
+	unsigned GetLeafCount() const;
+
+	unsigned GetClusterCount() const;
+	unsigned GetClusterSize(unsigned uNodeIndex) const;
+	unsigned GetLeaf(unsigned uClusterIndex, unsigned uLeafIndex) const;
+
+	unsigned GetNodeCount() const { return 2*m_uLeafCount - 1; }
+	const ClustNode &GetRoot() const { return m_Nodes[GetRootNodeIndex()]; }
+	unsigned GetRootNodeIndex() const { return m_uNodeCount - 1; }
+
+	const ClustNode &GetNode(unsigned uNodeIndex) const;
+	bool IsLeaf(unsigned uNodeIndex) const;
+	unsigned GetLeftIndex(unsigned uNodeIndex) const;
+	unsigned GetRightIndex(unsigned uNodeIndex) const;
+	float GetLength(unsigned uNodeIndex) const;
+	float GetHeight(unsigned uNodeIndex) const;
+	const char *GetNodeName(unsigned uNodeIndex) const;
+	unsigned GetNodeId(unsigned uNodeIndex) const;
+
+	JOIN GetJoinStyle() const { return m_JoinStyle; }
+	LINKAGE GetCentroidStyle() const { return m_CentroidStyle; }
+
+	void SetDist(unsigned uIndex1, unsigned uIndex2, float dDist);
+	float GetDist(unsigned uIndex1, unsigned uIndex2) const;
+
+	void ToPhylip(Phylip &tree);
+
+	void LogMe() const;
+
+//private:
+	void SetLeafCount(unsigned uLeafCount);
+
+	void CreateCluster();
+	void JoinNodes(unsigned uLeftNodeIndex, unsigned uRightNodeIndex, 
+	  float dLeftLength, float dRightLength, unsigned uNewNodeIndex);
+
+	void ChooseJoin(unsigned *ptruLeftIndex, unsigned *ptruRightIndex,
+	  float *ptrdLeftLength, float *ptrdRightLength);
+	void ChooseJoinNeighborJoining(unsigned *ptruLeftIndex, unsigned *ptruRightIndex,
+	  float *ptrdLeftLength, float *ptrdRightLength);
+	void ChooseJoinNearestNeighbor(unsigned *ptruLeftIndex, unsigned *ptruRightIndex,
+	  float *ptrdLeftLength, float *ptrdRightLength);
+
+	float ComputeDist(unsigned uNewNodeIndex, unsigned uNodeIndex);
+	float ComputeDistAverageLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex);
+	float ComputeDistMinLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex);
+	float ComputeDistMaxLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex);
+	float ComputeDistNeighborJoining(unsigned uNewNewIndex, unsigned uNodeIndex);
+	float ComputeDistMAFFT(unsigned uNewNewIndex, unsigned uNodeIndex);
+
+	float Calc_r(unsigned uNodeIndex) const;
+
+	unsigned VectorIndex(unsigned uIndex1, unsigned uIndex2) const;
+
+	unsigned GetFirstCluster() const;
+	unsigned GetNextCluster(unsigned uNodeIndex) const;
+
+	float ComputeMetric(unsigned uIndex1, unsigned uIndex2) const;
+	float ComputeMetricNearestNeighbor(unsigned i, unsigned j) const;
+	float ComputeMetricNeighborJoining(unsigned i, unsigned j) const;
+
+	void InitMetric(unsigned uMaxNodeIndex);
+	void InsertMetric(unsigned uIndex1, unsigned uIndex2, float dMetric);
+	float GetMinMetric(unsigned *ptruIndex1, unsigned *ptruIndex2) const;
+	float GetMinMetricBruteForce(unsigned *ptruIndex1, unsigned *ptruIndex2) const;
+	void DeleteMetric(unsigned uIndex);
+	void DeleteMetric(unsigned uIndex1, unsigned uIndex2);
+	void ListMetric() const;
+
+	void DeleteFromClusterList(unsigned uNodeIndex);
+	void AddToClusterList(unsigned uNodeIndex);
+
+	void RBDelete(unsigned RBNode);
+	unsigned RBInsert(unsigned i, unsigned j, float fMetric);
+
+	unsigned RBNext(unsigned RBNode) const;
+	unsigned RBPrev(unsigned RBNode) const;
+	unsigned RBMin(unsigned RBNode) const;
+	unsigned RBMax(unsigned RBNode) const;
+
+	void ValidateRB(const char szMsg[] = 0) const;
+	void ValidateRBNode(unsigned Node, const char szMsg[]) const;
+
+//private:
+	JOIN m_JoinStyle;
+	LINKAGE m_CentroidStyle;
+	ClustNode *m_Nodes;
+	unsigned *m_ClusterIndexToNodeIndex;
+	unsigned *m_NodeIndexToClusterIndex;
+	unsigned m_uLeafCount;
+	unsigned m_uNodeCount;
+	unsigned m_uClusterCount;
+	unsigned m_uTriangularMatrixSize;
+	float *m_dDist;
+	ClustSet *m_ptrSet;
+	ClustNode *m_ptrClusterList;
+	};
+
+#endif // Clust_h

Added: trunk/packages/muscle/branches/upstream/current/cluster.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/cluster.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/cluster.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,339 @@
+#include "muscle.h"
+#include "cluster.h"
+#include "distfunc.h"
+
+static inline float Min(float d1, float d2)
+	{
+	return d1 < d2 ? d1 : d2;
+	}
+
+static inline float Max(float d1, float d2)
+	{
+	return d1 > d2 ? d1 : d2;
+	}
+
+static inline float Mean(float d1, float d2)
+	{
+	return (float) ((d1 + d2)/2.0);
+	}
+
+#if	_DEBUG
+void ClusterTree::Validate(unsigned uNodeCount)
+	{
+	unsigned n;
+	ClusterNode *pNode;
+	unsigned uDisjointListCount = 0;
+	for (pNode = m_ptrDisjoints; pNode; pNode = pNode->GetNextDisjoint())
+		{
+		ClusterNode *pPrev = pNode->GetPrevDisjoint();
+		ClusterNode *pNext = pNode->GetNextDisjoint();
+		if (0 != pPrev)
+			{
+			if (pPrev->GetNextDisjoint() != pNode)
+				{
+				Log("Prev->This mismatch, prev=\n");
+				pPrev->LogMe();
+				Log("This=\n");
+				pNode->LogMe();
+				Quit("ClusterTree::Validate()");
+				}
+			}
+		else
+			{
+			if (pNode != m_ptrDisjoints)
+				{
+				Log("[%u]->prev = 0 but != m_ptrDisjoints=%d\n",
+				  pNode->GetIndex(),
+				  m_ptrDisjoints ? m_ptrDisjoints->GetIndex() : 0xffffffff);
+				pNode->LogMe();
+				Quit("ClusterTree::Validate()");
+				}
+			}
+		if (0 != pNext)
+			{
+			if (pNext->GetPrevDisjoint() != pNode)
+				{
+				Log("Next->This mismatch, next=\n");
+				pNext->LogMe();
+				Log("This=\n");
+				pNode->LogMe();
+				Quit("ClusterTree::Validate()");
+				}
+			}
+		++uDisjointListCount;
+		if (uDisjointListCount > m_uNodeCount)
+			Quit("Loop in disjoint list");
+		}
+
+	unsigned uParentlessNodeCount = 0;
+	for (n = 0; n < uNodeCount; ++n)
+		if (0 == m_Nodes[n].GetParent())
+			++uParentlessNodeCount;
+	
+	if (uDisjointListCount != uParentlessNodeCount)
+		Quit("Disjoints = %u Parentless = %u\n", uDisjointListCount,
+		  uParentlessNodeCount);
+	}
+#else	// !_DEBUG
+#define	Validate(uNodeCount)	// empty
+#endif
+
+void ClusterNode::LogMe() const
+	{
+	unsigned uClusterSize = GetClusterSize();
+	Log("[%02u] w=%5.3f  CW=%5.3f  LBW=%5.3f  RBW=%5.3f  LWT=%5.3f  RWT=%5.3f  L=%02d  R=%02d  P=%02d  NxDj=%02d  PvDj=%02d  Sz=%02d  {",
+		m_uIndex,
+		m_dWeight,
+		GetClusterWeight(),
+		GetLeftBranchWeight(),
+		GetRightBranchWeight(),
+		GetLeftWeight(),
+		GetRightWeight(),
+		m_ptrLeft ? m_ptrLeft->GetIndex() : 0xffffffff,
+		m_ptrRight ? m_ptrRight->GetIndex() : 0xffffffff,
+		m_ptrParent ? m_ptrParent->GetIndex() : 0xffffffff,
+		m_ptrNextDisjoint ? m_ptrNextDisjoint->GetIndex() : 0xffffffff,
+		m_ptrPrevDisjoint ? m_ptrPrevDisjoint->GetIndex() : 0xffffffff,
+		uClusterSize);
+	for (unsigned i = 0; i < uClusterSize; ++i)
+		Log(" %u", GetClusterLeaf(i)->GetIndex());
+	Log(" }\n");
+	}
+
+// How many leaves in the sub-tree under this node?
+unsigned ClusterNode::GetClusterSize() const
+	{
+	unsigned uLeafCount = 0;
+
+	if (0 == m_ptrLeft && 0 == m_ptrRight)
+		return 1;
+
+	if (0 != m_ptrLeft)
+		uLeafCount += m_ptrLeft->GetClusterSize();
+	if (0 != m_ptrRight)
+		uLeafCount += m_ptrRight->GetClusterSize();
+	assert(uLeafCount > 0);
+	return uLeafCount;
+	}
+
+double ClusterNode::GetClusterWeight() const
+	{
+	double dWeight = 0.0;
+	if (0 != m_ptrLeft)
+		dWeight += m_ptrLeft->GetClusterWeight();
+	if (0 != m_ptrRight)
+		dWeight += m_ptrRight->GetClusterWeight();
+	return dWeight + GetWeight();
+	}
+
+double ClusterNode::GetLeftBranchWeight() const
+	{
+	const ClusterNode *ptrLeft = GetLeft();
+	if (0 == ptrLeft)
+		return 0.0;
+
+	return GetWeight() - ptrLeft->GetWeight();
+	}
+
+double ClusterNode::GetRightBranchWeight() const
+	{
+	const ClusterNode *ptrRight = GetRight();
+	if (0 == ptrRight)
+		return 0.0;
+
+	return GetWeight() - ptrRight->GetWeight();
+	}
+
+double ClusterNode::GetRightWeight() const
+	{
+	const ClusterNode *ptrRight = GetRight();
+	if (0 == ptrRight)
+		return 0.0;
+	return ptrRight->GetClusterWeight() + GetWeight();
+	}
+
+double ClusterNode::GetLeftWeight() const
+	{
+	const ClusterNode *ptrLeft = GetLeft();
+	if (0 == ptrLeft)
+		return 0.0;
+	return ptrLeft->GetClusterWeight() + GetWeight();
+	}
+
+// Return n'th leaf in the sub-tree under this node.
+const ClusterNode *ClusterNode::GetClusterLeaf(unsigned uLeafIndex) const
+	{
+	if (0 != m_ptrLeft)
+		{
+		if (0 == m_ptrRight)
+			return this;
+
+		unsigned uLeftLeafCount = m_ptrLeft->GetClusterSize();
+
+		if (uLeafIndex < uLeftLeafCount)
+			return m_ptrLeft->GetClusterLeaf(uLeafIndex);
+
+		assert(uLeafIndex >= uLeftLeafCount);
+		return m_ptrRight->GetClusterLeaf(uLeafIndex - uLeftLeafCount);
+		}
+	if (0 == m_ptrRight)
+		return this;
+	return m_ptrRight->GetClusterLeaf(uLeafIndex);
+	}
+
+void ClusterTree::DeleteFromDisjoints(ClusterNode *ptrNode)
+	{
+	ClusterNode *ptrPrev = ptrNode->GetPrevDisjoint();
+	ClusterNode *ptrNext = ptrNode->GetNextDisjoint();
+
+	if (0 != ptrPrev)
+		ptrPrev->SetNextDisjoint(ptrNext);
+	else
+		m_ptrDisjoints = ptrNext;
+
+	if (0 != ptrNext)
+		ptrNext->SetPrevDisjoint(ptrPrev);
+
+#if	_DEBUG
+// not algorithmically necessary, but improves clarity
+// and supports Validate().
+	ptrNode->SetPrevDisjoint(0);
+	ptrNode->SetNextDisjoint(0);
+#endif
+	}
+
+void ClusterTree::AddToDisjoints(ClusterNode *ptrNode)
+	{
+	ptrNode->SetNextDisjoint(m_ptrDisjoints);
+	ptrNode->SetPrevDisjoint(0);
+	if (0 != m_ptrDisjoints)
+		m_ptrDisjoints->SetPrevDisjoint(ptrNode);
+	m_ptrDisjoints = ptrNode;
+	}
+
+ClusterTree::ClusterTree()
+	{
+	m_ptrDisjoints = 0;
+	m_Nodes = 0;
+	m_uNodeCount = 0;
+	}
+
+ClusterTree::~ClusterTree()
+	{
+	delete[] m_Nodes;
+	}
+
+void ClusterTree::LogMe() const
+	{
+	Log("Disjoints=%d\n", m_ptrDisjoints ? m_ptrDisjoints->GetIndex() : 0xffffffff);
+	for (unsigned i = 0; i < m_uNodeCount; ++i)
+		{
+		m_Nodes[i].LogMe();
+		}
+	}
+
+ClusterNode *ClusterTree::GetRoot() const
+	{
+	return &m_Nodes[m_uNodeCount - 1];
+	}
+
+// This is the UPGMA algorithm as described in Durbin et al. p166.
+void ClusterTree::Create(const DistFunc &Dist)
+	{
+	unsigned i;
+	m_uLeafCount = Dist.GetCount();
+	m_uNodeCount = 2*m_uLeafCount - 1;
+
+	delete[] m_Nodes;
+	m_Nodes = new ClusterNode[m_uNodeCount];
+
+	for (i = 0; i < m_uNodeCount; ++i)
+		m_Nodes[i].SetIndex(i);
+
+	for (i = 0; i < m_uLeafCount - 1; ++i)
+		m_Nodes[i].SetNextDisjoint(&m_Nodes[i+1]);
+
+	for (i = 1; i < m_uLeafCount; ++i)
+		m_Nodes[i].SetPrevDisjoint(&m_Nodes[i-1]);
+	
+	m_ptrDisjoints = &m_Nodes[0];
+
+//	Log("Initial state\n");
+//	LogMe();
+//	Log("\n");
+
+	DistFunc ClusterDist;
+	ClusterDist.SetCount(m_uNodeCount);
+	double dMaxDist = 0.0;
+	for (i = 0; i < m_uLeafCount; ++i)
+		for (unsigned j = 0; j < m_uLeafCount; ++j)
+			{
+			float dDist = Dist.GetDist(i, j);
+			ClusterDist.SetDist(i, j, dDist);
+			}
+
+	Validate(m_uLeafCount);
+
+// Iteration. N-1 joins needed to create a binary tree from N leaves.
+	for (unsigned uJoinIndex = m_uLeafCount; uJoinIndex < m_uNodeCount;
+	  ++uJoinIndex)
+		{
+	// Find closest pair of clusters
+		unsigned uIndexClosest1;
+		unsigned uIndexClosest2;
+		bool bFound = false;
+		double dDistClosest = 9e99;
+		for (ClusterNode *ptrNode1 = m_ptrDisjoints; ptrNode1;
+		  ptrNode1 = ptrNode1->GetNextDisjoint())
+			{
+			for (ClusterNode *ptrNode2 = ptrNode1->GetNextDisjoint(); ptrNode2;
+			  ptrNode2 = ptrNode2->GetNextDisjoint())
+				{
+				unsigned i1 = ptrNode1->GetIndex();
+				unsigned i2 = ptrNode2->GetIndex();
+				double dDist = ClusterDist.GetDist(i1, i2);
+				if (dDist < dDistClosest)
+					{
+					bFound = true;
+					dDistClosest = dDist;
+					uIndexClosest1 = i1;
+					uIndexClosest2 = i2;
+					}
+				}
+			}
+		assert(bFound);
+
+		ClusterNode &Join = m_Nodes[uJoinIndex];
+		ClusterNode &Child1 = m_Nodes[uIndexClosest1];
+		ClusterNode &Child2 = m_Nodes[uIndexClosest2];
+
+		Join.SetLeft(&Child1);
+		Join.SetRight(&Child2);
+		Join.SetWeight(dDistClosest);
+
+		Child1.SetParent(&Join);
+		Child2.SetParent(&Join);
+
+		DeleteFromDisjoints(&Child1);
+		DeleteFromDisjoints(&Child2);
+		AddToDisjoints(&Join);
+
+//		Log("After join %d %d\n", uIndexClosest1, uIndexClosest2);
+//		LogMe();
+
+	// Calculate distance of every remaining disjoint cluster to the
+	// new cluster created by the join
+		for (ClusterNode *ptrNode = m_ptrDisjoints; ptrNode;
+		  ptrNode = ptrNode->GetNextDisjoint())
+			{
+			unsigned uNodeIndex = ptrNode->GetIndex();
+			float dDist1 = ClusterDist.GetDist(uNodeIndex, uIndexClosest1);
+			float dDist2 = ClusterDist.GetDist(uNodeIndex, uIndexClosest2);
+			float dDist = Min(dDist1, dDist2);
+			ClusterDist.SetDist(uJoinIndex, uNodeIndex, dDist);
+			}
+		Validate(uJoinIndex+1);
+		}
+	GetRoot()->GetClusterWeight();
+//	LogMe();
+	}

Added: trunk/packages/muscle/branches/upstream/current/cluster.h
===================================================================
--- trunk/packages/muscle/branches/upstream/current/cluster.h	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/cluster.h	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,86 @@
+class DistFunc;
+
+class ClusterNode
+	{
+	friend class ClusterTree;
+public:
+	ClusterNode()
+		{
+		m_dWeight = 0.0;
+		m_dWeight2 = 0.0;
+		m_ptrLeft = 0;
+		m_ptrRight = 0;
+		m_ptrParent = 0;
+		m_uIndex = 0;
+		m_ptrPrevDisjoint = 0;
+		m_ptrNextDisjoint = 0;
+		}
+	~ClusterNode() {}
+
+public:
+	unsigned GetIndex() const { return m_uIndex; }
+	ClusterNode *GetLeft() const { return m_ptrLeft; }
+	ClusterNode *GetRight() const { return m_ptrRight; }
+	ClusterNode *GetParent() const { return m_ptrParent; }
+	double GetWeight() const { return m_dWeight; }
+
+	const ClusterNode *GetClusterLeaf(unsigned uLeafIndex) const;
+	unsigned GetClusterSize() const;
+	double GetClusterWeight() const;
+	double GetLeftBranchWeight() const;
+	double GetRightBranchWeight() const;
+	double GetLeftWeight() const;
+	double GetRightWeight() const;
+
+	void LogMe() const;
+
+	double GetWeight2() const { return m_dWeight2; }
+	void SetWeight2(double dWeight2) { m_dWeight2 = dWeight2; }
+
+protected:
+	void SetIndex(unsigned uIndex) { m_uIndex = uIndex; }
+	void SetWeight(double dWeight) { m_dWeight = dWeight; }
+	void SetLeft(ClusterNode *ptrLeft) { m_ptrLeft = ptrLeft; }
+	void SetRight(ClusterNode *ptrRight) { m_ptrRight = ptrRight; }
+	void SetParent(ClusterNode *ptrParent) { m_ptrParent = ptrParent; }
+	void SetNextDisjoint(ClusterNode *ptrNode) { m_ptrNextDisjoint = ptrNode; }
+	void SetPrevDisjoint(ClusterNode *ptrNode) { m_ptrPrevDisjoint = ptrNode; }
+
+	ClusterNode *GetNextDisjoint() { return m_ptrNextDisjoint; }
+	ClusterNode *GetPrevDisjoint() { return m_ptrPrevDisjoint; }
+
+private:
+	double m_dWeight;
+	double m_dWeight2;
+	unsigned m_uIndex;
+	ClusterNode *m_ptrLeft;
+	ClusterNode *m_ptrRight;
+	ClusterNode *m_ptrParent;
+	ClusterNode *m_ptrNextDisjoint;
+	ClusterNode *m_ptrPrevDisjoint;
+	};
+
+class ClusterTree
+	{
+public:
+	ClusterTree();
+	virtual ~ClusterTree();
+
+	void Create(const DistFunc &DF);
+
+	ClusterNode *GetRoot() const;
+	void LogMe() const;
+
+protected:
+	void Join(ClusterNode *ptrNode1, ClusterNode *ptrNode2,
+	  ClusterNode *ptrJoin);
+	void AddToDisjoints(ClusterNode *ptrNode);
+	void DeleteFromDisjoints(ClusterNode *ptrNode);
+	void Validate(unsigned uNodeCount);
+
+private:
+	ClusterNode *m_ptrDisjoints;
+	ClusterNode *m_Nodes;
+	unsigned m_uNodeCount;
+	unsigned m_uLeafCount;
+	};

Added: trunk/packages/muscle/branches/upstream/current/clustset.h
===================================================================
--- trunk/packages/muscle/branches/upstream/current/clustset.h	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/clustset.h	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,21 @@
+#ifndef ClustSet_h
+#define ClustSet_h
+
+enum JOIN;
+enum LINKAGE;
+class Clust;
+
+class ClustSet
+	{
+public:
+	virtual unsigned GetLeafCount() = 0;
+	virtual double ComputeDist(const Clust &C, unsigned uNodeIndex1,
+	  unsigned uNodeIndex2) = 0;
+	virtual void JoinNodes(const Clust &C, unsigned uLeftNodeIndex,
+	  unsigned uRightNodeIndex, unsigned uJoinedNodeIndex,
+	  double *ptrdLeftLength, double *ptrdRightLength) = 0;
+	virtual const char *GetLeafName(unsigned uNodeIndex) = 0;
+	virtual unsigned GetLeafId(unsigned uNodeIndex) = 0;
+	};
+
+#endif	// ClustSet_h

Added: trunk/packages/muscle/branches/upstream/current/clustsetdf.h
===================================================================
--- trunk/packages/muscle/branches/upstream/current/clustsetdf.h	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/clustsetdf.h	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,48 @@
+#ifndef ClustSetDF_h
+#define ClustSetDF_h
+
+class MSA;
+class Clust;
+
+#include "clustset.h"
+#include "distfunc.h"
+#include "msa.h"
+
+class ClustSetDF : public ClustSet
+	{
+public:
+	ClustSetDF(const DistFunc &DF) :
+		m_ptrDF(&DF)
+		{
+		}
+
+public:
+	virtual unsigned GetLeafCount()
+		{
+		return m_ptrDF->GetCount();
+		}
+	virtual const char *GetLeafName(unsigned uNodeIndex)
+		{
+		return m_ptrDF->GetName(uNodeIndex);
+		}
+	virtual unsigned GetLeafId(unsigned uNodeIndex)
+		{
+		return m_ptrDF->GetId(uNodeIndex);
+		}
+	virtual void JoinNodes(const Clust &C, unsigned uLeftNodeIndex,
+	  unsigned uRightNodeIndex, unsigned uJoinedNodeIndex,
+	  double *ptrdLeftLength, double *ptrdRightLength)
+		{
+		Quit("ClustSetDF::JoinNodes, should never be called");
+		}
+	virtual double ComputeDist(const Clust &C, unsigned uNodeIndex1,
+	  unsigned uNodeIndex2)
+		{
+		return m_ptrDF->GetDist(uNodeIndex1, uNodeIndex2);
+		}
+
+private:
+	const DistFunc *m_ptrDF;
+	};
+
+#endif	// ClustSetDF_h

Added: trunk/packages/muscle/branches/upstream/current/clustsetmsa.h
===================================================================
--- trunk/packages/muscle/branches/upstream/current/clustsetmsa.h	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/clustsetmsa.h	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,55 @@
+#ifndef ClustSetMSA_h
+#define ClustSetMSA_h
+
+class MSA;
+class Clust;
+
+#include "clustset.h"
+#include "msadist.h"
+
+// Distance matrix based set.
+// Computes distances between leaves, never between
+// joined clusters (leaves this to distance matrix method).
+class ClustSetMSA : public ClustSet
+	{
+public:
+	ClustSetMSA(const MSA &msa, MSADist &MD) :
+		m_ptrMSA(&msa),
+		m_ptrMSADist(&MD)
+		{
+		}
+
+public:
+	virtual unsigned GetLeafCount()
+		{
+		return m_ptrMSA->GetSeqCount();
+		}
+	virtual const char *GetLeafName(unsigned uNodeIndex)
+		{
+		return m_ptrMSA->GetSeqName(uNodeIndex);
+		}
+	virtual unsigned GetLeafId(unsigned uNodeIndex)
+		{
+		return m_ptrMSA->GetSeqId(uNodeIndex);
+		}
+	virtual void JoinNodes(const Clust &C, unsigned uLeftNodeIndex,
+	  unsigned uRightNodeIndex, unsigned uJoinedNodeIndex,
+	  double *ptrdLeftLength, double *ptrdRightLength)
+		{
+		Quit("ClustSetMSA::JoinNodes, should never be called");
+		}
+	virtual double ComputeDist(const Clust &C, unsigned uNodeIndex1,
+	  unsigned uNodeIndex2)
+		{
+		return m_ptrMSADist->ComputeDist(*m_ptrMSA, uNodeIndex1, uNodeIndex2);
+		}
+
+public:
+	const MSA &GetMSA();
+
+private:
+	const MSA *m_ptrMSA;
+	MSADist *m_ptrMSADist;
+	};
+
+#endif	// ClustSetMSA_h

Added: trunk/packages/muscle/branches/upstream/current/clwwt.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/clwwt.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/clwwt.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,190 @@
+#include "muscle.h"
+#include "tree.h"
+#include "msa.h"
+
+/***
+Compute weights by the CLUSTALW method.
+Thompson, Higgins and Gibson (1994), CABIOS (10) 19-29;
+see also CLUSTALW paper.
+
+Weights are computed from the edge lengths of a rooted tree.
+
+Define the strength of an edge to be its length divided by the number
+of leaves under that edge. The weight of a sequence is then the sum
+of edge strengths on the path from the root to the leaf.
+
+Example.
+
+        0.2
+       -----A     0.1
+	 -x         ------- B     0.7
+	   --------y           ----------- C
+	    0.3     ----------z
+                    0.4    -------------- D
+                                 0.8
+
+Edge	Length	Leaves	Strength
+----	-----	------	--------
+xy		0.3		3		0.1
+xA		0.2		1		0.2
+yz		0.4		2		0.2
+yB		0.1		1		0.1
+zC		0.7		1		0.7
+zD		0.8		1		0.8
+
+Leaf	Path		Strengths			Weight
+----	----		---------			------
+A		xA			0.2					0.2
+B		xy-yB		0.1 + 0.1			0.2
+C		xy-yz-zC	0.1 + 0.2 + 0.7		1.0
+D		xy-yz-zD	0.1 + 0.2 + 0.8		1.1
+
+***/
+
+#define TRACE 0
+
+static unsigned CountLeaves(const Tree &tree, unsigned uNodeIndex,
+  unsigned LeavesUnderNode[])
+	{
+	if (tree.IsLeaf(uNodeIndex))
+		{
+		LeavesUnderNode[uNodeIndex] = 1;
+		return 1;
+		}
+
+	const unsigned uLeft = tree.GetLeft(uNodeIndex);
+	const unsigned uRight = tree.GetRight(uNodeIndex);
+	const unsigned uRightCount = CountLeaves(tree, uRight, LeavesUnderNode);
+	const unsigned uLeftCount = CountLeaves(tree, uLeft, LeavesUnderNode);
+	const unsigned uCount = uRightCount + uLeftCount;
+	LeavesUnderNode[uNodeIndex] = uCount;
+	return uCount;
+	}
+
+void CalcClustalWWeights(const Tree &tree, WEIGHT Weights[])
+	{
+#if	TRACE
+	Log("CalcClustalWWeights\n");
+	tree.LogMe();
+#endif
+
+	const unsigned uLeafCount = tree.GetLeafCount();
+	if (0 == uLeafCount)
+		return;
+	else if (1 == uLeafCount)
+		{
+		Weights[0] = (WEIGHT) 1.0;
+		return;
+		}
+	else if (2 == uLeafCount)
+		{
+		Weights[0] = (WEIGHT) 0.5;
+		Weights[1] = (WEIGHT) 0.5;
+		return;
+		}
+
+	if (!tree.IsRooted())
+		Quit("CalcClustalWWeights requires rooted tree");
+
+	const unsigned uNodeCount = tree.GetNodeCount();
+	unsigned *LeavesUnderNode = new unsigned[uNodeCount];
+	memset(LeavesUnderNode, 0, uNodeCount*sizeof(unsigned));
+
+	const unsigned uRootNodeIndex = tree.GetRootNodeIndex();
+	unsigned uLeavesUnderRoot = CountLeaves(tree, uRootNodeIndex, LeavesUnderNode);
+	if (uLeavesUnderRoot != uLeafCount)
+		Quit("WeightsFromTreee: Internal error, root count %u %u",
+		  uLeavesUnderRoot, uLeafCount);
+
+#if	TRACE
+	Log("Node  Leaves    Length  Strength\n");
+	Log("----  ------  --------  --------\n");
+	//    1234  123456  12345678  12345678
+#endif
+
+	double *Strengths = new double[uNodeCount];
+	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
+		{
+		if (tree.IsRoot(uNodeIndex))
+			{
+			Strengths[uNodeIndex] = 0.0;
+			continue;
+			}
+		const unsigned uParent = tree.GetParent(uNodeIndex);
+		const double dLength = tree.GetEdgeLength(uNodeIndex, uParent);
+		const unsigned uLeaves = LeavesUnderNode[uNodeIndex];
+		const double dStrength = dLength / (double) uLeaves;
+		Strengths[uNodeIndex] = dStrength;
+#if	TRACE
+		Log("%4u  %6u  %8g  %8g\n", uNodeIndex, uLeaves, dLength, dStrength);
+#endif
+		}
+
+#if	TRACE
+	Log("\n");
+	Log("                 Seq  Path..Weight\n");
+	Log("--------------------  ------------\n");
+#endif
+	for (unsigned n = 0; n < uLeafCount; ++n)
+		{
+		const unsigned uLeafNodeIndex = tree.LeafIndexToNodeIndex(n);
+#if	TRACE
+		Log("%20.20s  %4u ", tree.GetLeafName(uLeafNodeIndex), uLeafNodeIndex);
+#endif
+		if (!tree.IsLeaf(uLeafNodeIndex))
+			Quit("CalcClustalWWeights: leaf");
+
+		double dWeight = 0;
+		unsigned uNode = uLeafNodeIndex;
+		while (!tree.IsRoot(uNode))
+			{
+			dWeight += Strengths[uNode];
+			uNode = tree.GetParent(uNode);
+#if	TRACE
+			Log("->%u(%g)", uNode, Strengths[uNode]);
+#endif
+			}
+		if (dWeight < 0.0001)
+			{
+#if	TRACE
+			Log("zero->one");
+#endif
+			dWeight = 1.0;
+			}
+		Weights[n] = (WEIGHT) dWeight;
+#if	TRACE
+		Log(" = %g\n", dWeight);
+#endif
+		}
+
+	delete[] Strengths;
+	delete[] LeavesUnderNode;
+
+	Normalize(Weights, uLeafCount);
+	}
+
+void MSA::SetClustalWWeights(const Tree &tree)
+	{
+	const unsigned uSeqCount = GetSeqCount();
+	const unsigned uLeafCount = tree.GetLeafCount();
+
+	WEIGHT *Weights = new WEIGHT[uSeqCount];
+
+	CalcClustalWWeights(tree, Weights);
+
+	for (unsigned n = 0; n < uLeafCount; ++n)
+		{
+		const WEIGHT w = Weights[n];
+		const unsigned uLeafNodeIndex = tree.LeafIndexToNodeIndex(n);
+		const unsigned uId = tree.GetLeafId(uLeafNodeIndex);
+		const unsigned uSeqIndex = GetSeqIndex(uId);
+#if	DEBUG
+		if (GetSeqName(uSeqIndex) != tree.GetLeafName(uLeafNodeIndex))
+			Quit("MSA::SetClustalWWeights: names don't match");
+#endif
+		SetSeqWeight(uSeqIndex, w);
+		}
+	NormalizeWeights((WEIGHT) 1.0);
+
+	delete[] Weights;
+	}

Added: trunk/packages/muscle/branches/upstream/current/color.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/color.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/color.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,189 @@
+#include "muscle.h"
+#include "msa.h"
+
+static int Blosum62[23][23] =
+	{
+//   A   B   C   D   E    F   G   H   I   K    L   M   N   P   Q    R   S   T   V   W    X   Y   Z 
+	+4, -2, +0, -2, -1,  -2, +0, -2, -1, -1,  -1, -1, -2, -1, -1,  -1, +1, +0, +0, -3,  -1, -2, -1,  // A
+	-2, +6, -3, +6, +2,  -3, -1, -1, -3, -1,  -4, -3, +1, -1, +0,  -2, +0, -1, -3, -4,  -1, -3, +2,  // B
+	+0, -3, +9, -3, -4,  -2, -3, -3, -1, -3,  -1, -1, -3, -3, -3,  -3, -1, -1, -1, -2,  -1, -2, -4,  // C
+	-2, +6, -3, +6, +2,  -3, -1, -1, -3, -1,  -4, -3, +1, -1, +0,  -2, +0, -1, -3, -4,  -1, -3, +2,  // D
+	-1, +2, -4, +2, +5,  -3, -2, +0, -3, +1,  -3, -2, +0, -1, +2,  +0, +0, -1, -2, -3,  -1, -2, +5,  // E
+	
+	-2, -3, -2, -3, -3,  +6, -3, -1, +0, -3,  +0, +0, -3, -4, -3,  -3, -2, -2, -1, +1,  -1, +3, -3,  // F
+	+0, -1, -3, -1, -2,  -3, +6, -2, -4, -2,  -4, -3, +0, -2, -2,  -2, +0, -2, -3, -2,  -1, -3, -2,  // G
+	-2, -1, -3, -1, +0,  -1, -2, +8, -3, -1,  -3, -2, +1, -2, +0,  +0, -1, -2, -3, -2,  -1, +2, +0,  // H
+	-1, -3, -1, -3, -3,  +0, -4, -3, +4, -3,  +2, +1, -3, -3, -3,  -3, -2, -1, +3, -3,  -1, -1, -3,  // I
+	-1, -1, -3, -1, +1,  -3, -2, -1, -3, +5,  -2, -1, +0, -1, +1,  +2, +0, -1, -2, -3,  -1, -2, +1,  // K
+	
+	-1, -4, -1, -4, -3,  +0, -4, -3, +2, -2,  +4, +2, -3, -3, -2,  -2, -2, -1, +1, -2,  -1, -1, -3,  // L
+	-1, -3, -1, -3, -2,  +0, -3, -2, +1, -1,  +2, +5, -2, -2, +0,  -1, -1, -1, +1, -1,  -1, -1, -2,  // M
+	-2, +1, -3, +1, +0,  -3, +0, +1, -3, +0,  -3, -2, +6, -2, +0,  +0, +1, +0, -3, -4,  -1, -2, +0,  // N
+	-1, -1, -3, -1, -1,  -4, -2, -2, -3, -1,  -3, -2, -2, +7, -1,  -2, -1, -1, -2, -4,  -1, -3, -1,  // P
+	-1, +0, -3, +0, +2,  -3, -2, +0, -3, +1,  -2, +0, +0, -1, +5,  +1, +0, -1, -2, -2,  -1, -1, +2,  // Q
+	
+	-1, -2, -3, -2, +0,  -3, -2, +0, -3, +2,  -2, -1, +0, -2, +1,  +5, -1, -1, -3, -3,  -1, -2, +0,  // R
+	+1, +0, -1, +0, +0,  -2, +0, -1, -2, +0,  -2, -1, +1, -1, +0,  -1, +4, +1, -2, -3,  -1, -2, +0,  // S
+	+0, -1, -1, -1, -1,  -2, -2, -2, -1, -1,  -1, -1, +0, -1, -1,  -1, +1, +5, +0, -2,  -1, -2, -1,  // T
+	+0, -3, -1, -3, -2,  -1, -3, -3, +3, -2,  +1, +1, -3, -2, -2,  -3, -2, +0, +4, -3,  -1, -1, -2,  // V
+	-3, -4, -2, -4, -3,  +1, -2, -2, -3, -3,  -2, -1, -4, -4, -2,  -3, -3, -2, -3,+11,  -1, +2, -3,  // W
+	
+	-1, -1, -1, -1, -1,  -1, -1, -1, -1, -1,  -1, -1, -1, -1, -1,  -1, -1, -1, -1, -1,  -1, -1, -1,  // X
+	-2, -3, -2, -3, -2,  +3, -3, +2, -1, -2,  -1, -1, -2, -3, -1,  -2, -2, -2, -1, +2,  -1, +7, -2,  // Y
+	-1, +2, -4, +2, +5,  -3, -2, +0, -3, +1,  -3, -2, +0, -1, +2,  +0, +0, -1, -2, -3,  -1, -2, +5,  // Z
+	};
+
+static int toi_tab[26] =
+	{
+	0,	// A
+	1,	// B
+	2,	// C
+	3,	// D
+	4,	// E
+	5,	// F
+	6,	// G
+	7,	// H
+	8,	// I
+	-1,	// J
+	9,	// K
+	10,	// L
+	11,	// M
+	12,	// N
+	-1,	// O
+	13,	// P
+	14,	// Q
+	15,	// R
+	16,	// S
+	17,	// T
+	-1,	// U
+	18,	// V
+	19,	// W
+	20,	// X
+	21,	// Y
+	22,	// Z
+	};
+
+static int toi(char c)
+	{
+	c = toupper(c);
+	return toi_tab[c - 'A'];
+	}
+
+static int BlosumScore(char c1, char c2)
+	{
+	int i1 = toi(c1);
+	int i2 = toi(c2);
+	return Blosum62[i1][i2];
+	}
+
+/***
+Consider a column with 5 As and 3 Bs.
+There are:
+	5x4 pairs of As.
+	3x2 pairs of Bs.
+	5x3x2 AB pairs
+	8x7 = 5x4 + 3x2 + 5x3x2 pairs of letters
+***/
+static double BlosumScoreCol(const MSA &a, unsigned uColIndex)
+	{
+	int iCounts[23];
+	memset(iCounts, 0, sizeof(iCounts));
+	const unsigned uSeqCount = a.GetSeqCount();
+	unsigned uCharCount = 0;
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		char c = a.GetChar(uSeqIndex, uColIndex);
+		if (IsGapChar(c))
+			continue;
+		int iChar = toi(c);
+		++iCounts[iChar];
+		++uCharCount;
+		}
+	if (uCharCount < 2)
+		return -9;
+	int iTotalScore = 0;
+	for (int i1 = 0; i1 < 23; ++i1)
+		{
+		int iCounts1 = iCounts[i1];
+		iTotalScore += iCounts1*(iCounts1 - 1)*Blosum62[i1][i1];
+		for (int i2 = i1 + 1; i2 < 23; ++i2)
+			iTotalScore += iCounts[i2]*iCounts1*2*Blosum62[i1][i2];
+		}
+	int iPairCount = uCharCount*(uCharCount - 1);
+	return (double) iTotalScore / (double) iPairCount;
+	}
+
+/***
+Consider a column with 5 As and 3 Bs.
+A residue of type Q scores:
+	5xAQ + 3xBQ
+***/
+static void AssignColorsCol(const MSA &a, unsigned uColIndex, int **Colors)
+	{
+	int iCounts[23];
+	memset(iCounts, 0, sizeof(iCounts));
+	const unsigned uSeqCount = a.GetSeqCount();
+	unsigned uCharCount = 0;
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		char c = a.GetChar(uSeqIndex, uColIndex);
+		if (IsGapChar(c))
+			continue;
+		int iChar = toi(c);
+		++iCounts[iChar];
+		++uCharCount;
+		}
+	int iMostConservedType = -1;
+	int iMostConservedCount = -1;
+	for (unsigned i = 0; i < 23; ++i)
+		{
+		if (iCounts[i] > iMostConservedCount)
+			{
+			iMostConservedType = i;
+			iMostConservedCount = iCounts[i];
+			}
+		}
+
+	double dColScore = BlosumScoreCol(a, uColIndex);
+	int c;
+	if (dColScore >= 3.0)
+		c = 3;
+	//else if (dColScore >= 1.0)
+	//	c = 2;
+	else if (dColScore >= 0.2)
+		c = 1;
+	else
+		c = 0;
+
+	int Color[23];
+	for (unsigned uLetter = 0; uLetter < 23; ++uLetter)
+		{
+		double dScore = Blosum62[uLetter][iMostConservedType];
+		if (dScore >= dColScore)
+			Color[uLetter] = c;
+		else
+			Color[uLetter] = 0;
+		}
+
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		char c = a.GetChar(uSeqIndex, uColIndex);
+		if (IsGapChar(c))
+			{
+			Colors[uSeqIndex][uColIndex] = 0;
+			continue;
+			}
+		int iLetter = toi(c);
+		if (iLetter >= 0 && iLetter < 23)
+			Colors[uSeqIndex][uColIndex] = Color[iLetter];
+		else
+			Colors[uSeqIndex][uColIndex] = 0;
+		}
+	}
+
+void AssignColors(const MSA &a, int **Colors)
+	{
+	const unsigned uColCount = a.GetColCount();
+	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
+		AssignColorsCol(a, uColIndex, Colors);
+	}

Added: trunk/packages/muscle/branches/upstream/current/cons.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/cons.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/cons.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,118 @@
+/***
+Conservation value for a column in an MSA is defined as the number
+of times the most common letter appears divided by the number of
+sequences.
+***/
+
+#include "muscle.h"
+#include "msa.h"
+#include <math.h>
+
+double MSA::GetAvgCons() const
+	{
+	assert(GetSeqCount() > 0);
+	double dSum = 0;
+	unsigned uNonGapColCount = 0;
+	for (unsigned uColIndex = 0; uColIndex < GetColCount(); ++uColIndex)
+		{
+		if (!IsGapColumn(uColIndex))
+			{
+			dSum += GetCons(uColIndex);
+			++uNonGapColCount;
+			}
+		}
+	assert(uNonGapColCount > 0);
+	double dAvg = dSum / uNonGapColCount;
+	assert(dAvg > 0 && dAvg <= 1);
+	return dAvg;
+	}
+
+double MSA::GetCons(unsigned uColIndex) const
+	{
+	unsigned Counts[MAX_ALPHA];
+	for (unsigned uLetter = 0; uLetter < g_AlphaSize; ++uLetter)
+		Counts[uLetter] = 0;
+
+	unsigned uMaxCount = 0;
+	for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
+		{
+		if (IsGap(uSeqIndex, uColIndex))
+			continue;
+		char c = GetChar(uSeqIndex, uColIndex);
+		c = toupper(c);
+		if ('X' == c || 'B' == c || 'Z' == c)
+			continue;
+		unsigned uLetter = GetLetter(uSeqIndex, uColIndex);
+		unsigned uCount = Counts[uLetter] + 1;
+		if (uCount > uMaxCount)
+			uMaxCount = uCount;
+		Counts[uLetter] = uCount;
+		}
+
+// Cons is undefined for all-gap column
+	if (0 == uMaxCount)
+		{
+//		assert(false);
+		return 1;
+		}
+
+	double dCons = (double) uMaxCount / (double) GetSeqCount();
+	assert(dCons > 0 && dCons <= 1);
+	return dCons;
+	}
+
+// Perecent identity of a pair of sequences.
+// Positions with one or both gapped are ignored.
+double MSA::GetPctIdentityPair(unsigned uSeqIndex1, unsigned uSeqIndex2) const
+	{
+	const unsigned uColCount = GetColCount();
+	unsigned uPosCount = 0;
+	unsigned uSameCount = 0;
+	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
+		{
+		const char c1 = GetChar(uSeqIndex1, uColIndex);
+		const char c2 = GetChar(uSeqIndex2, uColIndex);
+		if (IsGapChar(c1) || IsGapChar(c2))
+			continue;
+		if (c1 == c2)
+			++uSameCount;
+		++uPosCount;
+		}
+	if (0 == uPosCount)
+		return 0;
+	return (double) uSameCount / (double) uPosCount;
+	}
+
+// Perecent group identity of a pair of sequences.
+// Positions with one or both gapped are ignored.
+double MSA::GetPctGroupIdentityPair(unsigned uSeqIndex1,
+  unsigned uSeqIndex2) const
+	{
+	extern unsigned ResidueGroup[];
+
+	const unsigned uColCount = GetColCount();
+	unsigned uPosCount = 0;
+	unsigned uSameCount = 0;
+	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
+		{
+		if (IsGap(uSeqIndex1, uColIndex))
+			continue;
+		if (IsGap(uSeqIndex2, uColIndex))
+			continue;
+		if (IsWildcard(uSeqIndex1, uColIndex))
+			continue;
+		if (IsWildcard(uSeqIndex2, uColIndex))
+			continue;
+
+		const unsigned uLetter1 = GetLetter(uSeqIndex1, uColIndex);
+		const unsigned uLetter2 = GetLetter(uSeqIndex2, uColIndex);
+		const unsigned uGroup1 = ResidueGroup[uLetter1];
+		const unsigned uGroup2 = ResidueGroup[uLetter2];
+		if (uGroup1 == uGroup2)
+			++uSameCount;
+		++uPosCount;
+		}
+	if (0 == uPosCount)
+		return 0;
+	return (double) uSameCount / (double) uPosCount;
+	}

Added: trunk/packages/muscle/branches/upstream/current/diaglist.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/diaglist.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/diaglist.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,378 @@
+#include "muscle.h"
+#include "diaglist.h"
+#include "pwpath.h"
+
+#define MAX(x, y)	((x) > (y) ? (x) : (y))
+#define MIN(x, y)	((x) < (y) ? (x) : (y))
+
+void DiagList::Add(const Diag &d)
+	{
+	if (m_uCount == MAX_DIAGS)
+		Quit("DiagList::Add, overflow %u", m_uCount);
+	m_Diags[m_uCount] = d;
+	++m_uCount;
+	}
+
+void DiagList::Add(unsigned uStartPosA, unsigned uStartPosB, unsigned uLength)
+	{
+	Diag d;
+	d.m_uStartPosA = uStartPosA;
+	d.m_uStartPosB = uStartPosB;
+	d.m_uLength = uLength;
+	Add(d);
+	}
+
+const Diag &DiagList::Get(unsigned uIndex) const
+	{
+	if (uIndex >= m_uCount)
+		Quit("DiagList::Get(%u), count=%u", uIndex, m_uCount);
+	return m_Diags[uIndex];
+	}
+
+void DiagList::LogMe() const
+	{
+	Log("DiagList::LogMe, count=%u\n", m_uCount);
+	Log("  n  StartA  StartB  Length\n");
+	Log("---  ------  ------  ------\n");
+	for (unsigned n = 0; n < m_uCount; ++n)
+		{
+		const Diag &d = m_Diags[n];
+		Log("%3u  %6u  %6u  %6u\n",
+		  n, d.m_uStartPosA, d.m_uStartPosB, d.m_uLength);
+		}
+	}
+
+void DiagList::FromPath(const PWPath &Path)
+	{
+	Clear();
+
+	const unsigned uEdgeCount = Path.GetEdgeCount();
+	unsigned uLength = 0;
+	unsigned uStartPosA;
+	unsigned uStartPosB;
+	for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
+		{
+		const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
+
+	// Typical cases
+		if (Edge.cType == 'M')
+			{
+			if (0 == uLength)
+				{
+				uStartPosA = Edge.uPrefixLengthA - 1;
+				uStartPosB = Edge.uPrefixLengthB - 1;
+				}
+			++uLength;
+			}
+		else
+			{
+			if (uLength >= g_uMinDiagLength)
+				Add(uStartPosA, uStartPosB, uLength);
+			uLength = 0;
+			}
+		}
+
+// Special case for last edge
+	if (uLength >= g_uMinDiagLength)
+		Add(uStartPosA, uStartPosB, uLength);
+	}
+
+bool DiagList::NonZeroIntersection(const Diag &d) const
+	{
+	for (unsigned n = 0; n < m_uCount; ++n)
+		{
+		const Diag &d2 = m_Diags[n];
+		if (DiagOverlap(d, d2) > 0)
+			return true;
+		}
+	return false;
+	}
+
+// DialogOverlap returns the length of the overlapping
+// section of the two diagonals along the diagonals
+// themselves; in other words, the length of
+// the intersection of the two sets of cells in
+// the matrix.
+unsigned DiagOverlap(const Diag &d1, const Diag &d2)
+	{
+// Determine where the diagonals intersect the A
+// axis (extending them if required). If they
+// intersect at different points, they do not
+// overlap. Coordinates on a diagonal are
+// given by B = A + c where c is the value of
+// A at the intersection with the A axis.
+// Hence, c = B - A for any point on the diagonal.
+	int c1 = (int) d1.m_uStartPosB - (int) d1.m_uStartPosA;
+	int c2 = (int) d2.m_uStartPosB - (int) d2.m_uStartPosA;
+	if (c1 != c2)
+		return 0;
+
+	assert(DiagOverlapA(d1, d2) == DiagOverlapB(d1, d2));
+	return DiagOverlapA(d1, d2);
+	}
+
+// DialogOverlapA returns the length of the overlapping
+// section of the projection of the two diagonals onto
+// the A axis.
+unsigned DiagOverlapA(const Diag &d1, const Diag &d2)
+	{
+	unsigned uMaxStart = MAX(d1.m_uStartPosA, d2.m_uStartPosA);
+	unsigned uMinEnd = MIN(d1.m_uStartPosA + d1.m_uLength - 1,
+	  d2.m_uStartPosA + d2.m_uLength - 1);
+
+	int iLength = (int) uMinEnd - (int) uMaxStart + 1;
+	if (iLength < 0)
+		return 0;
+	return (unsigned) iLength;
+	}
+
+// DialogOverlapB returns the length of the overlapping
+// section of the projection of the two diagonals onto
+// the B axis.
+unsigned DiagOverlapB(const Diag &d1, const Diag &d2)
+	{
+	unsigned uMaxStart = MAX(d1.m_uStartPosB, d2.m_uStartPosB);
+	unsigned uMinEnd = MIN(d1.m_uStartPosB + d1.m_uLength - 1,
+	  d2.m_uStartPosB + d2.m_uLength - 1);
+
+	int iLength = (int) uMinEnd - (int) uMaxStart + 1;
+	if (iLength < 0)
+		return 0;
+	return (unsigned) iLength;
+	}
+
+// Returns true if the two diagonals can be on the
+// same path through the DP matrix. If DiagCompatible
+// returns false, they cannot be in the same path
+// and hence "contradict" each other.
+bool DiagCompatible(const Diag &d1, const Diag &d2)
+	{
+	if (DiagOverlap(d1, d2) > 0)
+		return true;
+	return 0 == DiagOverlapA(d1, d2) && 0 == DiagOverlapB(d1, d2);
+	}
+
+// Returns the length of the "break" between two diagonals.
+unsigned DiagBreak(const Diag &d1, const Diag &d2)
+	{
+	int c1 = (int) d1.m_uStartPosB - (int) d1.m_uStartPosA;
+	int c2 = (int) d2.m_uStartPosB - (int) d2.m_uStartPosA;
+	if (c1 != c2)
+		return 0;
+
+	int iMaxStart = MAX(d1.m_uStartPosA, d2.m_uStartPosA);
+	int iMinEnd = MIN(d1.m_uStartPosA + d1.m_uLength - 1,
+	  d2.m_uStartPosA + d1.m_uLength - 1);
+	int iBreak = iMaxStart - iMinEnd - 1;
+	if (iBreak < 0)
+		return 0;
+	return (unsigned) iBreak;
+	}
+
+// Merge diagonals that are continuations of each other with
+// short breaks of up to length g_uMaxDiagBreak.
+// In a sorted list of diagonals, we only have to check
+// consecutive entries.
+void MergeDiags(DiagList &DL)
+	{
+	return;
+#if	DEBUG
+	if (!DL.IsSorted())
+		Quit("MergeDiags: !IsSorted");
+#endif
+
+// TODO: Fix this!
+// Breaks must be with no offset (no gaps)
+	const unsigned uCount = DL.GetCount();
+	if (uCount <= 1)
+		return;
+
+	DiagList NewList;
+
+	Diag MergedDiag;
+	const Diag *ptrPrev = &DL.Get(0);
+	for (unsigned i = 1; i < uCount; ++i)
+		{
+		const Diag *ptrDiag = &DL.Get(i);
+		unsigned uBreakLength = DiagBreak(*ptrPrev, *ptrDiag);
+		if (uBreakLength <= g_uMaxDiagBreak)
+			{
+			MergedDiag.m_uStartPosA = ptrPrev->m_uStartPosA;
+			MergedDiag.m_uStartPosB = ptrPrev->m_uStartPosB;
+			MergedDiag.m_uLength = ptrPrev->m_uLength + ptrDiag->m_uLength
+			  + uBreakLength;
+			ptrPrev = &MergedDiag;
+			}
+		else
+			{
+			NewList.Add(*ptrPrev);
+			ptrPrev = ptrDiag;
+			}
+		}
+	NewList.Add(*ptrPrev);
+	DL.Copy(NewList);
+	}
+
+void DiagList::DeleteIncompatible()
+	{
+	assert(IsSorted());
+
+	if (m_uCount < 2)
+		return;
+
+	bool *bFlagForDeletion = new bool[m_uCount];
+	for (unsigned i = 0; i < m_uCount; ++i)
+		bFlagForDeletion[i] = false;
+
+	for (unsigned i = 0; i < m_uCount; ++i)
+		{
+		const Diag &di = m_Diags[i];
+		for (unsigned j = i + 1; j < m_uCount; ++j)
+			{
+			const Diag &dj = m_Diags[j];
+
+		// Verify sorted correctly
+			assert(di.m_uStartPosA <= dj.m_uStartPosA);
+
+		// If two diagonals are incompatible and
+		// one is is much longer than the other,
+		// keep the longer one.
+			if (!DiagCompatible(di, dj))
+				{
+				if (di.m_uLength > dj.m_uLength*4)
+					bFlagForDeletion[j] = true;
+				else if (dj.m_uLength > di.m_uLength*4)
+					bFlagForDeletion[i] = true;
+				else
+					{
+					bFlagForDeletion[i] = true;
+					bFlagForDeletion[j] = true;
+					}
+				}
+			}
+		}
+
+	for (unsigned i = 0; i < m_uCount; ++i)
+		{
+		const Diag &di = m_Diags[i];
+		if (bFlagForDeletion[i])
+			continue;
+
+		for (unsigned j = i + 1; j < m_uCount; ++j)
+			{
+			const Diag &dj = m_Diags[j];
+			if (bFlagForDeletion[j])
+				continue;
+
+		// Verify sorted correctly
+			assert(di.m_uStartPosA <= dj.m_uStartPosA);
+
+		// If sort order in B different from sorted order in A,
+		// either diags are incompatible or we detected a repeat
+		// or permutation.
+			if (di.m_uStartPosB >= dj.m_uStartPosB || !DiagCompatible(di, dj))
+				{
+				bFlagForDeletion[i] = true;
+				bFlagForDeletion[j] = true;
+				}
+			}
+		}
+
+	unsigned uNewCount = 0;
+	Diag *NewDiags = new Diag[m_uCount];
+	for (unsigned i = 0; i < m_uCount; ++i)
+		{
+		if (bFlagForDeletion[i])
+			continue;
+
+		const Diag &d = m_Diags[i];
+		NewDiags[uNewCount] = d;
+		++uNewCount;
+		}
+	memcpy(m_Diags, NewDiags, uNewCount*sizeof(Diag));
+	m_uCount = uNewCount;
+	delete[] NewDiags;
+	}
+
+void DiagList::Copy(const DiagList &DL)
+	{
+	Clear();
+	unsigned uCount = DL.GetCount();
+	for (unsigned i = 0; i < uCount; ++i)
+		Add(DL.Get(i));
+	}
+
+// Check if sorted in increasing order of m_uStartPosA
+bool DiagList::IsSorted() const
+	{
+	return true;
+	unsigned uCount = GetCount();
+	for (unsigned i = 1; i < uCount; ++i)
+		if (m_Diags[i-1].m_uStartPosA > m_Diags[i].m_uStartPosA)
+			return false;
+	return true;
+	}
+
+// Sort in increasing order of m_uStartPosA
+// Dumb bubble sort, but don't care about speed
+// because don't get long lists.
+void DiagList::Sort()
+	{
+	if (m_uCount < 2)
+		return;
+
+	bool bContinue = true;
+	while (bContinue)
+		{
+		bContinue = false;
+		for (unsigned i = 0; i < m_uCount - 1; ++i)
+			{
+			if (m_Diags[i].m_uStartPosA > m_Diags[i+1].m_uStartPosA)
+				{
+				Diag Tmp = m_Diags[i];
+				m_Diags[i] = m_Diags[i+1];
+				m_Diags[i+1] = Tmp;
+				bContinue = true;
+				}
+			}
+		}
+	}
+
+//void TestDiag()
+//	{
+//	Diag d1;
+//	Diag d2;
+//	Diag d3;
+//
+//	d1.m_uStartPosA = 0;
+//	d1.m_uStartPosB = 1;
+//	d1.m_uLength = 32;
+//
+//	d2.m_uStartPosA = 55;
+//	d2.m_uStartPosB = 70;
+//	d2.m_uLength = 36;
+//
+//	d3.m_uStartPosA = 102;
+//	d3.m_uStartPosB = 122;
+//	d3.m_uLength = 50;
+//
+//	DiagList DL;
+//	DL.Add(d1);
+//	DL.Add(d2);
+//	DL.Add(d3);
+//
+//	Log("Before DeleteIncompatible:\n");
+//	DL.LogMe();
+//	DL.DeleteIncompatible();
+//
+//	Log("After DeleteIncompatible:\n");
+//	DL.LogMe();
+//
+//	MergeDiags(DL);
+//	Log("After Merge:\n");
+//	DL.LogMe();
+//
+//	DPRegionList RL;
+//	DiagListToDPRegionList(DL, RL, 200, 200);
+//	RL.LogMe();
+//	}

Added: trunk/packages/muscle/branches/upstream/current/diaglist.h
===================================================================
--- trunk/packages/muscle/branches/upstream/current/diaglist.h	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/diaglist.h	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,89 @@
+#ifndef diaglist_h
+#define diaglist_h
+
+const unsigned EMPTY = (unsigned) ~0;
+const unsigned MAX_DIAGS = 1024;
+
+struct Diag
+	{
+	unsigned m_uStartPosA;
+	unsigned m_uStartPosB;
+	unsigned m_uLength;
+	};
+
+struct Rect
+	{
+	unsigned m_uStartPosA;
+	unsigned m_uStartPosB;
+	unsigned m_uLengthA;
+	unsigned m_uLengthB;
+	};
+
+class DiagList
+	{
+public:
+	DiagList()
+		{
+		m_uCount = 0;
+		}
+	~DiagList()
+		{
+		Free();
+		}
+
+public:
+// Creation
+	void Clear()
+		{
+		Free();
+		}
+	void FromPath(const PWPath &Path);
+	void Add(const Diag &d);
+	void Add(unsigned uStartPosA, unsigned uStartPosB, unsigned uLength);
+	void DeleteIncompatible();
+
+// Accessors
+	unsigned GetCount() const
+		{
+		return m_uCount;
+		}
+	const Diag &Get(unsigned uIndex) const;
+
+// Operations
+	void Sort();
+	void Copy(const DiagList &DL);
+
+// Query
+	// returns true iff given diagonal is included in the list
+	// in whole or in part.
+	bool NonZeroIntersection(const Diag &d) const;
+	bool IsSorted() const;
+
+// Diagnostics
+	void LogMe() const;
+
+private:
+	void Free()
+		{
+		m_uCount = 0;
+		}
+
+private:
+	unsigned m_uCount;
+	Diag m_Diags[MAX_DIAGS];
+	};
+
+unsigned DiagOverlap(const Diag &d1, const Diag &d2);
+unsigned DiagOverlapA(const Diag &d1, const Diag &d2);
+unsigned DiagOverlapB(const Diag &d1, const Diag &d2);
+unsigned DiagBreak(const Diag &d1, const Diag &d2);
+bool DiagCompatible(const Diag &d1, const Diag &d2);
+void CheckDiags(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
+  unsigned uLengthB, const MSA &msaA, const MSA &msaB, const PWPath &Path);
+void FindDiags(const ProfPos *PX, unsigned uLengthX, const ProfPos *PY,
+  unsigned uLengthY, DiagList &DL);
+void FindDiagsNuc(const ProfPos *PX, unsigned uLengthX, const ProfPos *PY,
+  unsigned uLengthY, DiagList &DL);
+void MergeDiags(DiagList &DL);
+
+#endif // diaglist_h

Added: trunk/packages/muscle/branches/upstream/current/diffobjscore.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/diffobjscore.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/diffobjscore.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,162 @@
+#include "muscle.h"
+#include "msa.h"
+#include "objscore.h"
+#include "profile.h"
+
+#define TRACE				0
+#define COMPARE_3_52		0
+#define BRUTE_LETTERS		0
+
+static SCORE ScoreColLetters(const MSA &msa, unsigned uColIndex)
+	{
+	SCOREMATRIX &Mx = *g_ptrScoreMatrix;
+	const unsigned uSeqCount = msa.GetSeqCount();
+
+#if	BRUTE_LETTERS
+	SCORE BruteScore = 0;
+	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
+		{
+		unsigned uLetter1 = msa.GetLetterEx(uSeqIndex1, uColIndex);
+		if (uLetter1 >= g_AlphaSize)
+			continue;
+		WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1);
+		for (unsigned uSeqIndex2 = uSeqIndex1 + 1; uSeqIndex2 < uSeqCount; ++uSeqIndex2)
+			{
+			unsigned uLetter2 = msa.GetLetterEx(uSeqIndex2, uColIndex);
+			if (uLetter2 >= g_AlphaSize)
+				continue;
+			WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2);
+			BruteScore += w1*w2*Mx[uLetter1][uLetter2];
+			}
+		}
+#endif
+	
+	double N = 0;
+	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
+		{
+		WEIGHT w = msa.GetSeqWeight(uSeqIndex1);
+		N += w;
+		}
+	if (N <= 0)
+		return 0;
+
+	FCOUNT Freqs[20];
+	memset(Freqs, 0, sizeof(Freqs));
+	SCORE Score = 0;
+	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
+		{
+		unsigned uLetter = msa.GetLetterEx(uSeqIndex1, uColIndex);
+		if (uLetter >= g_AlphaSize)
+			continue;
+		WEIGHT w = msa.GetSeqWeight(uSeqIndex1);
+		Freqs[uLetter] += w;
+		Score -= w*w*Mx[uLetter][uLetter];
+		}
+
+	for (unsigned uLetter1 = 0; uLetter1 < g_AlphaSize; ++uLetter1)
+		{
+		const FCOUNT f1 = Freqs[uLetter1];
+		Score += f1*f1*Mx[uLetter1][uLetter1];
+		for (unsigned uLetter2 = uLetter1 + 1; uLetter2 < g_AlphaSize; ++uLetter2)
+			{
+			const FCOUNT f2 = Freqs[uLetter2];
+			Score += 2*f1*f2*Mx[uLetter1][uLetter2];
+			}
+		}
+	Score /= 2;
+#if	BRUTE_LETTERS
+	assert(BTEq(BruteScore, Score));
+#endif
+	return Score;
+	}
+
+static SCORE ScoreLetters(const MSA &msa, const unsigned Edges[],
+  unsigned uEdgeCount)
+	{
+	const unsigned uSeqCount = msa.GetSeqCount();
+	const unsigned uColCount = msa.GetColCount();
+
+// Letters
+	SCORE Score = 0;
+	for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
+		{
+		const unsigned uColIndex = Edges[uEdgeIndex];
+		assert(uColIndex < uColCount);
+		Score += ScoreColLetters(msa, uColIndex);
+		}
+	return Score;
+	}
+
+void GetLetterScores(const MSA &msa, SCORE Scores[])
+	{
+	const unsigned uColCount = msa.GetColCount();
+	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
+		Scores[uColIndex] = ScoreColLetters(msa, uColIndex);
+	}
+
+SCORE DiffObjScore(
+  const MSA &msa1, const PWPath &Path1, const unsigned Edges1[], unsigned uEdgeCount1, 
+  const MSA &msa2, const PWPath &Path2, const unsigned Edges2[], unsigned uEdgeCount2)
+	{
+#if	TRACE
+	{
+	Log("============DiffObjScore===========\n");
+	Log("msa1:\n");
+	msa1.LogMe();
+	Log("\n");
+	Log("Cols1: ");
+	for (unsigned i = 0; i < uEdgeCount1; ++i)
+		Log(" %u", Edges1[i]);
+	Log("\n\n");
+	Log("msa2:\n");
+	msa2.LogMe();
+	Log("Cols2: ");
+	for (unsigned i = 0; i < uEdgeCount2; ++i)
+		Log(" %u", Edges2[i]);
+	Log("\n\n");
+	}
+#endif
+
+#if	COMPARE_3_52
+	extern SCORE g_SPScoreLetters;
+	extern SCORE g_SPScoreGaps;
+	SCORE SP1 = ObjScoreSP(msa1);
+	SCORE SPLetters1 = g_SPScoreLetters;
+	SCORE SPGaps1 = g_SPScoreGaps;
+
+	SCORE SP2 = ObjScoreSP(msa2);
+	SCORE SPLetters2 = g_SPScoreLetters;
+	SCORE SPGaps2 = g_SPScoreGaps;
+	SCORE SPDiffLetters = SPLetters2 - SPLetters1;
+	SCORE SPDiffGaps = SPGaps2 - SPGaps1;
+	SCORE SPDiff = SPDiffLetters + SPDiffGaps;
+#endif
+
+	SCORE Letters1 = ScoreLetters(msa1, Edges1, uEdgeCount1);
+	SCORE Letters2 = ScoreLetters(msa2, Edges2, uEdgeCount2);
+
+	SCORE Gaps1 = ScoreGaps(msa1, Edges1, uEdgeCount1);
+	SCORE Gaps2 = ScoreGaps(msa2, Edges2, uEdgeCount2);
+
+	SCORE DiffLetters = Letters2 - Letters1;
+	SCORE DiffGaps = Gaps2 - Gaps1;
+	SCORE Diff = DiffLetters + DiffGaps;
+
+#if	COMPARE_3_52
+	Log("ObjScoreSP    Letters1=%.4g  Letters2=%.4g  DiffLetters=%.4g\n",
+	  SPLetters1, SPLetters2, SPDiffLetters);
+
+	Log("DiffObjScore  Letters1=%.4g  Letters2=%.4g  DiffLetters=%.4g\n",
+	  Letters1, Letters2, DiffLetters);
+
+	Log("ObjScoreSP    Gaps1=%.4g  Gaps2=%.4g  DiffGaps=%.4g\n",
+	  SPGaps1, SPGaps2, SPDiffGaps);
+
+	Log("DiffObjScore  Gaps1=%.4g  Gaps2=%.4g  DiffGaps=%.4g\n",
+	  Gaps1, Gaps2, DiffGaps);
+
+	Log("SP diff=%.4g DiffObjScore Diff=%.4g\n", SPDiff, Diff);
+#endif
+
+	return Diff;
+	}

Added: trunk/packages/muscle/branches/upstream/current/diffpaths.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/diffpaths.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/diffpaths.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,114 @@
+#include "muscle.h"
+#include "pwpath.h"
+
+#define TRACE	0
+
+void DiffPaths(const PWPath &p1, const PWPath &p2, unsigned Edges1[],
+  unsigned *ptruDiffCount1, unsigned Edges2[], unsigned *ptruDiffCount2)
+	{
+#if	TRACE
+	Log("DiffPaths\n");
+	Log("p1=");
+	p1.LogMe();
+	Log("p2=");
+	p2.LogMe();
+#endif
+	const unsigned uEdgeCount1 = p1.GetEdgeCount();
+	const unsigned uEdgeCount2 = p2.GetEdgeCount();
+
+	unsigned uDiffCount1 = 0;
+	unsigned uDiffCount2 = 0;
+	unsigned uEdgeIndex1 = 0;
+	unsigned uEdgeIndex2 = 0;
+	const PWEdge *Edge1 = &p1.GetEdge(uEdgeIndex1);
+	const PWEdge *Edge2 = &p2.GetEdge(uEdgeIndex2);
+	for (;;)
+		{
+		unsigned uEdgeIndexTop1 = uEdgeIndex1;
+		unsigned uEdgeIndexTop2 = uEdgeIndex2;
+		Edge1 = &p1.GetEdge(uEdgeIndex1);
+		Edge2 = &p2.GetEdge(uEdgeIndex2);
+#if	TRACE
+		Log("e1[%u] PLA%u PLB%u %c, e2[%u] PLA%u PLB %u %c  DC1=%u DC2=%u\n",
+		  uEdgeIndex1, Edge1->uPrefixLengthA, Edge1->uPrefixLengthB, Edge1->cType,
+		  uEdgeIndex2, Edge2->uPrefixLengthA, Edge2->uPrefixLengthB, Edge2->cType,
+		  uDiffCount1, uDiffCount2);
+#endif
+		if (Edge1->uPrefixLengthA == Edge2->uPrefixLengthA &&
+		  Edge1->uPrefixLengthB == Edge2->uPrefixLengthB)
+			{
+			if (!Edge1->Equal(*Edge2))
+				{
+				Edges1[uDiffCount1++] = uEdgeIndex1;
+				Edges2[uDiffCount2++] = uEdgeIndex2;
+				}
+			++uEdgeIndex1;
+			++uEdgeIndex2;
+			}
+
+		else if (Edge2->uPrefixLengthA < Edge1->uPrefixLengthA ||
+		  Edge2->uPrefixLengthB < Edge1->uPrefixLengthB)
+			Edges2[uDiffCount2++] = uEdgeIndex2++;
+
+		else if (Edge1->uPrefixLengthA < Edge2->uPrefixLengthA ||
+		  Edge1->uPrefixLengthB < Edge2->uPrefixLengthB)
+			Edges1[uDiffCount1++] = uEdgeIndex1++;
+
+		if (uEdgeCount1 == uEdgeIndex1)
+			{
+			while (uEdgeIndex2 < uEdgeCount2)
+				Edges2[uDiffCount2++] = uEdgeIndex2++;
+			goto Done;
+			}
+		if (uEdgeCount2 == uEdgeIndex2)
+			{
+			while (uEdgeIndex1 < uEdgeCount1)
+				Edges1[uDiffCount1++] = uEdgeIndex1++;
+			goto Done;
+			}
+		if (uEdgeIndex1 == uEdgeIndexTop1 && uEdgeIndex2 == uEdgeIndexTop2)
+			Quit("DiffPaths stuck");
+		}
+Done:;
+#if	TRACE
+	Log("DiffCount1=%u (%u %u)\n", uDiffCount1, uEdgeCount1, uEdgeCount2);
+	Log("Diffs1=");
+	for (unsigned i = 0; i < uDiffCount1; ++i)
+		{
+		const PWEdge e = p1.GetEdge(Edges1[i]);
+		Log(" %u=%c%u.%u", Edges1[i], e.cType, e.uPrefixLengthA, e.uPrefixLengthB); 
+		}
+	Log("\n");
+	Log("DiffCount2=%u\n", uDiffCount2);
+	Log("Diffs2=");
+	for (unsigned i = 0; i < uDiffCount2; ++i)
+		{
+		const PWEdge e = p2.GetEdge(Edges2[i]);
+		Log(" %u=%c%u.%u", Edges2[i], e.cType, e.uPrefixLengthA, e.uPrefixLengthB); 
+		}
+	Log("\n");
+#endif
+	*ptruDiffCount1 = uDiffCount1;
+	*ptruDiffCount2 = uDiffCount2;
+	}
+
+void TestDiffPaths()
+	{
+	PWPath p1;
+	PWPath p2;
+
+	p1.AppendEdge('M', 1, 1);
+	p1.AppendEdge('M', 2, 2);
+	p1.AppendEdge('M', 3, 3);
+
+	p2.AppendEdge('M', 1, 1);
+	p2.AppendEdge('D', 2, 1);
+	p2.AppendEdge('I', 2, 2);
+	p2.AppendEdge('M', 3, 3);
+
+	unsigned Edges1[64];
+	unsigned Edges2[64];
+	unsigned uDiffCount1;
+	unsigned uDiffCount2;
+	DiffPaths(p1, p2, Edges1, &uDiffCount1, Edges2, &uDiffCount2);
+	}

Added: trunk/packages/muscle/branches/upstream/current/difftrees.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/difftrees.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/difftrees.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,381 @@
+#include "muscle.h"
+#include "tree.h"
+
+#define TRACE	0
+
+/***
+Algorithm to compare two trees, X and Y.
+
+A node x in X and node y in Y are defined to be
+similar iff the set of leaves in the subtree under
+x is identical to the set of leaves under y.
+
+A node is defined to be dissimilar iff it is not
+similar to any node in the other tree.
+
+Nodes x and y are defined to be married iff every
+node in the subtree under x is similar to a node
+in the subtree under y. Married nodes are considered
+to be equal. The subtrees under two married nodes can
+at most differ by exchanges of left and right branches,
+which we do not consider to be significant here.
+
+A node is defined to be a bachelor iff it is not
+married. If a node is a bachelor, then it has a
+dissimilar node in its subtree, and it follows
+immediately from the definition of marriage that its
+parent is also a bachelor. Hence all nodes on the path
+from a bachelor node to the root are bachelors.
+
+We assume the trees have the same set of leaves, so
+every leaf is trivially both similar and married to
+the same leaf in the opposite tree. Bachelor nodes
+are therefore always internal (i.e., non-leaf) nodes.
+
+A node is defined to be a diff iff (a) it is married
+and (b) its parent is a bachelor. The subtree under
+a diff is maximally similar to the other tree. (In
+other words, you cannot extend the subtree without
+adding a bachelor). 
+
+The set of diffs is the subset of the two trees that
+we consider to be identical.
+
+Example:
+
+              -----A
+        -----k
+   ----j      -----B
+--i     -----C
+   ------D
+
+
+              -----A
+        -----p
+   ----n      -----B
+--m     -----D
+   ------C
+
+
+The following pairs of internal nodes are similar.
+
+	Nodes	Set of leaves
+	-----	-------------
+	k,p		A,B
+	i,m		A,B,C,D
+
+Bachelors in the first tree are i and j, bachelors
+in the second tree are m and n.
+
+Node k and p are married, but i and m are not (because j
+and n are bachelors). The diffs are C, D and k.
+
+The set of bachelor nodes can be viewed as the internal
+nodes of a tree, the leaves of which are diffs. (To see
+that there can't be disjoint subtrees, note that the path
+from a diff to a root is all bachelor nodes, so there is
+always a path between two diffs that goes through the root).
+We call this tree the "diffs tree".
+
+There is a simple O(N) algorithm to build the diffs tree.
+To achieve O(N) we avoid traversing a given subtree multiple
+times and also avoid comparing lists of leaves. 
+
+We visit nodes in depth-first order (i.e., a node is visited
+before its parent).
+
+If either child of a node is a bachelor, we flag it as
+a bachelor.
+
+If both children of the node we are visiting are married,
+we check whether the spouses of those children have the
+same parent in the other tree. If the parents are different,
+the current node is a bachelor. If they have the same parent,
+then the node we are visiting is the spouse of that parent.
+We assign this newly identified married couple a unique integer
+id. The id of a node is in one-to-one correspondence with the
+set of leaves in its subtree. Two nodes have the same set of
+leaves iff they have the same id. Bachelor nodes do not get
+an id.
+***/
+
+static void BuildDiffs(const Tree &tree, unsigned uTreeNodeIndex,
+  const bool bIsDiff[], Tree &Diffs, unsigned uDiffsNodeIndex,
+  unsigned IdToDiffsLeafNodeIndex[])
+	{
+#if	TRACE
+	Log("BuildDiffs(TreeNode=%u IsDiff=%d IsLeaf=%d)\n",
+	  uTreeNodeIndex, bIsDiff[uTreeNodeIndex], tree.IsLeaf(uTreeNodeIndex));
+#endif
+	if (bIsDiff[uTreeNodeIndex])
+		{
+		unsigned uLeafCount = tree.GetLeafCount();
+		unsigned *Leaves = new unsigned[uLeafCount];
+		GetLeaves(tree, uTreeNodeIndex, Leaves, &uLeafCount);
+		for (unsigned n = 0; n < uLeafCount; ++n)
+			{
+			const unsigned uLeafNodeIndex = Leaves[n];
+			const unsigned uId = tree.GetLeafId(uLeafNodeIndex);
+			if (uId >= tree.GetLeafCount())
+				Quit("BuildDiffs, id out of range");
+			IdToDiffsLeafNodeIndex[uId] = uDiffsNodeIndex;
+#if	TRACE
+			Log("  Leaf id=%u DiffsNode=%u\n", uId, uDiffsNodeIndex);
+#endif
+			}
+		delete[] Leaves;
+		return;
+		}
+
+	if (tree.IsLeaf(uTreeNodeIndex))
+		Quit("BuildDiffs: should never reach leaf");
+
+	const unsigned uTreeLeft = tree.GetLeft(uTreeNodeIndex);
+	const unsigned uTreeRight = tree.GetRight(uTreeNodeIndex);
+
+	const unsigned uDiffsLeft = Diffs.AppendBranch(uDiffsNodeIndex);
+	const unsigned uDiffsRight = uDiffsLeft + 1;
+
+	BuildDiffs(tree, uTreeLeft, bIsDiff, Diffs, uDiffsLeft, IdToDiffsLeafNodeIndex);
+	BuildDiffs(tree, uTreeRight, bIsDiff, Diffs, uDiffsRight, IdToDiffsLeafNodeIndex);
+	}
+
+void DiffTrees(const Tree &Tree1, const Tree &Tree2, Tree &Diffs,
+  unsigned IdToDiffsLeafNodeIndex[])
+	{
+#if	TRACE
+	Log("Tree1:\n");
+	Tree1.LogMe();
+	Log("\n");
+	Log("Tree2:\n");
+	Tree2.LogMe();
+#endif
+
+	if (!Tree1.IsRooted() || !Tree2.IsRooted())
+		Quit("DiffTrees: requires rooted trees");
+
+	const unsigned uNodeCount = Tree1.GetNodeCount();
+	const unsigned uNodeCount2 = Tree2.GetNodeCount();
+	
+	const unsigned uLeafCount = Tree1.GetLeafCount();
+	const unsigned uLeafCount2 = Tree2.GetLeafCount();
+	assert(uLeafCount == uLeafCount2);
+
+	if (uNodeCount != uNodeCount2)
+		Quit("DiffTrees: different node counts");
+
+// Allocate tables so we can convert tree node index to
+// and from the unique id with a O(1) lookup.
+	unsigned *NodeIndexToId1 = new unsigned[uNodeCount];
+	unsigned *IdToNodeIndex2 = new unsigned[uNodeCount];
+
+	bool *bIsBachelor1 = new bool[uNodeCount];
+	bool *bIsDiff1 = new bool[uNodeCount];
+
+	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
+		{
+		NodeIndexToId1[uNodeIndex] = uNodeCount;
+		bIsBachelor1[uNodeIndex] = false;
+		bIsDiff1[uNodeIndex] = false;
+
+	// Use uNodeCount as value meaning "not set".
+		IdToNodeIndex2[uNodeIndex] = uNodeCount;
+		}
+
+// Initialize node index <-> id lookup tables
+	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
+		{
+		if (Tree1.IsLeaf(uNodeIndex))
+			{
+			const unsigned uId = Tree1.GetLeafId(uNodeIndex);
+			if (uId >= uNodeCount)
+				Quit("Diff trees requires existing leaf ids in range 0 .. (N-1)");
+			NodeIndexToId1[uNodeIndex] = uId;
+			}
+
+		if (Tree2.IsLeaf(uNodeIndex))
+			{
+			const unsigned uId = Tree2.GetLeafId(uNodeIndex);
+			if (uId >= uNodeCount)
+				Quit("Diff trees requires existing leaf ids in range 0 .. (N-1)");
+			IdToNodeIndex2[uId] = uNodeIndex;
+			}
+		}
+
+// Validity check. This verifies that the ids
+// pre-assigned to the leaves in Tree1 are unique
+// (note that the id<N check above does not rule
+// out two leaves having duplicate ids).
+	for (unsigned uId = 0; uId < uLeafCount; ++uId)
+		{
+		unsigned uNodeIndex2 = IdToNodeIndex2[uId];
+		if (uNodeCount == uNodeIndex2)
+			Quit("DiffTrees, check 2");
+		}
+
+// Ids assigned to internal nodes are N, N+1 ...
+// An internal node id uniquely identifies a set
+// of two or more leaves.
+	unsigned uInternalNodeId = uLeafCount;
+
+// Depth-first traversal of tree.
+// The order guarantees that a node is visited before
+// its parent is visited.
+	for (unsigned uNodeIndex1 = Tree1.FirstDepthFirstNode();
+	  NULL_NEIGHBOR != uNodeIndex1;
+	  uNodeIndex1 = Tree1.NextDepthFirstNode(uNodeIndex1))
+		{
+#if	TRACE
+		Log("Main loop: Node1=%u IsLeaf=%d IsBachelor=%d\n",
+		  uNodeIndex1,
+		  Tree1.IsLeaf(uNodeIndex1),
+		  bIsBachelor1[uNodeIndex1]);
+#endif
+
+	// Leaves are trivial; nothing to do.
+		if (Tree1.IsLeaf(uNodeIndex1) || bIsBachelor1[uNodeIndex1])
+			continue;
+
+	// If either child is a bachelor, flag
+	// this node as a bachelor and continue.
+		unsigned uLeft1 = Tree1.GetLeft(uNodeIndex1);
+		if (bIsBachelor1[uLeft1])
+			{
+			bIsBachelor1[uNodeIndex1] = true;
+			continue;
+			}
+
+		unsigned uRight1 = Tree1.GetRight(uNodeIndex1);
+		if (bIsBachelor1[uRight1])
+			{
+			bIsBachelor1[uNodeIndex1] = true;
+			continue;
+			}
+
+	// Both children are married.
+	// Married nodes are guaranteed to have an id.
+		unsigned uIdLeft = NodeIndexToId1[uLeft1];
+		unsigned uIdRight = NodeIndexToId1[uRight1];
+
+		if (uIdLeft == uNodeCount || uIdRight == uNodeCount)
+			Quit("DiffTrees, check 5");
+
+	// uLeft2 is the spouse of uLeft1, and similarly for uRight2.
+		unsigned uLeft2 = IdToNodeIndex2[uIdLeft];
+		unsigned uRight2 = IdToNodeIndex2[uIdRight];
+
+		if (uLeft2 == uNodeCount || uRight2 == uNodeCount)
+			Quit("DiffTrees, check 6");
+
+	// If the spouses of uLeft1 and uRight1 have the same
+	// parent, then this parent is the spouse of uNodeIndex1.
+	// Otherwise, uNodeIndex1 is a diff.
+		unsigned uParentLeft2 = Tree2.GetParent(uLeft2);
+		unsigned uParentRight2 = Tree2.GetParent(uRight2);
+
+#if	TRACE
+		Log("L1=%u R1=%u L2=%u R2=%u PL2=%u PR2=%u\n",
+		  uLeft1,
+		  uRight1,
+		  uLeft2,
+		  uRight2,
+		  uParentLeft2,
+		  uParentRight2);
+#endif
+
+		if (uParentLeft2 == uParentRight2)
+			{
+			NodeIndexToId1[uNodeIndex1] = uInternalNodeId;
+			IdToNodeIndex2[uInternalNodeId] = uParentLeft2;
+			++uInternalNodeId;
+			}
+		else
+			bIsBachelor1[uNodeIndex1] = true;
+		}
+
+	unsigned uDiffCount = 0;
+	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
+		{
+		if (bIsBachelor1[uNodeIndex])
+			continue;
+		if (Tree1.IsRoot(uNodeIndex))
+			{
+		// Special case: if no bachelors, consider the
+		// root a diff.
+			if (!bIsBachelor1[uNodeIndex])
+				bIsDiff1[uNodeIndex] = true;
+			continue;
+			}
+		const unsigned uParent = Tree1.GetParent(uNodeIndex);
+		if (bIsBachelor1[uParent])
+			{
+			bIsDiff1[uNodeIndex] = true;
+			++uDiffCount;
+			}
+		}
+
+#if	TRACE
+	Log("Tree1:\n");
+	Log("Node    Id  Bach  Diff  Name\n");
+	Log("----  ----  ----  ----  ----\n");
+	for (unsigned n = 0; n < uNodeCount; ++n)
+		{
+		Log("%4u  %4u     %d     %d",
+		  n,
+		  NodeIndexToId1[n],
+		  bIsBachelor1[n],
+		  bIsDiff1[n]);
+		if (Tree1.IsLeaf(n))
+			Log("  %s", Tree1.GetLeafName(n));
+		Log("\n");
+		}
+	Log("\n");
+	Log("Tree2:\n");
+	Log("Node    Id              Name\n");
+	Log("----  ----              ----\n");
+	for (unsigned n = 0; n < uNodeCount; ++n)
+		{
+		Log("%4u                  ", n);
+		if (Tree2.IsLeaf(n))
+			Log("  %s", Tree2.GetLeafName(n));
+		Log("\n");
+		}
+#endif
+
+	Diffs.CreateRooted();
+	const unsigned uDiffsRootIndex = Diffs.GetRootNodeIndex();
+	const unsigned uRootIndex1 = Tree1.GetRootNodeIndex();
+
+	for (unsigned n = 0; n < uLeafCount; ++n)
+		IdToDiffsLeafNodeIndex[n] = uNodeCount;
+
+	BuildDiffs(Tree1, uRootIndex1, bIsDiff1, Diffs, uDiffsRootIndex,
+	  IdToDiffsLeafNodeIndex);
+
+#if TRACE
+	Log("\n");
+	Log("Diffs:\n");
+	Diffs.LogMe();
+	Log("\n");
+	Log("IdToDiffsLeafNodeIndex:");
+	for (unsigned n = 0; n < uLeafCount; ++n)
+		{
+		if (n%16 == 0)
+			Log("\n");
+		else
+			Log(" ");
+		Log("%u=%u", n, IdToDiffsLeafNodeIndex[n]);
+		}
+	Log("\n");
+#endif
+
+	for (unsigned n = 0; n < uLeafCount; ++n)
+		if (IdToDiffsLeafNodeIndex[n] == uNodeCount)
+			Quit("TreeDiffs check 7");
+
+	delete[] NodeIndexToId1;
+	delete[] IdToNodeIndex2;
+
+	delete[] bIsBachelor1;
+	delete[] bIsDiff1;
+	}

Added: trunk/packages/muscle/branches/upstream/current/difftreese.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/difftreese.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/difftreese.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,235 @@
+#include "muscle.h"
+#include "tree.h"
+
+#define TRACE	0
+
+/***
+Algorithm to compare two trees, X and Y.
+
+A node x in X and node y in Y are defined to be
+similar iff the set of leaves in the subtree under
+x is identical to the set of leaves under y.
+
+A node is defined to be changed iff it is not
+similar to any node in the other tree.
+
+Nodes x and y are defined to be married iff every
+node in the subtree under x is similar to a node
+in the subtree under y. Married nodes are considered
+to be equal. The subtrees under two married nodes can
+at most differ by exchanges of left and right branches,
+which we do not consider to be significant here.
+
+A node is changed iff it is not married. If a node is
+changed, then it has a dissimilar node in its subtree,
+and it follows immediately from the definition of marriage
+that its parent is also a bachelor. Hence all nodes on the
+path from a changed node to the root are changed.
+
+We assume the trees have the same set of leaves, so
+every leaf is trivially both similar and married to
+the same leaf in the opposite tree. Changed nodes
+are therefore always internal (i.e., non-leaf) nodes.
+
+Example:
+
+              -----A
+        -----k
+   ----j      -----B
+--i     -----C
+   ------D
+
+
+              -----A
+        -----p
+   ----n      -----B
+--m     -----D
+   ------C
+
+
+The following pairs of internal nodes are similar.
+
+	Nodes	Set of leaves
+	-----	-------------
+	k,p		A,B
+	i,m		A,B,C,D
+
+Changed nodes in the first tree are i and j, changed nodes
+in the second tree are m and n.
+
+Node k and p are married, but i and m are not (because j
+and n are changed). The diffs are C, D and k.
+
+To achieve O(N) we avoid traversing a given subtree multiple
+times and also avoid comparing lists of leaves. 
+
+We visit nodes in depth-first order (i.e., a node is visited
+before its parent).
+
+If either child of a node is changed, we flag it as changed.
+
+If both children of the node we are visiting are married,
+we check whether the spouses of those children have the
+same parent in the other tree. If the parents are different,
+the current node is a bachelor. If they have the same parent,
+then the node we are visiting is the spouse of that parent.
+We assign this newly identified married couple a unique integer
+id. The id of a node is in one-to-one correspondence with the
+set of leaves in its subtree. Two nodes have the same set of
+leaves iff they have the same id. Changed nodes do not get
+an id.
+***/
+
+void DiffTreesE(const Tree &NewTree, const Tree &OldTree,
+  unsigned NewNodeIndexToOldNodeIndex[])
+	{
+#if	TRACE
+	Log("DiffTreesE NewTree:\n");
+	NewTree.LogMe();
+	Log("\n");
+	Log("OldTree:\n");
+	OldTree.LogMe();
+#endif
+
+	if (!NewTree.IsRooted() || !OldTree.IsRooted())
+		Quit("DiffTrees: requires rooted trees");
+
+	const unsigned uNodeCount = NewTree.GetNodeCount();
+	const unsigned uOldNodeCount = OldTree.GetNodeCount();
+	const unsigned uLeafCount = NewTree.GetLeafCount();
+	const unsigned uOldLeafCount = OldTree.GetLeafCount();
+	if (uNodeCount != uOldNodeCount || uLeafCount != uOldLeafCount)
+		Quit("DiffTreesE: different node counts");
+
+	{
+	unsigned *IdToOldNodeIndex = new unsigned[uNodeCount];
+	for (unsigned uOldNodeIndex = 0; uOldNodeIndex < uNodeCount; ++uOldNodeIndex)
+		{
+		if (OldTree.IsLeaf(uOldNodeIndex))
+			{
+			unsigned Id = OldTree.GetLeafId(uOldNodeIndex);
+			IdToOldNodeIndex[Id] = uOldNodeIndex;
+			}
+		}
+
+// Initialize NewNodeIndexToOldNodeIndex[]
+// All internal nodes are marked as changed, but may be updated later.
+	for (unsigned uNewNodeIndex = 0; uNewNodeIndex < uNodeCount; ++uNewNodeIndex)
+		{
+		if (NewTree.IsLeaf(uNewNodeIndex))
+			{
+			unsigned uId = NewTree.GetLeafId(uNewNodeIndex);
+			assert(uId < uLeafCount);
+
+			unsigned uOldNodeIndex = IdToOldNodeIndex[uId];
+			assert(uOldNodeIndex < uNodeCount);
+
+			NewNodeIndexToOldNodeIndex[uNewNodeIndex] = uOldNodeIndex;
+			}
+		else
+			NewNodeIndexToOldNodeIndex[uNewNodeIndex] = NODE_CHANGED;
+		}
+	delete[] IdToOldNodeIndex;
+	}
+
+// Depth-first traversal of tree.
+// The order guarantees that a node is visited before
+// its parent is visited.
+	for (unsigned uNewNodeIndex = NewTree.FirstDepthFirstNode();
+	  NULL_NEIGHBOR != uNewNodeIndex;
+	  uNewNodeIndex = NewTree.NextDepthFirstNode(uNewNodeIndex))
+		{
+		if (NewTree.IsLeaf(uNewNodeIndex))
+			continue;
+
+	// If either child is changed, flag this node as changed and continue.
+		unsigned uNewLeft = NewTree.GetLeft(uNewNodeIndex);
+		unsigned uOldLeft = NewNodeIndexToOldNodeIndex[uNewLeft];
+		if (NODE_CHANGED == uOldLeft)
+			{
+			NewNodeIndexToOldNodeIndex[uNewLeft] = NODE_CHANGED;
+			continue;
+			}
+
+		unsigned uNewRight = NewTree.GetRight(uNewNodeIndex);
+		unsigned uOldRight = NewNodeIndexToOldNodeIndex[uNewRight];
+		if (NODE_CHANGED == NewNodeIndexToOldNodeIndex[uNewRight])
+			{
+			NewNodeIndexToOldNodeIndex[uNewRight] = NODE_CHANGED;
+			continue;
+			}
+
+		unsigned uOldParentLeft = OldTree.GetParent(uOldLeft);
+		unsigned uOldParentRight = OldTree.GetParent(uOldRight);
+		if (uOldParentLeft == uOldParentRight)
+			NewNodeIndexToOldNodeIndex[uNewNodeIndex] = uOldParentLeft;
+		else
+			NewNodeIndexToOldNodeIndex[uNewNodeIndex] = NODE_CHANGED;
+		}
+
+#if TRACE
+	{
+	Log("NewToOld ");
+	for (unsigned uNewNodeIndex = 0; uNewNodeIndex < uNodeCount; ++uNewNodeIndex)
+		{
+		Log(" [%3u]=", uNewNodeIndex);
+		if (NODE_CHANGED == NewNodeIndexToOldNodeIndex[uNewNodeIndex])
+			Log("  X");
+		else
+			Log("%3u", NewNodeIndexToOldNodeIndex[uNewNodeIndex]);
+		if ((uNewNodeIndex+1)%8 == 0)
+			Log("\n         ");
+		}
+	Log("\n");
+	}
+#endif
+
+#if	DEBUG
+	{
+	for (unsigned uNewNodeIndex = 0; uNewNodeIndex < uNodeCount; ++uNewNodeIndex)
+		{
+		unsigned uOld = NewNodeIndexToOldNodeIndex[uNewNodeIndex];
+		if (NewTree.IsLeaf(uNewNodeIndex))
+			{
+			if (uOld >= uNodeCount)
+				{
+				Log("NewNode=%u uOld=%u > uNodeCount=%u\n",
+				  uNewNodeIndex, uOld, uNodeCount);
+				Quit("Diff check failed");
+				}
+			unsigned uIdNew = NewTree.GetLeafId(uNewNodeIndex);
+			unsigned uIdOld = OldTree.GetLeafId(uOld);
+			if (uIdNew != uIdOld)
+				{
+				Log("NewNode=%u uOld=%u IdNew=%u IdOld=%u\n",
+				  uNewNodeIndex, uOld, uIdNew, uIdOld);
+				Quit("Diff check failed");
+				}
+			continue;
+			}
+
+		if (NODE_CHANGED == uOld)
+			continue;
+
+		unsigned uNewLeft = NewTree.GetLeft(uNewNodeIndex);
+		unsigned uNewRight = NewTree.GetRight(uNewNodeIndex);
+
+		unsigned uOldLeft = OldTree.GetLeft(uOld);
+		unsigned uOldRight = OldTree.GetRight(uOld);
+
+		unsigned uNewLeftPartner = NewNodeIndexToOldNodeIndex[uNewLeft];
+		unsigned uNewRightPartner = NewNodeIndexToOldNodeIndex[uNewRight];
+
+		bool bSameNotRotated = (uNewLeftPartner == uOldLeft && uNewRightPartner == uOldRight);
+		bool bSameRotated = (uNewLeftPartner == uOldRight && uNewRightPartner == uOldLeft);
+		if (!bSameNotRotated && !bSameRotated)
+			{
+			Log("NewNode=%u NewL=%u NewR=%u\n", uNewNodeIndex, uNewLeft, uNewRight);
+			Log("OldNode=%u OldL=%u OldR=%u\n", uOld, uOldLeft, uOldRight);
+			Log("NewLPartner=%u NewRPartner=%u\n", uNewLeftPartner, uNewRightPartner);
+			Quit("Diff check failed");
+			}
+		}
+	}
+#endif
+	}

Added: trunk/packages/muscle/branches/upstream/current/distcalc.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/distcalc.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/distcalc.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,72 @@
+#include "muscle.h"
+#include "distfunc.h"
+#include "distcalc.h"
+#include "msa.h"
+
+void DistCalcDF::Init(const DistFunc &DF)
+	{
+	m_ptrDF = &DF;
+	}
+
+void DistCalcDF::CalcDistRange(unsigned i, dist_t Dist[]) const
+	{
+	for (unsigned j = 0; j < i; ++j)
+		Dist[j] = m_ptrDF->GetDist(i, j);
+	}
+
+unsigned DistCalcDF::GetCount() const
+	{
+	return m_ptrDF->GetCount();
+	}
+
+unsigned DistCalcDF::GetId(unsigned i) const
+	{
+	return m_ptrDF->GetId(i);
+	}
+
+const char *DistCalcDF::GetName(unsigned i) const
+	{
+	return m_ptrDF->GetName(i);
+	}
+
+void DistCalcMSA::Init(const MSA &msa, DISTANCE Distance)
+	{
+	m_ptrMSA = &msa;
+	m_Distance = Distance;
+	}
+
+void DistCalcMSA::CalcDistRange(unsigned i, dist_t Dist[]) const
+	{
+//	const unsigned uSeqIndex1 = m_ptrMSA->GetSeqIndex(i);
+	for (unsigned j = 0; j < i; ++j)
+		{
+//		const unsigned uSeqIndex2 = m_ptrMSA->GetSeqIndex(j);
+		const float PctId = (float) m_ptrMSA->GetPctIdentityPair(i, j);
+		switch (m_Distance)
+			{
+		case DISTANCE_PctIdKimura:
+			Dist[j] = (float) KimuraDist(PctId);
+			break;
+		case DISTANCE_PctIdLog:
+			Dist[j] = (float) PctIdToMAFFTDist(PctId);
+			break;
+		default:
+			Quit("DistCalcMSA: Invalid DISTANCE_%u", m_Distance);
+			}
+		}
+	}
+
+unsigned DistCalcMSA::GetCount() const
+	{
+	return m_ptrMSA->GetSeqCount();
+	}
+
+unsigned DistCalcMSA::GetId(unsigned i) const
+	{
+	return m_ptrMSA->GetSeqId(i);
+	}
+
+const char *DistCalcMSA::GetName(unsigned i) const
+	{
+	return m_ptrMSA->GetSeqName(i);
+	}

Added: trunk/packages/muscle/branches/upstream/current/distcalc.h
===================================================================
--- trunk/packages/muscle/branches/upstream/current/distcalc.h	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/distcalc.h	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,45 @@
+#ifndef DistCalc_h
+#define DistCalc_h
+
+typedef float dist_t;
+const dist_t BIG_DIST = (dist_t) 1e29;
+
+class DistFunc;
+
+class DistCalc
+	{
+public:
+	virtual void CalcDistRange(unsigned i, dist_t Dist[]) const = 0;
+	virtual unsigned GetCount() const = 0;
+	virtual unsigned GetId(unsigned i) const = 0;
+	virtual const char *GetName(unsigned i) const = 0;
+	};
+
+class DistCalcDF : public DistCalc
+	{
+public:
+	void Init(const DistFunc &DF);
+	virtual void CalcDistRange(unsigned i, dist_t Dist[]) const;
+	virtual unsigned GetCount() const;
+	virtual unsigned GetId(unsigned i) const;
+	virtual const char *GetName(unsigned i) const;
+
+private:
+	const DistFunc *m_ptrDF;
+	};
+
+class DistCalcMSA : public DistCalc
+	{
+public:
+	void Init(const MSA &msa, DISTANCE Distance);
+	virtual void CalcDistRange(unsigned i, dist_t Dist[]) const;
+	virtual unsigned GetCount() const;
+	virtual unsigned GetId(unsigned i) const;
+	virtual const char *GetName(unsigned i) const;
+
+private:
+	const MSA *m_ptrMSA;
+	DISTANCE m_Distance;
+	};
+
+#endif	// DistCalc_h

Added: trunk/packages/muscle/branches/upstream/current/distfunc.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/distfunc.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/distfunc.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,113 @@
+#include "muscle.h"
+#include "distfunc.h"
+#include <assert.h>
+
+DistFunc::DistFunc()
+	{
+	m_Dists = 0;
+	m_uCount = 0;
+	m_uCacheCount = 0;
+	m_Names = 0;
+	m_Ids = 0;
+	}
+
+DistFunc::~DistFunc()
+	{
+	if (0 != m_Names)
+		{
+		for (unsigned i = 0; i < m_uCount; ++i)
+			free(m_Names[i]);
+		}
+	delete[] m_Dists;
+	delete[] m_Names;
+	delete[] m_Ids;
+	}
+
+float DistFunc::GetDist(unsigned uIndex1, unsigned uIndex2) const
+	{
+	return m_Dists[VectorIndex(uIndex1, uIndex2)];
+	}
+
+unsigned DistFunc::GetCount() const
+	{
+	return m_uCount;
+	}
+
+void DistFunc::SetCount(unsigned uCount)
+	{
+	m_uCount = uCount;
+	if (uCount <= m_uCacheCount)
+		return;
+	delete[] m_Dists;
+	m_Dists = new float[VectorLength()];
+	m_Names = new char *[m_uCount];
+	m_Ids = new unsigned[m_uCount];
+	m_uCacheCount = uCount;
+
+	memset(m_Names, 0, m_uCount*sizeof(char *));
+	memset(m_Ids, 0xff, m_uCount*sizeof(unsigned));
+	memset(m_Dists, 0, VectorLength()*sizeof(float));
+	}
+
+void DistFunc::SetDist(unsigned uIndex1, unsigned uIndex2, float dDist)
+	{
+	m_Dists[VectorIndex(uIndex1, uIndex2)] = dDist;
+	m_Dists[VectorIndex(uIndex2, uIndex1)] = dDist;
+	}
+
+unsigned DistFunc::VectorIndex(unsigned uIndex1, unsigned uIndex2) const
+	{
+	assert(uIndex1 < m_uCount && uIndex2 < m_uCount);
+	return uIndex1*m_uCount + uIndex2;
+	}
+
+unsigned DistFunc::VectorLength() const
+	{
+	return m_uCount*m_uCount;
+	}
+
+void DistFunc::SetName(unsigned uIndex, const char szName[])
+	{
+	assert(uIndex < m_uCount);
+	m_Names[uIndex] = strsave(szName);
+	}
+
+void DistFunc::SetId(unsigned uIndex, unsigned uId)
+	{
+	assert(uIndex < m_uCount);
+	m_Ids[uIndex] = uId;
+	}
+
+const char *DistFunc::GetName(unsigned uIndex) const
+	{
+	assert(uIndex < m_uCount);
+	return m_Names[uIndex];
+	}
+
+unsigned DistFunc::GetId(unsigned uIndex) const
+	{
+	assert(uIndex < m_uCount);
+	return m_Ids[uIndex];
+	}
+
+void DistFunc::LogMe() const
+	{
+	Log("DistFunc::LogMe count=%u\n", m_uCount);
+	Log("                     ");
+	for (unsigned i = 0; i < m_uCount; ++i)
+		Log(" %7u", i);
+	Log("\n");
+
+	Log("                     ");
+	for (unsigned i = 0; i < m_uCount; ++i)
+		Log(" %7.7s", m_Names[i] ? m_Names[i] : "");
+	Log("\n");
+
+	for (unsigned i = 0; i < m_uCount; ++i)
+		{
+		Log("%4u  %10.10s  :  ", i, m_Names[i] ? m_Names[i] : "");
+		for (unsigned j = 0; j <= i; ++j)
+			Log(" %7.4g", GetDist(i, j));
+		Log("\n");
+		}
+	}

Added: trunk/packages/muscle/branches/upstream/current/distfunc.h
===================================================================
--- trunk/packages/muscle/branches/upstream/current/distfunc.h	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/distfunc.h	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,36 @@
+#ifndef DistFunc_h
+#define DistFunc_h
+
+class DistFunc
+	{
+public:
+	DistFunc();
+	virtual ~DistFunc();
+
+public:
+	virtual void SetCount(unsigned uCount);
+	virtual void SetDist(unsigned uIndex1, unsigned uIndex2, float dDist);
+
+	void SetName(unsigned uIndex, const char szName[]);
+	void SetId(unsigned uIndex, unsigned uId);
+	const char *GetName(unsigned uIndex) const;
+	unsigned GetId(unsigned uIndex) const;
+
+	virtual float GetDist(unsigned uIndex1, unsigned uIndex2) const;
+	virtual unsigned GetCount() const;
+
+	void LogMe() const;
+
+protected:
+	unsigned VectorIndex(unsigned uIndex, unsigned uIndex2) const;
+	unsigned VectorLength() const;
+
+private:
+	unsigned m_uCount;
+	unsigned m_uCacheCount;
+	float *m_Dists;
+	char **m_Names;
+	unsigned *m_Ids;
+	};
+
+#endif	// DistFunc_h

Added: trunk/packages/muscle/branches/upstream/current/distpwkimura.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/distpwkimura.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/distpwkimura.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,45 @@
+#include "muscle.h"
+#include "distfunc.h"
+#include "msa.h"
+#include "seqvect.h"
+#include "pwpath.h"
+
+void DistPWKimura(const SeqVect &v, DistFunc &DF)
+	{
+	SEQWEIGHT SeqWeightSave = GetSeqWeightMethod();
+	SetSeqWeightMethod(SEQWEIGHT_Henikoff);
+
+	const unsigned uSeqCount = v.Length();
+	DF.SetCount(uSeqCount);
+
+	const unsigned uPairCount = (uSeqCount*(uSeqCount + 1))/2;
+	unsigned uCount = 0;
+	SetProgressDesc("PWKimura distance");
+	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
+		{
+		const Seq &s1 = v.GetSeq(uSeqIndex1);
+		MSA msa1;
+		msa1.FromSeq(s1);
+		for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqIndex1; ++uSeqIndex2)
+			{
+			if (0 == uCount%20)
+				Progress(uCount, uPairCount);
+			++uCount;
+			const Seq &s2 = v.GetSeq(uSeqIndex2);
+			MSA msa2;
+			msa2.FromSeq(s2);
+		
+			PWPath Path;
+			MSA msaOut;
+			AlignTwoMSAs(msa1, msa2, msaOut, Path, false, false);
+
+			double dPctId = msaOut.GetPctIdentityPair(0, 1);
+			float f = (float) KimuraDist(dPctId);
+
+			DF.SetDist(uSeqIndex1, uSeqIndex2, f);
+			}
+		}
+	ProgressStepsDone();
+
+	SetSeqWeightMethod(SeqWeightSave);
+	}

Added: trunk/packages/muscle/branches/upstream/current/domuscle.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/domuscle.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/domuscle.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,299 @@
+#include "muscle.h"
+#include "textfile.h"
+#include "seqvect.h"
+#include "distfunc.h"
+#include "msa.h"
+#include "tree.h"
+#include "profile.h"
+#include "timing.h"
+
+static char g_strUseTreeWarning[] =
+"\n******** WARNING ****************\n"
+"\nYou specified the -usetree option.\n"
+"Note that a good evolutionary tree may NOT be a good\n"
+"guide tree for multiple alignment. For more details,\n"
+"please refer to the user guide. To disable this\n"
+"warning, use -usetree_nowarn <treefilename>.\n\n";
+
+void DoMuscle()
+	{
+	SetOutputFileName(g_pstrOutFileName);
+	SetInputFileName(g_pstrInFileName);
+
+	SetMaxIters(g_uMaxIters);
+	SetSeqWeightMethod(g_SeqWeight1);
+
+	TextFile fileIn(g_pstrInFileName);
+	SeqVect v;
+	v.FromFASTAFile(fileIn);
+	const unsigned uSeqCount = v.Length();
+
+	if (0 == uSeqCount)
+		Quit("No sequences in input file");
+
+	ALPHA Alpha = ALPHA_Undefined;
+	switch (g_SeqType)
+		{
+	case SEQTYPE_Auto:
+		Alpha = v.GuessAlpha();
+		break;
+
+	case SEQTYPE_Protein:
+		Alpha = ALPHA_Amino;
+		break;
+
+	case SEQTYPE_DNA:
+		Alpha = ALPHA_DNA;
+		break;
+
+	case SEQTYPE_RNA:
+		Alpha = ALPHA_RNA;
+		break;
+
+	default:
+		Quit("Invalid seq type");
+		}
+	SetAlpha(Alpha);
+	v.FixAlpha();
+
+	PTR_SCOREMATRIX UserMatrix = 0;
+	if (0 != g_pstrMatrixFileName)
+		{
+		const char *FileName = g_pstrMatrixFileName;
+		const char *Path = getenv("MUSCLE_MXPATH");
+		if (Path != 0)
+			{
+			size_t n = strlen(Path) + 1 + strlen(FileName) + 1;
+			char *NewFileName = new char[n];
+			sprintf(NewFileName, "%s/%s", Path, FileName);
+			FileName = NewFileName;
+			}
+		TextFile File(FileName);
+		UserMatrix = ReadMx(File);
+		g_Alpha = ALPHA_Amino;
+		g_PPScore = PPSCORE_SP;
+		}
+
+	SetPPScore();
+
+	if (0 != UserMatrix)
+		g_ptrScoreMatrix = UserMatrix;
+
+	unsigned uMaxL = 0;
+	unsigned uTotL = 0;
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		unsigned L = v.GetSeq(uSeqIndex).Length();
+		uTotL += L;
+		if (L > uMaxL)
+			uMaxL = L;
+		}
+
+	SetIter(1);
+	g_bDiags = g_bDiags1;
+	SetSeqStats(uSeqCount, uMaxL, uTotL/uSeqCount);
+
+	SetMuscleSeqVect(v);
+
+	MSA::SetIdCount(uSeqCount);
+
+// Initialize sequence ids.
+// From this point on, ids must somehow propogate from here.
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		v.SetSeqId(uSeqIndex, uSeqIndex);
+
+	if (0 == uSeqCount)
+		Quit("Input file '%s' has no sequences", g_pstrInFileName);
+	if (1 == uSeqCount)
+		{
+		TextFile fileOut(g_pstrOutFileName, true);
+		v.ToFile(fileOut);
+		return;
+		}
+
+	if (uSeqCount > 1)
+		MHackStart(v);
+
+// First iteration
+	Tree GuideTree;
+	if (0 != g_pstrUseTreeFileName)
+		{
+	// Discourage users...
+		if (!g_bUseTreeNoWarn)
+			fprintf(stderr, g_strUseTreeWarning);
+
+	// Read tree from file
+		TextFile TreeFile(g_pstrUseTreeFileName);
+		GuideTree.FromFile(TreeFile);
+
+	// Make sure tree is rooted
+		if (!GuideTree.IsRooted())
+			Quit("User tree must be rooted");
+
+		if (GuideTree.GetLeafCount() != uSeqCount)
+			Quit("User tree does not match input sequences");
+
+		const unsigned uNodeCount = GuideTree.GetNodeCount();
+		for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
+			{
+			if (!GuideTree.IsLeaf(uNodeIndex))
+				continue;
+			const char *LeafName = GuideTree.GetLeafName(uNodeIndex);
+			unsigned uSeqIndex;
+			bool SeqFound = v.FindName(LeafName, &uSeqIndex);
+			if (!SeqFound)
+				Quit("Label %s in tree does not match sequences", LeafName);
+			}
+
+	// Set ids
+		for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+			{
+			const char *SeqName = v.GetSeqName(uSeqIndex);
+			unsigned uLeafIndex = GuideTree.GetLeafNodeIndex(SeqName);
+			GuideTree.SetLeafId(uLeafIndex, uSeqIndex);
+			}
+		}
+	else
+		TreeFromSeqVect(v, GuideTree, g_Cluster1, g_Distance1, g_Root1);
+
+	const char *Tree1 = ValueOpt("Tree1");
+	if (0 != Tree1)
+		{
+		TextFile f(Tree1, true);
+		GuideTree.ToFile(f);
+		if (g_bCluster)
+			return;
+		}
+
+	SetMuscleTree(GuideTree);
+	ValidateMuscleIds(GuideTree);
+
+	MSA msa;
+	ProgNode *ProgNodes = 0;
+	if (g_bLow)
+		ProgNodes = ProgressiveAlignE(v, GuideTree, msa);
+	else
+		ProgressiveAlign(v, GuideTree, msa);
+	SetCurrentAlignment(msa);
+
+	if (0 != g_pstrComputeWeightsFileName)
+		{
+		extern void OutWeights(const char *FileName, const MSA &msa);
+		SetMSAWeightsMuscle(msa);
+		OutWeights(g_pstrComputeWeightsFileName, msa);
+		return;
+		}
+
+	ValidateMuscleIds(msa);
+
+	if (1 == g_uMaxIters || 2 == uSeqCount)
+		{
+		//TextFile fileOut(g_pstrOutFileName, true);
+		//MHackEnd(msa);
+		//msa.ToFile(fileOut);
+		MuscleOutput(msa);
+		return;
+		}
+
+	if (0 == g_pstrUseTreeFileName)
+		{
+		g_bDiags = g_bDiags2;
+		SetIter(2);
+
+		if (g_bLow)
+			{
+			if (0 != g_uMaxTreeRefineIters)
+				RefineTreeE(msa, v, GuideTree, ProgNodes);
+			}
+		else
+			RefineTree(msa, GuideTree);
+
+		const char *Tree2 = ValueOpt("Tree2");
+		if (0 != Tree2)
+			{
+			TextFile f(Tree2, true);
+			GuideTree.ToFile(f);
+			}
+		}
+
+	SetSeqWeightMethod(g_SeqWeight2);
+	SetMuscleTree(GuideTree);
+
+	if (g_bAnchors)
+		RefineVert(msa, GuideTree, g_uMaxIters - 2);
+	else
+		RefineHoriz(msa, GuideTree, g_uMaxIters - 2, false, false);
+
+#if	0
+// Refining by subfamilies is disabled as it didn't give better
+// results. I tried doing this before and after RefineHoriz.
+// Should get back to this as it seems like this should work.
+	RefineSubfams(msa, GuideTree, g_uMaxIters - 2);
+#endif
+
+	ValidateMuscleIds(msa);
+	ValidateMuscleIds(GuideTree);
+
+	//TextFile fileOut(g_pstrOutFileName, true);
+	//MHackEnd(msa);
+	//msa.ToFile(fileOut);
+	MuscleOutput(msa);
+	}
+
+void Run()
+	{
+	SetStartTime();
+	Log("Started %s\n", GetTimeAsStr());
+	for (int i = 0; i < g_argc; ++i)
+		Log("%s ", g_argv[i]);
+	Log("\n");
+
+#if	TIMING
+	TICKS t1 = GetClockTicks();
+#endif
+	if (g_bRefine)
+		Refine();
+	else if (g_bRefineW)
+		{
+		extern void DoRefineW();
+		DoRefineW();
+		}
+	else if (g_bProfDB)
+		ProfDB();
+	else if (g_bSW)
+		Local();
+	else if (0 != g_pstrSPFileName)
+		DoSP();
+	else if (g_bProfile)
+		Profile();
+	else if (g_bPPScore)
+		PPScore();
+	else if (g_bPAS)
+		ProgAlignSubFams();
+	else
+		DoMuscle();
+
+#if	TIMING
+	extern TICKS g_ticksDP;
+	extern TICKS g_ticksObjScore;
+	TICKS t2 = GetClockTicks();
+	TICKS TotalTicks = t2 - t1;
+	TICKS ticksOther = TotalTicks - g_ticksDP - g_ticksObjScore;
+	double dSecs = TicksToSecs(TotalTicks);
+	double PctDP = (double) g_ticksDP*100.0/(double) TotalTicks;
+	double PctOS = (double) g_ticksObjScore*100.0/(double) TotalTicks;
+	double PctOther = (double) ticksOther*100.0/(double) TotalTicks;
+	Log("                 Ticks     Secs    Pct\n");
+	Log("          ============  =======  =====\n");
+	Log("DP        %12ld  %7.2f  %5.1f%%\n",
+	  (long) g_ticksDP, TicksToSecs(g_ticksDP), PctDP);
+	Log("OS        %12ld  %7.2f  %5.1f%%\n",
+	  (long) g_ticksObjScore, TicksToSecs(g_ticksObjScore), PctOS);
+	Log("Other     %12ld  %7.2f  %5.1f%%\n",
+	  (long) ticksOther, TicksToSecs(ticksOther), PctOther);
+	Log("Total     %12ld  %7.2f  100.0%%\n", (long) TotalTicks, dSecs);
+#endif
+
+	ListDiagSavings();
+	Log("Finished %s\n", GetTimeAsStr());
+	}

Added: trunk/packages/muscle/branches/upstream/current/dosp.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/dosp.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/dosp.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,60 @@
+#include "muscle.h"
+#include "textfile.h"
+#include "msa.h"
+#include "objscore.h"
+#include "tree.h"
+#include "profile.h"
+
+void DoSP()
+	{
+	TextFile f(g_pstrSPFileName);
+
+	MSA a;
+	a.FromFile(f);
+
+	ALPHA Alpha = ALPHA_Undefined;
+	switch (g_SeqType)
+		{
+	case SEQTYPE_Auto:
+		Alpha = a.GuessAlpha();
+		break;
+
+	case SEQTYPE_Protein:
+		Alpha = ALPHA_Amino;
+		break;
+
+	case SEQTYPE_DNA:
+		Alpha = ALPHA_DNA;
+		break;
+
+	case SEQTYPE_RNA:
+		Alpha = ALPHA_RNA;
+		break;
+
+	default:
+		Quit("Invalid SeqType");
+		}
+	SetAlpha(Alpha);
+	a.FixAlpha();
+
+	SetPPScore();
+
+	const unsigned uSeqCount = a.GetSeqCount();
+	if (0 == uSeqCount)
+		Quit("No sequences in input file %s", g_pstrSPFileName);
+
+	MSA::SetIdCount(uSeqCount);
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		a.SetSeqId(uSeqIndex, uSeqIndex);
+
+	SetSeqWeightMethod(g_SeqWeight1);
+	Tree tree;
+	TreeFromMSA(a, tree, g_Cluster2, g_Distance2, g_Root2);
+	SetMuscleTree(tree);
+	SetMSAWeightsMuscle((MSA &) a);
+
+	SCORE SP = ObjScoreSP(a);
+
+	Log("File=%s;SP=%.4g\n", g_pstrSPFileName, SP);
+	fprintf(stderr, "File=%s;SP=%.4g\n", g_pstrSPFileName, SP);
+	}

Added: trunk/packages/muscle/branches/upstream/current/dpregionlist.h
===================================================================
--- trunk/packages/muscle/branches/upstream/current/dpregionlist.h	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/dpregionlist.h	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,73 @@
+#ifndef DPRegionList_h
+#define DPRegionList_h
+
+#include "diaglist.h"
+
+enum DPREGIONTYPE
+	{
+	DPREGIONTYPE_Unknown,
+	DPREGIONTYPE_Diag,
+	DPREGIONTYPE_Rect
+	};
+
+struct DPRegion
+	{
+	DPREGIONTYPE m_Type;
+	union
+		{
+		Diag m_Diag;
+		Rect m_Rect;
+		};
+	};
+
+const unsigned MAX_DPREGIONS = 1024;
+
+class DPRegionList
+	{
+public:
+	DPRegionList()
+		{
+		m_uCount = 0;
+		}
+	~DPRegionList()
+		{
+		Free();
+		}
+
+public:
+// Creation
+	void Clear()
+		{
+		Free();
+		}
+	void Add(const DPRegion &r);
+
+// Accessors
+	unsigned GetCount() const
+		{
+		return m_uCount;
+		}
+	const DPRegion &Get(unsigned uIndex) const
+		{
+		assert(uIndex < m_uCount);
+		return m_DPRegions[uIndex];
+		}
+
+// Diagnostics
+	void LogMe() const;
+
+private:
+	void Free()
+		{
+		m_uCount = 0;
+		}
+
+private:
+	unsigned m_uCount;
+	DPRegion m_DPRegions[MAX_DPREGIONS];
+	};
+
+void DiagListToDPRegionList(const DiagList &DL, DPRegionList &RL,
+  unsigned uLengthA, unsigned uLengthB);
+
+#endif	// DPRegionList_h

Added: trunk/packages/muscle/branches/upstream/current/dpreglist.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/dpreglist.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/dpreglist.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,108 @@
+#include "muscle.h"
+#include "dpreglist.h"
+
+unsigned DPRegionList::GetDPArea() const
+	{
+	unsigned uArea = 0;
+	for (unsigned i = 0; i < m_uCount; ++i)
+		{
+		const DPRegion &r = m_DPRegions[i];
+		if (DPREGIONTYPE_Rect == r.m_Type)
+			uArea += r.m_Rect.m_uLengthA*r.m_Rect.m_uLengthB;
+		}
+	return uArea;
+	}
+
+void DPRegionList::Add(const DPRegion &r)
+	{
+	if (m_uCount == MAX_DPREGIONS)
+		Quit("DPRegionList::Add, overflow %d", m_uCount);
+	m_DPRegions[m_uCount] = r;
+	++m_uCount;
+	}
+
+void DPRegionList::LogMe() const
+	{
+	Log("DPRegionList::LogMe, count=%u\n", m_uCount);
+	Log("Region  Type  StartA  StartB    EndA    EndB\n");
+	Log("------  ----  ------  ------    ----    ----\n");
+	for (unsigned i = 0; i < m_uCount; ++i)
+		{
+		const DPRegion &r = m_DPRegions[i];
+		Log("%6u  ", i);
+		if (DPREGIONTYPE_Diag == r.m_Type)
+			Log("Diag  %6u  %6u  %6u  %6u\n",
+			  r.m_Diag.m_uStartPosA,
+			  r.m_Diag.m_uStartPosB,
+			  r.m_Diag.m_uStartPosA + r.m_Diag.m_uLength - 1,
+			  r.m_Diag.m_uStartPosB + r.m_Diag.m_uLength - 1);
+		else if (DPREGIONTYPE_Rect == r.m_Type)
+			Log("Rect  %6u  %6u  %6u  %6u\n",
+			  r.m_Rect.m_uStartPosA,
+			  r.m_Rect.m_uStartPosB,
+			  r.m_Rect.m_uStartPosA + r.m_Rect.m_uLengthA - 1,
+			  r.m_Rect.m_uStartPosB + r.m_Rect.m_uLengthB - 1);
+		else
+			Log(" *** ERROR *** Type=%u\n", r.m_Type);
+		}
+	}
+
+void DiagListToDPRegionList(const DiagList &DL, DPRegionList &RL,
+  unsigned uLengthA, unsigned uLengthB)
+	{
+	if (g_uDiagMargin > g_uMinDiagLength/2)
+		Quit("Invalid parameters, diagmargin=%d must be <= 2*diaglength=%d",
+		  g_uDiagMargin, g_uMinDiagLength);
+
+	unsigned uStartPosA = 0;
+	unsigned uStartPosB = 0;
+	const unsigned uDiagCount = DL.GetCount();
+	DPRegion r;
+	for (unsigned uDiagIndex = 0; uDiagIndex < uDiagCount; ++uDiagIndex)
+		{
+		const Diag &d = DL.Get(uDiagIndex);
+		assert(d.m_uLength >= g_uMinDiagLength);
+		const unsigned uStartVertexA = d.m_uStartPosA + g_uDiagMargin - 1;
+		const unsigned uStartVertexB = d.m_uStartPosB + g_uDiagMargin - 1;
+		const unsigned uEndVertexA = d.m_uStartPosA + d.m_uLength - g_uDiagMargin;
+		const unsigned uEndVertexB = d.m_uStartPosB + d.m_uLength - g_uDiagMargin;
+
+		r.m_Type = DPREGIONTYPE_Rect;
+		r.m_Rect.m_uStartPosA = uStartPosA;
+		r.m_Rect.m_uStartPosB = uStartPosB;
+
+		assert(uStartVertexA + 1 >= uStartPosA);
+		assert(uStartVertexB + 1 >= uStartPosB);
+		r.m_Rect.m_uLengthA = uStartVertexA + 1 - uStartPosA;
+		r.m_Rect.m_uLengthB = uStartVertexB + 1 - uStartPosB;
+		RL.Add(r);
+
+		if (uEndVertexA > uStartVertexA + 1)
+			{
+			const unsigned uDiagLengthMinusCaps = uEndVertexA - uStartVertexA - 1;
+
+			r.m_Type = DPREGIONTYPE_Diag;
+			r.m_Diag.m_uStartPosA = uStartVertexA + 1;
+			r.m_Diag.m_uStartPosB = uStartVertexB + 1;
+			assert(uEndVertexA - uStartVertexA == uEndVertexB - uStartVertexB);
+			r.m_Diag.m_uLength = uEndVertexA - uStartVertexA - 1;
+			RL.Add(r);
+			}
+
+		uStartPosA = uEndVertexA;
+		uStartPosB = uEndVertexB;
+		}
+
+	assert((int) uLengthA - (int) uStartPosA >= (int) g_uDiagMargin);
+	assert((int) uLengthB - (int) uStartPosB >= (int) g_uDiagMargin);
+
+	r.m_Type = DPREGIONTYPE_Rect;
+	r.m_Rect.m_uStartPosA = uStartPosA;
+	r.m_Rect.m_uStartPosB = uStartPosB;
+
+	assert(uLengthA >= uStartPosA);
+	assert(uLengthB >= uStartPosB);
+	r.m_Rect.m_uLengthA = uLengthA - uStartPosA;
+	r.m_Rect.m_uLengthB = uLengthB - uStartPosB;
+	RL.Add(r);
+	}

Added: trunk/packages/muscle/branches/upstream/current/dpreglist.h
===================================================================
--- trunk/packages/muscle/branches/upstream/current/dpreglist.h	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/dpreglist.h	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,76 @@
+#ifndef dpreglist_h
+#define dpreglist_h
+
+#include "diaglist.h"
+
+enum DPREGIONTYPE
+	{
+	DPREGIONTYPE_Unknown,
+	DPREGIONTYPE_Diag,
+	DPREGIONTYPE_Rect
+	};
+
+struct DPRegion
+	{
+	DPREGIONTYPE m_Type;
+	union
+		{
+		Diag m_Diag;
+		Rect m_Rect;
+		};
+	};
+
+const unsigned MAX_DPREGIONS = 1024;
+
+class DPRegionList
+	{
+public:
+	DPRegionList()
+		{
+		m_uCount = 0;
+		}
+	~DPRegionList()
+		{
+		Free();
+		}
+
+public:
+// Creation
+	void Clear()
+		{
+		Free();
+		}
+	void Add(const DPRegion &r);
+
+// Accessors
+	unsigned GetCount() const
+		{
+		return m_uCount;
+		}
+
+	const DPRegion &Get(unsigned uIndex) const
+		{
+		assert(uIndex < m_uCount);
+		return m_DPRegions[uIndex];
+		}
+
+	unsigned GetDPArea() const;
+
+// Diagnostics
+	void LogMe() const;
+
+private:
+	void Free()
+		{
+		m_uCount = 0;
+		}
+
+private:
+	unsigned m_uCount;
+	DPRegion m_DPRegions[MAX_DPREGIONS];
+	};
+
+void DiagListToDPRegionList(const DiagList &DL, DPRegionList &RL,
+  unsigned uLengthA, unsigned uLengthB);
+
+#endif	// dpreglist_h

Added: trunk/packages/muscle/branches/upstream/current/drawtree.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/drawtree.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/drawtree.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,41 @@
+#include "muscle.h"
+#include "tree.h"
+
+/***
+Simple tree drawing algorithm.
+
+y coordinate of node is index in depth-first traversal.
+x coordinate is distance from root.
+***/
+
+static unsigned DistFromRoot(const Tree &tree, unsigned uNodeIndex)
+	{
+	const unsigned uRoot = tree.GetRootNodeIndex();
+	unsigned uDist = 0;
+	while (uNodeIndex != uRoot)
+		{
+		++uDist;
+		uNodeIndex = tree.GetParent(uNodeIndex);
+		}
+	return uDist;
+	}
+
+static void DrawNode(const Tree &tree, unsigned uNodeIndex)
+	{
+	if (!tree.IsLeaf(uNodeIndex))
+		DrawNode(tree, tree.GetLeft(uNodeIndex));
+
+	unsigned uDist = DistFromRoot(tree, uNodeIndex);
+	for (unsigned i = 0; i < 5*uDist; ++i)
+		Log(" ");
+	Log("%d\n", uNodeIndex);
+
+	if (!tree.IsLeaf(uNodeIndex))
+		DrawNode(tree, tree.GetRight(uNodeIndex));
+	}
+
+void DrawTree(const Tree &tree)
+	{
+	unsigned uRoot = tree.GetRootNodeIndex();
+	DrawNode(tree, uRoot);
+	}

Added: trunk/packages/muscle/branches/upstream/current/edgelist.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/edgelist.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/edgelist.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,88 @@
+#include "muscle.h"
+#include "edgelist.h"
+
+EdgeList::EdgeList()
+	{
+	m_uNode1 = 0;
+	m_uNode2 = 0;
+	m_uCount = 0;
+	m_uCacheSize = 0;
+	}
+
+EdgeList::~EdgeList()
+	{
+	Clear();
+	}
+
+void EdgeList::Clear()
+	{
+	delete[] m_uNode1;
+	delete[] m_uNode2;
+	m_uNode1 = 0;
+	m_uNode2 = 0;
+	m_uCount = 0;
+	m_uCacheSize = 0;
+	}
+
+void EdgeList::Add(unsigned uNode1, unsigned uNode2)
+	{
+	if (m_uCount <= m_uCacheSize)
+		Expand();
+	m_uNode1[m_uCount] = uNode1;
+	m_uNode2[m_uCount] = uNode2;
+	++m_uCount;
+	}
+
+unsigned EdgeList::GetCount() const
+	{
+	return m_uCount;
+	}
+
+void EdgeList::GetEdge(unsigned uIndex, unsigned *ptruNode1, unsigned *ptruNode2) const
+	{
+	if (uIndex > m_uCount)
+		Quit("EdgeList::GetEdge(%u) count=%u", uIndex, m_uCount);
+	*ptruNode1 = m_uNode1[uIndex];
+	*ptruNode2 = m_uNode2[uIndex];
+	}
+
+void EdgeList::Copy(const EdgeList &rhs)
+	{
+	Clear();
+	const unsigned uCount = rhs.GetCount();
+	for (unsigned n = 0; n < uCount; ++n)
+		{
+		unsigned uNode1;
+		unsigned uNode2;
+		rhs.GetEdge(n, &uNode1, &uNode2);
+		Add(uNode1, uNode2);
+		}
+	}
+
+void EdgeList::Expand()
+	{
+	unsigned uNewCacheSize = m_uCacheSize + 512;
+	unsigned *NewNode1 = new unsigned[uNewCacheSize];
+	unsigned *NewNode2 = new unsigned[uNewCacheSize];
+	if (m_uCount > 0)
+		{
+		memcpy(NewNode1, m_uNode1, m_uCount*sizeof(unsigned));
+		memcpy(NewNode2, m_uNode2, m_uCount*sizeof(unsigned));
+		}
+	delete[] m_uNode1;
+	delete[] m_uNode2;
+	m_uNode1 = NewNode1;
+	m_uNode2 = NewNode2;
+	m_uCacheSize = uNewCacheSize;
+	}
+
+void EdgeList::LogMe() const
+	{
+	for (unsigned n = 0; n < m_uCount; ++n)
+		{
+		if (n > 0)
+			Log(" ");
+		Log("%u->%u", m_uNode1[n], m_uNode2[n]);
+		}
+	Log("\n");
+	}

Added: trunk/packages/muscle/branches/upstream/current/edgelist.h
===================================================================
--- trunk/packages/muscle/branches/upstream/current/edgelist.h	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/edgelist.h	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,28 @@
+#ifndef EdgeList_h
+#define EdgeList_h
+
+class EdgeList
+	{
+public:
+	EdgeList();
+	virtual ~EdgeList();
+
+public:
+	void Clear();
+	void Add(unsigned uNode1, unsigned uNode2);
+	unsigned GetCount() const;
+	void GetEdge(unsigned uIndex, unsigned *ptruNode1, unsigned *ptruNode2) const;
+	void Copy(const EdgeList &rhs);
+	void LogMe() const;
+
+private:
+	void Expand();
+
+private:
+	unsigned m_uCount;
+	unsigned m_uCacheSize;
+	unsigned *m_uNode1;
+	unsigned *m_uNode2;
+	};
+
+#endif	// EdgeList_h

Added: trunk/packages/muscle/branches/upstream/current/enumopts.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/enumopts.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/enumopts.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,8 @@
+#include "muscle.h"
+#include "enumopts.h"
+
+#define	s(t)		EnumOpt t##_Opts[] = {
+#define c(t, x)		#x, t##_##x,
+#define e(t)		0, 0 };
+
+#include "enums.h"

Added: trunk/packages/muscle/branches/upstream/current/enumopts.h
===================================================================
--- trunk/packages/muscle/branches/upstream/current/enumopts.h	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/enumopts.h	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,16 @@
+#ifndef enumopts_h
+#define enumopts_h
+
+struct EnumOpt
+	{
+	const char *pstrOpt;
+	int iValue;
+	};
+
+#define	s(t)		extern EnumOpt t##_Opts[];
+#define c(t, x)		/* empty */
+#define e(t)		/* empty */
+#include "enums.h"	
+
+
+#endif // enumopts_h

Added: trunk/packages/muscle/branches/upstream/current/enums.h
===================================================================
--- trunk/packages/muscle/branches/upstream/current/enums.h	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/enums.h	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,95 @@
+// enums.h
+// Define enum types.
+// Exploit macro hacks to avoid lots of repetetive typing.
+// Generally I am opposed to macro hacks because of the
+// highly obscure code that results, but in this case it
+// makes maintenance much easier and less error-prone.
+// The idea is that this file can be included in different
+// places with different definitions of s (Start), c (Case)
+// and e (End). See types.h.
+
+s(ALPHA)
+c(ALPHA, Amino)
+c(ALPHA, DNA)
+c(ALPHA, RNA)
+e(ALPHA)
+
+s(SEQTYPE)
+c(SEQTYPE, Protein)
+c(SEQTYPE, DNA)
+c(SEQTYPE, RNA)
+c(SEQTYPE, Auto)
+e(SEQTYPE)
+
+s(ROOT)
+c(ROOT, Pseudo)
+c(ROOT, MidLongestSpan)
+c(ROOT, MinAvgLeafDist)
+e(ROOT)
+
+s(CLUSTER)
+c(CLUSTER, UPGMA)
+c(CLUSTER, UPGMAMax)
+c(CLUSTER, UPGMAMin)
+c(CLUSTER, UPGMB)
+c(CLUSTER, NeighborJoining)
+e(CLUSTER)
+
+s(JOIN)
+c(JOIN, NearestNeighbor)
+c(JOIN, NeighborJoining)
+e(JOIN)
+
+s(LINKAGE)
+c(LINKAGE, Min)
+c(LINKAGE, Avg)
+c(LINKAGE, Max)
+c(LINKAGE, NeighborJoining)
+c(LINKAGE, Biased)
+e(LINKAGE)
+
+s(DISTANCE)
+c(DISTANCE, Kmer6_6)
+c(DISTANCE, Kmer20_3)
+c(DISTANCE, Kmer20_4)
+c(DISTANCE, Kbit20_3)
+c(DISTANCE, Kmer4_6)
+c(DISTANCE, PctIdKimura)
+c(DISTANCE, PctIdLog)
+c(DISTANCE, PWKimura)
+e(DISTANCE)
+
+s(PPSCORE)
+c(PPSCORE, LE)
+c(PPSCORE, SP)
+c(PPSCORE, SV)
+c(PPSCORE, SPN)
+e(PPSCORE)
+
+s(SEQWEIGHT)
+c(SEQWEIGHT, None)
+c(SEQWEIGHT, Henikoff)
+c(SEQWEIGHT, HenikoffPB)
+c(SEQWEIGHT, GSC)
+c(SEQWEIGHT, ClustalW)
+c(SEQWEIGHT, ThreeWay)
+e(SEQWEIGHT)
+
+s(OBJSCORE)
+c(OBJSCORE, SP)				// Sum of Pairs of sequences
+c(OBJSCORE, DP)				// Dynamic Programming score
+c(OBJSCORE, XP)				// Cross Pairs = sum of pairs between two MSAs
+c(OBJSCORE, PS)				// sum of Prof-Seq score for all seqs in MSA
+c(OBJSCORE, SPF)			// sum of pairs, fast approximation
+c(OBJSCORE, SPM)			// sp if <= 100 seqs, spf otherwise
+e(OBJSCORE)
+
+s(TERMGAPS)
+c(TERMGAPS, Full)
+c(TERMGAPS, Half)
+c(TERMGAPS, Ext)
+e(TERMGAPS)
+
+#undef s
+#undef c
+#undef e

Added: trunk/packages/muscle/branches/upstream/current/enumtostr.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/enumtostr.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/enumtostr.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,16 @@
+#include "muscle.h"
+#include <stdio.h>
+
+static char szMsg[64];
+
+// Define XXXToStr(XXX x) functions for each enum type XXX.
+#define s(t)	const char *t##ToStr(t x) { switch (x) { case t##_Undefined: return "Undefined";
+#define c(t, x)	case t##_##x: return #x;
+#define e(t)	} sprintf(szMsg, #t "_%d", x); return szMsg; }
+#include "enums.h"
+
+// Define StrToXXX(const char *Str) functions for each enum type XXX.
+#define s(t)	t StrTo##t(const char *Str) { if (0) ;
+#define c(t, x)	else if (0 == stricmp(#x, Str)) return t##_##x;
+#define e(t)	Quit("Invalid value %s for type %s", Str, #t); return t##_Undefined; }
+#include "enums.h"

Added: trunk/packages/muscle/branches/upstream/current/estring.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/estring.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/estring.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,689 @@
+#include "muscle.h"
+#include "pwpath.h"
+#include "estring.h"
+#include "seq.h"
+#include "msa.h"
+
+/***
+An "estring" is an edit string that operates on a sequence.
+An estring is represented as a vector of integers.
+It is interpreted in order of increasing suffix.
+A positive value n means copy n letters.
+A negative value -n means insert n indels.
+Zero marks the end of the vector.
+Consecutive entries must have opposite sign, i.e. the
+shortest possible representation must be used.
+
+A "tpair" is a traceback path for a pairwise alignment
+represented as two estrings, one for each sequence.
+***/
+
+#define c2(c,d)	(((unsigned char) c) << 8 | (unsigned char) d)
+
+unsigned LengthEstring(const short es[])
+	{
+	unsigned i = 0;
+	while (*es++ != 0)
+		++i;
+	return i;
+	}
+
+short *EstringNewCopy(const short es[])
+	{
+	unsigned n = LengthEstring(es) + 1;
+	short *esNew = new short[n];
+	memcpy(esNew, es, n*sizeof(short));
+	return esNew;
+	}
+
+void LogEstring(const short es[])
+	{
+	Log("<");
+	for (unsigned i = 0; es[i] != 0; ++i)
+		{
+		if (i > 0)
+			Log(" ");
+		Log("%d", es[i]);
+		}
+	Log(">");
+	}
+
+static bool EstringsEq(const short es1[], const short es2[])
+	{
+	for (;;)
+		{
+		if (*es1 != *es2)
+			return false;
+		if (0 == *es1)
+			break;
+		++es1;
+		++es2;
+		}
+	return true;
+	}
+
+static void EstringCounts(const short es[], unsigned *ptruSymbols,
+  unsigned *ptruIndels)
+	{
+	unsigned uSymbols = 0;
+	unsigned uIndels = 0;
+	for (unsigned i = 0; es[i] != 0; ++i)
+		{
+		short n = es[i];
+		if (n > 0)
+			uSymbols += n;
+		else if (n < 0)
+			uIndels += -n;
+		}
+	*ptruSymbols = uSymbols;
+	*ptruIndels = uIndels;
+	}
+
+static char *EstringOp(const short es[], const char s[])
+	{
+	unsigned uSymbols;
+	unsigned uIndels;
+	EstringCounts(es, &uSymbols, &uIndels);
+	assert((unsigned) strlen(s) == uSymbols);
+	char *sout = new char[uSymbols + uIndels + 1];
+	char *psout = sout;
+	for (;;)
+		{
+		int n = *es++;
+		if (0 == n)
+			break;
+		if (n > 0)
+			for (int i = 0; i < n; ++i)
+				*psout++ = *s++;
+		else
+			for (int i = 0; i < -n; ++i)
+				*psout++ = '-';
+		}
+	assert(0 == *s);
+	*psout = 0;
+	return sout;
+	}
+
+void EstringOp(const short es[], const Seq &sIn, Seq &sOut)
+	{
+#if	DEBUG
+	unsigned uSymbols;
+	unsigned uIndels;
+	EstringCounts(es, &uSymbols, &uIndels);
+	assert(sIn.Length() == uSymbols);
+#endif
+	sOut.Clear();
+	sOut.SetName(sIn.GetName());
+	int p = 0;
+	for (;;)
+		{
+		int n = *es++;
+		if (0 == n)
+			break;
+		if (n > 0)
+			for (int i = 0; i < n; ++i)
+				{
+				char c = sIn[p++];
+				sOut.push_back(c);
+				}
+		else
+			for (int i = 0; i < -n; ++i)
+				sOut.push_back('-');
+		}
+	}
+
+unsigned EstringOp(const short es[], const Seq &sIn, MSA &a)
+	{
+	unsigned uSymbols;
+	unsigned uIndels;
+	EstringCounts(es, &uSymbols, &uIndels);
+	assert(sIn.Length() == uSymbols);
+
+	unsigned uColCount = uSymbols + uIndels;
+
+	a.Clear();
+	a.SetSize(1, uColCount);
+
+	a.SetSeqName(0, sIn.GetName());
+	a.SetSeqId(0, sIn.GetId());
+
+	unsigned p = 0;
+	unsigned uColIndex = 0;
+	for (;;)
+		{
+		int n = *es++;
+		if (0 == n)
+			break;
+		if (n > 0)
+			for (int i = 0; i < n; ++i)
+				{
+				char c = sIn[p++];
+				a.SetChar(0, uColIndex++, c);
+				}
+		else
+			for (int i = 0; i < -n; ++i)
+				a.SetChar(0, uColIndex++, '-');
+		}
+	assert(uColIndex == uColCount);
+	return uColCount;
+	}
+
+void PathToEstrings(const PWPath &Path, short **ptresA, short **ptresB)
+	{
+// First pass to determine size of estrings esA and esB
+	const unsigned uEdgeCount = Path.GetEdgeCount();
+	if (0 == uEdgeCount)
+		{
+		short *esA = new short[1];
+		short *esB = new short[1];
+		esA[0] = 0;
+		esB[0] = 0;
+		*ptresA = esA;
+		*ptresB = esB;
+		return;
+		}
+
+	unsigned iLengthA = 1;
+	unsigned iLengthB = 1;
+	const char cFirstEdgeType = Path.GetEdge(0).cType;
+	char cPrevEdgeType = cFirstEdgeType;
+	for (unsigned uEdgeIndex = 1; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
+		{
+		const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
+		char cEdgeType = Edge.cType;
+
+		switch (c2(cPrevEdgeType, cEdgeType))
+			{
+		case c2('M', 'M'):
+		case c2('D', 'D'):
+		case c2('I', 'I'):
+			break;
+
+		case c2('D', 'M'):
+		case c2('M', 'D'):
+			++iLengthB;
+			break;
+
+		case c2('I', 'M'):
+		case c2('M', 'I'):
+			++iLengthA;
+			break;
+
+		case c2('I', 'D'):
+		case c2('D', 'I'):
+			++iLengthB;
+			++iLengthA;
+			break;
+
+		default:
+			assert(false);
+			}
+		cPrevEdgeType = cEdgeType;
+		}
+
+// Pass2 for seq A
+	{
+	short *esA = new short[iLengthA+1];
+	unsigned iA = 0;
+	switch (Path.GetEdge(0).cType)
+		{
+	case 'M':
+	case 'D':
+		esA[0] = 1;
+		break;
+
+	case 'I':
+		esA[0] = -1;
+		break;
+
+	default:
+		assert(false);
+		}
+
+	char cPrevEdgeType = cFirstEdgeType;
+	for (unsigned uEdgeIndex = 1; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
+		{
+		const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
+		char cEdgeType = Edge.cType;
+
+		switch (c2(cPrevEdgeType, cEdgeType))
+			{
+		case c2('M', 'M'):
+		case c2('D', 'D'):
+		case c2('D', 'M'):
+		case c2('M', 'D'):
+			++(esA[iA]);
+			break;
+
+		case c2('I', 'D'):
+		case c2('I', 'M'):
+			++iA;
+			esA[iA] = 1;
+			break;
+
+		case c2('M', 'I'):
+		case c2('D', 'I'):
+			++iA;
+			esA[iA] = -1;
+			break;
+
+		case c2('I', 'I'):
+			--(esA[iA]);
+			break;
+
+		default:
+			assert(false);
+			}
+
+		cPrevEdgeType = cEdgeType;
+		}
+	assert(iA == iLengthA - 1);
+	esA[iLengthA] = 0;
+	*ptresA = esA;
+	}
+
+	{
+// Pass2 for seq B
+	short *esB = new short[iLengthB+1];
+	unsigned iB = 0;
+	switch (Path.GetEdge(0).cType)
+		{
+	case 'M':
+	case 'I':
+		esB[0] = 1;
+		break;
+
+	case 'D':
+		esB[0] = -1;
+		break;
+
+	default:
+		assert(false);
+		}
+
+	char cPrevEdgeType = cFirstEdgeType;
+	for (unsigned uEdgeIndex = 1; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
+		{
+		const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
+		char cEdgeType = Edge.cType;
+
+		switch (c2(cPrevEdgeType, cEdgeType))
+			{
+		case c2('M', 'M'):
+		case c2('I', 'I'):
+		case c2('I', 'M'):
+		case c2('M', 'I'):
+			++(esB[iB]);
+			break;
+
+		case c2('D', 'I'):
+		case c2('D', 'M'):
+			++iB;
+			esB[iB] = 1;
+			break;
+
+		case c2('M', 'D'):
+		case c2('I', 'D'):
+			++iB;
+			esB[iB] = -1;
+			break;
+
+		case c2('D', 'D'):
+			--(esB[iB]);
+			break;
+
+		default:
+			assert(false);
+			}
+
+		cPrevEdgeType = cEdgeType;
+		}
+	assert(iB == iLengthB - 1);
+	esB[iLengthB] = 0;
+	*ptresB = esB;
+	}
+
+#if	DEBUG
+	{
+	const PWEdge &LastEdge = Path.GetEdge(uEdgeCount - 1);
+	unsigned uSymbols;
+	unsigned uIndels;
+	EstringCounts(*ptresA, &uSymbols, &uIndels);
+	assert(uSymbols == LastEdge.uPrefixLengthA);
+	assert(uSymbols + uIndels == uEdgeCount);
+
+	EstringCounts(*ptresB, &uSymbols, &uIndels);
+	assert(uSymbols == LastEdge.uPrefixLengthB);
+	assert(uSymbols + uIndels == uEdgeCount);
+
+	PWPath TmpPath;
+	EstringsToPath(*ptresA, *ptresB, TmpPath);
+	TmpPath.AssertEqual(Path);
+	}
+#endif
+	}
+
+void EstringsToPath(const short esA[], const short esB[], PWPath &Path)
+	{
+	Path.Clear();
+	unsigned iA = 0;
+	unsigned iB = 0;
+	int nA = esA[iA++];
+	int nB = esB[iB++];
+	unsigned uPrefixLengthA = 0;
+	unsigned uPrefixLengthB = 0;
+	for (;;)
+		{
+		char cType;
+		if (nA > 0)
+			{
+			if (nB > 0)
+				{
+				cType = 'M';
+				--nA;
+				--nB;
+				}
+			else if (nB < 0)
+				{
+				cType = 'D';
+				--nA;
+				++nB;
+				}
+			else
+				assert(false);
+			}
+		else if (nA < 0)
+			{
+			if (nB > 0)
+				{
+				cType = 'I';
+				++nA;
+				--nB;
+				}
+			else
+				assert(false);
+			}
+		else
+			assert(false);
+
+		switch (cType)
+			{
+		case 'M':
+			++uPrefixLengthA;
+			++uPrefixLengthB;
+			break;
+		case 'D':
+			++uPrefixLengthA;
+			break;
+		case 'I':
+			++uPrefixLengthB;
+			break;
+			}
+
+		PWEdge Edge;
+		Edge.cType = cType;
+		Edge.uPrefixLengthA = uPrefixLengthA;
+		Edge.uPrefixLengthB = uPrefixLengthB;
+		Path.AppendEdge(Edge);
+
+		if (nA == 0)
+			{
+			if (0 == esA[iA])
+				{
+				assert(0 == esB[iB]);
+				break;
+				}
+			nA = esA[iA++];
+			}
+		if (nB == 0)
+			nB = esB[iB++];
+		}
+	}
+
+/***
+Multiply two estrings to make a third estring.
+The product of two estrings e1*e2 is defined to be
+the estring that produces the same result as applying
+e1 then e2. Multiplication is not commutative. In fact,
+the reversed order is undefined unless both estrings
+consist of a single, identical, positive entry.
+A primary motivation for using estrings is that
+multiplication is very fast, reducing the time
+needed to construct the root alignment.
+
+Example
+
+	<-1,3>(XXX)	= -XXX
+	<2,-1,2>(-XXX) = -X-XX
+
+Therefore,
+
+	<-1,3>*<2,-1,2> = <-1,1,-1,2>
+***/
+
+static bool CanMultiplyEstrings(const short es1[], const short es2[])
+	{
+	unsigned uSymbols1;
+	unsigned uSymbols2;
+	unsigned uIndels1;
+	unsigned uIndels2;
+	EstringCounts(es1, &uSymbols1, &uIndels1);
+	EstringCounts(es2, &uSymbols2, &uIndels2);
+	return uSymbols1 + uIndels1 == uSymbols2;
+	}
+
+static inline void AppendGaps(short esp[], int &ip, int n)
+	{
+	if (-1 == ip)
+		esp[++ip] = n;
+	else if (esp[ip] < 0)
+		esp[ip] += n;
+	else
+		esp[++ip] = n;
+	}
+
+static inline void AppendSymbols(short esp[], int &ip, int n)
+	{
+	if (-1 == ip)
+		esp[++ip] = n;
+	else if (esp[ip] > 0)
+		esp[ip] += n;
+	else
+		esp[++ip] = n;
+	}
+
+void MulEstrings(const short es1[], const short es2[], short esp[])
+	{
+	assert(CanMultiplyEstrings(es1, es2));
+
+	unsigned i1 = 0;
+	int ip = -1;
+	int n1 = es1[i1++];
+	for (unsigned i2 = 0; ; ++i2)
+		{
+		int n2 = es2[i2];
+		if (0 == n2)
+			break;
+		if (n2 > 0)
+			{
+			for (;;)
+				{
+				if (n1 < 0)
+					{
+					if (n2 > -n1)
+						{
+						AppendGaps(esp, ip, n1);
+						n2 += n1;
+						n1 = es1[i1++];
+						}
+					else if (n2 == -n1)
+						{
+						AppendGaps(esp, ip, n1);
+						n1 = es1[i1++];
+						break;
+						}
+					else
+						{
+						assert(n2 < -n1);
+						AppendGaps(esp, ip, -n2);
+						n1 += n2;
+						break;
+						}
+					}
+				else
+					{
+					assert(n1 > 0);
+					if (n2 > n1)
+						{
+						AppendSymbols(esp, ip, n1);
+						n2 -= n1;
+						n1 = es1[i1++];
+						}
+					else if (n2 == n1)
+						{
+						AppendSymbols(esp, ip, n1);
+						n1 = es1[i1++];
+						break;
+						}
+					else
+						{
+						assert(n2 < n1);
+						AppendSymbols(esp, ip, n2);
+						n1 -= n2;
+						break;
+						}
+					}
+				}
+			}
+		else
+			{
+			assert(n2 < 0);
+			AppendGaps(esp, ip, n2);
+			}
+		}
+	esp[++ip] = 0;
+
+#if	DEBUG
+	{
+	int MaxLen = (int) (LengthEstring(es1) + LengthEstring(es2) + 1);
+	assert(ip < MaxLen);
+	if (ip >= 2)
+		for (int i = 0; i < ip - 2; ++i)
+			{
+			if (!(esp[i] > 0 && esp[i+1] < 0 || esp[i] < 0 && esp[i+1] > 0))
+				{
+				Log("Bad result of MulEstring: ");
+				LogEstring(esp);
+				Quit("Assert failed (alternating signs)");
+				}
+			}
+	unsigned uSymbols1;
+	unsigned uSymbols2;
+	unsigned uSymbolsp;
+	unsigned uIndels1;
+	unsigned uIndels2;
+	unsigned uIndelsp;
+	EstringCounts(es1, &uSymbols1, &uIndels1);
+	EstringCounts(es2, &uSymbols2, &uIndels2);
+	EstringCounts(esp, &uSymbolsp, &uIndelsp);
+	if (uSymbols1 + uIndels1 != uSymbols2)
+		{
+		Log("Bad result of MulEstring: ");
+		LogEstring(esp);
+		Quit("Assert failed (counts1 %u %u %u)",
+		  uSymbols1, uIndels1, uSymbols2);
+		}
+	}
+#endif
+	}
+
+static void test(const short es1[], const short es2[], const short esa[])
+	{
+	unsigned uSymbols1;
+	unsigned uSymbols2;
+	unsigned uIndels1;
+	unsigned uIndels2;
+	EstringCounts(es1, &uSymbols1, &uIndels1);
+	EstringCounts(es2, &uSymbols2, &uIndels2);
+
+	char s[4096];
+	memset(s, 'X', sizeof(s));
+	s[uSymbols1] = 0;
+
+	char *s1 = EstringOp(es1, s);
+	char *s12 = EstringOp(es2, s1);
+
+	memset(s, 'X', sizeof(s));
+	s[uSymbols2] = 0;
+	char *s2 = EstringOp(es2, s);
+
+	Log("%s * %s = %s\n", s1, s2, s12);
+
+	LogEstring(es1);
+	Log(" * ");
+	LogEstring(es2);
+	Log(" = ");
+	LogEstring(esa);
+	Log("\n");
+
+	short esp[4096];
+	MulEstrings(es1, es2, esp);
+	LogEstring(esp);
+	if (!EstringsEq(esp, esa))
+		Log(" *ERROR* ");
+	Log("\n");
+
+	memset(s, 'X', sizeof(s));
+	s[uSymbols1] = 0;
+	char *sp = EstringOp(esp, s);
+	Log("%s\n", sp);
+	Log("\n==========\n\n");
+	}
+
+void TestEstrings()
+	{
+	SetListFileName("c:\\tmp\\muscle.log", false);
+	//{
+	//short es1[] = { -1, 1, -1, 0 };
+	//short es2[] = { 1, -1, 2, 0 };
+	//short esa[] = { -2, 1, -1, 0 };
+	//test(es1, es2, esa);
+	//}
+	//{
+	//short es1[] = { 2, -1, 2, 0 };
+	//short es2[] = { 1, -1, 3, -1, 1, 0 };
+	//short esa[] = { 1, -1, 1, -1, 1, -1, 1, 0 };
+	//test(es1, es2, esa);
+	//}
+	//{
+	//short es1[] = { -1, 3, 0 };
+	//short es2[] = { 2, -1, 2, 0 };
+	//short esa[] = { -1, 1, -1, 2, 0 };
+	//test(es1, es2, esa);
+	//}
+	//{
+	//short es1[] = { -1, 1, -1, 1, 0};
+	//short es2[] = { 4, 0 };
+	//short esa[] = { -1, 1, -1, 1, 0};
+	//test(es1, es2, esa);
+	//}
+	//{
+	//short es1[] = { 1, -1, 1, -1, 0};
+	//short es2[] = { 4, 0 };
+	//short esa[] = { 1, -1, 1, -1, 0};
+	//test(es1, es2, esa);
+	//}
+	//{
+	//short es1[] = { 1, -1, 1, -1, 0};
+	//short es2[] = { -1, 4, -1, 0 };
+	//short esa[] = { -1, 1, -1, 1, -2, 0};
+	//test(es1, es2, esa);
+	//}
+	{
+	short es1[] = { 106, -77, 56, -2, 155, -3, 123, -2, 0};
+	short es2[] = { 50, -36, 34, -3, 12, -6, 1, -6, 18, -17, 60, -5, 349, -56, 0 };
+	short esa[] = { 0 };
+	test(es1, es2, esa);
+	}
+	exit(0);
+	}

Added: trunk/packages/muscle/branches/upstream/current/estring.h
===================================================================
--- trunk/packages/muscle/branches/upstream/current/estring.h	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/estring.h	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,13 @@
+#ifndef pathsum_h
+#define pathsum_h
+
+void PathToEstrings(const PWPath &Path, short **ptresA, short **ptresB);
+void EstringsToPath(const short esA[], const short esB[], PWPath &Path);
+void MulEstrings(const short es1[], const short es2[], short esp[]);
+void EstringOp(const short es[], const Seq &sIn, Seq &sOut);
+unsigned EstringOp(const short es[], const Seq &sIn, MSA &a);
+void LogEstring(const short es[]);
+unsigned LengthEstring(const short es[]);
+short *EstringNewCopy(const short es[]);
+
+#endif	// pathsum_h

Added: trunk/packages/muscle/branches/upstream/current/fasta.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/fasta.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/fasta.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,56 @@
+#include "muscle.h"
+#include <stdio.h>
+#include <ctype.h>
+#include "msa.h"
+#include "textfile.h"
+
+const unsigned FASTA_BLOCK = 60;
+
+void MSA::FromFASTAFile(TextFile &File)
+	{
+	Clear();
+
+	FILE *f = File.GetStdioFile();
+	
+	unsigned uSeqCount = 0;
+	unsigned uColCount = uInsane;
+	for (;;)
+		{
+		char *Label;
+		unsigned uSeqLength;
+		char *SeqData = GetFastaSeq(f, &uSeqLength, &Label, false);
+		if (0 == SeqData)
+			break;
+		AppendSeq(SeqData, uSeqLength, Label);
+		}
+	}
+
+void MSA::ToFASTAFile(TextFile &File) const
+	{
+	const unsigned uColCount = GetColCount();
+	assert(uColCount > 0);
+	const unsigned uLinesPerSeq = (GetColCount() - 1)/FASTA_BLOCK + 1;
+	const unsigned uSeqCount = GetSeqCount();
+
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		File.PutString(">");
+		File.PutString(GetSeqName(uSeqIndex));
+		File.PutString("\n");
+
+		unsigned n = 0;
+		for (unsigned uLine = 0; uLine < uLinesPerSeq; ++uLine)
+			{
+			unsigned uLetters = uColCount - uLine*FASTA_BLOCK;
+			if (uLetters > FASTA_BLOCK)
+				uLetters = FASTA_BLOCK;
+			for (unsigned i = 0; i < uLetters; ++i)
+				{
+				char c = GetChar(uSeqIndex, n);
+				File.PutChar(c);
+				++n;
+				}
+			File.PutChar('\n');
+			}
+		}
+	}

Added: trunk/packages/muscle/branches/upstream/current/fasta2.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/fasta2.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/fasta2.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,117 @@
+#include "muscle.h"
+#include <stdio.h>
+#include <errno.h>
+
+const int BUFFER_BYTES = 16*1024;
+const int CR = '\r';
+const int NL = '\n';
+
+#define ADD(c)															\
+		{																\
+		if (Pos >= BufferLength)										\
+			{															\
+			const int NewBufferLength = BufferLength + BUFFER_BYTES;	\
+			char *NewBuffer	= new char[NewBufferLength];				\
+			memcpy(NewBuffer, Buffer, BufferLength);					\
+			delete[] Buffer;											\
+			Buffer = NewBuffer;											\
+			BufferLength = NewBufferLength;								\
+			}															\
+		Buffer[Pos++] = c;												\
+		}
+
+// Get next sequence from file.
+char *GetFastaSeq(FILE *f, unsigned *ptrSeqLength, char **ptrLabel, bool DeleteGaps)
+	{
+	unsigned BufferLength = 0;
+	unsigned Pos = 0;
+	char *Buffer = 0;
+
+	int c = fgetc(f);
+	if (EOF == c)
+		return 0;
+	if ('>' != c)
+		Quit("Invalid file format, expected '>' to start FASTA label");
+
+	for (;;)
+		{
+		int c = fgetc(f);
+		if (EOF == c)
+			Quit("End-of-file or input error in FASTA label");
+
+	// Ignore CR (discard, do not include in label)
+		if (CR == c)
+			continue;
+
+	// NL terminates label
+		if (NL == c)
+			break;
+
+	// All other characters added to label
+		ADD(c)
+		}
+
+// Nul-terminate label
+	ADD(0)
+	*ptrLabel = Buffer;
+
+	BufferLength = 0;
+	Pos = 0;
+	Buffer = 0;
+	int PreviousChar = NL;
+	for (;;)
+		{
+		int c = fgetc(f);
+		if (EOF == c)
+			{
+			if (feof(f))
+				break;
+			else if (ferror(f))
+				Quit("Error reading FASTA file, ferror=TRUE feof=FALSE errno=%d %s",
+				  errno, strerror(errno));
+			else
+				Quit("Error reading FASTA file, fgetc=EOF feof=FALSE ferror=FALSE errno=%d %s",
+				  errno, strerror(errno));
+			}
+
+		if ('>' == c)
+			{
+			if (NL == PreviousChar)
+				{
+				ungetc(c, f);
+				break;
+				}
+			else
+				Quit("Unexpected '>' in FASTA sequence data");
+			}
+		else if (isspace(c))
+			;
+		else if (IsGapChar(c))
+			{
+			if (!DeleteGaps)
+				ADD(c)
+			}
+		else if (isalpha(c))
+			{
+			c = toupper(c);
+			ADD(c)
+			}
+		else if (isprint(c))
+			{
+			Warning("Invalid character '%c' in FASTA sequence data, ignored", c);
+			continue;
+			}
+		else
+			{
+			Warning("Invalid byte hex %02x in FASTA sequence data, ignored", (unsigned char) c);
+			continue;
+			}
+		PreviousChar = c;
+		}
+
+	if (0 == Pos)
+		return GetFastaSeq(f, ptrSeqLength, ptrLabel, DeleteGaps);
+
+	*ptrSeqLength = Pos;
+	return Buffer;
+	}

Added: trunk/packages/muscle/branches/upstream/current/fastclust.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/fastclust.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/fastclust.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,57 @@
+#include "muscle.h"
+#include "seqvect.h"
+#include "distfunc.h"
+#include "clust.h"
+#include "clustsetdf.h"
+#include "tree.h"
+#include "clust.h"
+#include "distcalc.h"
+#include <math.h>
+
+static void TreeFromSeqVect_NJ(const DistFunc &DF, CLUSTER Cluster, Tree &tree)
+	{
+    ClustSetDF CSD(DF);
+
+    Clust C;
+    C.Create(CSD, Cluster);
+
+    tree.FromClust(C);
+	}
+
+static void TreeFromSeqVect_UPGMA(const DistFunc &DF, CLUSTER Cluster, Tree &tree)
+	{
+	LINKAGE Linkage = LINKAGE_Undefined;
+	switch (Cluster)
+		{
+	case CLUSTER_UPGMA:
+		Linkage = LINKAGE_Avg;
+		break;
+	case CLUSTER_UPGMAMin:
+		Linkage = LINKAGE_Min;
+		break;
+	case CLUSTER_UPGMAMax:
+		Linkage = LINKAGE_Max;
+		break;
+	case CLUSTER_UPGMB:
+		Linkage = LINKAGE_Biased;
+		break;
+	default:
+		Quit("TreeFromSeqVect_UPGMA, CLUSTER_%u not supported", Cluster);
+		}
+	
+	DistCalcDF DC;
+	DC.Init(DF);
+	UPGMA2(DC, tree, Linkage);
+	}
+
+void TreeFromSeqVect(const SeqVect &v, Tree &tree, CLUSTER Cluster,
+  DISTANCE Distance, ROOT Root)
+	{
+	DistFunc DF;
+	DistUnaligned(v, Distance, DF);
+	if (CLUSTER_NeighborJoining == Cluster)
+		TreeFromSeqVect_NJ(DF, Cluster, tree);
+	else
+		TreeFromSeqVect_UPGMA(DF, Cluster, tree);
+	FixRoot(tree, Root);
+	}

Added: trunk/packages/muscle/branches/upstream/current/fastdist.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/fastdist.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/fastdist.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,50 @@
+#include "muscle.h"
+#include "distfunc.h"
+#include "seqvect.h"
+
+void DistUnaligned(const SeqVect &v, DISTANCE DistMethod, DistFunc &DF)
+	{
+	const unsigned uSeqCount = v.Length();
+
+	switch (DistMethod)
+		{
+	case DISTANCE_Kmer6_6:
+		DistKmer6_6(v, DF);
+		break;
+
+	case DISTANCE_Kmer20_3:
+		DistKmer20_3(v, DF);
+		break;
+
+	case DISTANCE_Kmer20_4:
+		FastDistKmer(v, DF);
+		break;
+
+	case DISTANCE_Kbit20_3:
+		DistKbit20_3(v, DF);
+		break;
+
+	case DISTANCE_Kmer4_6:
+		DistKmer4_6(v, DF);
+		break;
+
+	case DISTANCE_PWKimura:
+		DistPWKimura(v, DF);
+		break;
+
+	default:
+		Quit("DistUnaligned, unsupported distance method %d", DistMethod);
+		}
+
+//	const char **SeqNames = (const char **) malloc(uSeqCount*sizeof(char *));
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		const Seq &s = *(v[uSeqIndex]);
+
+		const char *ptrName = s.GetName();
+		unsigned uId = s.GetId();
+
+		DF.SetName(uSeqIndex, ptrName);
+		DF.SetId(uSeqIndex, uId);
+		}
+	}

Added: trunk/packages/muscle/branches/upstream/current/fastdistjones.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/fastdistjones.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/fastdistjones.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,206 @@
+#include "muscle.h"
+#include "distfunc.h"
+#include "seqvect.h"
+#include <math.h>
+
+const unsigned TRIPLE_COUNT = 20*20*20;
+
+struct TripleCount
+	{
+	unsigned m_uSeqCount;			// How many sequences have this triple?
+	unsigned short *m_Counts;		// m_Counts[s] = nr of times triple found in seq s
+	};
+static TripleCount *TripleCounts;
+
+// WARNING: Sequences MUST be stripped of gaps and upper case!
+void DistKmer20_3(const SeqVect &v, DistFunc &DF)
+	{
+	const unsigned uSeqCount = v.Length();
+
+	DF.SetCount(uSeqCount);
+	if (0 == uSeqCount)
+		return;
+	for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)
+		{
+		DF.SetDist(uSeq1, uSeq1, 0);
+		for (unsigned uSeq2 = 0; uSeq2 < uSeq1; ++uSeq2)
+			DF.SetDist(uSeq1, uSeq2, 0);
+		}
+
+	const unsigned uTripleArrayBytes = TRIPLE_COUNT*sizeof(TripleCount);
+	TripleCounts = (TripleCount *) malloc(uTripleArrayBytes);
+	if (0 == TripleCounts)
+		Quit("Not enough memory (TripleCounts)");
+	memset(TripleCounts, 0, uTripleArrayBytes);
+
+	for (unsigned uWord = 0; uWord < TRIPLE_COUNT; ++uWord)
+		{
+		TripleCount &tc = *(TripleCounts + uWord);
+		const unsigned uBytes = uSeqCount*sizeof(short);
+		tc.m_Counts = (unsigned short *) malloc(uBytes);
+		memset(tc.m_Counts, 0, uBytes);
+		}
+
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		Seq &s = *(v[uSeqIndex]);
+		const unsigned uSeqLength = s.Length();
+		for (unsigned uPos = 0; uPos < uSeqLength - 2; ++uPos)
+			{
+			const unsigned uLetter1 = CharToLetterEx(s[uPos]);
+			if (uLetter1 >= 20)
+				continue;
+			const unsigned uLetter2 = CharToLetterEx(s[uPos+1]);
+			if (uLetter2 >= 20)
+				continue;
+			const unsigned uLetter3 = CharToLetterEx(s[uPos+2]);
+			if (uLetter3 >= 20)
+				continue;
+
+			const unsigned uWord = uLetter1 + uLetter2*20 + uLetter3*20*20;
+			assert(uWord < TRIPLE_COUNT);
+
+			TripleCount &tc = *(TripleCounts + uWord);
+			const unsigned uOldCount = tc.m_Counts[uSeqIndex];
+			if (0 == uOldCount)
+				++(tc.m_uSeqCount);
+
+			++(tc.m_Counts[uSeqIndex]);
+			}
+		}
+
+#if TRACE
+	{
+	Log("TripleCounts\n");
+	unsigned uGrandTotal = 0;
+	for (unsigned uWord = 0; uWord < TRIPLE_COUNT; ++uWord)
+		{
+		const TripleCount &tc = *(TripleCounts + uWord);
+		if (0 == tc.m_uSeqCount)
+			continue;
+
+		const unsigned uLetter3 = uWord/(20*20);
+		const unsigned uLetter2 = (uWord - uLetter3*20*20)/20;
+		const unsigned uLetter1 = uWord%20;
+		Log("Word %6u %c%c%c   %6u",
+		  uWord,
+		  LetterToCharAmino(uLetter1),
+		  LetterToCharAmino(uLetter2),
+		  LetterToCharAmino(uLetter3),
+		  tc.m_uSeqCount);
+
+		unsigned uSeqCountWithThisWord = 0;
+		for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+			{
+			const unsigned uCount = tc.m_Counts[uSeqIndex];
+			if (uCount > 0)
+				{
+				++uSeqCountWithThisWord;
+				Log(" %u=%u", uSeqIndex, uCount);
+				uGrandTotal += uCount;
+				}
+			}
+		if (uSeqCountWithThisWord != tc.m_uSeqCount)
+			Log(" *** SQ ERROR *** %u %u", tc.m_uSeqCount, uSeqCountWithThisWord);
+		Log("\n");
+		}
+	
+	unsigned uTotalBySeqLength = 0;
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		Seq &s = *(v[uSeqIndex]);
+		const unsigned uSeqLength = s.Length();
+		uTotalBySeqLength += uSeqLength - 2;
+		}
+	if (uGrandTotal != uTotalBySeqLength)
+		Log("*** TOTALS DISAGREE *** %u %u\n", uGrandTotal, uTotalBySeqLength);
+	}
+#endif
+
+	const unsigned uSeqListBytes = uSeqCount*sizeof(unsigned);
+	unsigned short *SeqList = (unsigned short *) malloc(uSeqListBytes);
+
+	for (unsigned uWord = 0; uWord < TRIPLE_COUNT; ++uWord)
+		{
+		const TripleCount &tc = *(TripleCounts + uWord);
+		if (0 == tc.m_uSeqCount)
+			continue;
+
+		unsigned uSeqCountFound = 0;
+		memset(SeqList, 0, uSeqListBytes);
+
+		for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+			{
+			if (tc.m_Counts[uSeqIndex] > 0)
+				{
+				SeqList[uSeqCountFound] = uSeqIndex;
+				++uSeqCountFound;
+				if (uSeqCountFound == tc.m_uSeqCount)
+					break;
+				}
+			}
+		assert(uSeqCountFound == tc.m_uSeqCount);
+
+		for (unsigned uSeq1 = 0; uSeq1 < uSeqCountFound; ++uSeq1)
+			{
+			const unsigned uSeqIndex1 = SeqList[uSeq1];
+			const unsigned uCount1 = tc.m_Counts[uSeqIndex1];
+			for (unsigned uSeq2 = 0; uSeq2 < uSeq1; ++uSeq2)
+				{
+				const unsigned uSeqIndex2 = SeqList[uSeq2];
+				const unsigned uCount2 = tc.m_Counts[uSeqIndex2];
+				const unsigned uMinCount = uCount1 < uCount2 ? uCount1 : uCount2;
+				const double d = DF.GetDist(uSeqIndex1, uSeqIndex2);
+				DF.SetDist(uSeqIndex1, uSeqIndex2, (float) (d + uMinCount));
+				}
+			}
+		}
+	delete[] SeqList;
+	free(TripleCounts);
+
+	unsigned uDone = 0;
+	const unsigned uTotal = (uSeqCount*(uSeqCount - 1))/2;
+	for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)
+		{
+		DF.SetDist(uSeq1, uSeq1, 0.0);
+
+		const Seq &s1 = *(v[uSeq1]);
+		const unsigned uLength1 = s1.Length();
+
+		for (unsigned uSeq2 = 0; uSeq2 < uSeq1; ++uSeq2)
+			{
+			const Seq &s2 = *(v[uSeq2]);
+			const unsigned uLength2 = s2.Length();
+			unsigned uMinLength = uLength1 < uLength2 ? uLength1 : uLength2;
+			if (uMinLength < 3)
+				{
+				DF.SetDist(uSeq1, uSeq2, 1.0);
+				continue;
+				}
+
+			const double dTripleCount = DF.GetDist(uSeq1, uSeq2);
+			if (dTripleCount == 0)
+				{
+				DF.SetDist(uSeq1, uSeq2, 1.0);
+				continue;
+				}
+			double dNormalizedTripletScore = dTripleCount/(uMinLength - 2);
+			//double dEstimatedPairwiseIdentity = exp(0.3912*log(dNormalizedTripletScore));
+			//if (dEstimatedPairwiseIdentity > 1)
+			//	dEstimatedPairwiseIdentity = 1;
+//			DF.SetDist(uSeq1, uSeq2, (float) (1.0 - dEstimatedPairwiseIdentity));
+			DF.SetDist(uSeq1, uSeq2, (float) dNormalizedTripletScore);
+
+#if	TRACE
+			{
+			Log("%s - %s  Triplet count = %g  Lengths %u, %u Estimated pwid = %g\n",
+			  s1.GetName(), s2.GetName(), dTripleCount, uLength1, uLength2,
+			  dEstimatedPairwiseIdentity);
+			}
+#endif
+			if (uDone%1000 == 0)
+				Progress(uDone, uTotal);
+			}
+		}
+	ProgressStepsDone();
+	}

Added: trunk/packages/muscle/branches/upstream/current/fastdistkbit.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/fastdistkbit.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/fastdistkbit.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,109 @@
+#include "muscle.h"
+#include "distfunc.h"
+#include "seqvect.h"
+#include <math.h>
+
+#define	MIN(x, y)	((x) < (y) ? (x) : (y))
+
+static void SetKmerBitVector(const Seq &s, byte Bits[])
+	{
+	const unsigned uLength = s.Length();
+	const unsigned k = 3;	// kmer length
+	unsigned i = 0;
+	unsigned c = 0;
+	unsigned h = 0;
+	for (unsigned j = 0; j < k - 1; ++j)
+		{
+		unsigned x = CharToLetterEx(s[i++]);
+		if (x <= AX_Y)
+			c = c*20 + x;
+		else
+			{
+			c = 0;
+			h = j + 1;
+			}
+		}
+	for ( ; i < uLength; ++i)
+		{
+		unsigned x = CharToLetterEx(s[i++]);
+		if (x <= AX_Y)
+			c = (c*20 + x)%8000;
+		else
+			{
+			c = 0;
+			h = i + k;
+			}
+		if (i >= h)
+			{
+			unsigned ByteOffset = c/8;
+			unsigned BitOffset = c%8;
+			Bits[ByteOffset] |= (1 << BitOffset);
+			}
+		}
+	}
+
+static unsigned CommonBitCount(const byte Bits1[], const byte Bits2[])
+	{
+	const byte * const p1end = Bits1 + 1000;
+	const byte *p2 = Bits2;
+
+	unsigned uCount = 0;
+	for (const byte *p1 = Bits1; p1 != p1end; ++p1)
+		{
+	// Here is a cute trick for efficiently counting the
+	// bits common between two bytes by combining them into
+	// a single word.
+		unsigned b = *p1 | (*p2 << 8);
+		while (b != 0)
+			{
+			if (b & 0x101)
+				++uCount;
+			b >>= 1;
+			}
+		++p2;
+		}
+	return uCount;
+	}
+
+void DistKbit20_3(const SeqVect &v, DistFunc &DF)
+	{
+	const unsigned uSeqCount = v.Length();
+	DF.SetCount(uSeqCount);
+
+// There are 20^3 = 8,000 distinct kmers in the 20-letter alphabet.
+// For each sequence, we create a bit vector of length 8,000, i.e.
+// 1,000 bytes, having one bit per kmer. The bit is set to 1 if the
+// kmer is present in the sequence.
+	const unsigned uBytes = uSeqCount*1000;
+	byte *BitVector = new byte[uBytes];
+	memset(BitVector, 0, uBytes);
+
+	SetProgressDesc("K-bit distance matrix");
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		SetKmerBitVector(*v[uSeqIndex], BitVector + uSeqIndex*1000);
+
+	unsigned uDone = 0;
+	const unsigned uTotal = (uSeqCount*(uSeqCount - 1))/2;
+	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
+		{
+		const byte *Bits1 = BitVector + uSeqIndex1*1000;
+		const unsigned uLength1 = v[uSeqIndex1]->Length();
+		for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqIndex1; ++uSeqIndex2)
+			{
+			const byte *Bits2 = BitVector + uSeqIndex2*1000;
+			const unsigned uLength2 = v[uSeqIndex2]->Length();
+			const float fCount = (float) CommonBitCount(Bits1, Bits2);
+
+		// Distance measure = K / min(L1, L2)
+		// K is number of distinct kmers that are found in both sequences
+			const float fDist = fCount / MIN(uLength1, uLength2);
+			DF.SetDist(uSeqIndex1, uSeqIndex2, fDist);
+			if (uDone%10000 == 0)
+				Progress(uDone, uTotal);
+			++uDone;
+			}
+		}
+	ProgressStepsDone();
+
+	delete[] BitVector;
+	}

Added: trunk/packages/muscle/branches/upstream/current/fastdistkmer.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/fastdistkmer.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/fastdistkmer.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,247 @@
+#include "muscle.h"
+#include "msa.h"
+#include "seqvect.h"
+#include "seq.h"
+#include "distfunc.h"
+#include <math.h>
+
+#define TRACE	0
+
+/***
+Some candidate alphabets considered because they
+have high correlations and small table sizes.
+Correlation coefficent is between k-mer distance
+and %id D measured from a CLUSTALW alignment.
+Table size is N^k where N is size of alphabet.
+A is standard (uncompressed) amino alphabet.
+
+                           Correlation
+Alpha   N  k  Table Size   all   25-50%
+-----  --  -  ----------   ----  ------
+A      20  3       8,000  0.943   0.575
+A      20  4     160,000  0.962   0.685 <<
+LiA    14  4      38,416  0.966   0.645
+SEB    14  4      38,416  0.964   0.634
+LiA    13  4      28,561  0.965   0.640
+LiA    12  4      20,736  0.963   0.620
+LiA    10  5     100,000  0.964   0.652
+
+We select A with k=4 because it has the best
+correlations. The only drawback is a large table
+size, but space is readily available and the only 
+additional time cost is in resetting the table to
+zero, which can be done quickly with memset or by
+keeping a list of the k-mers that were found (should
+test to see which is faster, and may vary by compiler
+and processor type). It also has the minor advantage
+that we don't need to convert the alphabet.
+
+Fractional identity d is estimated as follows.
+
+	F = fractional k-mer count
+	if F is 0: F = 0.01
+	Y = log(0.02 + F)
+	d = -4.1 + 4.12*Y
+
+The constant 0.02 was chosen to make the relationship
+between Y and D linear. The constants -4.1 and 4.12
+were chosen to fit a straight line to the scatterplot
+of Y vs D.
+***/
+
+#define MIN(x, y)	(((x) < (y)) ? (x) : (y))
+
+const unsigned K = 4;
+const unsigned N = 20;
+const unsigned N_2 = 20*20;
+const unsigned N_3 = 20*20*20;
+const unsigned N_4 = 20*20*20*20;
+
+const unsigned TABLE_SIZE = N_4;
+
+// For debug output
+const char *KmerToStr(unsigned Kmer)
+	{
+	static char s[5];
+
+	unsigned c3 = (Kmer/N_3)%N;
+	unsigned c2 = (Kmer/N_2)%N;
+	unsigned c1 = (Kmer/N)%N;
+	unsigned c0 = Kmer%N;
+
+	s[0] = LetterToChar(c3);
+	s[1] = LetterToChar(c2);
+	s[2] = LetterToChar(c1);
+	s[3] = LetterToChar(c0);
+	return s;
+	}
+
+void CountKmers(const byte s[], unsigned uSeqLength, byte KmerCounts[])
+	{
+#if	TRACE
+	Log("CountKmers\n");
+#endif
+	memset(KmerCounts, 0, TABLE_SIZE*sizeof(byte));
+
+	const byte *ptrKmerStart = s;
+	const byte *ptrKmerEnd = s + 4;
+	const byte *ptrSeqEnd = s + uSeqLength;
+
+	unsigned c3 = s[0]*N_3;
+	unsigned c2 = s[1]*N_2;
+	unsigned c1 = s[2]*N;
+	unsigned c0 = s[3];
+
+	unsigned Kmer = c3 + c2 + c1 + c0;
+
+	for (;;)
+		{
+		assert(Kmer < TABLE_SIZE);
+
+#if	TRACE
+		Log("Kmer=%d=%s\n", Kmer, KmerToStr(Kmer));
+#endif
+		++(KmerCounts[Kmer]);
+
+		if (ptrKmerEnd == ptrSeqEnd)
+			break;
+
+	// Compute k-mer as function of previous k-mer:
+	// 1. Subtract first letter from previous k-mer.
+	// 2. Multiply by N.
+	// 3. Add next letter.
+		c3 = (*ptrKmerStart++) * N_3;
+		Kmer = (Kmer - c3)*N;
+		Kmer += *ptrKmerEnd++;
+		}
+	}
+
+unsigned CommonKmerCount(const byte Seq[], unsigned uSeqLength,
+  const byte KmerCounts1[], const byte Seq2[], unsigned uSeqLength2)
+	{
+	byte KmerCounts2[TABLE_SIZE];
+	CountKmers(Seq2, uSeqLength2, KmerCounts2);
+
+	const byte *ptrKmerStart = Seq;
+	const byte *ptrKmerEnd = Seq + 4;
+	const byte *ptrSeqEnd = Seq + uSeqLength;
+
+	unsigned c3 = Seq[0]*N_3;
+	unsigned c2 = Seq[1]*N_2;
+	unsigned c1 = Seq[2]*N;
+	unsigned c0 = Seq[3];
+
+	unsigned Kmer = c3 + c2 + c1 + c0;
+
+	unsigned uCommonCount = 0;
+	for (;;)
+		{
+		assert(Kmer < TABLE_SIZE);
+
+		const byte Count1 = KmerCounts1[Kmer];
+		const byte Count2 = KmerCounts2[Kmer];
+
+		uCommonCount += MIN(Count1, Count2);
+
+	// Hack so we don't double-count
+		KmerCounts2[Kmer] = 0;
+
+		if (ptrKmerEnd == ptrSeqEnd)
+			break;
+
+	// Compute k-mer as function of previous k-mer:
+	// 1. Subtract first letter from previous k-mer.
+	// 2. Multiply by N.
+	// 3. Add next letter.
+		c3 = (*ptrKmerStart++) * N_3;
+		Kmer = (Kmer - c3)*N;
+		Kmer += *ptrKmerEnd++;
+		}
+	return uCommonCount;
+	}
+
+static void SeqToLetters(const Seq &s, byte Letters[])
+	{
+	const unsigned uSeqLength = s.Length();
+	for (unsigned uCol = 0; uCol < uSeqLength; ++uCol)
+		{
+		char c = s.GetChar(uCol);
+	// Ugly hack. My k-mer counting code isn't wild-card
+	// aware. Arbitrarily replace wildcards by a specific
+	// amino acid.
+		if (IsWildcardChar(c))
+			c = 'A';
+		*Letters++ = CharToLetter(c);
+		}
+	}
+
+void FastDistKmer(const SeqVect &v, DistFunc &DF)
+	{
+	byte KmerCounts[TABLE_SIZE];
+
+	const unsigned uSeqCount = v.GetSeqCount();
+
+	DF.SetCount(uSeqCount);
+	if (0 == uSeqCount)
+		return;
+
+// Initialize distance matrix to zero
+	for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)
+		{
+		DF.SetDist(uSeq1, uSeq1, 0);
+		for (unsigned uSeq2 = 0; uSeq2 < uSeq1; ++uSeq2)
+			DF.SetDist(uSeq1, uSeq2, 0);
+		}
+
+	unsigned uMaxLength = 0;
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		const Seq &s = v.GetSeq(uSeqIndex);
+		unsigned uSeqLength = s.Length();
+		if (uSeqLength > uMaxLength)
+			uMaxLength = uSeqLength;
+		}
+	if (0 == uMaxLength)
+		return;
+
+	byte *Seq1Letters = new byte[uMaxLength];
+	byte *Seq2Letters = new byte[uMaxLength];
+
+	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount - 1; ++uSeqIndex1)
+		{
+		const Seq &s1 = v.GetSeq(uSeqIndex1);
+		const unsigned uSeqLength1 = s1.Length();
+
+		SeqToLetters(s1, Seq1Letters);
+		CountKmers(Seq1Letters, uSeqLength1, KmerCounts);
+
+		for (unsigned uSeqIndex2 = uSeqIndex1 + 1; uSeqIndex2 < uSeqCount;
+		  ++uSeqIndex2)
+			{
+			const Seq &s2 = v.GetSeq(uSeqIndex2);
+			const unsigned uSeqLength2 = s2.Length();
+
+			SeqToLetters(s2, Seq2Letters);
+
+			unsigned uCommonKmerCount = CommonKmerCount(Seq1Letters, uSeqLength1,
+			  KmerCounts, Seq2Letters, uSeqLength2);
+
+			unsigned uMinLength = MIN(uSeqLength1, uSeqLength2);
+			double F = (double) uCommonKmerCount / (uMinLength - K + 1);
+			if (0.0 == F)
+				F = 0.01;
+			double Y = log(0.02 + F);
+			double EstimatedPctId = Y/4.12 + 0.995;
+			double KD = KimuraDist(EstimatedPctId);
+//			DF.SetDist(uSeqIndex1, uSeqIndex2, (float) KD);
+			DF.SetDist(uSeqIndex1, uSeqIndex2, (float) (1 - F));
+#if	TRACE
+			Log("CommonCount=%u, MinLength=%u, F=%6.4f Y=%6.4f, %%id=%6.4f, KimuraDist=%8.4f\n",
+			  uCommonKmerCount, uMinLength, F, Y, EstimatedPctId, KD);
+#endif
+			}
+		}
+
+	delete[] Seq1Letters;
+	delete[] Seq2Letters;
+	}

Added: trunk/packages/muscle/branches/upstream/current/fastdistmafft.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/fastdistmafft.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/fastdistmafft.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,290 @@
+#include "muscle.h"
+#include "distfunc.h"
+#include "seqvect.h"
+#include <math.h>
+
+#define TRACE 0
+
+#define MIN(x, y)	(((x) < (y)) ? (x) : (y))
+#define MAX(x, y)	(((x) > (y)) ? (x) : (y))
+
+const unsigned TUPLE_COUNT = 6*6*6*6*6*6;
+static unsigned char Count1[TUPLE_COUNT];
+static unsigned char Count2[TUPLE_COUNT];
+
+// Amino acid groups according to MAFFT (sextet5)
+// 0 =  A G P S T
+// 1 =  I L M V
+// 2 =  N D Q E B Z
+// 3 =  R H K
+// 4 =  F W Y
+// 5 =  C
+// 6 =  X . - U
+unsigned ResidueGroup[] =
+	{
+	0,		// AX_A,
+	5,		// AX_C,
+	2,		// AX_D,
+	2,		// AX_E,
+	4,		// AX_F,
+	0,		// AX_G,
+	3,		// AX_H,
+	1,		// AX_I,
+	3,		// AX_K,
+	1,		// AX_L,
+	1,		// AX_M,
+	2,		// AX_N,
+	0,		// AX_P,
+	2,		// AX_Q,
+	3,		// AX_R,
+	0,		// AX_S,
+	0,		// AX_T,
+	1,		// AX_V,
+	4,		// AX_W,
+	4,		// AX_Y,
+
+	2,		// AX_B,	// D or N
+	2,		// AX_Z,	// E or Q
+	0,		// AX_X,	// Unknown		// ******** TODO *************
+										// This isn't the correct way of avoiding group 6
+	0		// AX_GAP,					// ******** TODO ******************
+	};
+unsigned uResidueGroupCount = sizeof(ResidueGroup)/sizeof(ResidueGroup[0]);
+
+static char *TupleToStr(int t)
+	{
+	static char s[7];
+	int t1, t2, t3, t4, t5, t6;
+
+	t1 = t%6;
+	t2 = (t/6)%6;
+	t3 = (t/(6*6))%6;
+	t4 = (t/(6*6*6))%6;
+	t5 = (t/(6*6*6*6))%6;
+	t6 = (t/(6*6*6*6*6))%6;
+
+	s[5] = '0' + t1;
+	s[4] = '0' + t2;
+	s[3] = '0' + t3;
+	s[2] = '0' + t4;
+	s[1] = '0' + t5;
+	s[0] = '0' + t6;
+	return s;
+	}
+
+static unsigned GetTuple(const unsigned uLetters[], unsigned n)
+	{
+	assert(uLetters[n] < uResidueGroupCount);
+	assert(uLetters[n+1] < uResidueGroupCount);
+	assert(uLetters[n+2] < uResidueGroupCount);
+	assert(uLetters[n+3] < uResidueGroupCount);
+	assert(uLetters[n+4] < uResidueGroupCount);
+	assert(uLetters[n+5] < uResidueGroupCount);
+
+	unsigned u1 = ResidueGroup[uLetters[n]];
+	unsigned u2 = ResidueGroup[uLetters[n+1]];
+	unsigned u3 = ResidueGroup[uLetters[n+2]];
+	unsigned u4 = ResidueGroup[uLetters[n+3]];
+	unsigned u5 = ResidueGroup[uLetters[n+4]];
+	unsigned u6 = ResidueGroup[uLetters[n+5]];
+
+	return u6 + u5*6 + u4*6*6 + u3*6*6*6 + u2*6*6*6*6 + u1*6*6*6*6*6;
+	}
+
+static void CountTuples(const unsigned L[], unsigned uTupleCount, unsigned char Count[])
+	{
+	memset(Count, 0, TUPLE_COUNT*sizeof(unsigned char));
+	for (unsigned n = 0; n < uTupleCount; ++n)
+		{
+		const unsigned uTuple = GetTuple(L, n);
+		++(Count[uTuple]);
+		}
+	}
+
+static void ListCount(const unsigned char Count[])
+	{
+	for (unsigned n = 0; n < TUPLE_COUNT; ++n)
+		{
+		if (0 == Count[n])
+			continue;
+		Log("%s  %u\n", TupleToStr(n), Count[n]);
+		}
+	}
+
+void DistKmer6_6(const SeqVect &v, DistFunc &DF)
+	{
+	const unsigned uSeqCount = v.Length();
+
+	DF.SetCount(uSeqCount);
+	if (0 == uSeqCount)
+		return;
+
+// Initialize distance matrix to zero
+	for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)
+		{
+		DF.SetDist(uSeq1, uSeq1, 0);
+		for (unsigned uSeq2 = 0; uSeq2 < uSeq1; ++uSeq2)
+			DF.SetDist(uSeq1, uSeq2, 0);
+		}
+
+// Convert to letters
+	unsigned **Letters = new unsigned *[uSeqCount];
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		Seq &s = *(v[uSeqIndex]);
+		const unsigned uSeqLength = s.Length();
+		unsigned *L = new unsigned[uSeqLength];
+		Letters[uSeqIndex] = L;
+		for (unsigned n = 0; n < uSeqLength; ++n)
+			{
+			char c = s[n];
+			L[n] = CharToLetterEx(c);
+			assert(L[n] < uResidueGroupCount);
+			}
+		}
+
+	unsigned **uCommonTupleCount = new unsigned *[uSeqCount];
+	for (unsigned n = 0; n < uSeqCount; ++n)
+		{
+		uCommonTupleCount[n] = new unsigned[uSeqCount];
+		memset(uCommonTupleCount[n], 0, uSeqCount*sizeof(unsigned));
+		}
+
+	const unsigned uPairCount = (uSeqCount*(uSeqCount + 1))/2;
+	unsigned uCount = 0;
+	for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)
+		{
+		Seq &seq1 = *(v[uSeq1]);
+		const unsigned uSeqLength1 = seq1.Length();
+		if (uSeqLength1 < 5)
+			continue;
+
+		const unsigned uTupleCount = uSeqLength1 - 5;
+		const unsigned *L = Letters[uSeq1];
+		CountTuples(L, uTupleCount, Count1);
+#if	TRACE
+		{
+		Log("Seq1=%d\n", uSeq1);
+		Log("Groups:\n");
+		for (unsigned n = 0; n < uSeqLength1; ++n)
+			Log("%u", ResidueGroup[L[n]]);
+		Log("\n");
+
+		Log("Tuples:\n");
+		ListCount(Count1);
+		}
+#endif
+
+		SetProgressDesc("K-mer dist pass 1");
+		for (unsigned uSeq2 = 0; uSeq2 <= uSeq1; ++uSeq2)
+			{
+			if (0 == uCount%500)
+				Progress(uCount, uPairCount);
+			++uCount;
+			Seq &seq2 = *(v[uSeq2]);
+			const unsigned uSeqLength2 = seq2.Length();
+			if (uSeqLength2 < 5)
+				{
+				if (uSeq1 == uSeq2)
+					DF.SetDist(uSeq1, uSeq2, 0);
+				else
+					DF.SetDist(uSeq1, uSeq2, 1);
+				continue;
+				}
+
+		// First pass through seq 2 to count tuples
+			const unsigned uTupleCount = uSeqLength2 - 5;
+			const unsigned *L = Letters[uSeq2];
+			CountTuples(L, uTupleCount, Count2);
+#if	TRACE
+			Log("Seq2=%d Counts=\n", uSeq2);
+			ListCount(Count2);
+#endif
+
+		// Second pass to accumulate sum of shared tuples
+		// MAFFT defines this as the sum over unique tuples
+		// in seq2 of the minimum of the number of tuples found
+		// in the two sequences.
+			unsigned uSum = 0;
+			for (unsigned n = 0; n < uTupleCount; ++n)
+				{
+				const unsigned uTuple = GetTuple(L, n);
+				uSum += MIN(Count1[uTuple], Count2[uTuple]);
+
+			// This is a hack to make sure each unique tuple counted only once.
+				Count2[uTuple] = 0;
+				}
+#if	TRACE
+			{
+			Seq &s1 = *(v[uSeq1]);
+			Seq &s2 = *(v[uSeq2]);
+			const char *pName1 = s1.GetName();
+			const char *pName2 = s2.GetName();
+			Log("Common count %s(%d) - %s(%d) =%u\n",
+			  pName1, uSeq1, pName2, uSeq2, uSum);
+			}
+#endif
+			uCommonTupleCount[uSeq1][uSeq2] = uSum;
+			uCommonTupleCount[uSeq2][uSeq1] = uSum;
+			}
+		}
+	ProgressStepsDone();
+
+	uCount = 0;
+	SetProgressDesc("K-mer dist pass 2");
+	for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)
+		{
+		Seq &s1 = *(v[uSeq1]);
+		const char *pName1 = s1.GetName();
+
+		double dCommonTupleCount11 = uCommonTupleCount[uSeq1][uSeq1];
+		if (0 == dCommonTupleCount11)
+			dCommonTupleCount11 = 1;
+
+		DF.SetDist(uSeq1, uSeq1, 0);
+		for (unsigned uSeq2 = 0; uSeq2 < uSeq1; ++uSeq2)
+			{
+			if (0 == uCount%500)
+				Progress(uCount, uPairCount);
+			++uCount;
+
+			double dCommonTupleCount22 = uCommonTupleCount[uSeq2][uSeq2];
+			if (0 == dCommonTupleCount22)
+				dCommonTupleCount22 = 1;
+
+			const double dDist1 = 3.0*(dCommonTupleCount11 - uCommonTupleCount[uSeq1][uSeq2])
+			  /dCommonTupleCount11;
+			const double dDist2 = 3.0*(dCommonTupleCount22 - uCommonTupleCount[uSeq1][uSeq2])
+			  /dCommonTupleCount22;
+
+		// dMinDist is the value used for tree-building in MAFFT
+			const double dMinDist = MIN(dDist1, dDist2);
+			DF.SetDist(uSeq1, uSeq2, (float) dMinDist);
+
+			//const double dEstimatedPctId = TupleDistToEstimatedPctId(dMinDist);
+			//g_dfPwId.SetDist(uSeq1, uSeq2, dEstimatedPctId);
+		// **** TODO **** why does this make score slightly worse??
+			//const double dKimuraDist = KimuraDist(dEstimatedPctId);
+			//DF.SetDist(uSeq1, uSeq2, dKimuraDist);
+			}
+		}
+	ProgressStepsDone();
+
+	for (unsigned n = 0; n < uSeqCount; ++n)
+		delete[] uCommonTupleCount[n];
+	delete[] uCommonTupleCount;
+	delete[] Letters;
+	}
+
+double PctIdToMAFFTDist(double dPctId)
+	{
+	if (dPctId < 0.05)
+		dPctId = 0.05;
+	double dDist = -log(dPctId);
+	return dDist;
+	}
+
+double PctIdToHeightMAFFT(double dPctId)
+	{
+	return PctIdToMAFFTDist(dPctId);
+	}

Added: trunk/packages/muscle/branches/upstream/current/fastdistnuc.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/fastdistnuc.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/fastdistnuc.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,265 @@
+#include "muscle.h"
+#include "distfunc.h"
+#include "seqvect.h"
+#include <math.h>
+
+#define TRACE 0
+
+#define MIN(x, y)	(((x) < (y)) ? (x) : (y))
+#define MAX(x, y)	(((x) > (y)) ? (x) : (y))
+
+const unsigned TUPLE_COUNT = 6*6*6*6*6*6;
+static unsigned char Count1[TUPLE_COUNT];
+static unsigned char Count2[TUPLE_COUNT];
+
+// Nucleotide groups according to MAFFT (sextet5)
+// 0 =  A
+// 1 =  C
+// 2 =  G
+// 3 =  T
+// 4 =  other
+
+static unsigned ResidueGroup[] =
+	{
+	0,		// NX_A,
+	1,		// NX_C,
+	2,		// NX_G,
+	3,		// NX_T/U
+	4,		// NX_N,
+	4,		// NX_R,
+	4,		// NX_Y,
+	4,		// NX_GAP
+	};
+static unsigned uResidueGroupCount = sizeof(ResidueGroup)/sizeof(ResidueGroup[0]);
+
+static char *TupleToStr(int t)
+	{
+	static char s[7];
+	int t1, t2, t3, t4, t5, t6;
+
+	t1 = t%6;
+	t2 = (t/6)%6;
+	t3 = (t/(6*6))%6;
+	t4 = (t/(6*6*6))%6;
+	t5 = (t/(6*6*6*6))%6;
+	t6 = (t/(6*6*6*6*6))%6;
+
+	s[5] = '0' + t1;
+	s[4] = '0' + t2;
+	s[3] = '0' + t3;
+	s[2] = '0' + t4;
+	s[1] = '0' + t5;
+	s[0] = '0' + t6;
+	return s;
+	}
+
+static unsigned GetTuple(const unsigned uLetters[], unsigned n)
+	{
+	assert(uLetters[n] < uResidueGroupCount);
+	assert(uLetters[n+1] < uResidueGroupCount);
+	assert(uLetters[n+2] < uResidueGroupCount);
+	assert(uLetters[n+3] < uResidueGroupCount);
+	assert(uLetters[n+4] < uResidueGroupCount);
+	assert(uLetters[n+5] < uResidueGroupCount);
+
+	unsigned u1 = ResidueGroup[uLetters[n]];
+	unsigned u2 = ResidueGroup[uLetters[n+1]];
+	unsigned u3 = ResidueGroup[uLetters[n+2]];
+	unsigned u4 = ResidueGroup[uLetters[n+3]];
+	unsigned u5 = ResidueGroup[uLetters[n+4]];
+	unsigned u6 = ResidueGroup[uLetters[n+5]];
+
+	return u6 + u5*6 + u4*6*6 + u3*6*6*6 + u2*6*6*6*6 + u1*6*6*6*6*6;
+	}
+
+static void CountTuples(const unsigned L[], unsigned uTupleCount, unsigned char Count[])
+	{
+	memset(Count, 0, TUPLE_COUNT*sizeof(unsigned char));
+	for (unsigned n = 0; n < uTupleCount; ++n)
+		{
+		const unsigned uTuple = GetTuple(L, n);
+		++(Count[uTuple]);
+		}
+	}
+
+static void ListCount(const unsigned char Count[])
+	{
+	for (unsigned n = 0; n < TUPLE_COUNT; ++n)
+		{
+		if (0 == Count[n])
+			continue;
+		Log("%s  %u\n", TupleToStr(n), Count[n]);
+		}
+	}
+
+void DistKmer4_6(const SeqVect &v, DistFunc &DF)
+	{
+	if (ALPHA_DNA != g_Alpha && ALPHA_RNA != g_Alpha)
+		Quit("DistKmer4_6 requires nucleo alphabet");
+
+	const unsigned uSeqCount = v.Length();
+
+	DF.SetCount(uSeqCount);
+	if (0 == uSeqCount)
+		return;
+
+// Initialize distance matrix to zero
+	for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)
+		{
+		DF.SetDist(uSeq1, uSeq1, 0);
+		for (unsigned uSeq2 = 0; uSeq2 < uSeq1; ++uSeq2)
+			DF.SetDist(uSeq1, uSeq2, 0);
+		}
+
+// Convert to letters
+	unsigned **Letters = new unsigned *[uSeqCount];
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		Seq &s = *(v[uSeqIndex]);
+		const unsigned uSeqLength = s.Length();
+		unsigned *L = new unsigned[uSeqLength];
+		Letters[uSeqIndex] = L;
+		for (unsigned n = 0; n < uSeqLength; ++n)
+			{
+			char c = s[n];
+			L[n] = CharToLetterEx(c);
+			if (L[n] >= 4)
+				L[n] = 4;
+			}
+		}
+
+	unsigned **uCommonTupleCount = new unsigned *[uSeqCount];
+	for (unsigned n = 0; n < uSeqCount; ++n)
+		{
+		uCommonTupleCount[n] = new unsigned[uSeqCount];
+		memset(uCommonTupleCount[n], 0, uSeqCount*sizeof(unsigned));
+		}
+
+	const unsigned uPairCount = (uSeqCount*(uSeqCount + 1))/2;
+	unsigned uCount = 0;
+	for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)
+		{
+		Seq &seq1 = *(v[uSeq1]);
+		const unsigned uSeqLength1 = seq1.Length();
+		if (uSeqLength1 < 5)
+			continue;
+
+		const unsigned uTupleCount = uSeqLength1 - 5;
+		const unsigned *L = Letters[uSeq1];
+		CountTuples(L, uTupleCount, Count1);
+#if	TRACE
+		{
+		Log("Seq1=%d\n", uSeq1);
+		Log("Groups:\n");
+		for (unsigned n = 0; n < uSeqLength1; ++n)
+			Log("%u", ResidueGroup[L[n]]);
+		Log("\n");
+
+		Log("Tuples:\n");
+		ListCount(Count1);
+		}
+#endif
+
+		SetProgressDesc("K-mer dist pass 1");
+		for (unsigned uSeq2 = 0; uSeq2 <= uSeq1; ++uSeq2)
+			{
+			if (0 == uCount%500)
+				Progress(uCount, uPairCount);
+			++uCount;
+			Seq &seq2 = *(v[uSeq2]);
+			const unsigned uSeqLength2 = seq2.Length();
+			if (uSeqLength2 < 5)
+				{
+				if (uSeq1 == uSeq2)
+					DF.SetDist(uSeq1, uSeq2, 0);
+				else
+					DF.SetDist(uSeq1, uSeq2, 1);
+				continue;
+				}
+
+		// First pass through seq 2 to count tuples
+			const unsigned uTupleCount = uSeqLength2 - 5;
+			const unsigned *L = Letters[uSeq2];
+			CountTuples(L, uTupleCount, Count2);
+#if	TRACE
+			Log("Seq2=%d Counts=\n", uSeq2);
+			ListCount(Count2);
+#endif
+
+		// Second pass to accumulate sum of shared tuples
+		// MAFFT defines this as the sum over unique tuples
+		// in seq2 of the minimum of the number of tuples found
+		// in the two sequences.
+			unsigned uSum = 0;
+			for (unsigned n = 0; n < uTupleCount; ++n)
+				{
+				const unsigned uTuple = GetTuple(L, n);
+				uSum += MIN(Count1[uTuple], Count2[uTuple]);
+
+			// This is a hack to make sure each unique tuple counted only once.
+				Count2[uTuple] = 0;
+				}
+#if	TRACE
+			{
+			Seq &s1 = *(v[uSeq1]);
+			Seq &s2 = *(v[uSeq2]);
+			const char *pName1 = s1.GetName();
+			const char *pName2 = s2.GetName();
+			Log("Common count %s(%d) - %s(%d) =%u\n",
+			  pName1, uSeq1, pName2, uSeq2, uSum);
+			}
+#endif
+			uCommonTupleCount[uSeq1][uSeq2] = uSum;
+			uCommonTupleCount[uSeq2][uSeq1] = uSum;
+			}
+		}
+	ProgressStepsDone();
+
+	uCount = 0;
+	SetProgressDesc("K-mer dist pass 2");
+	for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)
+		{
+		Seq &s1 = *(v[uSeq1]);
+		const char *pName1 = s1.GetName();
+
+		double dCommonTupleCount11 = uCommonTupleCount[uSeq1][uSeq1];
+		if (0 == dCommonTupleCount11)
+			dCommonTupleCount11 = 1;
+
+		DF.SetDist(uSeq1, uSeq1, 0);
+		for (unsigned uSeq2 = 0; uSeq2 < uSeq1; ++uSeq2)
+			{
+			if (0 == uCount%500)
+				Progress(uCount, uPairCount);
+			++uCount;
+
+			double dCommonTupleCount22 = uCommonTupleCount[uSeq2][uSeq2];
+			if (0 == dCommonTupleCount22)
+				dCommonTupleCount22 = 1;
+
+			const double dDist1 = 3.0*(dCommonTupleCount11 - uCommonTupleCount[uSeq1][uSeq2])
+			  /dCommonTupleCount11;
+			const double dDist2 = 3.0*(dCommonTupleCount22 - uCommonTupleCount[uSeq1][uSeq2])
+			  /dCommonTupleCount22;
+
+		// dMinDist is the value used for tree-building in MAFFT
+			const double dMinDist = MIN(dDist1, dDist2);
+			DF.SetDist(uSeq1, uSeq2, (float) dMinDist);
+
+			//const double dEstimatedPctId = TupleDistToEstimatedPctId(dMinDist);
+			//g_dfPwId.SetDist(uSeq1, uSeq2, dEstimatedPctId);
+		// **** TODO **** why does this make score slightly worse??
+			//const double dKimuraDist = KimuraDist(dEstimatedPctId);
+			//DF.SetDist(uSeq1, uSeq2, dKimuraDist);
+			}
+		}
+	ProgressStepsDone();
+
+	for (unsigned n = 0; n < uSeqCount; ++n)
+		{
+		delete[] uCommonTupleCount[n];
+		delete[] Letters[n];
+		}
+	delete[] uCommonTupleCount;
+	delete[] Letters;
+	}

Added: trunk/packages/muscle/branches/upstream/current/fastscorepath2.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/fastscorepath2.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/fastscorepath2.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,165 @@
+#include "muscle.h"
+#include "profile.h"
+#include "pwpath.h"
+
+SCORE FastScorePath2(const ProfPos *PA, unsigned uLengthA,
+  const ProfPos *PB, unsigned uLengthB, const PWPath &Path)
+	{
+	const unsigned uEdgeCount = Path.GetEdgeCount();
+	Log("Edge  SS     PLA   PLB   Match     Gap    Total\n");
+	Log("----  --     ---   ---   -----     ---    -----\n");
+	char cType = 'S';
+	SCORE scoreTotal = 0;
+	for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
+		{
+		const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
+		const char cPrevType = cType;
+		cType = Edge.cType;
+		const unsigned uPrefixLengthA = Edge.uPrefixLengthA;
+		const unsigned uPrefixLengthB = Edge.uPrefixLengthB;
+		bool bGap = false;
+		bool bMatch = false;
+		SCORE scoreGap = 0;
+		SCORE scoreMatch = 0;
+
+		switch (cType)
+			{
+		case 'M':
+			{
+			if (0 == uPrefixLengthA || 0 == uPrefixLengthB)
+				Quit("FastScorePath2, M zero length");
+
+			const ProfPos &PPA = PA[uPrefixLengthA - 1];
+			const ProfPos &PPB = PB[uPrefixLengthB - 1];
+
+			bMatch = true;
+			scoreMatch = ScoreProfPos2(PPA, PPB);
+
+			if ('D' == cPrevType)
+				{
+				bGap = true;
+				assert(uPrefixLengthA > 1);
+				scoreGap = PA[uPrefixLengthA-2].m_scoreGapClose;
+				}
+			else if ('I' == cPrevType)
+				{
+				bGap = true;
+				assert(uPrefixLengthB > 1);
+				scoreGap = PB[uPrefixLengthB-2].m_scoreGapClose;
+				}
+			break;
+			}
+
+		case 'D':
+			{
+			if (0 == uPrefixLengthA)
+				Quit("FastScorePath2, D zero length");
+
+			const ProfPos &PPA = PA[uPrefixLengthA - 1];
+			bGap = true;
+			switch (cPrevType)
+				{
+			case 'S':
+				scoreGap = PPA.m_scoreGapOpen;
+				break;
+			case 'M':
+				scoreGap = PPA.m_scoreGapOpen;
+				break;
+			case 'D':
+//				scoreGap = g_scoreGapExtend;
+				scoreGap = 0;
+				break;
+			case 'I':
+				Quit("FastScorePath2 DI");
+				}
+			break;
+			}
+
+		case 'I':
+			{
+			if (0 == uPrefixLengthB)
+				Quit("FastScorePath2, I zero length");
+
+			const ProfPos &PPB = PB[uPrefixLengthB - 1];
+			bGap = true;
+			switch (cPrevType)
+				{
+			case 'S':
+				scoreGap = PPB.m_scoreGapOpen;
+				break;
+			case 'M':
+				scoreGap = PPB.m_scoreGapOpen;
+				break;
+			case 'I':
+				scoreGap = 0;
+//				scoreGap = g_scoreGapExtend;
+				break;
+			case 'D':
+				Quit("FastScorePath2 DI");
+				}
+			break;
+			}
+
+		case 'U':
+			{
+			Quit("FastScorePath2 U");
+			}
+
+		default:
+			Quit("FastScorePath2: invalid type %c", cType);
+			}
+
+		Log("%4u  %c%c  %4u  %4u  ", uEdgeIndex, cPrevType, cType,
+		  uPrefixLengthA, uPrefixLengthB);
+		if (bMatch)
+			Log("%7.1f  ", scoreMatch);
+		else
+			Log("         ");
+		if (bGap)
+			Log("%7.1f  ", scoreGap);
+		else
+			Log("         ");
+		SCORE scoreEdge = scoreMatch + scoreGap;
+		scoreTotal += scoreEdge;
+		Log("%7.1f  %7.1f", scoreEdge, scoreTotal);
+		Log("\n");
+		}
+
+	SCORE scoreGap = 0;
+//	if (!g_bTermGapsHalf)
+		switch (cType)
+			{
+		case 'M':
+			scoreGap = 0;
+			break;
+
+		case 'D':
+			{
+			const ProfPos &LastPPA = PA[uLengthA - 1];
+			scoreGap = LastPPA.m_scoreGapClose;
+			break;
+			}
+
+		case 'I':
+			{
+			const ProfPos &LastPPB = PB[uLengthB - 1];
+			scoreGap = LastPPB.m_scoreGapClose;
+			break;
+			}
+
+		case 'U':
+			Quit("Unaligned regions not supported");
+
+		case 'S':
+			break;
+
+		default:
+			Quit("Invalid type %c", cType);
+			}
+
+	Log("      %cE  %4u  %4u           %7.1f\n", cType, uLengthA, uLengthB, scoreGap);
+	scoreTotal += scoreGap;
+
+	Log("Total = %g\n", scoreTotal);
+	return scoreTotal;
+	}

Added: trunk/packages/muscle/branches/upstream/current/finddiags.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/finddiags.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/finddiags.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,161 @@
+#include "muscle.h"
+#include "profile.h"
+#include "diaglist.h"
+
+#define TRACE	0
+
+const unsigned KTUP = 5;
+const unsigned KTUPS = 6*6*6*6*6;
+static unsigned TuplePos[KTUPS];
+
+static char *TupleToStr(int t)
+	{
+	static char s[7];
+	int t1, t2, t3, t4, t5;
+
+	t1 = t%6;
+	t2 = (t/6)%6;
+	t3 = (t/(6*6))%6;
+	t4 = (t/(6*6*6))%6;
+	t5 = (t/(6*6*6*6))%6;
+
+	s[4] = '0' + t1;
+	s[3] = '0' + t2;
+	s[2] = '0' + t3;
+	s[1] = '0' + t4;
+	s[0] = '0' + t5;
+	return s;
+	}
+
+static unsigned GetTuple(const ProfPos *PP, unsigned uPos)
+	{
+	const unsigned t0 = PP[uPos].m_uResidueGroup;
+	if (RESIDUE_GROUP_MULTIPLE == t0)
+		return EMPTY;
+
+	const unsigned t1 = PP[uPos+1].m_uResidueGroup;
+	if (RESIDUE_GROUP_MULTIPLE == t1)
+		return EMPTY;
+
+	const unsigned t2 = PP[uPos+2].m_uResidueGroup;
+	if (RESIDUE_GROUP_MULTIPLE == t2)
+		return EMPTY;
+
+	const unsigned t3 = PP[uPos+3].m_uResidueGroup;
+	if (RESIDUE_GROUP_MULTIPLE == t3)
+		return EMPTY;
+
+	const unsigned t4 = PP[uPos+4].m_uResidueGroup;
+	if (RESIDUE_GROUP_MULTIPLE == t4)
+		return EMPTY;
+
+	return t0 + t1*6 + t2*6*6 + t3*6*6*6 + t4*6*6*6*6;
+	}
+
+void FindDiags(const ProfPos *PX, unsigned uLengthX, const ProfPos *PY,
+  unsigned uLengthY, DiagList &DL)
+	{
+	if (ALPHA_Amino != g_Alpha)
+		Quit("FindDiags: requires amino acid alphabet");
+
+	DL.Clear();
+
+	if (uLengthX < 12 || uLengthY < 12)
+		return;
+
+// Set A to shorter profile, B to longer
+	const ProfPos *PA;
+	const ProfPos *PB;
+	unsigned uLengthA;
+	unsigned uLengthB;
+	bool bSwap;
+	if (uLengthX < uLengthY)
+		{
+		bSwap = false;
+		PA = PX;
+		PB = PY;
+		uLengthA = uLengthX;
+		uLengthB = uLengthY;
+		}
+	else
+		{
+		bSwap = true;
+		PA = PY;
+		PB = PX;
+		uLengthA = uLengthY;
+		uLengthB = uLengthX;
+		}
+
+// Build tuple map for the longer profile, B
+	if (uLengthB < KTUP)
+		Quit("FindDiags: profile too short");
+
+	memset(TuplePos, EMPTY, sizeof(TuplePos));
+
+	for (unsigned uPos = 0; uPos < uLengthB - KTUP; ++uPos)
+		{
+		const unsigned uTuple = GetTuple(PB, uPos);
+		if (EMPTY == uTuple)
+			continue;
+		TuplePos[uTuple] = uPos;
+		}
+
+// Find matches
+	for (unsigned uPosA = 0; uPosA < uLengthA - KTUP; ++uPosA)
+		{
+		const unsigned uTuple = GetTuple(PA, uPosA);
+		if (EMPTY == uTuple)
+			continue;
+		const unsigned uPosB = TuplePos[uTuple];
+		if (EMPTY == uPosB)
+			continue;
+
+	// This tuple is found in both profiles
+		unsigned uStartPosA = uPosA;
+		unsigned uStartPosB = uPosB;
+
+	// Try to extend the match forwards
+		unsigned uEndPosA = uPosA + KTUP - 1;
+		unsigned uEndPosB = uPosB + KTUP - 1;
+		for (;;)
+			{
+			if (uLengthA - 1 == uEndPosA || uLengthB - 1 == uEndPosB)
+				break;
+			const unsigned uAAGroupA = PA[uEndPosA+1].m_uResidueGroup;
+			if (RESIDUE_GROUP_MULTIPLE == uAAGroupA)
+				break;
+			const unsigned uAAGroupB = PB[uEndPosB+1].m_uResidueGroup;
+			if (RESIDUE_GROUP_MULTIPLE == uAAGroupB)
+				break;
+			if (uAAGroupA != uAAGroupB)
+				break;
+			++uEndPosA;
+			++uEndPosB;
+			}
+		uPosA = uEndPosA;
+
+#if	TRACE
+		{
+		Log("Match: A %4u-%4u   ", uStartPosA, uEndPosA);
+		for (unsigned n = uStartPosA; n <= uEndPosA; ++n)
+			Log("%c", 'A' + PA[n].m_uResidueGroup);
+		Log("\n");
+		Log("       B %4u-%4u   ", uStartPosB, uEndPosB);
+		for (unsigned n = uStartPosB; n <= uEndPosB; ++n)
+			Log("%c", 'A' + PB[n].m_uResidueGroup);
+		Log("\n");
+		}
+#endif
+
+		const unsigned uLength = uEndPosA - uStartPosA + 1;
+		assert(uEndPosB - uStartPosB + 1 == uLength);
+
+		if (uLength >= g_uMinDiagLength)
+			{
+			if (bSwap)
+				DL.Add(uStartPosB, uStartPosA, uLength);
+			else
+				DL.Add(uStartPosA, uStartPosB, uLength);
+			}
+		}
+	}

Added: trunk/packages/muscle/branches/upstream/current/finddiagsn.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/finddiagsn.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/finddiagsn.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,152 @@
+#include "muscle.h"
+#include "profile.h"
+#include "diaglist.h"
+
+#define TRACE	0
+
+#define pow4(i)	(1 << (2*i))	// 4^i = 2^(2*i)
+const unsigned K = 7;
+const unsigned KTUPS = pow4(K);
+static unsigned TuplePos[KTUPS];
+
+static char *TupleToStr(int t)
+	{
+	static char s[K];
+
+	for (int i = 0; i < K; ++i)
+		{
+		unsigned Letter = (t/(pow4(i)))%4;
+		assert(Letter >= 0 && Letter < 4);
+		s[K-i-1] = LetterToChar(Letter);
+		}
+
+	return s;
+	}
+
+static unsigned GetTuple(const ProfPos *PP, unsigned uPos)
+	{
+	unsigned t = 0;
+
+	for (unsigned i = 0; i < K; ++i)
+		{
+		const unsigned uLetter = PP[uPos+i].m_uResidueGroup;
+		if (RESIDUE_GROUP_MULTIPLE == uLetter)
+			return EMPTY;
+		t = t*4 + uLetter;
+		}
+
+	return t;
+	}
+
+void FindDiagsNuc(const ProfPos *PX, unsigned uLengthX, const ProfPos *PY,
+  unsigned uLengthY, DiagList &DL)
+	{
+	if (ALPHA_DNA != g_Alpha && ALPHA_RNA != g_Alpha)
+		Quit("FindDiagsNuc: requires nucleo alphabet");
+
+	DL.Clear();
+
+// 16 is arbitrary slop, no principled reason for this.
+	if (uLengthX < K + 16 || uLengthY < K + 16)
+		return;
+
+// Set A to shorter profile, B to longer
+	const ProfPos *PA;
+	const ProfPos *PB;
+	unsigned uLengthA;
+	unsigned uLengthB;
+	bool bSwap;
+	if (uLengthX < uLengthY)
+		{
+		bSwap = false;
+		PA = PX;
+		PB = PY;
+		uLengthA = uLengthX;
+		uLengthB = uLengthY;
+		}
+	else
+		{
+		bSwap = true;
+		PA = PY;
+		PB = PX;
+		uLengthA = uLengthY;
+		uLengthB = uLengthX;
+		}
+
+#if	TRACE
+	Log("FindDiagsNuc(LengthA=%d LengthB=%d\n", uLengthA, uLengthB);
+#endif
+
+// Build tuple map for the longer profile, B
+	if (uLengthB < K)
+		Quit("FindDiags: profile too short");
+
+	memset(TuplePos, EMPTY, sizeof(TuplePos));
+
+	for (unsigned uPos = 0; uPos < uLengthB - K; ++uPos)
+		{
+		const unsigned uTuple = GetTuple(PB, uPos);
+		if (EMPTY == uTuple)
+			continue;
+		TuplePos[uTuple] = uPos;
+		}
+
+// Find matches
+	for (unsigned uPosA = 0; uPosA < uLengthA - K; ++uPosA)
+		{
+		const unsigned uTuple = GetTuple(PA, uPosA);
+		if (EMPTY == uTuple)
+			continue;
+		const unsigned uPosB = TuplePos[uTuple];
+		if (EMPTY == uPosB)
+			continue;
+
+	// This tuple is found in both profiles
+		unsigned uStartPosA = uPosA;
+		unsigned uStartPosB = uPosB;
+
+	// Try to extend the match forwards
+		unsigned uEndPosA = uPosA + K - 1;
+		unsigned uEndPosB = uPosB + K - 1;
+		for (;;)
+			{
+			if (uLengthA - 1 == uEndPosA || uLengthB - 1 == uEndPosB)
+				break;
+			const unsigned uAAGroupA = PA[uEndPosA+1].m_uResidueGroup;
+			if (RESIDUE_GROUP_MULTIPLE == uAAGroupA)
+				break;
+			const unsigned uAAGroupB = PB[uEndPosB+1].m_uResidueGroup;
+			if (RESIDUE_GROUP_MULTIPLE == uAAGroupB)
+				break;
+			if (uAAGroupA != uAAGroupB)
+				break;
+			++uEndPosA;
+			++uEndPosB;
+			}
+		uPosA = uEndPosA;
+
+#if	TRACE
+		{
+		Log("Match: A %4u-%4u   ", uStartPosA, uEndPosA);
+		for (unsigned n = uStartPosA; n <= uEndPosA; ++n)
+			Log("%c", LetterToChar(PA[n].m_uResidueGroup));
+		Log("\n");
+		Log("       B %4u-%4u   ", uStartPosB, uEndPosB);
+		for (unsigned n = uStartPosB; n <= uEndPosB; ++n)
+			Log("%c", LetterToChar(PB[n].m_uResidueGroup));
+		Log("\n");
+		}
+#endif
+
+		const unsigned uLength = uEndPosA - uStartPosA + 1;
+		assert(uEndPosB - uStartPosB + 1 == uLength);
+
+		if (uLength >= g_uMinDiagLength)
+			{
+			if (bSwap)
+				DL.Add(uStartPosB, uStartPosA, uLength);
+			else
+				DL.Add(uStartPosA, uStartPosB, uLength);
+			}
+		}
+	}

Added: trunk/packages/muscle/branches/upstream/current/gapscoredimer.h
===================================================================
--- trunk/packages/muscle/branches/upstream/current/gapscoredimer.h	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/gapscoredimer.h	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,69 @@
+// source code generated by dimer.py
+
+static SCORE GapScoreMM(const ProfPos &PPA, const ProfPos &PPB)
+	{
+	return
+	  g_scoreGapOpen*(PPA.m_LL*PPB.m_LG + PPA.m_LG*PPB.m_LL + PPA.m_LG*PPB.m_GL + PPA.m_GL*PPB.m_LG) +
+	  g_scoreGapExtend*(PPA.m_LL*PPB.m_GG + PPA.m_GG*PPB.m_LL) +
+	  g_scoreGapAmbig*(PPA.m_GL*PPB.m_GG + PPA.m_GG*PPB.m_GL);
+	}
+
+static SCORE GapScoreMD(const ProfPos &PPA, const ProfPos &PPB)
+	{
+	return
+	  g_scoreGapOpen*(PPA.m_LL*PPB.m_LL + PPA.m_LL*PPB.m_GL + PPA.m_GL*PPB.m_LL + PPA.m_GL*PPB.m_GL) +
+	  g_scoreGapExtend*(PPA.m_LL*PPB.m_LG + PPA.m_LL*PPB.m_GG) +
+	  g_scoreGapAmbig*(PPA.m_GL*PPB.m_LG + PPA.m_GL*PPB.m_GG);
+	}
+
+static SCORE GapScoreMI(const ProfPos &PPA, const ProfPos &PPB)
+	{
+	return
+	  g_scoreGapOpen*(PPA.m_LL*PPB.m_LL + PPA.m_LL*PPB.m_GL + PPA.m_GL*PPB.m_LL + PPA.m_GL*PPB.m_GL) +
+	  g_scoreGapExtend*(PPA.m_LG*PPB.m_LL + PPA.m_GG*PPB.m_LL) +
+	  g_scoreGapAmbig*(PPA.m_LG*PPB.m_GL + PPA.m_GG*PPB.m_GL);
+	}
+
+static SCORE GapScoreDM(const ProfPos &PPA, const ProfPos &PPB)
+	{
+	return
+	  g_scoreGapOpen*(PPA.m_LG*PPB.m_LL + PPA.m_LG*PPB.m_GL) +
+	  g_scoreGapExtend*(PPA.m_LL*PPB.m_LG + PPA.m_LL*PPB.m_GG) +
+	  g_scoreGapAmbig*(PPA.m_GL*PPB.m_LG + PPA.m_GL*PPB.m_GG + PPA.m_GG*PPB.m_LL + PPA.m_GG*PPB.m_GL);
+	}
+
+static SCORE GapScoreDD(const ProfPos &PPA, const ProfPos &PPB)
+	{
+	return
+	  g_scoreGapExtend*(PPA.m_LL*PPB.m_LL + PPA.m_LL*PPB.m_LG + PPA.m_LL*PPB.m_GL + PPA.m_LL*PPB.m_GG) +
+	  g_scoreGapAmbig*(PPA.m_GL*PPB.m_LL + PPA.m_GL*PPB.m_LG + PPA.m_GL*PPB.m_GL + PPA.m_GL*PPB.m_GG);
+	}
+
+static SCORE GapScoreDI(const ProfPos &PPA, const ProfPos &PPB)
+	{
+	return
+	  g_scoreGapOpen*(PPA.m_LL*PPB.m_LL + PPA.m_LL*PPB.m_GL + PPA.m_GL*PPB.m_LL + PPA.m_GL*PPB.m_GL) +
+	  g_scoreGapAmbig*(PPA.m_LG*PPB.m_LL + PPA.m_LG*PPB.m_GL + PPA.m_GG*PPB.m_LL + PPA.m_GG*PPB.m_GL);
+	}
+
+static SCORE GapScoreIM(const ProfPos &PPA, const ProfPos &PPB)
+	{
+	return
+	  g_scoreGapOpen*(PPA.m_LL*PPB.m_LG + PPA.m_GL*PPB.m_LG) +
+	  g_scoreGapExtend*(PPA.m_LG*PPB.m_LL + PPA.m_GG*PPB.m_LL) +
+	  g_scoreGapAmbig*(PPA.m_LL*PPB.m_GG + PPA.m_LG*PPB.m_GL + PPA.m_GL*PPB.m_GG + PPA.m_GG*PPB.m_GL);
+	}
+
+static SCORE GapScoreID(const ProfPos &PPA, const ProfPos &PPB)
+	{
+	return
+	  g_scoreGapOpen*(PPA.m_LL*PPB.m_LL + PPA.m_LL*PPB.m_GL + PPA.m_GL*PPB.m_LL + PPA.m_GL*PPB.m_GL) +
+	  g_scoreGapAmbig*(PPA.m_LL*PPB.m_LG + PPA.m_LL*PPB.m_GG + PPA.m_GL*PPB.m_LG + PPA.m_GL*PPB.m_GG);
+	}
+
+static SCORE GapScoreII(const ProfPos &PPA, const ProfPos &PPB)
+	{
+	return
+	  g_scoreGapExtend*(PPA.m_LL*PPB.m_LL + PPA.m_LG*PPB.m_LL + PPA.m_GL*PPB.m_LL + PPA.m_GG*PPB.m_LL) +
+	  g_scoreGapAmbig*(PPA.m_LL*PPB.m_GL + PPA.m_LG*PPB.m_GL + PPA.m_GL*PPB.m_GL + PPA.m_GG*PPB.m_GL);
+	}

Added: trunk/packages/muscle/branches/upstream/current/glbalign.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/glbalign.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/glbalign.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,165 @@
+#include "muscle.h"
+#include "pwpath.h"
+#include "timing.h"
+#include "textfile.h"
+#include "msa.h"
+#include "profile.h"
+
+#if	!VER_3_52
+
+#define COMPARE_SIMPLE	0
+
+#if	TIMING
+TICKS g_ticksDP = 0;
+#endif
+
+#if	1
+extern bool g_bKeepSimpleDP;
+SCORE NWSmall(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
+  unsigned uLengthB, PWPath &Path);
+SCORE NWDASmall(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
+  unsigned uLengthB, PWPath &Path);
+SCORE NWDASimple(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
+  unsigned uLengthB, PWPath &Path);
+SCORE NWDASimple2(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
+  unsigned uLengthB, PWPath &Path);
+SCORE GlobalAlignSimple(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
+  unsigned uLengthB, PWPath &Path);
+
+SCORE GlobalAlignNoDiags(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
+  unsigned uLengthB, PWPath &Path)
+	{
+	return GlobalAlign(PA, uLengthA, PB, uLengthB, Path);
+	}
+
+#if	COMPARE_SIMPLE
+
+SCORE GlobalAlign(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
+  unsigned uLengthB, PWPath &Path)
+	{
+#if	TIMING
+	TICKS t1 = GetClockTicks();
+#endif
+	g_bKeepSimpleDP = true;
+	PWPath SimplePath;
+	GlobalAlignSimple(PA, uLengthA, PB, uLengthB, SimplePath);
+
+	SCORE Score = NWSmall(PA, uLengthA, PB, uLengthB, Path);
+
+	if (!Path.Equal(SimplePath))
+		{
+		Log("Simple:\n");
+		SimplePath.LogMe();
+		Log("Small:\n");
+		Path.LogMe();
+		Quit("Paths differ");
+		}
+
+#if	TIMING
+	TICKS t2 = GetClockTicks();
+	g_ticksDP += (t2 - t1);
+#endif
+	return Score;
+	}
+
+#else // COMPARE_SIMPLE
+
+SCORE GlobalAlign(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
+  unsigned uLengthB, PWPath &Path)
+	{
+#if	TIMING
+	TICKS t1 = GetClockTicks();
+#endif
+	SCORE Score = NWSmall(PA, uLengthA, PB, uLengthB, Path);
+#if	TIMING
+	TICKS t2 = GetClockTicks();
+	g_ticksDP += (t2 - t1);
+#endif
+	return Score;
+	}
+
+#endif
+
+#else // 1
+
+static void AllInserts(PWPath &Path, unsigned uLengthB)
+	{
+	Path.Clear();
+	PWEdge Edge;
+	Edge.cType = 'I';
+	Edge.uPrefixLengthA = 0;
+	for (unsigned uPrefixLengthB = 1; uPrefixLengthB <= uLengthB; ++uPrefixLengthB)
+		{
+		Edge.uPrefixLengthB = uPrefixLengthB;
+		Path.AppendEdge(Edge);
+		}
+	}
+
+static void AllDeletes(PWPath &Path, unsigned uLengthA)
+	{
+	Path.Clear();
+	PWEdge Edge;
+	Edge.cType = 'D';
+	Edge.uPrefixLengthB = 0;
+	for (unsigned uPrefixLengthA = 1; uPrefixLengthA <= uLengthA; ++uPrefixLengthA)
+		{
+		Edge.uPrefixLengthA = uPrefixLengthA;
+		Path.AppendEdge(Edge);
+		}
+	}
+
+SCORE GlobalAlign(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
+  unsigned uLengthB, PWPath &Path)
+	{
+#if	TIMING
+	TICKS t1 = GetClockTicks();
+#endif
+	if (0 == uLengthA)
+		{
+		AllInserts(Path, uLengthB);
+		return 0;
+		}
+	else if (0 == uLengthB)
+		{
+		AllDeletes(Path, uLengthA);
+		return 0;
+		}
+
+	SCORE Score = 0;
+	if (g_bDiags)
+		Score = GlobalAlignDiags(PA, uLengthA, PB, uLengthB, Path);
+	else
+		Score = GlobalAlignNoDiags(PA, uLengthA, PB, uLengthB, Path);
+#if	TIMING
+	TICKS t2 = GetClockTicks();
+	g_ticksDP += (t2 - t1);
+#endif
+	return Score;
+	}
+
+SCORE GlobalAlignNoDiags(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
+  unsigned uLengthB, PWPath &Path)
+	{
+	if (g_bDimer)
+		return GlobalAlignDimer(PA, uLengthA, PB, uLengthB, Path);
+
+	switch (g_PPScore)
+		{
+	case PPSCORE_LE:
+		return GlobalAlignLE(PA, uLengthA, PB, uLengthB, Path);
+
+	case PPSCORE_SP:
+	case PPSCORE_SV:
+		return GlobalAlignSP(PA, uLengthA, PB, uLengthB, Path);
+
+	case PPSCORE_SPN:
+		return GlobalAlignSPN(PA, uLengthA, PB, uLengthB, Path);
+		}
+
+	Quit("Invalid PP score (GlobalAlignNoDiags)");
+	return 0;
+	}
+
+#endif
+
+#endif	// !VER_3_52

Added: trunk/packages/muscle/branches/upstream/current/glbalign352.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/glbalign352.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/glbalign352.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,55 @@
+#include "muscle.h"
+#include "pwpath.h"
+#include "timing.h"
+#include "textfile.h"
+#include "msa.h"
+#include "profile.h"
+
+#if	VER_3_52
+
+#if	TIMING
+TICKS g_ticksDP = 0;
+#endif
+
+SCORE GlobalAlign(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
+  unsigned uLengthB, PWPath &Path)
+	{
+#if	TIMING
+	TICKS t1 = GetClockTicks();
+#endif
+	SCORE Score = 0;
+	if (g_bDiags)
+		Score = GlobalAlignDiags(PA, uLengthA, PB, uLengthB, Path);
+	else
+		Score = GlobalAlignNoDiags(PA, uLengthA, PB, uLengthB, Path);
+#if	TIMING
+	TICKS t2 = GetClockTicks();
+	g_ticksDP += (t2 - t1);
+#endif
+	return Score;
+	}
+
+SCORE GlobalAlignNoDiags(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
+  unsigned uLengthB, PWPath &Path)
+	{
+	if (g_bDimer)
+		return GlobalAlignDimer(PA, uLengthA, PB, uLengthB, Path);
+
+	switch (g_PPScore)
+		{
+	case PPSCORE_LE:
+		return GlobalAlignLE(PA, uLengthA, PB, uLengthB, Path);
+
+	case PPSCORE_SP:
+	case PPSCORE_SV:
+		return GlobalAlignSP(PA, uLengthA, PB, uLengthB, Path);
+
+	case PPSCORE_SPN:
+		return GlobalAlignSPN(PA, uLengthA, PB, uLengthB, Path);
+		}
+
+	Quit("Invalid PP score (GlobalAlignNoDiags)");
+	return 0;
+	}
+
+#endif	// VER_3_52

Added: trunk/packages/muscle/branches/upstream/current/glbaligndiag.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/glbaligndiag.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/glbaligndiag.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,172 @@
+#include "muscle.h"
+#include "dpreglist.h"
+#include "diaglist.h"
+#include "pwpath.h"
+#include "profile.h"
+#include "timing.h"
+
+#define TRACE		0
+#define TRACE_PATH	0
+#define LIST_DIAGS	0
+
+static double g_dDPAreaWithoutDiags = 0.0;
+static double g_dDPAreaWithDiags = 0.0;
+
+static void OffsetPath(PWPath &Path, unsigned uOffsetA, unsigned uOffsetB)
+	{
+	const unsigned uEdgeCount = Path.GetEdgeCount();
+	for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
+		{
+		const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
+
+	// Nasty hack -- poke new values back into path, circumventing class
+		PWEdge &NonConstEdge = (PWEdge &) Edge;
+		NonConstEdge.uPrefixLengthA += uOffsetA;
+		NonConstEdge.uPrefixLengthB += uOffsetB;
+		}
+	}
+
+static void DiagToPath(const Diag &d, PWPath &Path)
+	{
+	Path.Clear();
+	const unsigned uLength = d.m_uLength;
+	for (unsigned i = 0; i < uLength; ++i)
+		{
+		PWEdge Edge;
+		Edge.cType = 'M';
+		Edge.uPrefixLengthA = d.m_uStartPosA + i + 1;
+		Edge.uPrefixLengthB = d.m_uStartPosB + i + 1;
+		Path.AppendEdge(Edge);
+		}
+	}
+
+static void AppendRegPath(PWPath &Path, const PWPath &RegPath)
+	{
+	const unsigned uRegEdgeCount = RegPath.GetEdgeCount();
+	for (unsigned uRegEdgeIndex = 0; uRegEdgeIndex < uRegEdgeCount; ++uRegEdgeIndex)
+		{
+		const PWEdge &RegEdge = RegPath.GetEdge(uRegEdgeIndex);
+		Path.AppendEdge(RegEdge);
+		}
+	}
+
+SCORE GlobalAlignDiags(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
+  unsigned uLengthB, PWPath &Path)
+	{
+#if	LIST_DIAGS
+	TICKS t1 = GetClockTicks();
+#endif
+
+	DiagList DL;
+
+	if (ALPHA_Amino == g_Alpha)
+		FindDiags(PA, uLengthA, PB, uLengthB, DL);
+	else if (ALPHA_DNA == g_Alpha || ALPHA_RNA == g_Alpha)
+		FindDiagsNuc(PA, uLengthA, PB, uLengthB, DL);
+	else
+		Quit("GlobalAlignDiags: bad alpha");
+
+#if	TRACE
+	Log("GlobalAlignDiags, diag list:\n");
+	DL.LogMe();
+#endif
+
+	DL.Sort();
+	DL.DeleteIncompatible();
+
+#if	TRACE
+	Log("After DeleteIncompatible:\n");
+	DL.LogMe();
+#endif
+
+	MergeDiags(DL);
+
+#if	TRACE
+	Log("After MergeDiags:\n");
+	DL.LogMe();
+#endif
+
+	DPRegionList RL;
+	DiagListToDPRegionList(DL, RL, uLengthA, uLengthB);
+
+#if	TRACE
+	Log("RegionList:\n");
+	RL.LogMe();
+#endif
+
+#if	LIST_DIAGS
+	{
+	TICKS t2 = GetClockTicks();
+	unsigned uArea = RL.GetDPArea();
+	Log("ticks=%ld\n", (long) (t2 - t1));
+	Log("area=%u\n", uArea);
+	}
+#endif
+
+	g_dDPAreaWithoutDiags += uLengthA*uLengthB;
+
+	double dDPAreaWithDiags = 0.0;
+	const unsigned uRegionCount = RL.GetCount();
+	for (unsigned uRegionIndex = 0; uRegionIndex < uRegionCount; ++uRegionIndex)
+		{
+		const DPRegion &r = RL.Get(uRegionIndex);
+
+		PWPath RegPath;
+		if (DPREGIONTYPE_Diag == r.m_Type)
+			{
+			DiagToPath(r.m_Diag, RegPath);
+#if	TRACE_PATH
+			Log("DiagToPath, path=\n");
+			RegPath.LogMe();
+#endif
+			}
+		else if (DPREGIONTYPE_Rect == r.m_Type)
+			{
+			const unsigned uRegStartPosA = r.m_Rect.m_uStartPosA;
+			const unsigned uRegStartPosB = r.m_Rect.m_uStartPosB;
+			const unsigned uRegLengthA = r.m_Rect.m_uLengthA;
+			const unsigned uRegLengthB = r.m_Rect.m_uLengthB;
+			const ProfPos *RegPA = PA + uRegStartPosA;
+			const ProfPos *RegPB = PB + uRegStartPosB;
+
+			dDPAreaWithDiags += uRegLengthA*uRegLengthB;
+			GlobalAlignNoDiags(RegPA, uRegLengthA, RegPB, uRegLengthB, RegPath);
+#if	TRACE_PATH
+			Log("GlobalAlignNoDiags RegPath=\n");
+			RegPath.LogMe();
+#endif
+			OffsetPath(RegPath, uRegStartPosA, uRegStartPosB);
+#if	TRACE_PATH
+			Log("After offset path, RegPath=\n");
+			RegPath.LogMe();
+#endif
+			}
+		else
+			Quit("GlobalAlignDiags, Invalid region type %u", r.m_Type);
+
+		AppendRegPath(Path, RegPath);
+#if	TRACE_PATH
+		Log("After AppendPath, path=");
+		Path.LogMe();
+#endif
+		}
+
+#if	TRACE
+	{
+	double dDPAreaWithoutDiags = uLengthA*uLengthB;
+	Log("DP area with diags %.3g without %.3g pct saved %.3g %%\n",
+	  dDPAreaWithDiags, dDPAreaWithoutDiags, (1.0 - dDPAreaWithDiags/dDPAreaWithoutDiags)*100.0);
+	}
+#endif
+	g_dDPAreaWithDiags += dDPAreaWithDiags;
+	return 0;
+	}
+
+void ListDiagSavings()
+	{
+	if (!g_bVerbose || !g_bDiags)
+		return;
+	double dAreaSaved = g_dDPAreaWithoutDiags - g_dDPAreaWithDiags;
+	double dPct = dAreaSaved*100.0/g_dDPAreaWithoutDiags;
+	Log("DP area saved by diagonals %-4.1f%%\n", dPct);
+	}

Added: trunk/packages/muscle/branches/upstream/current/glbalignle.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/glbalignle.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/glbalignle.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,435 @@
+#include "muscle.h"
+#include "profile.h"
+#include "pwpath.h"
+
+#define	OCC	1
+
+struct DP_MEMORY
+	{
+	unsigned uLength;
+	SCORE *GapOpenA;
+	SCORE *GapOpenB;
+	SCORE *GapCloseA;
+	SCORE *GapCloseB;
+	SCORE *MPrev;
+	SCORE *MCurr;
+	SCORE *MWork;
+	SCORE *DPrev;
+	SCORE *DCurr;
+	SCORE *DWork;
+	SCORE **ScoreMxB;
+#if	OCC
+	FCOUNT *OccA;
+	FCOUNT *OccB;
+#endif
+	unsigned **SortOrderA;
+	unsigned *uDeletePos;
+	FCOUNT **FreqsA;
+	int **TraceBack;
+	};
+
+static struct DP_MEMORY DPM;
+
+static void AllocDPMem(unsigned uLengthA, unsigned uLengthB)
+	{
+// Max prefix length
+	unsigned uLength = (uLengthA > uLengthB ? uLengthA : uLengthB) + 1;
+	if (uLength < DPM.uLength)
+		return;
+
+// Add 256 to allow for future expansion and
+// round up to next multiple of 32.
+	uLength += 256;
+	uLength += 32 - uLength%32;
+
+	const unsigned uOldLength = DPM.uLength;
+	if (uOldLength > 0)
+		{
+		for (unsigned i = 0; i < uOldLength; ++i)
+			{
+			delete[] DPM.TraceBack[i];
+			delete[] DPM.FreqsA[i];
+			delete[] DPM.SortOrderA[i];
+			}
+		for (unsigned n = 0; n < 20; ++n)
+			delete[] DPM.ScoreMxB[n];
+
+		delete[] DPM.MPrev;
+		delete[] DPM.MCurr;
+		delete[] DPM.MWork;
+		delete[] DPM.DPrev;
+		delete[] DPM.DCurr;
+		delete[] DPM.DWork;
+		delete[] DPM.uDeletePos;
+		delete[] DPM.GapOpenA;
+		delete[] DPM.GapOpenB;
+		delete[] DPM.GapCloseA;
+		delete[] DPM.GapCloseB;
+		delete[] DPM.SortOrderA;
+		delete[] DPM.FreqsA;
+		delete[] DPM.ScoreMxB;
+		delete[] DPM.TraceBack;
+#if	OCC
+		delete[] DPM.OccA;
+		delete[] DPM.OccB;
+#endif
+		}
+
+	DPM.uLength = uLength;
+
+	DPM.GapOpenA = new SCORE[uLength];
+	DPM.GapOpenB = new SCORE[uLength];
+	DPM.GapCloseA = new SCORE[uLength];
+	DPM.GapCloseB = new SCORE[uLength];
+#if	OCC
+	DPM.OccA = new FCOUNT[uLength];
+	DPM.OccB = new FCOUNT[uLength];
+#endif
+
+	DPM.SortOrderA = new unsigned*[uLength];
+	DPM.FreqsA = new FCOUNT*[uLength];
+	DPM.ScoreMxB = new SCORE*[20];
+	DPM.MPrev = new SCORE[uLength];
+	DPM.MCurr = new SCORE[uLength];
+	DPM.MWork = new SCORE[uLength];
+
+	DPM.DPrev = new SCORE[uLength];
+	DPM.DCurr = new SCORE[uLength];
+	DPM.DWork = new SCORE[uLength];
+	DPM.uDeletePos = new unsigned[uLength];
+
+	DPM.TraceBack = new int*[uLength];
+
+	for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
+		DPM.ScoreMxB[uLetter] = new SCORE[uLength];
+
+	for (unsigned i = 0; i < uLength; ++i)
+		{
+		DPM.SortOrderA[i] = new unsigned[20];
+		DPM.FreqsA[i] = new FCOUNT[20];
+		DPM.TraceBack[i] = new int[uLength];
+		}
+	}
+
+SCORE GlobalAlignLE(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
+  unsigned uLengthB, PWPath &Path)
+	{
+	SetTermGaps(PA, uLengthA);
+	SetTermGaps(PB, uLengthB);
+
+	const unsigned uPrefixCountA = uLengthA + 1;
+	const unsigned uPrefixCountB = uLengthB + 1;
+
+	AllocDPMem(uLengthA, uLengthB);
+
+	SCORE *GapOpenA = DPM.GapOpenA;
+	SCORE *GapOpenB = DPM.GapOpenB;
+	SCORE *GapCloseA = DPM.GapCloseA;
+	SCORE *GapCloseB = DPM.GapCloseB;
+
+	unsigned **SortOrderA = DPM.SortOrderA;
+	FCOUNT **FreqsA = DPM.FreqsA;
+	SCORE **ScoreMxB = DPM.ScoreMxB;
+	SCORE *MPrev = DPM.MPrev;
+	SCORE *MCurr = DPM.MCurr;
+	SCORE *MWork = DPM.MWork;
+
+	SCORE *DPrev = DPM.DPrev;
+	SCORE *DCurr = DPM.DCurr;
+	SCORE *DWork = DPM.DWork;
+
+#if	OCC
+	FCOUNT *OccA = DPM.OccA;
+	FCOUNT *OccB = DPM.OccB;
+#endif
+
+	unsigned *uDeletePos = DPM.uDeletePos;
+
+	int **TraceBack = DPM.TraceBack;
+
+	for (unsigned i = 0; i < uLengthA; ++i)
+		{
+		GapOpenA[i] = PA[i].m_scoreGapOpen;
+		GapCloseA[i] = PA[i].m_scoreGapClose;
+#if	OCC
+		OccA[i] = PA[i].m_fOcc;
+#endif
+
+		for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
+			{
+			SortOrderA[i][uLetter] = PA[i].m_uSortOrder[uLetter];
+			FreqsA[i][uLetter] = PA[i].m_fcCounts[uLetter];
+			}
+		}
+
+	for (unsigned j = 0; j < uLengthB; ++j)
+		{
+		GapOpenB[j] = PB[j].m_scoreGapOpen;
+		GapCloseB[j] = PB[j].m_scoreGapClose;
+#if	OCC
+		OccB[j] = PB[j].m_fOcc;
+#endif
+		}
+
+	for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
+		{
+		for (unsigned j = 0; j < uLengthB; ++j)
+			ScoreMxB[uLetter][j] = PB[j].m_AAScores[uLetter];
+		}
+
+	for (unsigned i = 0; i < uPrefixCountA; ++i)
+		memset(TraceBack[i], 0, uPrefixCountB*sizeof(int));
+
+// Special case for i=0
+	unsigned **ptrSortOrderA = SortOrderA;
+	FCOUNT **ptrFreqsA = FreqsA;
+	assert(ptrSortOrderA == &(SortOrderA[0]));
+	assert(ptrFreqsA == &(FreqsA[0]));
+	TraceBack[0][0] = 0;
+
+	SCORE scoreSum = 0;
+	unsigned *ptrSortOrderAi = SortOrderA[0];
+	const unsigned *ptrSortOrderAEnd = ptrSortOrderAi + 20;
+	FCOUNT *ptrFreqsAi = FreqsA[0];
+	for (; ptrSortOrderAi != ptrSortOrderAEnd; ++ptrSortOrderAi)
+		{
+		const unsigned uLetter = *ptrSortOrderAi;
+		const FCOUNT fcLetter = ptrFreqsAi[uLetter];
+		if (0 == fcLetter)
+			break;
+		scoreSum += fcLetter*ScoreMxB[uLetter][0];
+		}
+	if (0 == scoreSum)
+		MPrev[0] = -2.5;
+	else
+		{
+#if	OCC
+		MPrev[0] = (logf(scoreSum) - g_scoreCenter)*OccA[0]*OccB[0];
+#else
+		MPrev[0] = (logf(scoreSum) - g_scoreCenter);
+#endif
+		}
+
+// D(0,0) is -infinity (requires I->D).
+	DPrev[0] = MINUS_INFINITY;
+
+	for (unsigned j = 1; j < uLengthB; ++j)
+		{
+	// Only way to get M(0, j) looks like this:
+	//		A	----X
+	//		B	XXXXX
+	//			0   j
+	// So gap-open at j=0, gap-close at j-1.
+		SCORE scoreSum = 0;
+		unsigned *ptrSortOrderAi = SortOrderA[0];
+		const unsigned *ptrSortOrderAEnd = ptrSortOrderAi + 20;
+		FCOUNT *ptrFreqsAi = FreqsA[0];
+		for (; ptrSortOrderAi != ptrSortOrderAEnd; ++ptrSortOrderAi)
+			{
+			const unsigned uLetter = *ptrSortOrderAi;
+			const FCOUNT fcLetter = ptrFreqsAi[uLetter];
+			if (0 == fcLetter)
+				break;
+			scoreSum += fcLetter*ScoreMxB[uLetter][j];
+			}
+		if (0 == scoreSum)
+			MPrev[j] = -2.5;
+		else
+			{
+#if	OCC
+			MPrev[j] = (logf(scoreSum) - g_scoreCenter)*OccA[0]*OccB[j] +
+			  GapOpenB[0] + GapCloseB[j-1];
+#else
+			MPrev[j] = (logf(scoreSum) - g_scoreCenter) +
+			  GapOpenB[0] + GapCloseB[j-1];
+#endif
+			}
+		TraceBack[0][j] = -(int) j;
+
+	// Assume no D->I transitions, then can't be a delete if only
+	// one letter from A.
+		DPrev[j] = MINUS_INFINITY;
+		}
+
+	SCORE IPrev_j_1;
+	for (unsigned i = 1; i < uLengthA; ++i)
+		{
+		++ptrSortOrderA;
+		++ptrFreqsA;
+		assert(ptrSortOrderA == &(SortOrderA[i]));
+		assert(ptrFreqsA == &(FreqsA[i]));
+
+		SCORE *ptrMCurr_j = MCurr;
+		memset(ptrMCurr_j, 0, uLengthB*sizeof(SCORE));
+		const FCOUNT *FreqsAi = *ptrFreqsA;
+
+		const unsigned *SortOrderAi = *ptrSortOrderA;
+		const unsigned *ptrSortOrderAiEnd = SortOrderAi + 20;
+		const SCORE *ptrMCurrMax = MCurr + uLengthB;
+		for (const unsigned *ptrSortOrderAi = SortOrderAi;
+		  ptrSortOrderAi != ptrSortOrderAiEnd;
+		  ++ptrSortOrderAi)
+			{
+			const unsigned uLetter = *ptrSortOrderAi;
+			SCORE *NSBR_Letter = ScoreMxB[uLetter];
+			const FCOUNT fcLetter = FreqsAi[uLetter];
+			if (0 == fcLetter)
+				break;
+			SCORE *ptrNSBR = NSBR_Letter;
+			for (SCORE *ptrMCurr = MCurr; ptrMCurr != ptrMCurrMax; ++ptrMCurr)
+				*ptrMCurr += fcLetter*(*ptrNSBR++);
+			}
+
+#if	OCC
+		const FCOUNT OccAi = OccA[i];
+#endif
+		for (unsigned j = 0; j < uLengthB; ++j)
+			{
+			if (MCurr[j] == 0)
+				MCurr[j] = -2.5;
+			else
+#if	OCC
+				MCurr[j] = (logf(MCurr[j]) - g_scoreCenter)*OccAi*OccB[j];
+#else
+				MCurr[j] = (logf(MCurr[j]) - g_scoreCenter);
+#endif
+			}
+
+		ptrMCurr_j = MCurr;
+		unsigned *ptrDeletePos = uDeletePos;
+
+	// Special case for j=0
+	// Only way to get M(i, 0) looks like this:
+	//			0   i
+	//		A	XXXXX
+	//		B	----X
+	// So gap-open at i=0, gap-close at i-1.
+		assert(ptrMCurr_j == &(MCurr[0]));
+		*ptrMCurr_j += GapOpenA[0] + GapCloseA[i-1];
+
+		++ptrMCurr_j;
+
+		int *ptrTraceBack_ij = TraceBack[i];
+		*ptrTraceBack_ij++ = (int) i;
+
+		SCORE *ptrMPrev_j = MPrev;
+		SCORE *ptrDPrev = DPrev;
+		SCORE d = *ptrDPrev;
+		SCORE DNew = *ptrMPrev_j + GapOpenA[i];
+		if (DNew > d)
+			{
+			d = DNew;
+			*ptrDeletePos = i;
+			}
+
+		SCORE *ptrDCurr = DCurr;
+
+		assert(ptrDCurr == &(DCurr[0]));
+		*ptrDCurr = d;
+
+	// Can't have an insert if no letters from B
+		IPrev_j_1 = MINUS_INFINITY;
+
+		unsigned uInsertPos = 0;
+		const SCORE scoreGapOpenAi = GapOpenA[i];
+		const SCORE scoreGapCloseAi_1 = GapCloseA[i-1];
+
+		for (unsigned j = 1; j < uLengthB; ++j)
+			{
+		// Here, MPrev_j is preserved from previous
+		// iteration so with current i,j is M[i-1][j-1]
+			SCORE MPrev_j = *ptrMPrev_j;
+			SCORE INew = MPrev_j + GapOpenB[j];
+			if (INew > IPrev_j_1)
+				{
+				IPrev_j_1 = INew;
+				uInsertPos = j;
+				}
+
+			SCORE scoreMax = MPrev_j;
+
+			assert(ptrDPrev == &(DPrev[j-1]));
+			SCORE scoreD = *ptrDPrev++ + scoreGapCloseAi_1;
+			if (scoreD > scoreMax)
+				{
+				scoreMax = scoreD;
+				assert(ptrDeletePos == &(uDeletePos[j-1]));
+				*ptrTraceBack_ij = (int) i - (int) *ptrDeletePos;
+				assert(*ptrTraceBack_ij > 0);
+				}
+			++ptrDeletePos;
+
+			SCORE scoreI = IPrev_j_1 + GapCloseB[j-1];
+			if (scoreI > scoreMax)
+				{
+				scoreMax = scoreI;
+				*ptrTraceBack_ij = (int) uInsertPos - (int) j;
+				assert(*ptrTraceBack_ij < 0);
+				}
+
+			assert(ptrSortOrderA == &(SortOrderA[i]));
+			assert(ptrFreqsA == &(FreqsA[i]));
+
+			*ptrMCurr_j += scoreMax;
+			assert(ptrMCurr_j == &(MCurr[j]));
+			++ptrMCurr_j;
+
+			MPrev_j = *(++ptrMPrev_j);
+			assert(ptrDPrev == &(DPrev[j]));
+			SCORE d = *ptrDPrev;
+			SCORE DNew = MPrev_j + scoreGapOpenAi;
+			if (DNew > d)
+				{
+				d = DNew;
+				assert(ptrDeletePos == &uDeletePos[j]);
+				*ptrDeletePos = i;
+				}
+			assert(ptrDCurr + 1 == &(DCurr[j]));
+			*(++ptrDCurr) = d;
+
+			++ptrTraceBack_ij;
+			}
+
+		Rotate(MPrev, MCurr, MWork);
+		Rotate(DPrev, DCurr, DWork);
+		}
+
+// Special case for i=uLengthA
+	SCORE IPrev = MINUS_INFINITY;
+
+	unsigned uInsertPos;
+
+	for (unsigned j = 1; j < uLengthB; ++j)
+		{
+		SCORE INew = MPrev[j-1] + GapOpenB[j];
+		if (INew > IPrev)
+			{
+			uInsertPos = j;
+			IPrev = INew;
+			}
+		}
+
+// Special case for i=uLengthA, j=uLengthB
+	SCORE scoreMax = MPrev[uLengthB-1];
+	int iTraceBack = 0;
+
+	SCORE scoreD = DPrev[uLengthB-1] + GapCloseA[uLengthA-1];
+	if (scoreD > scoreMax)
+		{
+		scoreMax = scoreD;
+		iTraceBack = (int) uLengthA - (int) uDeletePos[uLengthB-1];
+		}
+
+	SCORE scoreI = IPrev + GapCloseB[uLengthB-1];
+	if (scoreI > scoreMax)
+		{
+		scoreMax = scoreI;
+		iTraceBack = (int) uInsertPos - (int) uLengthB;
+		}
+
+	TraceBack[uLengthA][uLengthB] = iTraceBack;
+
+	TraceBackToPath(TraceBack, uLengthA, uLengthB, Path);
+
+	return scoreMax;
+	}

Added: trunk/packages/muscle/branches/upstream/current/glbalignsimple.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/glbalignsimple.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/glbalignsimple.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,368 @@
+#include "muscle.h"
+#include <math.h>
+#include "pwpath.h"
+#include "profile.h"
+#include <stdio.h>
+
+#define	TRACE	0
+
+#if	1 // SINGLE_AFFINE
+
+extern bool g_bKeepSimpleDP;
+extern SCORE *g_DPM;
+extern SCORE *g_DPD;
+extern SCORE *g_DPI;
+extern char *g_TBM;
+extern char *g_TBD;
+extern char *g_TBI;
+
+static const char *LocalScoreToStr(SCORE s)
+	{
+	static char str[16];
+	if (s < -100000)
+		return "     *";
+	sprintf(str, "%6.1f", s);
+	return str;
+	}
+
+static void ListTB(const char *TBM_, const ProfPos *PA, const ProfPos *PB,
+  unsigned uPrefixCountA, unsigned uPrefixCountB)
+	{
+	Log("        ");
+	for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+		{
+		char c = ' ';
+		if (uPrefixLengthB > 0)
+			c = ConsensusChar(PB[uPrefixLengthB - 1]);
+		Log(" %4u:%c", uPrefixLengthB, c);
+		}
+	Log("\n");
+	for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
+		{
+		char c = ' ';
+		if (uPrefixLengthA > 0)
+			c = ConsensusChar(PA[uPrefixLengthA - 1]);
+		Log("%4u:%c  ", uPrefixLengthA, c);
+		for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+			Log(" %6c", TBM(uPrefixLengthA, uPrefixLengthB));
+		Log("\n");
+		}
+	}
+
+static void ListDP(const SCORE *DPM_, const ProfPos *PA, const ProfPos *PB,
+  unsigned uPrefixCountA, unsigned uPrefixCountB)
+	{
+	Log("        ");
+	for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+		{
+		char c = ' ';
+		if (uPrefixLengthB > 0)
+			c = ConsensusChar(PB[uPrefixLengthB - 1]);
+		Log(" %4u:%c", uPrefixLengthB, c);
+		}
+	Log("\n");
+	for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
+		{
+		char c = ' ';
+		if (uPrefixLengthA > 0)
+			c = ConsensusChar(PA[uPrefixLengthA - 1]);
+		Log("%4u:%c  ", uPrefixLengthA, c);
+		for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+			Log(" %s", LocalScoreToStr(DPM(uPrefixLengthA, uPrefixLengthB)));
+		Log("\n");
+		}
+	}
+
+SCORE GlobalAlignSimple(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
+  unsigned uLengthB, PWPath &Path)
+	{
+	assert(uLengthB > 0 && uLengthA > 0);
+
+	SetTermGaps(PA, uLengthA);
+	SetTermGaps(PB, uLengthB);
+
+	const unsigned uPrefixCountA = uLengthA + 1;
+	const unsigned uPrefixCountB = uLengthB + 1;
+
+// Allocate DP matrices
+	const size_t LM = uPrefixCountA*uPrefixCountB;
+	SCORE *DPL_ = new SCORE[LM];
+	SCORE *DPM_ = new SCORE[LM];
+	SCORE *DPD_ = new SCORE[LM];
+	SCORE *DPI_ = new SCORE[LM];
+
+	char *TBM_ = new char[LM];
+	char *TBD_ = new char[LM];
+	char *TBI_ = new char[LM];
+
+	memset(TBM_, '?', LM);
+	memset(TBD_, '?', LM);
+	memset(TBI_, '?', LM);
+
+	DPM(0, 0) = 0;
+	DPD(0, 0) = MINUS_INFINITY;
+	DPI(0, 0) = MINUS_INFINITY;
+
+	DPM(1, 0) = MINUS_INFINITY;
+	DPD(1, 0) = PA[0].m_scoreGapOpen;
+	TBD(1, 0) = 'D';
+	DPI(1, 0) = MINUS_INFINITY;
+
+	DPM(0, 1) = MINUS_INFINITY;
+	DPD(0, 1) = MINUS_INFINITY;
+	DPI(0, 1) = PB[0].m_scoreGapOpen;
+	TBI(0, 1) = 'I';
+
+// Empty prefix of B is special case
+	for (unsigned uPrefixLengthA = 2; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
+		{
+	// M=LetterA+LetterB, impossible with empty prefix
+		DPM(uPrefixLengthA, 0) = MINUS_INFINITY;
+
+	// D=LetterA+GapB
+		DPD(uPrefixLengthA, 0) = DPD(uPrefixLengthA - 1, 0) + g_scoreGapExtend;
+		TBD(uPrefixLengthA, 0) = 'D';
+
+	// I=GapA+LetterB, impossible with empty prefix
+		DPI(uPrefixLengthA, 0) = MINUS_INFINITY;
+		}
+
+// Empty prefix of A is special case
+	for (unsigned uPrefixLengthB = 2; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+		{
+	// M=LetterA+LetterB, impossible with empty prefix
+		DPM(0, uPrefixLengthB) = MINUS_INFINITY;
+
+	// D=LetterA+GapB, impossible with empty prefix
+		DPD(0, uPrefixLengthB) = MINUS_INFINITY;
+
+	// I=GapA+LetterB
+		DPI(0, uPrefixLengthB) = DPI(0, uPrefixLengthB - 1) + g_scoreGapExtend;
+		TBI(0, uPrefixLengthB) = 'I';
+		}
+
+// Special case to agree with NWFast, no D-I transitions so...
+	DPD(uLengthA, 0) = MINUS_INFINITY;
+//	DPI(0, uLengthB) = MINUS_INFINITY;
+
+// ============
+// Main DP loop
+// ============
+	SCORE scoreGapCloseB = MINUS_INFINITY;
+	for (unsigned uPrefixLengthB = 1; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+		{
+		const ProfPos &PPB = PB[uPrefixLengthB - 1];
+
+		SCORE scoreGapCloseA = MINUS_INFINITY;
+		for (unsigned uPrefixLengthA = 1; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
+			{
+			const ProfPos &PPA = PA[uPrefixLengthA - 1];
+
+			{
+		// Match M=LetterA+LetterB
+			SCORE scoreLL = ScoreProfPos2(PPA, PPB);
+			DPL(uPrefixLengthA, uPrefixLengthB) = scoreLL;
+
+			SCORE scoreMM = DPM(uPrefixLengthA-1, uPrefixLengthB-1);
+			SCORE scoreDM = DPD(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapCloseA;
+			SCORE scoreIM = DPI(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapCloseB;
+
+			SCORE scoreBest;
+			if (scoreMM >= scoreDM && scoreMM >= scoreIM)
+				{
+				scoreBest = scoreMM;
+				TBM(uPrefixLengthA, uPrefixLengthB) = 'M';
+				}
+			else if (scoreDM >= scoreMM && scoreDM >= scoreIM)
+				{
+				scoreBest = scoreDM;
+				TBM(uPrefixLengthA, uPrefixLengthB) = 'D';
+				}
+			else 
+				{
+				assert(scoreIM >= scoreMM && scoreIM >= scoreDM);
+				scoreBest = scoreIM;
+				TBM(uPrefixLengthA, uPrefixLengthB) = 'I';
+				}
+			DPM(uPrefixLengthA, uPrefixLengthB) = scoreBest + scoreLL;
+			}
+
+			{
+		// Delete D=LetterA+GapB
+			SCORE scoreMD = DPM(uPrefixLengthA-1, uPrefixLengthB) +
+			  PA[uPrefixLengthA-1].m_scoreGapOpen;
+			SCORE scoreDD = DPD(uPrefixLengthA-1, uPrefixLengthB) + g_scoreGapExtend;
+
+			SCORE scoreBest;
+			if (scoreMD >= scoreDD)
+				{
+				scoreBest = scoreMD;
+				TBD(uPrefixLengthA, uPrefixLengthB) = 'M';
+				}
+			else
+				{
+				assert(scoreDD >= scoreMD);
+				scoreBest = scoreDD;
+				TBD(uPrefixLengthA, uPrefixLengthB) = 'D';
+				}
+			DPD(uPrefixLengthA, uPrefixLengthB) = scoreBest;
+			}
+
+		// Insert I=GapA+LetterB
+			{
+			SCORE scoreMI = DPM(uPrefixLengthA, uPrefixLengthB-1) +
+			  PB[uPrefixLengthB - 1].m_scoreGapOpen;
+			SCORE scoreII = DPI(uPrefixLengthA, uPrefixLengthB-1) + g_scoreGapExtend;
+
+			SCORE scoreBest;
+			if (scoreMI >= scoreII)
+				{
+				scoreBest = scoreMI;
+				TBI(uPrefixLengthA, uPrefixLengthB) = 'M';
+				}
+			else 
+				{
+				assert(scoreII > scoreMI);
+				scoreBest = scoreII;
+				TBI(uPrefixLengthA, uPrefixLengthB) = 'I';
+				}
+			DPI(uPrefixLengthA, uPrefixLengthB) = scoreBest;
+			}
+
+			scoreGapCloseA = PPA.m_scoreGapClose;
+			}
+		scoreGapCloseB = PPB.m_scoreGapClose;
+		}
+
+#if TRACE
+	Log("\n");
+	Log("Simple DPL:\n");
+	ListDP(DPL_, PA, PB, uPrefixCountA, uPrefixCountB);
+	Log("\n");
+	Log("Simple DPM:\n");
+	ListDP(DPM_, PA, PB, uPrefixCountA, uPrefixCountB);
+	Log("\n");
+	Log("Simple DPD:\n");
+	ListDP(DPD_, PA, PB, uPrefixCountA, uPrefixCountB);
+	Log("\n");
+	Log("Simple DPI:\n");
+	ListDP(DPI_, PA, PB, uPrefixCountA, uPrefixCountB);
+	Log("\n");
+	Log("Simple TBM:\n");
+	ListTB(TBM_, PA, PB, uPrefixCountA, uPrefixCountB);
+	Log("\n");
+	Log("Simple TBD:\n");
+	ListTB(TBD_, PA, PB, uPrefixCountA, uPrefixCountB);
+	Log("\n");
+	Log("Simple TBI:\n");
+	ListTB(TBI_, PA, PB, uPrefixCountA, uPrefixCountB);
+#endif
+
+// Trace-back
+// ==========
+	Path.Clear();
+
+// Find last edge
+	SCORE M = DPM(uLengthA, uLengthB);
+	SCORE D = DPD(uLengthA, uLengthB) + PA[uLengthA-1].m_scoreGapClose;
+	SCORE I = DPI(uLengthA, uLengthB) + PB[uLengthB-1].m_scoreGapClose;
+	char cEdgeType = '?';
+
+	SCORE BestScore = MINUS_INFINITY;
+	if (M >= D && M >= I)
+		{
+		cEdgeType = 'M';
+		BestScore = M;
+		}
+	else if (D >= M && D >= I)
+		{
+		cEdgeType = 'D';
+		BestScore = D;
+		}
+	else 
+		{
+		assert(I >= M && I >= D);
+		cEdgeType = 'I';
+		BestScore = I;
+		}
+
+#if	TRACE
+	Log("Simple: MAB=%.4g DAB=%.4g IAB=%.4g best=%c\n", M, D, I, cEdgeType);
+#endif
+
+	unsigned PLA = uLengthA;
+	unsigned PLB = uLengthB;
+	for (;;)
+		{
+		PWEdge Edge;
+		Edge.cType = cEdgeType;
+		Edge.uPrefixLengthA = PLA;
+		Edge.uPrefixLengthB = PLB;
+#if	TRACE
+		Log("Prepend %c%d.%d\n", Edge.cType, PLA, PLB);
+#endif
+		Path.PrependEdge(Edge);
+
+		switch (cEdgeType)
+			{
+		case 'M':
+			assert(PLA > 0);
+			assert(PLB > 0);
+			cEdgeType = TBM(PLA, PLB);
+			--PLA;
+			--PLB;
+			break;
+
+		case 'D':
+			assert(PLA > 0);
+			cEdgeType = TBD(PLA, PLB);
+			--PLA;
+			break;
+
+		case 'I':
+			assert(PLB > 0);
+			cEdgeType = TBI(PLA, PLB);
+			--PLB;
+			break;
+		
+		default:
+			Quit("Invalid edge %c", cEdgeType);
+			}
+		if (0 == PLA && 0 == PLB)
+			break;
+		}
+	Path.Validate();
+
+//	SCORE Score = TraceBack(PA, uLengthA, PB, uLengthB, DPM_, DPD_, DPI_, Path);
+
+#if	TRACE
+	SCORE scorePath = FastScorePath2(PA, uLengthA, PB, uLengthB, Path);
+	Path.LogMe();
+	Log("Score = %s Path = %s\n", LocalScoreToStr(BestScore), LocalScoreToStr(scorePath));
+#endif
+
+	if (g_bKeepSimpleDP)
+		{
+		g_DPM = DPM_;
+		g_DPD = DPD_;
+		g_DPI = DPI_;
+
+		g_TBM = TBM_;
+		g_TBD = TBD_;
+		g_TBI = TBI_;
+		}
+	else
+		{
+		delete[] DPM_;
+		delete[] DPD_;
+		delete[] DPI_;
+
+		delete[] TBM_;
+		delete[] TBD_;
+		delete[] TBI_;
+		}
+
+	return BestScore;
+	}
+
+#endif // SINLGLE_AFFINE

Added: trunk/packages/muscle/branches/upstream/current/glbalignsp.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/glbalignsp.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/glbalignsp.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,374 @@
+#include "muscle.h"
+#include "profile.h"
+#include "pwpath.h"
+
+struct DP_MEMORY
+	{
+	unsigned uLength;
+	SCORE *GapOpenA;
+	SCORE *GapOpenB;
+	SCORE *GapCloseA;
+	SCORE *GapCloseB;
+	SCORE *MPrev;
+	SCORE *MCurr;
+	SCORE *MWork;
+	SCORE *DPrev;
+	SCORE *DCurr;
+	SCORE *DWork;
+	SCORE **ScoreMxB;
+	unsigned **SortOrderA;
+	unsigned *uDeletePos;
+	FCOUNT **FreqsA;
+	int **TraceBack;
+	};
+
+static struct DP_MEMORY DPM;
+
+static void AllocDPMem(unsigned uLengthA, unsigned uLengthB)
+	{
+// Max prefix length
+	unsigned uLength = (uLengthA > uLengthB ? uLengthA : uLengthB) + 1;
+	if (uLength < DPM.uLength)
+		return;
+
+// Add 256 to allow for future expansion and
+// round up to next multiple of 32.
+	uLength += 256;
+	uLength += 32 - uLength%32;
+
+	const unsigned uOldLength = DPM.uLength;
+	if (uOldLength > 0)
+		{
+		for (unsigned i = 0; i < uOldLength; ++i)
+			{
+			delete[] DPM.TraceBack[i];
+			delete[] DPM.FreqsA[i];
+			delete[] DPM.SortOrderA[i];
+			}
+		for (unsigned n = 0; n < 20; ++n)
+			delete[] DPM.ScoreMxB[n];
+
+		delete[] DPM.MPrev;
+		delete[] DPM.MCurr;
+		delete[] DPM.MWork;
+		delete[] DPM.DPrev;
+		delete[] DPM.DCurr;
+		delete[] DPM.DWork;
+		delete[] DPM.uDeletePos;
+		delete[] DPM.GapOpenA;
+		delete[] DPM.GapOpenB;
+		delete[] DPM.GapCloseA;
+		delete[] DPM.GapCloseB;
+		delete[] DPM.SortOrderA;
+		delete[] DPM.FreqsA;
+		delete[] DPM.ScoreMxB;
+		delete[] DPM.TraceBack;
+		}
+
+	DPM.uLength = uLength;
+
+	DPM.GapOpenA = new SCORE[uLength];
+	DPM.GapOpenB = new SCORE[uLength];
+	DPM.GapCloseA = new SCORE[uLength];
+	DPM.GapCloseB = new SCORE[uLength];
+
+	DPM.SortOrderA = new unsigned*[uLength];
+	DPM.FreqsA = new FCOUNT*[uLength];
+	DPM.ScoreMxB = new SCORE*[20];
+	DPM.MPrev = new SCORE[uLength];
+	DPM.MCurr = new SCORE[uLength];
+	DPM.MWork = new SCORE[uLength];
+
+	DPM.DPrev = new SCORE[uLength];
+	DPM.DCurr = new SCORE[uLength];
+	DPM.DWork = new SCORE[uLength];
+	DPM.uDeletePos = new unsigned[uLength];
+
+	DPM.TraceBack = new int*[uLength];
+
+	for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
+		DPM.ScoreMxB[uLetter] = new SCORE[uLength];
+
+	for (unsigned i = 0; i < uLength; ++i)
+		{
+		DPM.SortOrderA[i] = new unsigned[20];
+		DPM.FreqsA[i] = new FCOUNT[20];
+		DPM.TraceBack[i] = new int[uLength];
+		}
+	}
+
+SCORE GlobalAlignSP(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
+  unsigned uLengthB, PWPath &Path)
+	{
+	const unsigned uPrefixCountA = uLengthA + 1;
+	const unsigned uPrefixCountB = uLengthB + 1;
+
+	AllocDPMem(uLengthA, uLengthB);
+
+	SCORE *GapOpenA = DPM.GapOpenA;
+	SCORE *GapOpenB = DPM.GapOpenB;
+	SCORE *GapCloseA = DPM.GapCloseA;
+	SCORE *GapCloseB = DPM.GapCloseB;
+
+	unsigned **SortOrderA = DPM.SortOrderA;
+	FCOUNT **FreqsA = DPM.FreqsA;
+	SCORE **ScoreMxB = DPM.ScoreMxB;
+	SCORE *MPrev = DPM.MPrev;
+	SCORE *MCurr = DPM.MCurr;
+	SCORE *MWork = DPM.MWork;
+
+	SCORE *DPrev = DPM.DPrev;
+	SCORE *DCurr = DPM.DCurr;
+	SCORE *DWork = DPM.DWork;
+	unsigned *uDeletePos = DPM.uDeletePos;
+
+	int **TraceBack = DPM.TraceBack;
+
+	for (unsigned i = 0; i < uLengthA; ++i)
+		{
+		GapOpenA[i] = PA[i].m_scoreGapOpen;
+		GapCloseA[i] = PA[i].m_scoreGapClose;
+
+		for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
+			{
+			SortOrderA[i][uLetter] = PA[i].m_uSortOrder[uLetter];
+			FreqsA[i][uLetter] = PA[i].m_fcCounts[uLetter];
+			}
+		}
+
+	for (unsigned j = 0; j < uLengthB; ++j)
+		{
+		GapOpenB[j] = PB[j].m_scoreGapOpen;
+		GapCloseB[j] = PB[j].m_scoreGapClose;
+		}
+
+	for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
+		{
+		for (unsigned j = 0; j < uLengthB; ++j)
+			ScoreMxB[uLetter][j] = PB[j].m_AAScores[uLetter];
+		}
+
+	for (unsigned i = 0; i < uPrefixCountA; ++i)
+		memset(TraceBack[i], 0, uPrefixCountB*sizeof(int));
+
+// Special case for i=0
+	unsigned **ptrSortOrderA = SortOrderA;
+	FCOUNT **ptrFreqsA = FreqsA;
+	assert(ptrSortOrderA == &(SortOrderA[0]));
+	assert(ptrFreqsA == &(FreqsA[0]));
+	TraceBack[0][0] = 0;
+
+	SCORE scoreSum = 0;
+	unsigned *ptrSortOrderAi = SortOrderA[0];
+	const unsigned *ptrSortOrderAEnd = ptrSortOrderAi + 20;
+	FCOUNT *ptrFreqsAi = FreqsA[0];
+	for (; ptrSortOrderAi != ptrSortOrderAEnd; ++ptrSortOrderAi)
+		{
+		const unsigned uLetter = *ptrSortOrderAi;
+		const FCOUNT fcLetter = ptrFreqsAi[uLetter];
+		if (0 == fcLetter)
+			break;
+		scoreSum += fcLetter*ScoreMxB[uLetter][0];
+		}
+	MPrev[0] = scoreSum - g_scoreCenter;
+
+// D(0,0) is -infinity (requires I->D).
+	DPrev[0] = MINUS_INFINITY;
+
+	for (unsigned j = 1; j < uLengthB; ++j)
+		{
+	// Only way to get M(0, j) looks like this:
+	//		A	----X
+	//		B	XXXXX
+	//			0   j
+	// So gap-open at j=0, gap-close at j-1.
+		SCORE scoreSum = 0;
+		unsigned *ptrSortOrderAi = SortOrderA[0];
+		const unsigned *ptrSortOrderAEnd = ptrSortOrderAi + 20;
+		FCOUNT *ptrFreqsAi = FreqsA[0];
+		for (; ptrSortOrderAi != ptrSortOrderAEnd; ++ptrSortOrderAi)
+			{
+			const unsigned uLetter = *ptrSortOrderAi;
+			const FCOUNT fcLetter = ptrFreqsAi[uLetter];
+			if (0 == fcLetter)
+				break;
+			scoreSum += fcLetter*ScoreMxB[uLetter][j];
+			}
+		MPrev[j] = scoreSum - g_scoreCenter + GapOpenB[0] + GapCloseB[j-1];
+		TraceBack[0][j] = -(int) j;
+
+	// Assume no D->I transitions, then can't be a delete if only
+	// one letter from A.
+		DPrev[j] = MINUS_INFINITY;
+		}
+
+	SCORE IPrev_j_1;
+	for (unsigned i = 1; i < uLengthA; ++i)
+		{
+		++ptrSortOrderA;
+		++ptrFreqsA;
+		assert(ptrSortOrderA == &(SortOrderA[i]));
+		assert(ptrFreqsA == &(FreqsA[i]));
+
+		SCORE *ptrMCurr_j = MCurr;
+		memset(ptrMCurr_j, 0, uLengthB*sizeof(SCORE));
+		const FCOUNT *FreqsAi = *ptrFreqsA;
+
+		const unsigned *SortOrderAi = *ptrSortOrderA;
+		const unsigned *ptrSortOrderAiEnd = SortOrderAi + 20;
+		const SCORE *ptrMCurrMax = MCurr + uLengthB;
+		for (const unsigned *ptrSortOrderAi = SortOrderAi;
+		  ptrSortOrderAi != ptrSortOrderAiEnd;
+		  ++ptrSortOrderAi)
+			{
+			const unsigned uLetter = *ptrSortOrderAi;
+			SCORE *NSBR_Letter = ScoreMxB[uLetter];
+			const FCOUNT fcLetter = FreqsAi[uLetter];
+			if (0 == fcLetter)
+				break;
+			SCORE *ptrNSBR = NSBR_Letter;
+			for (SCORE *ptrMCurr = MCurr; ptrMCurr != ptrMCurrMax; ++ptrMCurr)
+				*ptrMCurr += fcLetter*(*ptrNSBR++);
+			}
+
+		for (unsigned j = 0; j < uLengthB; ++j)
+			MCurr[j] -= g_scoreCenter;
+
+		ptrMCurr_j = MCurr;
+		unsigned *ptrDeletePos = uDeletePos;
+
+	// Special case for j=0
+	// Only way to get M(i, 0) looks like this:
+	//			0   i
+	//		A	XXXXX
+	//		B	----X
+	// So gap-open at i=0, gap-close at i-1.
+		assert(ptrMCurr_j == &(MCurr[0]));
+		*ptrMCurr_j += GapOpenA[0] + GapCloseA[i-1];
+
+		++ptrMCurr_j;
+
+		int *ptrTraceBack_ij = TraceBack[i];
+		*ptrTraceBack_ij++ = (int) i;
+
+		SCORE *ptrMPrev_j = MPrev;
+		SCORE *ptrDPrev = DPrev;
+		SCORE d = *ptrDPrev;
+		SCORE DNew = *ptrMPrev_j + GapOpenA[i];
+		if (DNew > d)
+			{
+			d = DNew;
+			*ptrDeletePos = i;
+			}
+
+		SCORE *ptrDCurr = DCurr;
+
+		assert(ptrDCurr == &(DCurr[0]));
+		*ptrDCurr = d;
+
+	// Can't have an insert if no letters from B
+		IPrev_j_1 = MINUS_INFINITY;
+
+		unsigned uInsertPos;
+		const SCORE scoreGapOpenAi = GapOpenA[i];
+		const SCORE scoreGapCloseAi_1 = GapCloseA[i-1];
+
+		for (unsigned j = 1; j < uLengthB; ++j)
+			{
+		// Here, MPrev_j is preserved from previous
+		// iteration so with current i,j is M[i-1][j-1]
+			SCORE MPrev_j = *ptrMPrev_j;
+			SCORE INew = MPrev_j + GapOpenB[j];
+			if (INew > IPrev_j_1)
+				{
+				IPrev_j_1 = INew;
+				uInsertPos = j;
+				}
+
+			SCORE scoreMax = MPrev_j;
+
+			assert(ptrDPrev == &(DPrev[j-1]));
+			SCORE scoreD = *ptrDPrev++ + scoreGapCloseAi_1;
+			if (scoreD > scoreMax)
+				{
+				scoreMax = scoreD;
+				assert(ptrDeletePos == &(uDeletePos[j-1]));
+				*ptrTraceBack_ij = (int) i - (int) *ptrDeletePos;
+				assert(*ptrTraceBack_ij > 0);
+				}
+			++ptrDeletePos;
+
+			SCORE scoreI = IPrev_j_1 + GapCloseB[j-1];
+			if (scoreI > scoreMax)
+				{
+				scoreMax = scoreI;
+				*ptrTraceBack_ij = (int) uInsertPos - (int) j;
+				assert(*ptrTraceBack_ij < 0);
+				}
+
+			assert(ptrSortOrderA == &(SortOrderA[i]));
+			assert(ptrFreqsA == &(FreqsA[i]));
+
+			*ptrMCurr_j += scoreMax;
+			assert(ptrMCurr_j == &(MCurr[j]));
+			++ptrMCurr_j;
+
+			MPrev_j = *(++ptrMPrev_j);
+			assert(ptrDPrev == &(DPrev[j]));
+			SCORE d = *ptrDPrev;
+			SCORE DNew = MPrev_j + scoreGapOpenAi;
+			if (DNew > d)
+				{
+				d = DNew;
+				assert(ptrDeletePos == &uDeletePos[j]);
+				*ptrDeletePos = i;
+				}
+			assert(ptrDCurr + 1 == &(DCurr[j]));
+			*(++ptrDCurr) = d;
+
+			++ptrTraceBack_ij;
+			}
+
+		Rotate(MPrev, MCurr, MWork);
+		Rotate(DPrev, DCurr, DWork);
+		}
+
+// Special case for i=uLengthA
+	SCORE IPrev = MINUS_INFINITY;
+
+	unsigned uInsertPos;
+
+	for (unsigned j = 1; j < uLengthB; ++j)
+		{
+		SCORE INew = MPrev[j-1] + GapOpenB[j];
+		if (INew > IPrev)
+			{
+			uInsertPos = j;
+			IPrev = INew;
+			}
+		}
+
+// Special case for i=uLengthA, j=uLengthB
+	SCORE scoreMax = MPrev[uLengthB-1];
+	int iTraceBack = 0;
+
+	SCORE scoreD = DPrev[uLengthB-1] + GapCloseA[uLengthA-1];
+	if (scoreD > scoreMax)
+		{
+		scoreMax = scoreD;
+		iTraceBack = (int) uLengthA - (int) uDeletePos[uLengthB-1];
+		}
+
+	SCORE scoreI = IPrev + GapCloseB[uLengthB-1];
+	if (scoreI > scoreMax)
+		{
+		scoreMax = scoreI;
+		iTraceBack = (int) uInsertPos - (int) uLengthB;
+		}
+
+	TraceBack[uLengthA][uLengthB] = iTraceBack;
+
+	TraceBackToPath(TraceBack, uLengthA, uLengthB, Path);
+
+	return scoreMax;
+	}

Added: trunk/packages/muscle/branches/upstream/current/glbalignspn.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/glbalignspn.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/glbalignspn.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,409 @@
+#include "muscle.h"
+#include "profile.h"
+#include "pwpath.h"
+
+struct DP_MEMORY
+	{
+	unsigned uLength;
+	SCORE *GapOpenA;
+	SCORE *GapOpenB;
+	SCORE *GapCloseA;
+	SCORE *GapCloseB;
+	SCORE *MPrev;
+	SCORE *MCurr;
+	SCORE *MWork;
+	SCORE *DPrev;
+	SCORE *DCurr;
+	SCORE *DWork;
+	SCORE **ScoreMxB;
+	unsigned **SortOrderA;
+	unsigned *uDeletePos;
+	FCOUNT **FreqsA;
+	int **TraceBack;
+	};
+
+static struct DP_MEMORY DPM;
+
+void FreeDPMemSPN()
+	{
+	const unsigned uOldLength = DPM.uLength;
+	if (0 == uOldLength)
+		return;
+
+	for (unsigned i = 0; i < uOldLength; ++i)
+		{
+		delete[] DPM.TraceBack[i];
+		delete[] DPM.FreqsA[i];
+		delete[] DPM.SortOrderA[i];
+		}
+	for (unsigned n = 0; n < 4; ++n)
+		delete[] DPM.ScoreMxB[n];
+
+	delete[] DPM.MPrev;
+	delete[] DPM.MCurr;
+	delete[] DPM.MWork;
+	delete[] DPM.DPrev;
+	delete[] DPM.DCurr;
+	delete[] DPM.DWork;
+	delete[] DPM.uDeletePos;
+	delete[] DPM.GapOpenA;
+	delete[] DPM.GapOpenB;
+	delete[] DPM.GapCloseA;
+	delete[] DPM.GapCloseB;
+	delete[] DPM.SortOrderA;
+	delete[] DPM.FreqsA;
+	delete[] DPM.ScoreMxB;
+	delete[] DPM.TraceBack;
+	}
+
+static void AllocDPMem(unsigned uLengthA, unsigned uLengthB)
+	{
+// Max prefix length
+	unsigned uLength = (uLengthA > uLengthB ? uLengthA : uLengthB) + 1;
+	if (uLength < DPM.uLength)
+		return;
+
+// Add 256 to allow for future expansion and
+// round up to next multiple of 32.
+	uLength += 256;
+	uLength += 32 - uLength%32;
+
+	const unsigned uOldLength = DPM.uLength;
+	if (uOldLength > 0)
+		{
+		for (unsigned i = 0; i < uOldLength; ++i)
+			{
+			delete[] DPM.TraceBack[i];
+			delete[] DPM.FreqsA[i];
+			delete[] DPM.SortOrderA[i];
+			}
+		for (unsigned n = 0; n < 4; ++n)
+			delete[] DPM.ScoreMxB[n];
+
+		delete[] DPM.MPrev;
+		delete[] DPM.MCurr;
+		delete[] DPM.MWork;
+		delete[] DPM.DPrev;
+		delete[] DPM.DCurr;
+		delete[] DPM.DWork;
+		delete[] DPM.uDeletePos;
+		delete[] DPM.GapOpenA;
+		delete[] DPM.GapOpenB;
+		delete[] DPM.GapCloseA;
+		delete[] DPM.GapCloseB;
+		delete[] DPM.SortOrderA;
+		delete[] DPM.FreqsA;
+		delete[] DPM.ScoreMxB;
+		delete[] DPM.TraceBack;
+		}
+
+	DPM.uLength = uLength;
+
+	DPM.GapOpenA = new SCORE[uLength];
+	DPM.GapOpenB = new SCORE[uLength];
+	DPM.GapCloseA = new SCORE[uLength];
+	DPM.GapCloseB = new SCORE[uLength];
+
+	DPM.SortOrderA = new unsigned*[uLength];
+	DPM.FreqsA = new FCOUNT*[uLength];
+	DPM.ScoreMxB = new SCORE*[4];
+	DPM.MPrev = new SCORE[uLength];
+	DPM.MCurr = new SCORE[uLength];
+	DPM.MWork = new SCORE[uLength];
+
+	DPM.DPrev = new SCORE[uLength];
+	DPM.DCurr = new SCORE[uLength];
+	DPM.DWork = new SCORE[uLength];
+	DPM.uDeletePos = new unsigned[uLength];
+
+	DPM.TraceBack = new int*[uLength];
+
+	for (unsigned uLetter = 0; uLetter < 4; ++uLetter)
+		DPM.ScoreMxB[uLetter] = new SCORE[uLength];
+
+	for (unsigned i = 0; i < uLength; ++i)
+		{
+		DPM.SortOrderA[i] = new unsigned[4];
+		DPM.FreqsA[i] = new FCOUNT[4];
+		DPM.TraceBack[i] = new int[uLength];
+		}
+	}
+
+SCORE GlobalAlignSPN(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
+  unsigned uLengthB, PWPath &Path)
+	{
+	if (ALPHA_DNA != g_Alpha || ALPHA_RNA == g_Alpha)
+		Quit("GlobalAlignSPN: must be nucleo");
+
+	const unsigned uPrefixCountA = uLengthA + 1;
+	const unsigned uPrefixCountB = uLengthB + 1;
+
+	AllocDPMem(uLengthA, uLengthB);
+
+	SCORE *GapOpenA = DPM.GapOpenA;
+	SCORE *GapOpenB = DPM.GapOpenB;
+	SCORE *GapCloseA = DPM.GapCloseA;
+	SCORE *GapCloseB = DPM.GapCloseB;
+
+	unsigned **SortOrderA = DPM.SortOrderA;
+	FCOUNT **FreqsA = DPM.FreqsA;
+	SCORE **ScoreMxB = DPM.ScoreMxB;
+	SCORE *MPrev = DPM.MPrev;
+	SCORE *MCurr = DPM.MCurr;
+	SCORE *MWork = DPM.MWork;
+
+	SCORE *DPrev = DPM.DPrev;
+	SCORE *DCurr = DPM.DCurr;
+	SCORE *DWork = DPM.DWork;
+	unsigned *uDeletePos = DPM.uDeletePos;
+
+	int **TraceBack = DPM.TraceBack;
+
+	for (unsigned i = 0; i < uLengthA; ++i)
+		{
+		GapOpenA[i] = PA[i].m_scoreGapOpen;
+		GapCloseA[i] = PA[i].m_scoreGapClose;
+
+		for (unsigned uLetter = 0; uLetter < 4; ++uLetter)
+			{
+			SortOrderA[i][uLetter] = PA[i].m_uSortOrder[uLetter];
+			FreqsA[i][uLetter] = PA[i].m_fcCounts[uLetter];
+			}
+		}
+
+	for (unsigned j = 0; j < uLengthB; ++j)
+		{
+		GapOpenB[j] = PB[j].m_scoreGapOpen;
+		GapCloseB[j] = PB[j].m_scoreGapClose;
+		}
+
+	for (unsigned uLetter = 0; uLetter < 4; ++uLetter)
+		{
+		for (unsigned j = 0; j < uLengthB; ++j)
+			ScoreMxB[uLetter][j] = PB[j].m_AAScores[uLetter];
+		}
+
+	for (unsigned i = 0; i < uPrefixCountA; ++i)
+		memset(TraceBack[i], 0, uPrefixCountB*sizeof(int));
+
+// Special case for i=0
+	unsigned **ptrSortOrderA = SortOrderA;
+	FCOUNT **ptrFreqsA = FreqsA;
+	assert(ptrSortOrderA == &(SortOrderA[0]));
+	assert(ptrFreqsA == &(FreqsA[0]));
+	TraceBack[0][0] = 0;
+
+	SCORE scoreSum = 0;
+	unsigned *ptrSortOrderAi = SortOrderA[0];
+	const unsigned *ptrSortOrderAEnd = ptrSortOrderAi + 4;
+	FCOUNT *ptrFreqsAi = FreqsA[0];
+	for (; ptrSortOrderAi != ptrSortOrderAEnd; ++ptrSortOrderAi)
+		{
+		const unsigned uLetter = *ptrSortOrderAi;
+		const FCOUNT fcLetter = ptrFreqsAi[uLetter];
+		if (0 == fcLetter)
+			break;
+		scoreSum += fcLetter*ScoreMxB[uLetter][0];
+		}
+	MPrev[0] = scoreSum - g_scoreCenter;
+
+// D(0,0) is -infinity (requires I->D).
+	DPrev[0] = MINUS_INFINITY;
+
+	for (unsigned j = 1; j < uLengthB; ++j)
+		{
+	// Only way to get M(0, j) looks like this:
+	//		A	----X
+	//		B	XXXXX
+	//			0   j
+	// So gap-open at j=0, gap-close at j-1.
+		SCORE scoreSum = 0;
+		unsigned *ptrSortOrderAi = SortOrderA[0];
+		const unsigned *ptrSortOrderAEnd = ptrSortOrderAi + 4;
+		FCOUNT *ptrFreqsAi = FreqsA[0];
+		for (; ptrSortOrderAi != ptrSortOrderAEnd; ++ptrSortOrderAi)
+			{
+			const unsigned uLetter = *ptrSortOrderAi;
+			const FCOUNT fcLetter = ptrFreqsAi[uLetter];
+			if (0 == fcLetter)
+				break;
+			scoreSum += fcLetter*ScoreMxB[uLetter][j];
+			}
+		MPrev[j] = scoreSum - g_scoreCenter + GapOpenB[0] + GapCloseB[j-1];
+		TraceBack[0][j] = -(int) j;
+
+	// Assume no D->I transitions, then can't be a delete if only
+	// one letter from A.
+		DPrev[j] = MINUS_INFINITY;
+		}
+
+	SCORE IPrev_j_1;
+	for (unsigned i = 1; i < uLengthA; ++i)
+		{
+		++ptrSortOrderA;
+		++ptrFreqsA;
+		assert(ptrSortOrderA == &(SortOrderA[i]));
+		assert(ptrFreqsA == &(FreqsA[i]));
+
+		SCORE *ptrMCurr_j = MCurr;
+		memset(ptrMCurr_j, 0, uLengthB*sizeof(SCORE));
+		const FCOUNT *FreqsAi = *ptrFreqsA;
+
+		const unsigned *SortOrderAi = *ptrSortOrderA;
+		const unsigned *ptrSortOrderAiEnd = SortOrderAi + 4;
+		const SCORE *ptrMCurrMax = MCurr + uLengthB;
+		for (const unsigned *ptrSortOrderAi = SortOrderAi;
+		  ptrSortOrderAi != ptrSortOrderAiEnd;
+		  ++ptrSortOrderAi)
+			{
+			const unsigned uLetter = *ptrSortOrderAi;
+			SCORE *NSBR_Letter = ScoreMxB[uLetter];
+			const FCOUNT fcLetter = FreqsAi[uLetter];
+			if (0 == fcLetter)
+				break;
+			SCORE *ptrNSBR = NSBR_Letter;
+			for (SCORE *ptrMCurr = MCurr; ptrMCurr != ptrMCurrMax; ++ptrMCurr)
+				*ptrMCurr += fcLetter*(*ptrNSBR++);
+			}
+
+		for (unsigned j = 0; j < uLengthB; ++j)
+			MCurr[j] -= g_scoreCenter;
+
+		ptrMCurr_j = MCurr;
+		unsigned *ptrDeletePos = uDeletePos;
+
+	// Special case for j=0
+	// Only way to get M(i, 0) looks like this:
+	//			0   i
+	//		A	XXXXX
+	//		B	----X
+	// So gap-open at i=0, gap-close at i-1.
+		assert(ptrMCurr_j == &(MCurr[0]));
+		*ptrMCurr_j += GapOpenA[0] + GapCloseA[i-1];
+
+		++ptrMCurr_j;
+
+		int *ptrTraceBack_ij = TraceBack[i];
+		*ptrTraceBack_ij++ = (int) i;
+
+		SCORE *ptrMPrev_j = MPrev;
+		SCORE *ptrDPrev = DPrev;
+		SCORE d = *ptrDPrev;
+		SCORE DNew = *ptrMPrev_j + GapOpenA[i];
+		if (DNew > d)
+			{
+			d = DNew;
+			*ptrDeletePos = i;
+			}
+
+		SCORE *ptrDCurr = DCurr;
+
+		assert(ptrDCurr == &(DCurr[0]));
+		*ptrDCurr = d;
+
+	// Can't have an insert if no letters from B
+		IPrev_j_1 = MINUS_INFINITY;
+
+		unsigned uInsertPos;
+		const SCORE scoreGapOpenAi = GapOpenA[i];
+		const SCORE scoreGapCloseAi_1 = GapCloseA[i-1];
+
+		for (unsigned j = 1; j < uLengthB; ++j)
+			{
+		// Here, MPrev_j is preserved from previous
+		// iteration so with current i,j is M[i-1][j-1]
+			SCORE MPrev_j = *ptrMPrev_j;
+			SCORE INew = MPrev_j + GapOpenB[j];
+			if (INew > IPrev_j_1)
+				{
+				IPrev_j_1 = INew;
+				uInsertPos = j;
+				}
+
+			SCORE scoreMax = MPrev_j;
+
+			assert(ptrDPrev == &(DPrev[j-1]));
+			SCORE scoreD = *ptrDPrev++ + scoreGapCloseAi_1;
+			if (scoreD > scoreMax)
+				{
+				scoreMax = scoreD;
+				assert(ptrDeletePos == &(uDeletePos[j-1]));
+				*ptrTraceBack_ij = (int) i - (int) *ptrDeletePos;
+				assert(*ptrTraceBack_ij > 0);
+				}
+			++ptrDeletePos;
+
+			SCORE scoreI = IPrev_j_1 + GapCloseB[j-1];
+			if (scoreI > scoreMax)
+				{
+				scoreMax = scoreI;
+				*ptrTraceBack_ij = (int) uInsertPos - (int) j;
+				assert(*ptrTraceBack_ij < 0);
+				}
+
+			assert(ptrSortOrderA == &(SortOrderA[i]));
+			assert(ptrFreqsA == &(FreqsA[i]));
+
+			*ptrMCurr_j += scoreMax;
+			assert(ptrMCurr_j == &(MCurr[j]));
+			++ptrMCurr_j;
+
+			MPrev_j = *(++ptrMPrev_j);
+			assert(ptrDPrev == &(DPrev[j]));
+			SCORE d = *ptrDPrev;
+			SCORE DNew = MPrev_j + scoreGapOpenAi;
+			if (DNew > d)
+				{
+				d = DNew;
+				assert(ptrDeletePos == &uDeletePos[j]);
+				*ptrDeletePos = i;
+				}
+			assert(ptrDCurr + 1 == &(DCurr[j]));
+			*(++ptrDCurr) = d;
+
+			++ptrTraceBack_ij;
+			}
+
+		Rotate(MPrev, MCurr, MWork);
+		Rotate(DPrev, DCurr, DWork);
+		}
+
+// Special case for i=uLengthA
+	SCORE IPrev = MINUS_INFINITY;
+
+	unsigned uInsertPos;
+
+	for (unsigned j = 1; j < uLengthB; ++j)
+		{
+		SCORE INew = MPrev[j-1] + GapOpenB[j];
+		if (INew > IPrev)
+			{
+			uInsertPos = j;
+			IPrev = INew;
+			}
+		}
+
+// Special case for i=uLengthA, j=uLengthB
+	SCORE scoreMax = MPrev[uLengthB-1];
+	int iTraceBack = 0;
+
+	SCORE scoreD = DPrev[uLengthB-1] + GapCloseA[uLengthA-1];
+	if (scoreD > scoreMax)
+		{
+		scoreMax = scoreD;
+		iTraceBack = (int) uLengthA - (int) uDeletePos[uLengthB-1];
+		}
+
+	SCORE scoreI = IPrev + GapCloseB[uLengthB-1];
+	if (scoreI > scoreMax)
+		{
+		scoreMax = scoreI;
+		iTraceBack = (int) uInsertPos - (int) uLengthB;
+		}
+
+	TraceBack[uLengthA][uLengthB] = iTraceBack;
+
+	TraceBackToPath(TraceBack, uLengthA, uLengthB, Path);
+
+	return scoreMax;
+	}

Added: trunk/packages/muscle/branches/upstream/current/glbalignss.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/glbalignss.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/glbalignss.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,318 @@
+#include "muscle.h"
+#include "profile.h"
+#include "pwpath.h"
+#include "seq.h"
+
+extern SCOREMATRIX VTML_SP;
+
+// #define SUBST(i, j)	Subst(seqA, seqB, i, j)
+#define SUBST(i, j)		MxRowA[i][seqB.GetLetter(j)]
+
+static SCORE Subst(const Seq &seqA, const Seq &seqB, unsigned i, unsigned j)
+	{
+	assert(i < seqA.Length());
+	assert(j < seqB.Length());
+
+	unsigned uLetterA = seqA.GetLetter(i);
+	unsigned uLetterB = seqB.GetLetter(j);
+	return VTML_SP[uLetterA][uLetterB] + g_scoreCenter;
+	}
+
+struct DP_MEMORY
+	{
+	unsigned uLength;
+	SCORE *MPrev;
+	SCORE *MCurr;
+	SCORE *MWork;
+	SCORE *DPrev;
+	SCORE *DCurr;
+	SCORE *DWork;
+	SCORE **MxRowA;
+	unsigned *LettersB;
+	unsigned *uDeletePos;
+	int **TraceBack;
+	};
+
+static struct DP_MEMORY DPM;
+
+static void AllocDPMem(unsigned uLengthA, unsigned uLengthB)
+	{
+// Max prefix length
+	unsigned uLength = (uLengthA > uLengthB ? uLengthA : uLengthB) + 1;
+	if (uLength < DPM.uLength)
+		return;
+
+// Add 256 to allow for future expansion and
+// round up to next multiple of 32.
+	uLength += 256;
+	uLength += 32 - uLength%32;
+
+	const unsigned uOldLength = DPM.uLength;
+	if (uOldLength > 0)
+		{
+		for (unsigned i = 0; i < uOldLength; ++i)
+			delete[] DPM.TraceBack[i];
+
+		delete[] DPM.MPrev;
+		delete[] DPM.MCurr;
+		delete[] DPM.MWork;
+		delete[] DPM.DPrev;
+		delete[] DPM.DCurr;
+		delete[] DPM.DWork;
+		delete[] DPM.MxRowA;
+		delete[] DPM.LettersB;
+		delete[] DPM.uDeletePos;
+		delete[] DPM.TraceBack;
+		}
+
+	DPM.uLength = uLength;
+
+	DPM.MPrev = new SCORE[uLength];
+	DPM.MCurr = new SCORE[uLength];
+	DPM.MWork = new SCORE[uLength];
+
+	DPM.DPrev = new SCORE[uLength];
+	DPM.DCurr = new SCORE[uLength];
+	DPM.DWork = new SCORE[uLength];
+	DPM.MxRowA = new SCORE *[uLength];
+	DPM.LettersB = new unsigned[uLength];
+	DPM.uDeletePos = new unsigned[uLength];
+
+	DPM.TraceBack = new int*[uLength];
+
+	for (unsigned i = 0; i < uLength; ++i)
+		DPM.TraceBack[i] = new int[uLength];
+	}
+
+static void RowFromSeq(const Seq &s, SCORE *Row[])
+	{
+	const unsigned uLength = s.Length();
+	for (unsigned i = 0; i < uLength; ++i)
+		{
+		char c = s.GetChar(i);
+		unsigned uLetter = CharToLetter(c);
+		if (uLetter < 20)
+			Row[i] = VTML_SP[uLetter];
+		else
+			Row[i] = VTML_SP[AX_X];
+		}
+	}
+
+static void LettersFromSeq(const Seq &s, unsigned Letters[])
+	{
+	const unsigned uLength = s.Length();
+	for (unsigned i = 0; i < uLength; ++i)
+		{
+		char c = s.GetChar(i);
+		unsigned uLetter = CharToLetter(c);
+		if (uLetter < 20)
+			Letters[i] = uLetter;
+		else
+			Letters[i] = AX_X;
+		}
+	}
+
+SCORE GlobalAlignSS(const Seq &seqA, const Seq &seqB, PWPath &Path)
+	{
+	const unsigned uLengthA = seqA.Length();
+	const unsigned uLengthB = seqB.Length();
+	const unsigned uPrefixCountA = uLengthA + 1;
+	const unsigned uPrefixCountB = uLengthB + 1;
+
+	AllocDPMem(uLengthA, uLengthB);
+
+	SCORE *MPrev = DPM.MPrev;
+	SCORE *MCurr = DPM.MCurr;
+	SCORE *MWork = DPM.MWork;
+
+	SCORE *DPrev = DPM.DPrev;
+	SCORE *DCurr = DPM.DCurr;
+	SCORE *DWork = DPM.DWork;
+	SCORE **MxRowA = DPM.MxRowA;
+	unsigned *LettersB = DPM.LettersB;
+
+	RowFromSeq(seqA, MxRowA);
+	LettersFromSeq(seqB, LettersB);
+
+	unsigned *uDeletePos = DPM.uDeletePos;
+
+	int **TraceBack = DPM.TraceBack;
+
+#if	DEBUG
+	for (unsigned i = 0; i < uPrefixCountA; ++i)
+		memset(TraceBack[i], 0, uPrefixCountB*sizeof(int));
+#endif
+
+// Special case for i=0
+	TraceBack[0][0] = 0;
+	MPrev[0] = MxRowA[0][LettersB[0]];
+
+// D(0,0) is -infinity (requires I->D).
+	DPrev[0] = MINUS_INFINITY;
+
+	for (unsigned j = 1; j < uLengthB; ++j)
+		{
+		unsigned uLetterB = LettersB[j];
+
+	// Only way to get M(0, j) looks like this:
+	//		A	----X
+	//		B	XXXXX
+	//			0   j
+	// So gap-open at j=0, gap-close at j-1.
+		MPrev[j] = MxRowA[0][uLetterB] + g_scoreGapOpen/2; // term gaps half
+		TraceBack[0][j] = -(int) j;
+
+	// Assume no D->I transitions, then can't be a delete if only
+	// one letter from A.
+		DPrev[j] = MINUS_INFINITY;
+		}
+
+	SCORE IPrev_j_1;
+	for (unsigned i = 1; i < uLengthA; ++i)
+		{
+		SCORE *ptrMCurr_j = MCurr;
+		memset(ptrMCurr_j, 0, uLengthB*sizeof(SCORE));
+
+		const SCORE *RowA = MxRowA[i];
+		const SCORE *ptrRowA = MxRowA[i];
+		const SCORE *ptrMCurrEnd = ptrMCurr_j + uLengthB;
+		unsigned *ptrLettersB = LettersB;
+		for (; ptrMCurr_j != ptrMCurrEnd; ++ptrMCurr_j)
+			{
+			*ptrMCurr_j = RowA[*ptrLettersB];
+			++ptrLettersB;
+			}
+
+		unsigned *ptrDeletePos = uDeletePos;
+
+	// Special case for j=0
+	// Only way to get M(i, 0) looks like this:
+	//			0   i
+	//		A	XXXXX
+	//		B	----X
+	// So gap-open at i=0, gap-close at i-1.
+		ptrMCurr_j = MCurr;
+		assert(ptrMCurr_j == &(MCurr[0]));
+		*ptrMCurr_j += g_scoreGapOpen/2;	// term gaps half
+
+		++ptrMCurr_j;
+
+		int *ptrTraceBack_ij = TraceBack[i];
+		*ptrTraceBack_ij++ = (int) i;
+
+		SCORE *ptrMPrev_j = MPrev;
+		SCORE *ptrDPrev = DPrev;
+		SCORE d = *ptrDPrev;
+		SCORE DNew = *ptrMPrev_j + g_scoreGapOpen;
+		if (DNew > d)
+			{
+			d = DNew;
+			*ptrDeletePos = i;
+			}
+
+		SCORE *ptrDCurr = DCurr;
+
+		assert(ptrDCurr == &(DCurr[0]));
+		*ptrDCurr = d;
+
+	// Can't have an insert if no letters from B
+		IPrev_j_1 = MINUS_INFINITY;
+
+		unsigned uInsertPos;
+
+		for (unsigned j = 1; j < uLengthB; ++j)
+			{
+		// Here, MPrev_j is preserved from previous
+		// iteration so with current i,j is M[i-1][j-1]
+			SCORE MPrev_j = *ptrMPrev_j;
+			SCORE INew = MPrev_j + g_scoreGapOpen;
+			if (INew > IPrev_j_1)
+				{
+				IPrev_j_1 = INew;
+				uInsertPos = j;
+				}
+
+			SCORE scoreMax = MPrev_j;
+
+			assert(ptrDPrev == &(DPrev[j-1]));
+			SCORE scoreD = *ptrDPrev++;
+			if (scoreD > scoreMax)
+				{
+				scoreMax = scoreD;
+				assert(ptrDeletePos == &(uDeletePos[j-1]));
+				*ptrTraceBack_ij = (int) i - (int) *ptrDeletePos;
+				assert(*ptrTraceBack_ij > 0);
+				}
+			++ptrDeletePos;
+
+			SCORE scoreI = IPrev_j_1;
+			if (scoreI > scoreMax)
+				{
+				scoreMax = scoreI;
+				*ptrTraceBack_ij = (int) uInsertPos - (int) j;
+				assert(*ptrTraceBack_ij < 0);
+				}
+
+			*ptrMCurr_j += scoreMax;
+			assert(ptrMCurr_j == &(MCurr[j]));
+			++ptrMCurr_j;
+
+			MPrev_j = *(++ptrMPrev_j);
+			assert(ptrDPrev == &(DPrev[j]));
+			SCORE d = *ptrDPrev;
+			SCORE DNew = MPrev_j + g_scoreGapOpen;
+			if (DNew > d)
+				{
+				d = DNew;
+				assert(ptrDeletePos == &uDeletePos[j]);
+				*ptrDeletePos = i;
+				}
+			assert(ptrDCurr + 1 == &(DCurr[j]));
+			*(++ptrDCurr) = d;
+
+			++ptrTraceBack_ij;
+			}
+
+		Rotate(MPrev, MCurr, MWork);
+		Rotate(DPrev, DCurr, DWork);
+		}
+
+// Special case for i=uLengthA
+	SCORE IPrev = MINUS_INFINITY;
+
+	unsigned uInsertPos;
+
+	for (unsigned j = 1; j < uLengthB; ++j)
+		{
+		SCORE INew = MPrev[j-1];
+		if (INew > IPrev)
+			{
+			uInsertPos = j;
+			IPrev = INew;
+			}
+		}
+
+// Special case for i=uLengthA, j=uLengthB
+	SCORE scoreMax = MPrev[uLengthB-1];
+	int iTraceBack = 0;
+
+	SCORE scoreD = DPrev[uLengthB-1] - g_scoreGapOpen/2;	// term gaps half
+	if (scoreD > scoreMax)
+		{
+		scoreMax = scoreD;
+		iTraceBack = (int) uLengthA - (int) uDeletePos[uLengthB-1];
+		}
+
+	SCORE scoreI = IPrev - g_scoreGapOpen/2;
+	if (scoreI > scoreMax)
+		{
+		scoreMax = scoreI;
+		iTraceBack = (int) uInsertPos - (int) uLengthB;
+		}
+
+	TraceBack[uLengthA][uLengthB] = iTraceBack;
+
+	TraceBackToPath(TraceBack, uLengthA, uLengthB, Path);
+
+	return scoreMax;
+	}

Added: trunk/packages/muscle/branches/upstream/current/glbalndimer.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/glbalndimer.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/glbalndimer.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,390 @@
+#include "muscle.h"
+#include <math.h>
+#include <stdio.h>	// for sprintf
+#include "pwpath.h"
+#include "profile.h"
+#include "gapscoredimer.h"
+
+#define	TRACE	0
+
+static SCORE TraceBackDimer(  const SCORE *DPM_, const SCORE *DPD_, const SCORE *DPI_,
+  const char *TBM_, const char *TBD_, const char *TBI_,
+  unsigned uLengthA, unsigned uLengthB, PWPath &Path);
+
+static const char *LocalScoreToStr(SCORE s)
+	{
+	static char str[16];
+	if (MINUS_INFINITY == s)
+		return "     *";
+	sprintf(str, "%6.3g", s);
+	return str;
+	}
+
+#if	TRACE
+static void ListDP(const SCORE *DPM_, const ProfPos *PA, const ProfPos *PB,
+  unsigned uPrefixCountA, unsigned uPrefixCountB)
+	{
+	Log("        ");
+	for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+		{
+		char c = ' ';
+		if (uPrefixLengthB > 0)
+			c = ConsensusChar(PB[uPrefixLengthB - 1]);
+		Log(" %4u:%c", uPrefixLengthB, c);
+		}
+	Log("\n");
+	for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
+		{
+		char c = ' ';
+		if (uPrefixLengthA > 0)
+			c = ConsensusChar(PA[uPrefixLengthA - 1]);
+		Log("%4u:%c  ", uPrefixLengthA, c);
+		for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+			Log(" %s", LocalScoreToStr(DPM(uPrefixLengthA, uPrefixLengthB)));
+		Log("\n");
+		}
+	}
+
+static void ListTB(const char *TBM_, const ProfPos *PA, const ProfPos *PB,
+  unsigned uPrefixCountA, unsigned uPrefixCountB)
+	{
+	Log("        ");
+	for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+		Log("%2d", uPrefixLengthB);
+	Log("\n");
+	Log("        ");
+	for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+		{
+		char c = ' ';
+		if (uPrefixLengthB > 0)
+			c = ConsensusChar(PB[uPrefixLengthB - 1]);
+		Log(" %c", c);
+		}
+	Log("\n");
+	for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
+		{
+		char c = ' ';
+		if (uPrefixLengthA > 0)
+			c = ConsensusChar(PA[uPrefixLengthA - 1]);
+		Log("%4u:%c  ", uPrefixLengthA, c);
+		for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+			Log(" %c", TBM(uPrefixLengthA, uPrefixLengthB));
+		Log("\n");
+		}
+	}
+#endif // TRACE
+
+static ProfPos PPTerm;
+static bool InitializePPTerm()
+	{
+	PPTerm.m_bAllGaps = false;
+	PPTerm.m_LL = 1;
+	PPTerm.m_LG = 0;
+	PPTerm.m_GL = 0;
+	PPTerm.m_GG = 0;
+	PPTerm.m_fOcc = 1;
+	return true;
+	}
+static bool PPTermInitialized = InitializePPTerm();
+
+static SCORE ScoreProfPosDimerLE(const ProfPos &PPA, const ProfPos &PPB)
+	{
+	SCORE Score = 0;
+	for (unsigned n = 0; n < 20; ++n)
+		{
+		const unsigned uLetter = PPA.m_uSortOrder[n];
+		const FCOUNT fcLetter = PPA.m_fcCounts[uLetter];
+		if (0 == fcLetter)
+			break;
+		Score += fcLetter*PPB.m_AAScores[uLetter];
+		}
+	if (0 == Score)
+		return -2.5;
+	SCORE logScore = logf(Score);
+	return (SCORE) (logScore*(PPA.m_fOcc * PPB.m_fOcc));
+	}
+
+static SCORE ScoreProfPosDimerPSP(const ProfPos &PPA, const ProfPos &PPB)
+	{
+	SCORE Score = 0;
+	for (unsigned n = 0; n < 20; ++n)
+		{
+		const unsigned uLetter = PPA.m_uSortOrder[n];
+		const FCOUNT fcLetter = PPA.m_fcCounts[uLetter];
+		if (0 == fcLetter)
+			break;
+		Score += fcLetter*PPB.m_AAScores[uLetter];
+		}
+	return Score;
+	}
+
+static SCORE ScoreProfPosDimer(const ProfPos &PPA, const ProfPos &PPB)
+	{
+	switch (g_PPScore)
+		{
+	case PPSCORE_LE:
+		return ScoreProfPosDimerLE(PPA, PPB);
+
+	case PPSCORE_SP:
+	case PPSCORE_SV:
+		return ScoreProfPosDimerPSP(PPA, PPB);
+		}
+	Quit("Invalid g_PPScore");
+	return 0;
+	}
+
+// Global alignment dynamic programming
+// This variant optimizes the profile-profile SP score under the
+// dimer approximation.
+SCORE GlobalAlignDimer(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
+  unsigned uLengthB, PWPath &Path)
+	{
+	assert(uLengthB > 0 && uLengthA > 0);
+
+	const unsigned uPrefixCountA = uLengthA + 1;
+	const unsigned uPrefixCountB = uLengthB + 1;
+
+// Allocate DP matrices
+	const size_t LM = uPrefixCountA*uPrefixCountB;
+	SCORE *DPM_ = new SCORE[LM];
+	SCORE *DPD_ = new SCORE[LM];
+	SCORE *DPI_ = new SCORE[LM];
+
+	char *TBM_ = new char[LM];
+	char *TBD_ = new char[LM];
+	char *TBI_ = new char[LM];
+
+	DPM(0, 0) = 0;
+	DPD(0, 0) = MINUS_INFINITY;
+	DPI(0, 0) = MINUS_INFINITY;
+
+	TBM(0, 0) = 'S';
+	TBD(0, 0) = '?';
+	TBI(0, 0) = '?';
+
+	DPM(1, 0) = MINUS_INFINITY;
+	DPD(1, 0) = GapScoreMD(PA[0], PPTerm);
+	DPI(1, 0) = MINUS_INFINITY;
+
+	TBM(1, 0) = '?';
+	TBD(1, 0) = 'S';
+	TBI(1, 0) = '?';
+
+	DPM(0, 1) = MINUS_INFINITY;
+	DPD(0, 1) = MINUS_INFINITY;
+	DPI(0, 1) = GapScoreMI(PPTerm, PB[0]);
+
+	TBM(0, 1) = '?';
+	TBD(0, 1) = '?';
+	TBI(0, 1) = 'S';
+
+// Empty prefix of B is special case
+	for (unsigned uPrefixLengthA = 2; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
+		{
+	// M=LetterA+LetterB, impossible with empty prefix
+		DPM(uPrefixLengthA, 0) = MINUS_INFINITY;
+		TBM(uPrefixLengthA, 0) = '?';
+
+	// D=LetterA+GapB
+		DPD(uPrefixLengthA, 0) = DPD(uPrefixLengthA - 1, 0) +
+		  GapScoreDD(PA[uPrefixLengthA - 1], PPTerm);
+		TBD(uPrefixLengthA, 0) = 'D';
+
+	// I=GapA+LetterB, impossible with empty prefix
+		DPI(uPrefixLengthA, 0) = MINUS_INFINITY;
+		TBI(uPrefixLengthA, 0) = '?';
+		}
+
+// Empty prefix of A is special case
+	for (unsigned uPrefixLengthB = 2; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+		{
+	// M=LetterA+LetterB, impossible with empty prefix
+		DPM(0, uPrefixLengthB) = MINUS_INFINITY;
+		TBM(0, uPrefixLengthB) = '?';
+
+	// D=LetterA+GapB, impossible with empty prefix
+		DPD(0, uPrefixLengthB) = MINUS_INFINITY;
+		TBD(0, uPrefixLengthB) = '?';
+
+	// I=GapA+LetterB
+		DPI(0, uPrefixLengthB) = DPI(0, uPrefixLengthB - 1) +
+		  GapScoreII(PPTerm, PB[uPrefixLengthB - 1]);
+		TBI(0, uPrefixLengthB) = 'I';
+		}
+
+// ============
+// Main DP loop
+// ============
+	for (unsigned uPrefixLengthB = 1; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+		{
+		const ProfPos &PPB = PB[uPrefixLengthB - 1];
+		for (unsigned uPrefixLengthA = 1; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
+			{
+			const ProfPos &PPA = PA[uPrefixLengthA - 1];
+			{
+		// Match M=LetterA+LetterB
+			SCORE scoreLL = ScoreProfPosDimer(PPA, PPB);
+
+			SCORE scoreMM = DPM(uPrefixLengthA-1, uPrefixLengthB-1) + GapScoreMM(PPA, PPB);
+			SCORE scoreDM = DPD(uPrefixLengthA-1, uPrefixLengthB-1) + GapScoreDM(PPA, PPB);
+			SCORE scoreIM = DPI(uPrefixLengthA-1, uPrefixLengthB-1) + GapScoreIM(PPA, PPB);
+
+			SCORE scoreBest = scoreMM;
+			char c = 'M';
+			if (scoreDM > scoreBest)
+				{
+				scoreBest = scoreDM;
+				c = 'D';
+				}
+			if (scoreIM > scoreBest)
+				{
+				scoreBest = scoreIM;
+				c = 'I';
+				}
+
+			DPM(uPrefixLengthA, uPrefixLengthB) = scoreBest + scoreLL;
+			TBM(uPrefixLengthA, uPrefixLengthB) = c;
+			}
+			{
+		// Delete D=LetterA+GapB
+			SCORE scoreMD = DPM(uPrefixLengthA-1, uPrefixLengthB) + GapScoreMD(PPA, PPB);
+			SCORE scoreDD = DPD(uPrefixLengthA-1, uPrefixLengthB) + GapScoreDD(PPA, PPB);
+			SCORE scoreID = DPI(uPrefixLengthA-1, uPrefixLengthB) + GapScoreID(PPA, PPB);
+
+			SCORE scoreBest = scoreMD;
+			char c = 'M';
+			if (scoreDD > scoreBest)
+				{
+				scoreBest = scoreDD;
+				c = 'D';
+				}
+			if (scoreID > scoreBest)
+				{
+				scoreBest = scoreID;
+				c = 'I';
+				}
+
+			DPD(uPrefixLengthA, uPrefixLengthB) = scoreBest;
+			TBD(uPrefixLengthA, uPrefixLengthB) = c;
+			}
+			{
+		// Insert I=GapA+LetterB
+			SCORE scoreMI = DPM(uPrefixLengthA, uPrefixLengthB-1) + GapScoreMI(PPA, PPB);
+			SCORE scoreDI = DPD(uPrefixLengthA, uPrefixLengthB-1) + GapScoreDI(PPA, PPB);
+			SCORE scoreII = DPI(uPrefixLengthA, uPrefixLengthB-1) + GapScoreII(PPA, PPB);
+
+			SCORE scoreBest = scoreMI;
+			char c = 'M';
+			if (scoreDI > scoreBest)
+				{
+				scoreBest = scoreDI;
+				c = 'D';
+				}
+			if (scoreII > scoreBest)
+				{
+				scoreBest = scoreII;
+				c = 'I';
+				}
+
+			DPI(uPrefixLengthA, uPrefixLengthB) = scoreBest;
+			TBI(uPrefixLengthA, uPrefixLengthB) = c;
+			}
+			}
+		}
+
+#if TRACE
+	Log("DPM:\n");
+	ListDP(DPM_, PA, PB, uPrefixCountA, uPrefixCountB);
+	Log("DPD:\n");
+	ListDP(DPD_, PA, PB, uPrefixCountA, uPrefixCountB);
+	Log("DPI:\n");
+	ListDP(DPI_, PA, PB, uPrefixCountA, uPrefixCountB);
+	Log("TBM:\n");
+	ListTB(TBM_, PA, PB, uPrefixCountA, uPrefixCountB);
+	Log("TBD:\n");
+	ListTB(TBD_, PA, PB, uPrefixCountA, uPrefixCountB);
+	Log("TBI:\n");
+	ListTB(TBI_, PA, PB, uPrefixCountA, uPrefixCountB);
+#endif
+
+	SCORE Score = TraceBackDimer(DPM_, DPD_, DPI_, TBM_, TBD_, TBI_,
+	  uLengthA, uLengthB, Path);
+
+#if	TRACE
+	Log("GlobalAlignDimer score = %.3g\n", Score);
+#endif
+
+	delete[] DPM_;
+	delete[] DPD_;
+	delete[] DPI_;
+
+	delete[] TBM_;
+	delete[] TBD_;
+	delete[] TBI_;
+
+	return Score;
+	}
+
+static SCORE TraceBackDimer(  const SCORE *DPM_, const SCORE *DPD_, const SCORE *DPI_,
+  const char *TBM_, const char *TBD_, const char *TBI_,
+  unsigned uLengthA, unsigned uLengthB, PWPath &Path)
+	{
+	const unsigned uPrefixCountA = uLengthA + 1;
+
+	unsigned uPrefixLengthA = uLengthA;
+	unsigned uPrefixLengthB = uLengthB;
+
+	char cEdge = 'M';
+	SCORE scoreMax = DPM(uLengthA, uLengthB);
+	if (DPD(uLengthA, uLengthB) > scoreMax)
+		{
+		scoreMax = DPD(uLengthA, uLengthB);
+		cEdge = 'D';
+		}
+	if (DPI(uLengthA, uLengthB) > scoreMax)
+		{
+		scoreMax = DPI(uLengthA, uLengthB);
+		cEdge = 'I';
+		}
+
+	for (;;)
+		{
+		if (0 == uPrefixLengthA && 0 == uPrefixLengthB)
+			break;
+
+		PWEdge Edge;
+		Edge.cType = cEdge;
+		Edge.uPrefixLengthA = uPrefixLengthA;
+		Edge.uPrefixLengthB = uPrefixLengthB;
+		Path.PrependEdge(Edge);
+
+#if TRACE
+		Log("PLA=%u PLB=%u Edge=%c\n", uPrefixLengthA, uPrefixLengthB, cEdge);
+#endif
+		switch (cEdge)
+			{
+		case 'M':
+			assert(uPrefixLengthA > 0 && uPrefixLengthB > 0);
+			cEdge = TBM(uPrefixLengthA, uPrefixLengthB);
+			--uPrefixLengthA;
+			--uPrefixLengthB;
+			break;
+		case 'D':
+			assert(uPrefixLengthA > 0);
+			cEdge = TBD(uPrefixLengthA, uPrefixLengthB);
+			--uPrefixLengthA;
+			break;
+		case 'I':
+			assert(uPrefixLengthB > 0);
+			cEdge = TBI(uPrefixLengthA, uPrefixLengthB);
+			--uPrefixLengthB;
+			break;
+		default:
+			Quit("Invalid edge PLA=%u PLB=%u %c", uPrefixLengthA, uPrefixLengthB, cEdge);
+			}
+		}
+#if	TRACE
+	Path.LogMe();
+#endif
+	return scoreMax;
+	}

Added: trunk/packages/muscle/branches/upstream/current/globals.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/globals.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/globals.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,267 @@
+#include "muscle.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <math.h>
+#include <assert.h>
+#include <time.h>
+#include <errno.h>
+
+#if	WIN32
+#include <windows.h>
+#include <share.h>
+#endif
+
+#ifndef	MAX_PATH
+#define	MAX_PATH	260
+#endif
+
+static char g_strListFileName[MAX_PATH];
+static bool g_bListFileAppend = false;
+
+static SEQWEIGHT g_SeqWeight = SEQWEIGHT_Undefined;
+
+void SetSeqWeightMethod(SEQWEIGHT Method)
+	{
+	g_SeqWeight = Method;
+	}
+
+SEQWEIGHT GetSeqWeightMethod()
+	{
+	return g_SeqWeight;
+	}
+
+void SetListFileName(const char *ptrListFileName, bool bAppend)
+	{
+	assert(strlen(ptrListFileName) < MAX_PATH);
+	strcpy(g_strListFileName, ptrListFileName);
+	g_bListFileAppend = bAppend;
+	}
+
+void Log(const char szFormat[], ...)
+	{
+	if (0 == g_strListFileName[0])
+		return;
+
+	static FILE *f = NULL;
+	char *mode;
+	if (g_bListFileAppend)
+		mode = "a";
+	else
+		mode = "w";
+	if (NULL == f)
+		f = _fsopen(g_strListFileName, mode, _SH_DENYNO);
+	if (NULL == f)
+		{
+		perror(g_strListFileName);
+		exit(EXIT_NotStarted);
+		}
+
+	char szStr[4096];
+	va_list ArgList;
+	va_start(ArgList, szFormat);
+	vsprintf(szStr, szFormat, ArgList);
+	fprintf(f, "%s", szStr);
+	fflush(f);
+	}
+
+const char *GetTimeAsStr()
+	{
+	static char szStr[32];
+	time_t t;
+	time(&t);
+	struct tm *ptmCurrentTime = localtime(&t);
+	strcpy(szStr, asctime(ptmCurrentTime));
+	assert('\n' == szStr[24]);
+	szStr[24] = 0;
+	return szStr;
+	}
+
+// Exit immediately with error message, printf-style.
+void Quit(const char szFormat[], ...)
+	{
+	va_list ArgList;
+	char szStr[4096];
+
+	va_start(ArgList, szFormat);
+	vsprintf(szStr, szFormat, ArgList);
+
+	fprintf(stderr, "\n*** ERROR ***  %s\n", szStr);
+
+	Log("\n*** FATAL ERROR ***  ");
+	Log("%s\n", szStr);
+	Log("Stopped %s\n", GetTimeAsStr());
+
+#ifdef WIN32
+	if (IsDebuggerPresent())
+		{
+		int iBtn = MessageBox(NULL, szStr, "muscle", MB_ICONERROR | MB_OKCANCEL);
+		if (IDCANCEL == iBtn)
+			Break();
+		}
+#endif
+	exit(EXIT_FatalError);
+	}
+
+void Warning(const char szFormat[], ...)
+	{
+	va_list ArgList;
+	char szStr[4096];
+
+	va_start(ArgList, szFormat);
+	vsprintf(szStr, szFormat, ArgList);
+
+	fprintf(stderr, "\n*** WARNING *** %s\n", szStr);
+	Log("\n*** WARNING ***  %s\n", szStr);
+	}
+
+// Remove leading and trailing blanks from string
+void TrimBlanks(char szStr[])
+	{
+	TrimLeadingBlanks(szStr);
+	TrimTrailingBlanks(szStr);
+	}
+
+void TrimLeadingBlanks(char szStr[])
+	{
+	size_t n = strlen(szStr);
+	while (szStr[0] == ' ')
+		{
+		memmove(szStr, szStr+1, n);
+		szStr[--n] = 0;
+		}
+	}
+
+void TrimTrailingBlanks(char szStr[])
+	{
+	size_t n = strlen(szStr);
+	while (n > 0 && szStr[n-1] == ' ')
+		szStr[--n] = 0;
+	}
+
+bool Verbose()
+	{
+	return true;
+	}
+
+SCORE StrToScore(const char *pszStr)
+	{
+	return (SCORE) atof(pszStr);
+	}
+
+void StripWhitespace(char szStr[])
+	{
+	unsigned uOutPos = 0;
+	unsigned uInPos = 0;
+	while (char c = szStr[uInPos++])
+		if (' ' != c && '\t' != c && '\n' != c && '\r' != c)
+			szStr[uOutPos++] = c;
+	szStr[uOutPos] = 0;
+	}
+
+void StripGaps(char szStr[])
+	{
+	unsigned uOutPos = 0;
+	unsigned uInPos = 0;
+	while (char c = szStr[uInPos++])
+		if ('-' != c)
+			szStr[uOutPos++] = c;
+	szStr[uOutPos] = 0;
+	}
+
+bool IsValidSignedInteger(const char *Str)
+	{
+	if (0 == strlen(Str))
+		return false;
+	if ('+' == *Str || '-' == *Str)
+		++Str;
+	while (char c = *Str++)
+		if (!isdigit(c))
+			return false;
+	return true;
+	}
+
+bool IsValidInteger(const char *Str)
+	{
+	if (0 == strlen(Str))
+		return false;
+	while (char c = *Str++)
+		if (!isdigit(c))
+			return false;
+	return true;
+	}
+
+// Is c valid as first character in an identifier?
+bool isidentf(char c)
+	{
+	return isalpha(c) || '_' == c;
+	}
+
+// Is c valid character in an identifier?
+bool isident(char c)
+	{
+	return isalpha(c) || isdigit(c) || '_' == c;
+	}
+
+bool IsValidIdentifier(const char *Str)
+	{
+	if (!isidentf(Str[0]))
+		return false;
+	while (char c = *Str++)
+		if (!isident(c))
+			return false;
+	return true;
+	}
+
+void SetLogFile()
+	{
+	const char *strFileName = ValueOpt("loga");
+	if (0 != strFileName)
+		g_bListFileAppend = true;
+	else
+		strFileName = ValueOpt("log");
+	if (0 == strFileName)
+		return;
+	strcpy(g_strListFileName, strFileName);
+	}
+
+// Get filename, stripping any extension and directory parts.
+void NameFromPath(const char szPath[], char szName[], unsigned uBytes)
+	{
+	if (0 == uBytes)
+		return;
+	const char *pstrLastSlash = strrchr(szPath, '/');
+	const char *pstrLastBackslash = strrchr(szPath, '\\');
+	const char *pstrLastDot = strrchr(szPath, '.');
+	const char *pstrLastSep = pstrLastSlash > pstrLastBackslash ?
+	  pstrLastSlash : pstrLastBackslash;
+	const char *pstrBegin = pstrLastSep ? pstrLastSep + 1 : szPath;
+	const char *pstrEnd = pstrLastDot ? pstrLastDot - 1 : szPath + strlen(szPath);
+	unsigned uNameLength = (unsigned) (pstrEnd - pstrBegin + 1);
+	if (uNameLength > uBytes - 1)
+		uNameLength = uBytes - 1;
+	memcpy(szName, pstrBegin, uNameLength);
+	szName[uNameLength] = 0;
+	}
+
+char *strsave(const char *s)
+	{
+	char *ptrCopy = strdup(s);
+	if (0 == ptrCopy)
+		Quit("Out of memory");
+	return ptrCopy;
+	}
+
+bool IsValidFloatChar(char c)
+	{
+	return isdigit(c) || '.' == c || 'e' == c || 'E' == c || 'd' == c ||
+	  'D' == c || '.' == c || '+' == c || '-' == c;
+	}
+
+void Call_MY_ASSERT(const char *file, int line, bool b, const char *msg)
+	{
+	if (b)
+		return;
+	Quit("%s(%d): MY_ASSERT(%s)", file, line, msg);
+	}

Added: trunk/packages/muscle/branches/upstream/current/globalslinux.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/globalslinux.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/globalslinux.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,163 @@
+#include "muscle.h"
+
+#ifndef	WIN32
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stdio.h>
+#include <fcntl.h>
+
+const int ONE_MB = 1000000;
+const int MEM_WARNING_THRESHOLD = 20*ONE_MB;
+
+double GetNAN()
+	{
+	static unsigned long nan[2]={0xffffffff, 0x7fffffff};
+	double dNAN = *( double* )nan;
+	return dNAN;
+	}
+
+double g_dNAN = GetNAN();
+
+void chkmem(const char szMsg[])
+	{
+	//assert(_CrtCheckMemory());
+	}
+
+void Break()
+	{
+	//DebugBreak();
+	}
+
+static char szCmdLine[4096];
+
+void *ptrStartBreak = sbrk(0);
+
+const char *GetCmdLine()
+	{
+	return szCmdLine;
+	}
+
+double GetMemUseMB()
+	{
+	static char statm[64];
+	static int PageSize;
+	if (0 == statm[0])
+		{
+		PageSize = sysconf(_SC_PAGESIZE);
+		pid_t pid = getpid();
+		sprintf(statm, "/proc/%d/statm", (int) pid);
+		}
+
+	int fd = open(statm, O_RDONLY);
+	if (-1 == fd)
+		return -1;
+	char Buffer[64];
+	int n = read(fd, Buffer, sizeof(Buffer) - 1);
+	close(fd);
+	fd = -1;
+
+	if (n <= 0)
+		{
+		static bool Warned = false;
+		if (!Warned)
+			{
+			Warned = true;
+			Warning("*Warning* Cannot read %s errno=%d %s",
+			  statm, errno, strerror(errno));
+			}
+		return 0;
+		}
+	Buffer[n] = 0;
+	int Pages = atoi(Buffer);
+
+	return ((double) Pages * (double) PageSize)/1e6;
+	}
+
+void SaveCmdLine(int argc, char *argv[])
+	{
+	for (int i = 0; i < argc; ++i)
+		{
+		if (i > 0)
+			strcat(szCmdLine, " ");
+		strcat(szCmdLine, argv[i]);
+		}
+	}
+
+double dPeakMemUseMB = 0;
+
+double GetPeakMemUseMB()
+	{
+	CheckMemUse();
+	return dPeakMemUseMB;
+	}
+
+double GetCPUGHz()
+	{
+	double dGHz = 2.5;
+	const char *e = getenv("CPUGHZ");
+	if (0 != e)
+		dGHz = atof(e);
+	return dGHz;
+	}
+
+void CheckMemUse()
+	{
+	double dMB = GetMemUseMB();
+	if (dMB > dPeakMemUseMB)
+		dPeakMemUseMB = dMB;
+	}
+
+double GetRAMSizeMB()
+	{
+	const double DEFAULT_RAM = 500;
+	static double RAMMB = 0;
+	if (RAMMB != 0)
+		return RAMMB;
+
+	int fd = open("/proc/meminfo", O_RDONLY);
+	if (-1 == fd)
+		{
+		static bool Warned = false;
+		if (!Warned)
+			{
+			Warned = true;
+			Warning("*Warning* Cannot open /proc/meminfo errno=%d %s",
+			  errno, strerror(errno));
+			}
+		return DEFAULT_RAM;
+		}
+	char Buffer[1024];
+	int n = read(fd, Buffer, sizeof(Buffer) - 1);
+	close(fd);
+	fd = -1;
+
+	if (n <= 0)
+		{
+		static bool Warned = false;
+		if (!Warned)
+			{
+			Warned = true;
+			Warning("*Warning* Cannot read /proc/meminfo errno=%d %s",
+			  errno, strerror(errno));
+			}
+		return DEFAULT_RAM;
+		}
+	Buffer[n] = 0;
+	char *pMem = strstr(Buffer, "MemTotal: ");
+	if (0 == pMem)
+		{
+		static bool Warned = false;
+		if (!Warned)
+			{
+			Warned = true;
+			Warning("*Warning* 'MemTotal:' not found in /proc/meminfo");
+			}
+		return DEFAULT_RAM;
+		}
+	int Bytes = atoi(pMem+9)*1000;
+	return ((double) Bytes)/1e6;
+	}
+
+#endif	// !WIN32

Added: trunk/packages/muscle/branches/upstream/current/globalswin32.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/globalswin32.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/globalswin32.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,100 @@
+#include "muscle.h"
+
+#if	WIN32
+#include <windows.h>
+#include <crtdbg.h>
+#include <psapi.h>
+#include <float.h>
+#include <stdio.h>
+
+void DebugPrintf(const char *szFormat, ...)
+	{
+	va_list ArgList;
+	char szStr[4096];
+
+	va_start(ArgList, szFormat);
+	vsprintf(szStr, szFormat, ArgList);
+
+	OutputDebugString(szStr);
+	}
+
+double GetNAN()
+	{
+	static unsigned long nan[2]={0xffffffff, 0x7fffffff};
+	double dNAN = *( double* )nan;
+	assert(_isnan(dNAN));
+	return dNAN;
+	}
+
+double g_dNAN = GetNAN();
+
+void chkmem(const char szMsg[])
+	{
+	if (!_CrtCheckMemory())
+		Quit("chkmem(%s)", szMsg);
+	}
+
+void Break()
+	{
+	if (IsDebuggerPresent())
+		DebugBreak();
+	}
+
+const char *GetCmdLine()
+	{
+	return GetCommandLine();
+	}
+
+static unsigned uPeakMemUseBytes;
+
+double GetRAMSizeMB()
+	{
+	MEMORYSTATUS MS;
+	GlobalMemoryStatus(&MS);
+	return MS.dwAvailPhys/1e6;
+	}
+
+double GetMemUseMB()
+	{
+	HANDLE hProc = GetCurrentProcess();
+	PROCESS_MEMORY_COUNTERS PMC;
+	BOOL bOk = GetProcessMemoryInfo(hProc, &PMC, sizeof(PMC));
+	assert(bOk);
+	//printf("GetMemUseMB()\n");
+	//printf("%12u  PageFaultCount\n", (unsigned) PMC.PageFaultCount);
+	//printf("%12u  PagefileUsage\n", (unsigned) PMC.PagefileUsage);
+	//printf("%12u  PeakPagefileUsage\n", (unsigned) PMC.PeakPagefileUsage);
+	//printf("%12u  WorkingSetSize\n", (unsigned) PMC.WorkingSetSize);
+	//printf("%12u  PeakWorkingSetSize\n", (unsigned) PMC.PeakWorkingSetSize);
+	//printf("%12u  QuotaPagedPoolUsage\n", (unsigned) PMC.QuotaPagedPoolUsage);
+	//printf("%12u  QuotaPeakPagedPoolUsage\n", (unsigned) PMC.QuotaPeakPagedPoolUsage);
+	//printf("%12u  QuotaNonPagedPoolUsage\n", (unsigned) PMC.QuotaNonPagedPoolUsage);
+	//printf("%12u  QuotaPeakNonPagedPoolUsage\n", (unsigned) PMC.QuotaPeakNonPagedPoolUsage);
+	unsigned uBytes = (unsigned) PMC.WorkingSetSize;
+	if (uBytes > uPeakMemUseBytes)
+		uPeakMemUseBytes = uBytes;
+	return (uBytes + 500000.0)/1000000.0;
+	}
+
+double GetPeakMemUseMB()
+	{
+	return (uPeakMemUseBytes + 500000.0)/1000000.0;
+	}
+
+void CheckMemUse()
+	{
+// Side-effect: sets peak usage in uPeakMemUseBytes
+	GetMemUseMB();
+	}
+
+double GetCPUGHz()
+	{
+	double dGHz = 2.5;
+	const char *e = getenv("CPUGHZ");
+	if (0 != e)
+		dGHz = atof(e);
+	if (dGHz < 0.1 || dGHz > 1000.0)
+		Quit("Invalid value '%s' for environment variable CPUGHZ", e);
+	return dGHz;
+	}
+#endif	// WIN32

Added: trunk/packages/muscle/branches/upstream/current/gonnet.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/gonnet.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/gonnet.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,499 @@
+#include "muscle.h"
+#include "gonnet.h"
+
+#define ROW(A, C, D, E, F, G, H, I, K, L, M, N, P, Q, R, S, T, V, W, Y) \
+	{ A/4.0, C/4.0, D/4.0, E/4.0, F/4.0, G/4.0, H/4.0, I/4.0, K/4.0, L/4.0, M/4.0, N/4.0, P/4.0, Q/4.0, R/4.0, S/4.0, T/4.0, V/4.0, W/4.0, Y/4.0 },
+
+static double Gonnet80[20][20] =
+	{
+//        A       C       D       E       F       G       H       I       K       L
+//        M       N       P       Q       R       S       T       V       W       Y
+ROW(   1990,   1140,    930,   1070,    600,   1130,    850,    810,    940,    810,
+        980,    900,   1080,   1020,    880,   1380,   1190,   1180,    370,    590) // A
+
+ROW(   1140,   2780,    310,    300,    850,    630,    810,    700,    360,    690,
+        850,    690,    310,    480,    640,   1090,    900,   1030,    810,    920) // C
+
+ROW(    930,    310,   2200,   1550,    130,    980,   1070,    180,   1030,    150,
+        360,   1450,    820,   1150,    800,   1100,   1000,    350,      0,    550) // D
+
+ROW(   1070,    300,   1550,   2120,    220,    770,   1070,    510,   1280,    490,
+        710,   1110,    890,   1470,   1010,   1050,    970,    730,    260,    500) // E
+
+ROW(    600,    850,    130,    220,   2380,     90,    980,   1090,    350,   1310,
+       1270,    490,    310,    540,    340,    470,    620,    930,   1400,   1730) // F
+
+ROW(   1130,    630,    980,    770,     90,   2210,    710,    100,    740,    200,
+        410,   1060,    660,    800,    810,   1080,    720,    380,    430,    300) // G
+
+ROW(    850,    810,   1070,   1070,    980,    710,   2510,    600,   1120,    670,
+        860,   1330,    790,   1380,   1140,    990,   1000,    590,    810,   1450) // H
+
+ROW(    810,    700,    180,    510,   1090,    100,    600,   2100,    650,   1460,
+       1490,    530,    490,    640,    530,    620,    960,   1650,    610,    770) // I
+
+ROW(    940,    360,   1030,   1280,    350,    740,   1120,    650,   2090,    660,
+        870,   1220,    870,   1410,   1570,   1040,   1090,    700,    350,    640) // K
+
+ROW(    810,    690,    150,    490,   1310,    200,    670,   1460,    660,   2010,
+       1550,    450,    660,    850,    660,    600,    750,   1270,    800,    890) // L
+
+ROW(    980,    850,    360,    710,   1270,    410,    860,   1490,    870,   1550,
+       2410,    620,    460,   1050,    710,    830,    990,   1250,    790,    870) // M
+
+ROW(    900,    690,   1450,   1110,    490,   1060,   1330,    530,   1220,    450,
+        620,   2210,    760,   1180,   1020,   1290,   1170,    550,    380,    850) // N
+
+ROW(   1080,    310,    820,    890,    310,    660,    790,    490,    870,    660,
+        460,    760,   2380,   1000,    790,   1100,   1040,    670,    120,    480) // P
+
+ROW(   1020,    480,   1150,   1470,    540,    800,   1380,    640,   1410,    850,
+       1050,   1180,   1000,   2190,   1350,   1090,   1060,    730,    620,    710) // Q
+
+ROW(    880,    640,    800,   1010,    340,    810,   1140,    530,   1570,    660,
+        710,   1020,    790,   1350,   2210,    970,    970,    640,    830,    740) // R
+
+ROW(   1380,   1090,   1100,   1050,    470,   1080,    990,    620,   1040,    600,
+        830,   1290,   1100,   1090,    970,   2020,   1490,    810,    520,    780) // S
+
+ROW(   1190,    900,   1000,    970,    620,    720,   1000,    960,   1090,    750,
+        990,   1170,   1040,   1060,    970,   1490,   2050,   1150,    370,    660) // T
+
+ROW(   1180,   1030,    350,    730,    930,    380,    590,   1650,    700,   1270,
+       1250,    550,    670,    730,    640,    810,   1150,   2040,    440,    770) // V
+
+ROW(    370,    810,      0,    260,   1400,    430,    810,    610,    350,    800,
+        790,    380,    120,    620,    830,    520,    370,    440,   2970,   1470) // W
+
+ROW(    590,    920,    550,    500,   1730,    300,   1450,    770,    640,    890,
+        870,    850,    480,    710,    740,    780,    660,    770,   1470,   2470) // Y
+	};
+
+static double Gonnet120[20][20] =
+	{
+//        A       C       D       E       F       G       H       I       K       L
+//        M       N       P       Q       R       S       T       V       W       Y
+ROW(   1550,    950,    780,    870,    480,    930,    700,    690,    770,    660,
+        790,    760,    900,    840,    730,   1120,    980,    960,    280,    480) // A
+
+ROW(    950,   2400,    270,    280,    700,    510,    650,    600,    320,    570,
+        700,    550,    280,    400,    510,    890,    750,    850,    670,    760) // C
+
+ROW(    780,    270,   1780,   1310,     90,    820,    890,    160,    880,    140,
+        320,   1220,    680,    970,    690,    910,    830,    310,      0,    430) // D
+
+ROW(    870,    280,   1310,   1680,    180,    650,    900,    410,   1070,    390,
+        560,    950,    740,   1210,    860,    870,    810,    580,    180,    400) // E
+
+ROW(    480,    700,     90,    180,   1980,     40,    820,    930,    290,   1110,
+       1070,    380,    240,    430,    280,    380,    490,    790,   1230,   1510) // F
+
+ROW(    930,    510,    820,    650,     40,   1860,    590,     90,    620,    140,
+        310,    890,    550,    660,    660,    900,    610,    310,    300,    220) // G
+
+ROW(    700,    650,    890,    900,    820,    590,   2060,    480,    940,    540,
+        680,   1100,    650,   1130,    950,    820,    820,    490,    680,   1220) // H
+
+ROW(    690,    600,    160,    410,    930,     90,    480,   1680,    520,   1240,
+       1250,    410,    400,    530,    430,    520,    790,   1380,    500,    650) // I
+
+ROW(    770,    320,    880,   1070,    290,    620,    940,    520,   1650,    520,
+        690,   1010,    720,   1160,   1320,    860,    900,    570,    280,    520) // K
+
+ROW(    660,    570,    140,    390,   1110,    140,    540,   1240,    520,   1620,
+       1300,    350,    520,    660,    520,    490,    620,   1090,    670,    760) // L
+
+ROW(    790,    700,    320,    560,   1070,    310,    680,   1250,    690,   1300,
+       1910,    500,    400,    820,    580,    670,    800,   1060,    650,    740) // M
+
+ROW(    760,    550,   1220,    950,    380,    890,   1100,    410,   1010,    350,
+        500,   1760,    640,    970,    860,   1060,    960,    460,    280,    680) // N
+
+ROW(    900,    280,    680,    740,    240,    550,    650,    400,    720,    520,
+        400,    640,   2010,    820,    660,    910,    860,    540,     70,    370) // P
+
+ROW(    840,    400,    970,   1210,    430,    660,   1130,    530,   1160,    660,
+        820,    970,    820,   1700,   1120,    890,    870,    600,    470,    580) // Q
+
+ROW(    730,    510,    690,    860,    280,    660,    950,    430,   1320,    520,
+        580,    860,    660,   1120,   1790,    810,    800,    520,    660,    590) // R
+
+ROW(   1120,    890,    910,    870,    380,    900,    820,    520,    860,    490,
+        670,   1060,    910,    890,    810,   1560,   1220,    680,    390,    610) // S
+
+ROW(    980,    750,    830,    810,    490,    610,    820,    790,    900,    620,
+        800,    960,    860,    870,    800,   1220,   1600,    930,    290,    540) // T
+
+ROW(    960,    850,    310,    580,    790,    310,    490,   1380,    570,   1090,
+       1060,    460,    540,    600,    520,    680,    930,   1610,    370,    630) // V
+
+ROW(    280,    670,      0,    180,   1230,    300,    680,    500,    280,    670,
+        650,    280,     70,    470,    660,    390,    290,    370,   2620,   1290) // W
+
+ROW(    480,    760,    430,    400,   1510,    220,   1220,    650,    520,    760,
+        740,    680,    370,    580,    590,    610,    540,    630,   1290,   2070) // Y
+	};
+
+static SCORE Gonnet160[20][20] =
+	{
+//        A       C       D       E       F       G       H       I       K       L
+//        M       N       P       Q       R       S       T       V       W       Y
+ROW(   1240,    810,    670,    740,    400,    800,    600,    600,    660,    560,
+        660,    660,    770,    710,    620,    940,    830,    790,    230,    410) // A
+
+ROW(    810,   2130,    250,    260,    600,    440,    550,    530,    300,    490,
+        590,    470,    260,    360,    430,    760,    640,    720,    570,    650) // C
+
+ROW(    670,    250,   1480,   1120,     80,    710,    770,    160,    770,    130,
+        280,   1040,    590,    840,    620,    780,    720,    290,      0,    360) // D
+
+ROW(    740,    260,   1120,   1370,    160,    570,    770,    350,    910,    330,
+        470,    830,    640,   1010,    750,    750,    700,    480,    140,    340) // E
+
+ROW(    400,    600,     80,    160,   1690,     20,    710,    810,    250,    970,
+        920,    310,    200,    370,    250,    330,    420,    700,   1100,   1340) // F
+
+ROW(    800,    440,    710,    570,     20,   1600,    510,     80,    540,    110,
+        260,    760,    480,    570,    570,    770,    540,    260,    230,    180) // G
+
+ROW(    600,    550,    770,    770,    710,    510,   1710,    410,    800,    460,
+        570,    930,    560,    950,    810,    700,    700,    430,    590,   1050) // H
+
+ROW(    600,    530,    160,    350,    810,     80,    410,   1370,    430,   1080,
+       1070,    340,    350,    460,    370,    450,    660,   1180,    440,    580) // I
+
+ROW(    660,    300,    770,    910,    250,    540,    800,    430,   1330,    440,
+        570,    860,    620,    980,   1130,    740,    760,    480,    240,    430) // K
+
+ROW(    560,    490,    130,    330,    970,    110,    460,   1080,    440,   1350,
+       1120,    300,    430,    540,    430,    420,    540,    950,    580,    670) // L
+
+ROW(    660,    590,    280,    470,    920,    260,    570,   1070,    570,   1120,
+       1540,    420,    360,    660,    490,    550,    670,    920,    560,    650) // M
+
+ROW(    660,    470,   1040,    830,    310,    760,    930,    340,    860,    300,
+        420,   1430,    560,    830,    740,    890,    810,    400,    230,    560) // N
+
+ROW(    770,    260,    590,    640,    200,    480,    560,    350,    620,    430,
+        360,    560,   1740,    700,    570,    780,    740,    460,     40,    300) // P
+
+ROW(    710,    360,    840,   1010,    370,    570,    950,    460,    980,    540,
+        660,    830,    700,   1340,    950,    760,    740,    510,    380,    490) // Q
+
+ROW(    620,    430,    620,    750,    250,    570,    810,    370,   1130,    430,
+        490,    740,    570,    950,   1490,    690,    690,    440,    540,    490) // R
+
+ROW(    940,    760,    780,    750,    330,    770,    700,    450,    740,    420,
+        550,    890,    780,    760,    690,   1220,   1010,    580,    310,    500) // S
+
+ROW(    830,    640,    720,    700,    420,    540,    700,    660,    760,    540,
+        670,    810,    740,    740,    690,   1010,   1280,    780,    240,    460) // T
+
+ROW(    790,    720,    290,    480,    700,    260,    430,   1180,    480,    950,
+        920,    400,    460,    510,    440,    580,    780,   1310,    330,    540) // V
+
+ROW(    230,    570,      0,    140,   1100,    230,    590,    440,    240,    580,
+        560,    230,     40,    380,    540,    310,    240,    330,   2360,   1160) // W
+
+ROW(    410,    650,    360,    340,   1340,    180,   1050,    580,    430,    670,
+        650,    560,    300,    490,    490,    500,    460,    540,   1160,   1780) // Y
+	};
+
+double Gonnet16[21][21] =
+	{
+//       A      C      D      E      F      G      H      I      K      L
+//       M      N      P      Q      R      S      T      V      W      Y
+ROW(   124,    81,    67,    74,    40,    80,    60,    60,    66,    56,
+        66,    66,    77,    71,    62,    94,    83,    79,    23,    41) // A
+
+ROW(    81,   213,    25,    26,    60,    44,    55,    53,    30,    49,
+        59,    47,    26,    36,    43,    76,    64,    72,    57,    65) // C
+
+ROW(    67,    25,   148,   112,     8,    71,    77,    16,    77,    13,
+        28,   104,    59,    84,    62,    78,    72,    29,     0,    36) // D
+
+ROW(    74,    26,   112,   137,    16,    57,    77,    35,    91,    33,
+        47,    83,    64,   101,    75,    75,    70,    48,    14,    34) // E
+
+ROW(    40,    60,     8,    16,   169,     2,    71,    81,    25,    97,
+        92,    31,    20,    37,    25,    33,    42,    70,   110,   134) // F
+
+ROW(    80,    44,    71,    57,     2,   160,    51,     8,    54,    11,
+        26,    76,    48,    57,    57,    77,    54,    26,    23,    18) // G
+
+ROW(    60,    55,    77,    77,    71,    51,   171,    41,    80,    46,
+        57,    93,    56,    95,    81,    70,    70,    43,    59,   105) // H
+
+ROW(    60,    53,    16,    35,    81,     8,    41,   137,    43,   108,
+       107,    34,    35,    46,    37,    45,    66,   118,    44,    58) // I
+
+ROW(    66,    30,    77,    91,    25,    54,    80,    43,   133,    44,
+        57,    86,    62,    98,   113,    74,    76,    48,    24,    43) // K
+
+ROW(    56,    49,    13,    33,    97,    11,    46,   108,    44,   135,
+       112,    30,    43,    54,    43,    42,    54,    95,    58,    67) // L
+
+ROW(    66,    59,    28,    47,    92,    26,    57,   107,    57,   112,
+       154,    42,    36,    66,    49,    55,    67,    92,    56,    65) // M
+
+ROW(    66,    47,   104,    83,    31,    76,    93,    34,    86,    30,
+        42,   143,    56,    83,    74,    89,    81,    40,    23,    56) // N
+
+ROW(    77,    26,    59,    64,    20,    48,    56,    35,    62,    43,
+        36,    56,   174,    70,    57,    78,    74,    46,     4,    30) // P
+
+ROW(    71,    36,    84,   101,    37,    57,    95,    46,    98,    54,
+        66,    83,    70,   134,    95,    76,    74,    51,    38,    49) // Q
+
+ROW(    62,    43,    62,    75,    25,    57,    81,    37,   113,    43,
+        49,    74,    57,    95,   149,    69,    69,    44,    54,    49) // R
+
+ROW(    94,    76,    78,    75,    33,    77,    70,    45,    74,    42,
+        55,    89,    78,    76,    69,   122,   101,    58,    31,    50) // S
+
+ROW(    83,    64,    72,    70,    42,    54,    70,    66,    76,    54,
+        67,    81,    74,    74,    69,   101,   128,    78,    24,    46) // T
+
+ROW(    79,    72,    29,    48,    70,    26,    43,   118,    48,    95,
+        92,    40,    46,    51,    44,    58,    78,   131,    33,    54) // V
+
+ROW(    23,    57,     0,    14,   110,    23,    59,    44,    24,    58,
+        56,    23,     4,    38,    54,    31,    24,    33,   236,   116) // W
+
+ROW(    41,    65,    36,    34,   134,    18,   105,    58,    43,    67,
+        65,    56,    30,    49,    49,    50,    46,    54,   116,   178) // Y
+	};
+
+static double Gonnet250[20][20] =
+	{
+//        A       C       D       E       F       G       H       I       K       L
+//        M       N       P       Q       R       S       T       V       W       Y
+ROW(    760,    570,    490,    520,    290,    570,    440,    440,    480,    400,
+        450,    490,    550,    500,    460,    630,    580,    530,    160,    300) // A
+
+ROW(    570,   1670,    200,    220,    440,    320,    390,    410,    240,    370,
+        430,    340,    210,    280,    300,    530,    470,    520,    420,    470) // C
+
+ROW(    490,    200,    990,    790,     70,    530,    560,    140,    570,    120,
+        220,    740,    450,    610,    490,    570,    520,    230,      0,    240) // D
+
+ROW(    520,    220,    790,    880,    130,    440,    560,    250,    640,    240,
+        320,    610,    470,    690,    560,    540,    510,    330,     90,    250) // E
+
+ROW(    290,    440,     70,    130,   1220,      0,    510,    620,    190,    720,
+        680,    210,    140,    260,    200,    240,    300,    530,    880,   1030) // F
+
+ROW(    570,    320,    530,    440,      0,   1180,    380,     70,    410,     80,
+        170,    560,    360,    420,    420,    560,    410,    190,    120,    120) // G
+
+ROW(    440,    390,    560,    560,    510,    380,   1120,    300,    580,    330,
+        390,    640,    410,    640,    580,    500,    490,    320,    440,    740) // H
+
+ROW(    440,    410,    140,    250,    620,     70,    300,    920,    310,    800,
+        770,    240,    260,    330,    280,    340,    460,    830,    340,    450) // I
+
+ROW(    480,    240,    570,    640,    190,    410,    580,    310,    840,    310,
+        380,    600,    460,    670,    790,    530,    530,    350,    170,    310) // K
+
+ROW(    400,    370,    120,    240,    720,     80,    330,    800,    310,    920,
+        800,    220,    290,    360,    300,    310,    390,    700,    450,    520) // L
+
+ROW(    450,    430,    220,    320,    680,    170,    390,    770,    380,    800,
+        950,    300,    280,    420,    350,    380,    460,    680,    420,    500) // M
+
+ROW(    490,    340,    740,    610,    210,    560,    640,    240,    600,    220,
+        300,    900,    430,    590,    550,    610,    570,    300,    160,    380) // N
+
+ROW(    550,    210,    450,    470,    140,    360,    410,    260,    460,    290,
+        280,    430,   1280,    500,    430,    560,    530,    340,     20,    210) // P
+
+ROW(    500,    280,    610,    690,    260,    420,    640,    330,    670,    360,
+        420,    590,    500,    790,    670,    540,    520,    370,    250,    350) // Q
+
+ROW(    460,    300,    490,    560,    200,    420,    580,    280,    790,    300,
+        350,    550,    430,    670,    990,    500,    500,    320,    360,    340) // R
+
+ROW(    630,    530,    570,    540,    240,    560,    500,    340,    530,    310,
+        380,    610,    560,    540,    500,    740,    670,    420,    190,    330) // S
+
+ROW(    580,    470,    520,    510,    300,    410,    490,    460,    530,    390,
+        460,    570,    530,    520,    500,    670,    770,    520,    170,    330) // T
+
+ROW(    530,    520,    230,    330,    530,    190,    320,    830,    350,    700,
+        680,    300,    340,    370,    320,    420,    520,    860,    260,    410) // V
+
+ROW(    160,    420,      0,     90,    880,    120,    440,    340,    170,    450,
+        420,    160,     20,    250,    360,    190,    170,    260,   1940,    930) // W
+
+ROW(    300,    470,    240,    250,   1030,    120,    740,    450,    310,    520,
+        500,    380,    210,    350,    340,    330,    330,    410,    930,   1300) // Y
+	};
+
+static double Gonnet350[20][20] =
+	{
+//        A       C       D       E       F       G       H       I       K       L
+//        M       N       P       Q       R       S       T       V       W       Y
+ROW(    450,    390,    350,    360,    210,    400,    310,    310,    340,    280,
+        310,    350,    380,    350,    330,    410,    390,    350,    110,    210) // A
+
+ROW(    390,   1280,    160,    180,    320,    230,    270,    300,    190,    280,
+        310,    240,    170,    210,    220,    360,    330,    370,    310,    340) // C
+
+ROW(    350,    160,    640,    540,     50,    390,    400,    110,    410,    100,
+        160,    500,    330,    430,    370,    400,    370,    170,      0,    170) // D
+
+ROW(    360,    180,    540,    550,    100,    330,    390,    180,    440,    170,
+        220,    440,    350,    460,    410,    380,    360,    230,     60,    180) // E
+
+ROW(    210,    320,     50,    100,    860,      0,    360,    460,    140,    530,
+        490,    150,    100,    190,    150,    170,    220,    400,    700,    770) // F
+
+ROW(    400,    230,    390,    330,      0,    860,    280,     60,    310,     50,
+        120,    400,    280,    310,    310,    400,    300,    140,     50,     80) // G
+
+ROW(    310,    270,    400,    390,    360,    280,    680,    220,    400,    240,
+        270,    430,    300,    420,    410,    350,    340,    240,    320,    500) // H
+
+ROW(    310,    300,    110,    180,    460,     60,    220,    620,    220,    570,
+        540,    170,    190,    240,    200,    240,    320,    570,    260,    340) // I
+
+ROW(    340,    190,    410,    440,    140,    310,    400,    220,    530,    210,
+        260,    420,    330,    450,    530,    370,    370,    250,    120,    210) // K
+
+ROW(    280,    280,    100,    170,    530,     50,    240,    570,    210,    630,
+        560,    160,    200,    240,    210,    220,    280,    510,    340,    400) // L
+
+ROW(    310,    310,    160,    220,    490,    120,    270,    540,    260,    560,
+        580,    210,    210,    280,    240,    260,    310,    490,    320,    370) // M
+
+ROW(    350,    240,    500,    440,    150,    400,    430,    170,    420,    160,
+        210,    550,    320,    410,    390,    410,    390,    220,    110,    250) // N
+
+ROW(    380,    170,    330,    350,    100,    280,    300,    190,    330,    200,
+        210,    320,    910,    350,    310,    390,    370,    240,     10,    150) // P
+
+ROW(    350,    210,    430,    460,    190,    310,    420,    240,    450,    240,
+        280,    410,    350,    470,    450,    370,    360,    260,    160,    240) // Q
+
+ROW(    330,    220,    370,    410,    150,    310,    410,    200,    530,    210,
+        240,    390,    310,    450,    630,    360,    350,    230,    230,    230) // R
+
+ROW(    410,    360,    400,    380,    170,    400,    350,    240,    370,    220,
+        260,    410,    390,    370,    360,    450,    430,    290,    130,    230) // S
+
+ROW(    390,    330,    370,    360,    220,    300,    340,    320,    370,    280,
+        310,    390,    370,    360,    350,    430,    460,    350,    120,    230) // T
+
+ROW(    350,    370,    170,    230,    400,    140,    240,    570,    250,    510,
+        490,    220,    240,    260,    230,    290,    350,    560,    210,    310) // V
+
+ROW(    110,    310,      0,     60,    700,     50,    320,    260,    120,    340,
+        320,    110,     10,    160,    230,    130,    120,    210,   1590,    740) // W
+
+ROW(    210,    340,    170,    180,    770,     80,    500,    340,    210,    400,
+        370,    250,    150,    240,    230,    230,    230,    310,    740,    920) // Y
+	};
+
+const t_ROW *GetGonnetMatrix(unsigned N)
+	{
+	switch (N)
+		{
+	case 80:
+		return Gonnet80;
+	case 120:
+		return Gonnet120;
+	//case 16:
+	//	return Gonnet16;
+	//case 160:
+	//	return Gonnet160;
+	case 250:
+		return Gonnet250;
+	case 350:
+		return Gonnet350;
+		}
+	Quit("Invalid Gonnet%u", N);
+	return 0;
+	}
+
+//SCORE GetGonnetGapOpen(unsigned N)
+//	{
+//	switch (N)
+//		{
+//	case 80:
+//		return -639;
+//	case 120:
+//		return -863;
+//	case 160:
+//		return -611;
+//	case 250:
+//		return -308;
+//	case 350:
+//		return -158;
+//		}
+//	Quit("Invalid Gonnet%u", N);
+//	return 0;
+//	}
+
+SCORE GetGonnetGapOpen(unsigned N)
+	{
+	switch (N)
+		{
+	case 80:
+		return -1000;
+	case 120:
+		return -800;
+	case 160:
+		return -700;
+	case 250:
+		return -200;
+	case 350:
+		return -175;
+		}
+	Quit("Invalid Gonnet%u", N);
+	return 0;
+	}
+
+SCORE GetGonnetGapExtend(unsigned N)
+	{
+	switch (N)
+		{
+	case 80:
+		return 350;
+	case 120:
+		return 200;
+	case 160:
+		return 175;
+	case 250:
+		return 20;
+	case 350:
+		return 20;
+		}
+	Quit("Invalid Gonnet%u", N);
+	return 0;
+	}
+
+//double GonnetLookup[400][400];
+//
+//static bool InitGonnetLookup()
+//	{
+//	for (unsigned i = 0; i < 400; ++i)
+//		{
+//		const unsigned A1 = i/20;
+//		const unsigned A2 = i%20;
+//		for (unsigned j = 0; j <= i; ++j)
+//			{
+//			const unsigned B1 = j/20;
+//			const unsigned B2 = j%20;
+//			
+//			const double s00 = Gonnet16[A1][B1];
+//			const double s01 = Gonnet16[A1][B2];
+//			const double s10 = Gonnet16[A2][B1];
+//			const double s11 = Gonnet16[A2][B2];
+//
+//			GonnetLookup[i][j] = GonnetLookup[j][i] = (s00 + s01 + s10 + s11)/4;
+//			}
+//		}
+//	return true;
+//	}
+//
+//static bool bGonnetLookupInitialized = InitGonnetLookup();

Added: trunk/packages/muscle/branches/upstream/current/gonnet.h
===================================================================
--- trunk/packages/muscle/branches/upstream/current/gonnet.h	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/gonnet.h	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,12 @@
+#ifndef Gonnet_h
+#define Gonnet_h
+
+typedef double t_ROW[20];
+
+const t_ROW *GetGonnetMatrix(unsigned N);
+SCORE GetGonnetGapOpen(unsigned N);
+SCORE GetGonnetGapExtend(unsigned N);
+
+extern double GonnetLookup[400][400];
+
+#endif	// Gonnet_h

Added: trunk/packages/muscle/branches/upstream/current/gotowt.cpp
===================================================================

Added: trunk/packages/muscle/branches/upstream/current/henikoffweight.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/henikoffweight.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/henikoffweight.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,84 @@
+#include "muscle.h"
+#include "msa.h"
+
+/***
+Compute Henikoff weights.
+Steven Henikoff and Jorja G. Henikoff (1994), Position-based sequence weights.
+J. Mol. Biol., 243(4):574-578.
+
+Award each different residue an equal share of the weight, and then to divide up
+that weight equally among the sequences sharing the same residue. So if in a
+position of a multiple alignment, r different residues are represented, a residue
+represented in only one sequence contributes a score of 1/r to that sequence, whereas a
+residue represented in s sequences contributes a score of 1/rs to each of the s
+sequences. For each sequence, the contributions from each position are summed to give
+a sequence weight.
+
+See also HenikoffWeightPB.
+***/
+
+void MSA::CalcHenikoffWeightsCol(unsigned uColIndex) const
+	{
+	const unsigned uSeqCount = GetSeqCount();
+
+// Compute letter counts in this column
+	unsigned uLetterCount[MAX_ALPHA];
+	memset(uLetterCount, 0, sizeof(uLetterCount));
+	unsigned uDifferentLetterCount = 0;
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		unsigned uLetter = GetLetterEx(uSeqIndex, uColIndex);
+		if (uLetter >= 20)
+			continue;
+		unsigned uNewCount = uLetterCount[uLetter] + 1;
+		uLetterCount[uLetter] = uNewCount;
+		if (1 == uNewCount)
+			++uDifferentLetterCount;
+		}
+
+// Compute weight contributions
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		unsigned uLetter = GetLetterEx(uSeqIndex, uColIndex);
+		if (uLetter >= 20)
+			continue;
+		const unsigned uCount = uLetterCount[uLetter];
+		unsigned uDenom = uCount*uDifferentLetterCount;
+		if (uDenom == 0)
+			continue;
+		m_Weights[uSeqIndex] += (WEIGHT) (1.0/uDenom);
+		}
+	}
+
+void MSA::SetHenikoffWeights() const
+	{
+	const unsigned uColCount = GetColCount();
+	const unsigned uSeqCount = GetSeqCount();
+
+	if (0 == uSeqCount)
+		return;
+	else if (1 == uSeqCount)
+		{
+		m_Weights[0] = (WEIGHT) 1.0;
+		return;
+		}
+	else if (2 == uSeqCount)
+		{
+		m_Weights[0] = (WEIGHT) 0.5;
+		m_Weights[1] = (WEIGHT) 0.5;
+		return;
+		}
+
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		m_Weights[uSeqIndex] = 0.0;
+
+	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
+		CalcHenikoffWeightsCol(uColIndex);
+
+// Set all-gap seqs weight to 0
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		if (IsGapSeq(uSeqIndex))
+			m_Weights[uSeqIndex] = 0.0;
+
+	Normalize(m_Weights, uSeqCount);
+	}

Added: trunk/packages/muscle/branches/upstream/current/henikoffweightpb.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/henikoffweightpb.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/henikoffweightpb.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,124 @@
+#include "muscle.h"
+#include "msa.h"
+
+/***
+Compute Henikoff weights.
+Steven Henikoff and Jorja G. Henikoff (1994), Position-based sequence weights.
+J. Mol. Biol., 243(4):574-578.
+
+Award each different residue an equal share of the weight, and then to divide up
+that weight equally among the sequences sharing the same residue. So if in a
+position of a multiple alignment, r different residues are represented, a residue
+represented in only one sequence contributes a score of 1/r to that sequence, whereas a
+residue represented in s sequences contributes a score of 1/rs to each of the s
+sequences. For each sequence, the contributions from each position are summed to give
+a sequence weight.
+
+Here we use the variant from PSI-BLAST, which (a) treats gaps as a 21st letter,
+and (b) ignores columns that are perfectly conserved.
+
+>>> WARNING -- I SUSPECT THIS DOESN'T WORK CORRECTLY <<<
+***/
+
+void MSA::CalcHenikoffWeightsColPB(unsigned uColIndex) const
+	{
+	const unsigned uSeqCount = GetSeqCount();
+
+// Compute letter counts in this column
+	unsigned uLetterCount[MAX_ALPHA+1];
+	memset(uLetterCount, 0, (MAX_ALPHA+1)*sizeof(unsigned));
+	unsigned uLetter;
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		if (IsGap(uSeqIndex, uColIndex) || IsWildcard(uSeqIndex, uColIndex))
+			uLetter = MAX_ALPHA;
+		else
+			uLetter = GetLetter(uSeqIndex, uColIndex);
+		++(uLetterCount[uLetter]);
+		}
+
+// Check for special case of perfect conservation
+	for (unsigned uLetter = 0; uLetter < MAX_ALPHA+1; ++uLetter)
+		{
+		unsigned uCount = uLetterCount[uLetter];
+		if (uCount > 0)
+			{
+		// Perfectly conserved?
+			if (uCount == uSeqCount)
+				return;
+			else
+			// If count > 0 but less than nr. sequences, can't be conserved
+				break;
+			}
+		}
+
+// Compute weight contributions
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		unsigned uLetter;
+		if (IsGap(uSeqIndex, uColIndex) || IsWildcard(uSeqIndex, uColIndex))
+			uLetter = MAX_ALPHA;
+		else
+			uLetter = GetLetter(uSeqIndex, uColIndex);
+		const unsigned uCount = uLetterCount[uLetter];
+		m_Weights[uSeqIndex] += (WEIGHT) (1.0/uCount);
+		}
+	}
+
+bool MSA::IsGapSeq(unsigned uSeqIndex) const
+	{
+	const unsigned uColCount = GetColCount();
+	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
+		if (!IsGap(uSeqIndex, uColIndex))
+			return false;
+	return true;
+	}
+
+void MSA::SetUniformWeights() const
+	{
+	const unsigned uSeqCount = GetSeqCount();
+	if (0 == uSeqCount)
+		return;
+
+	const WEIGHT w = (WEIGHT) (1.0 / uSeqCount);
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		m_Weights[uSeqIndex] = w;
+	}
+
+void MSA::SetHenikoffWeightsPB() const
+	{
+	const unsigned uColCount = GetColCount();
+	const unsigned uSeqCount = GetSeqCount();
+
+	if (0 == uSeqCount)
+		return;
+	else if (1 == uSeqCount)
+		{
+		m_Weights[0] = 1.0;
+		return;
+		}
+	else if (2 == uSeqCount)
+		{
+		m_Weights[0] = (WEIGHT) 0.5;
+		m_Weights[1] = (WEIGHT) 0.5;
+		return;
+		}
+
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		m_Weights[uSeqIndex] = 0.0;
+
+	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
+		CalcHenikoffWeightsColPB(uColIndex);
+
+// Set all-gap seqs weight to 0
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		if (IsGapSeq(uSeqIndex))
+			m_Weights[uSeqIndex] = 0.0;
+
+// Check for special case of identical sequences, which will cause all
+// columns to be skipped becasue they're perfectly conserved.
+	if (VectorIsZero(m_Weights, uSeqCount))
+		VectorSet(m_Weights, uSeqCount, 1.0);
+
+	Normalize(m_Weights, uSeqCount);
+	}

Added: trunk/packages/muscle/branches/upstream/current/html.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/html.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/html.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,136 @@
+#include "muscle.h"
+#include <stdio.h>
+#include <ctype.h>
+#include "msa.h"
+#include "textfile.h"
+
+const unsigned uCharsPerLine = 60;
+const int MIN_NAME = 10;
+const int MAX_NAME = 32;
+
+extern void AssignColors(const MSA &a, int **Colors);
+
+static int **MakeColors(const MSA &a)
+	{
+	const unsigned uSeqCount = a.GetSeqCount();
+	const unsigned uColCount = a.GetColCount();
+
+	int **Colors = new int *[uSeqCount];
+	for (unsigned i = 0; i < uSeqCount; ++i)
+		{
+		Colors[i] = new int[uColCount];
+		memset(Colors[i], 0, uColCount*sizeof(int));
+		}
+	AssignColors(a, Colors);
+	return Colors;
+	}
+
+static void ChangeColor(TextFile &File, int From, int To)
+	{
+	if (From == To)
+		return;
+
+#define	COLOR_WHITE		"FFFFFF"
+#define	COLOR_GRAY		"C0C0C0"
+#define	COLOR_BLACK		"000000"
+#define COLOR_RED		"FF0000"
+#define COLOR_GREEN		"00FF00"
+#define COLOR_BLUE		"5590FF"
+#define COLOR_LIGHTBLUE	"77FFFF"
+
+#define X(c)	File.PutString("</SPAN><SPAN STYLE=\"background-color:#" c "\">");
+	switch (To)
+		{
+	case 0:
+		X(COLOR_WHITE)
+		break;
+	case 1:
+		X(COLOR_GRAY)
+		break;
+	case 2:
+		X(COLOR_BLUE)
+		break;
+	case 3:
+		X(COLOR_LIGHTBLUE)
+		break;
+		}
+	}
+
+#define COLOR_WINDOW "FFEEE0"
+
+void MSA::ToHTMLFile(TextFile &File) const
+	{
+	File.PutString("<HTML>\n");
+	File.PutString("<BODY BGCOLOR=\"#" COLOR_WINDOW "\">\n");
+	File.PutString("<PRE>");
+
+	int **Colors = MakeColors(*this);
+
+	int iLongestNameLength = 0;
+	for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
+		{
+		const char *ptrName = GetSeqName(uSeqIndex);
+		const char *ptrBlank = strchr(ptrName, ' ');
+		int iLength;
+		if (0 != ptrBlank)
+			iLength = (int) (ptrBlank - ptrName);
+		else
+			iLength = (int) strlen(ptrName);
+		if (iLength > iLongestNameLength)
+			iLongestNameLength = iLength;
+		}
+	if (iLongestNameLength > MAX_NAME)
+		iLongestNameLength = MAX_NAME;
+	if (iLongestNameLength < MIN_NAME)
+		iLongestNameLength = MIN_NAME;
+
+	unsigned uLineCount = (GetColCount() - 1)/uCharsPerLine + 1;
+	int CurrentColor = -1;
+	for (unsigned uLineIndex = 0; uLineIndex < uLineCount; ++uLineIndex)
+		{
+		File.PutString("\n");
+		unsigned uStartColIndex = uLineIndex*uCharsPerLine;
+		unsigned uEndColIndex = uStartColIndex + uCharsPerLine - 1;
+		if (uEndColIndex >= GetColCount())
+			uEndColIndex = GetColCount() - 1;
+		char Name[MAX_NAME+1];
+		for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
+			{
+			const char *ptrName = GetSeqName(uSeqIndex);
+			const char *ptrBlank = strchr(ptrName, ' ');
+			int iLength;
+			if (0 != ptrBlank)
+				iLength = (int) (ptrBlank - ptrName);
+			else
+				iLength = (int) strlen(ptrName);
+			if (iLength > MAX_NAME)
+				iLength = MAX_NAME;
+			memset(Name, ' ', MAX_NAME);
+			memcpy(Name, ptrName, iLength);
+			Name[iLongestNameLength] = 0;
+
+//			File.PutString("<FONT COLOR=\"#000000\">");
+			CurrentColor = -1;
+			File.PutString("<SPAN STYLE=\"background-color:#" COLOR_WINDOW "\">");
+			File.PutFormat("%s      ", Name);
+			File.PutString("<SPAN STYLE=\"background-color:#FFFFFF\">");
+			for (unsigned uColIndex = uStartColIndex; uColIndex <= uEndColIndex;
+			  ++uColIndex)
+				{
+				const int Color = Colors[uSeqIndex][uColIndex];
+				ChangeColor(File, CurrentColor, Color);
+				CurrentColor = Color;
+				const char c = GetChar(uSeqIndex, uColIndex);
+				if (Color == 0)
+					File.PutFormat("%c", tolower(c));
+				else
+					File.PutFormat("%c", toupper(c));
+				}
+			File.PutString("\n");
+			}
+		}
+	File.PutString("</SPAN>\n");
+	File.PutString("</PRE>\n");
+	File.PutString("</BODY>\n");
+	File.PutString("</HTML>\n");
+	}

Added: trunk/packages/muscle/branches/upstream/current/hydro.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/hydro.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/hydro.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,37 @@
+#include "muscle.h"
+#include "profile.h"
+
+// Apply hydrophobicity heuristic to a profile
+void Hydro(ProfPos *Prof, unsigned uLength)
+	{
+	if (0 == g_uHydrophobicRunLength)
+		return;
+
+	unsigned uRunLength = 0;
+	for (unsigned uColIndex = 0; uColIndex < uLength; ++uColIndex)
+		{
+		ProfPos &PP = Prof[uColIndex];
+		bool bHydro = (PP.m_fOcc > 0.999 && IsHydrophobic(PP.m_fcCounts));
+		if (bHydro)
+			{
+			++uRunLength;
+			if (uRunLength > g_uHydrophobicRunLength)
+				{
+				PP.m_scoreGapOpen *= (SCORE) g_dHydroFactor;
+				PP.m_scoreGapClose *= (SCORE) g_dHydroFactor;
+				}
+			else if (uRunLength == g_uHydrophobicRunLength)
+				{
+				for (unsigned n = uColIndex - g_uHydrophobicRunLength - 1;
+				  n <= uColIndex; ++n)
+					{
+					ProfPos &PP = Prof[n];
+					PP.m_scoreGapOpen *= (SCORE) g_dHydroFactor;
+					PP.m_scoreGapClose *= (SCORE) g_dHydroFactor;
+					}
+				}
+			}
+		else
+			uRunLength = 0;
+		}
+	}

Added: trunk/packages/muscle/branches/upstream/current/intmath.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/intmath.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/intmath.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,352 @@
+#include "muscle.h"
+#include <math.h>
+
+PROB ScoreToProb(SCORE Score)
+	{
+	if (MINUS_INFINITY >= Score)
+		return 0.0;
+	return (PROB) pow(2.0, (double) Score/INTSCALE);
+	}
+
+static const double log2e = log2(exp(1.0));
+
+double lnTolog2(double ln)
+	{
+	return ln*log2e;
+	}
+
+double log2(double x)
+	{
+	if (0 == x)
+		return MINUS_INFINITY;
+
+	static const double dInvLn2 = 1.0/log(2);
+// Multiply by inverse of log(2) just in case multiplication
+// is faster than division.
+	return log(x)*dInvLn2;
+	}
+
+SCORE ProbToScore(PROB Prob)
+	{
+	if (0.0 == Prob)
+		return MINUS_INFINITY;
+//	return (SCORE) floor(INTSCALE*log2(Prob));
+	return (SCORE) log2(Prob);
+	}
+
+WEIGHT DoubleToWeight(double d)
+	{
+	assert(d >= 0);
+	return (WEIGHT) (INTSCALE*d);
+	}
+
+double WeightToDouble(WEIGHT w)
+	{
+	return (double) w / (double) INTSCALE;
+	}
+
+SCORE DoubleToScore(double d)
+	{
+	return (SCORE)(d*(double) INTSCALE);
+	}
+
+bool ScoreEq(SCORE s1, SCORE s2)
+	{
+	return BTEq(s1, s2);
+	}
+
+static bool BTEq2(BASETYPE b1, BASETYPE b2)
+	{
+	double diff = fabs(b1 - b2);
+	if (diff < 0.0001)
+		return true;
+	double sum = fabs(b1) + fabs(b2);
+	return diff/sum < 0.005;
+	}
+
+bool BTEq(double b1, double b2)
+	{
+	return BTEq2((BASETYPE) b1, (BASETYPE) b2);
+	}
+
+const double dLn2 = log(2);
+
+// pow2(x)=2^x
+double pow2(double x)
+	{
+	if (MINUS_INFINITY == x)
+		return 0;
+	return exp(x*dLn2);
+	}
+
+// lp2(x) = log2(1 + 2^-x), x >= 0
+double lp2(double x)
+	{
+	return log2(1 + pow2(-x));
+	}
+
+// SumLog(x, y) = log2(2^x + 2^y)
+SCORE SumLog(SCORE x, SCORE y)
+	{
+	return (SCORE) log2(pow2(x) + pow2(y));
+	}
+
+// SumLog(x, y, z) = log2(2^x + 2^y + 2^z)
+SCORE SumLog(SCORE x, SCORE y, SCORE z)
+	{
+	return (SCORE) log2(pow2(x) + pow2(y) + pow2(z));
+	}
+
+// SumLog(w, x, y, z) = log2(2^w + 2^x + 2^y + 2^z)
+SCORE SumLog(SCORE w, SCORE x, SCORE y, SCORE z)
+	{
+	return (SCORE) log2(pow2(w) + pow2(x) + pow2(y) + pow2(z));
+	}
+
+SCORE lp2Fast(SCORE x)
+	{
+	assert(x >= 0);
+	const int iTableSize = 1000;
+	const double dRange = 20.0;
+	const double dScale = dRange/iTableSize;
+	static SCORE dValue[iTableSize];
+	static bool bInit = false;
+	if (!bInit)
+		{
+		for (int i = 0; i < iTableSize; ++i)
+			dValue[i] = (SCORE) lp2(i*dScale);
+		bInit = true;
+		}
+	if (x >= dRange)
+		return 0.0;
+	int i = (int) (x/dScale);
+	assert(i >= 0 && i < iTableSize);
+	SCORE dResult = dValue[i];
+	assert(BTEq(dResult, lp2(x)));
+	return dResult;
+	}
+
+// SumLog(x, y) = log2(2^x + 2^y)
+SCORE SumLogFast(SCORE x, SCORE y)
+	{
+	if (MINUS_INFINITY == x)
+		{
+		if (MINUS_INFINITY == y)
+			return MINUS_INFINITY;
+		return y;
+		}
+	else if (MINUS_INFINITY == y)
+		return x;
+
+	SCORE dResult;
+	if (x > y)
+		dResult = x + lp2Fast(x-y);
+	else
+		dResult = y + lp2Fast(y-x);
+	assert(SumLog(x, y) == dResult);
+	return dResult;
+	}
+
+SCORE SumLogFast(SCORE x, SCORE y, SCORE z)
+	{
+	SCORE dResult = SumLogFast(x, SumLogFast(y, z));
+	assert(SumLog(x, y, z) == dResult);
+	return dResult;
+	}
+
+SCORE SumLogFast(SCORE w, SCORE x, SCORE y, SCORE z)
+	{
+	SCORE dResult = SumLogFast(SumLogFast(w, x), SumLogFast(y, z));
+	assert(SumLog(w, x, y, z) == dResult);
+	return dResult;
+	}
+
+double VecSum(const double v[], unsigned n)
+	{
+	double dSum = 0.0;
+	for (unsigned i = 0; i < n; ++i)
+		dSum += v[i];
+	return dSum;
+	}
+
+void Normalize(PROB p[], unsigned n)
+	{
+	unsigned i;
+	PROB dSum = 0.0;
+	for (i = 0; i < n; ++i)
+		dSum += p[i];
+	if (0.0 == dSum)
+		Quit("Normalize, sum=0");
+	for (i = 0; i < n; ++i)
+		p[i] /= dSum;
+	}
+
+void NormalizeUnlessZero(PROB p[], unsigned n)
+	{
+	unsigned i;
+	PROB dSum = 0.0;
+	for (i = 0; i < n; ++i)
+		dSum += p[i];
+	if (0.0 == dSum)
+		return;
+	for (i = 0; i < n; ++i)
+		p[i] /= dSum;
+	}
+
+void Normalize(PROB p[], unsigned n, double dRequiredTotal)
+	{
+	unsigned i;
+	double dSum = 0.0;
+	for (i = 0; i < n; ++i)
+		dSum += p[i];
+	if (0.0 == dSum)
+		Quit("Normalize, sum=0");
+	double dFactor = dRequiredTotal / dSum;
+	for (i = 0; i < n; ++i)
+		p[i] *= (PROB) dFactor;
+	}
+
+bool VectorIsZero(const double dValues[], unsigned n)
+	{
+	for (unsigned i = 0; i < n; ++i)
+		if (dValues[i] != 0.0)
+			return false;
+	return true;
+	}
+
+void VectorSet(double dValues[], unsigned n, double d)
+	{
+	for (unsigned i = 0; i < n; ++i)
+		dValues[i] = d;
+	}
+
+bool VectorIsZero(const float dValues[], unsigned n)
+	{
+	for (unsigned i = 0; i < n; ++i)
+		if (dValues[i] != 0.0)
+			return false;
+	return true;
+	}
+
+void VectorSet(float dValues[], unsigned n, float d)
+	{
+	for (unsigned i = 0; i < n; ++i)
+		dValues[i] = d;
+	}
+
+double Correl(const double P[], const double Q[], unsigned uCount)
+	{
+	double dSumP = 0.0;
+	double dSumQ = 0.0;
+	for (unsigned n = 0; n < uCount; ++n)
+		{
+		dSumP += P[n];
+		dSumQ += Q[n];
+		}
+	const double dMeanP = dSumP/uCount;
+	const double dMeanQ = dSumQ/uCount;
+
+	double dSum1 = 0.0;
+	double dSum2 = 0.0;
+	double dSum3 = 0.0;
+	for (unsigned n = 0; n < uCount; ++n)
+		{
+		const double dDiffP = P[n] - dMeanP;
+		const double dDiffQ = Q[n] - dMeanQ;
+		dSum1 += dDiffP*dDiffQ;
+		dSum2 += dDiffP*dDiffP;
+		dSum3 += dDiffQ*dDiffQ;
+		}
+	if (0 == dSum1)
+		return 0;
+	const double dCorrel = dSum1 / sqrt(dSum2*dSum3);
+	return dCorrel;
+	}
+
+float Correl(const float P[], const float Q[], unsigned uCount)
+	{
+	float dSumP = 0.0;
+	float dSumQ = 0.0;
+	for (unsigned n = 0; n < uCount; ++n)
+		{
+		dSumP += P[n];
+		dSumQ += Q[n];
+		}
+	const float dMeanP = dSumP/uCount;
+	const float dMeanQ = dSumQ/uCount;
+
+	float dSum1 = 0.0;
+	float dSum2 = 0.0;
+	float dSum3 = 0.0;
+	for (unsigned n = 0; n < uCount; ++n)
+		{
+		const float dDiffP = P[n] - dMeanP;
+		const float dDiffQ = Q[n] - dMeanQ;
+		dSum1 += dDiffP*dDiffQ;
+		dSum2 += dDiffP*dDiffP;
+		dSum3 += dDiffQ*dDiffQ;
+		}
+	if (0 == dSum1)
+		return 0;
+	const float dCorrel = dSum1 / (float) sqrt(dSum2*dSum3);
+	return dCorrel;
+	}
+
+// Simple (but slow) function to compute Pearson ranks
+// that allows for ties. Correctness and simplicity
+// are priorities over speed here.
+void Rank(const float P[], float Ranks[], unsigned uCount)
+	{
+	for (unsigned n = 0; n < uCount; ++n)
+		{
+		unsigned uNumberGreater = 0;
+		unsigned uNumberEqual = 0;
+		unsigned uNumberLess = 0;
+		double dValue = P[n];
+		for (unsigned i = 0; i < uCount; ++i)
+			{
+			double v = P[i];
+			if (v == dValue)
+				++uNumberEqual;
+			else if (v < dValue)
+				++uNumberLess;
+			else
+				++uNumberGreater;
+			}
+		assert(uNumberEqual >= 1);
+		assert(uNumberEqual + uNumberLess + uNumberGreater == uCount);
+		Ranks[n] = (float) (1 + uNumberLess + (uNumberEqual - 1)/2.0);
+		}
+	}
+
+void Rank(const double P[], double Ranks[], unsigned uCount)
+	{
+	for (unsigned n = 0; n < uCount; ++n)
+		{
+		unsigned uNumberGreater = 0;
+		unsigned uNumberEqual = 0;
+		unsigned uNumberLess = 0;
+		double dValue = P[n];
+		for (unsigned i = 0; i < uCount; ++i)
+			{
+			double v = P[i];
+			if (v == dValue)
+				++uNumberEqual;
+			else if (v < dValue)
+				++uNumberLess;
+			else
+				++uNumberGreater;
+			}
+		assert(uNumberEqual >= 1);
+		assert(uNumberEqual + uNumberLess + uNumberGreater == uCount);
+		Ranks[n] = (double) (1 + uNumberLess + (uNumberEqual - 1)/2.0);
+		}
+	}
+
+FCOUNT SumCounts(const FCOUNT Counts[])
+	{
+	FCOUNT Sum = 0;
+	for (int i = 0; i < 20; ++i)
+		Sum += Counts[i];
+	return Sum;
+	}

Added: trunk/packages/muscle/branches/upstream/current/intmath.h
===================================================================
--- trunk/packages/muscle/branches/upstream/current/intmath.h	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/intmath.h	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,210 @@
+// IntMath.h: Header for doing fractional math with integers for speed.
+
+#ifndef IntMath_h
+#define	IntMath_h
+
+typedef float BASETYPE;
+//typedef double BASETYPE;
+
+// Scaling factor used to store certain floating point
+// values as integers to a few significant figures.
+//const int INTSCALE = 1000;
+const int INTSCALE = 1;
+
+// Type for a probability in range 0.0 to 1.0.
+typedef BASETYPE PROB;
+
+// Type for an log-odds integer score.
+// Stored as log2(PROB)*INTSCALE.
+//typedef int	SCORE;
+typedef BASETYPE SCORE;
+
+// Type for a weight.
+// Stored as w*INTSCALE where w is in range 0.0 to 1.0.
+//typedef unsigned WEIGHT;
+typedef BASETYPE WEIGHT;
+
+// Type for a fractional weighted count stored as n*WEIGHT/N
+// where n=measured count (integer >= 0) and N is total for
+// the distribution (e.g., n=number of residues of a given
+// type in a column, N=number of residues in the column).
+// Hence values in an FCOUNT variable range from 0..INTSCALE
+// as an integer, representing "true" values 0.0 to 1.0.
+//typedef unsigned FCOUNT;
+typedef BASETYPE FCOUNT;
+
+// Representation of -infinity. Value should
+// be large and negative, but not so large
+// that adding a few of them overflows.
+// TODO: Multiplied by 10 to work around bug
+// when aligning Bali 1ckaA in ref4, which is
+// so long that B->Mmax got to -infinity, causing
+// traceback to fail.
+//const int MINUS_INFINITY = -10000000;
+const BASETYPE MINUS_INFINITY = (BASETYPE) -1e37;
+const BASETYPE PLUS_INFINITY = (BASETYPE) 1e37;
+
+// Probability relative to a null model
+typedef double RPROB;
+
+PROB ScoreToProb(SCORE Score);
+SCORE ProbToScore(PROB Prob);
+SCORE DoubleToScore(double d);
+WEIGHT DoubleToWeight(double d);
+double WeightToDouble(WEIGHT w);
+SCORE MulScoreWeight(SCORE Score, WEIGHT Weight);
+bool ScoreEq(SCORE s1, SCORE s2);
+bool BTEq(double b1, double b2);
+
+static double ScoreToDouble(SCORE Score)
+	{
+	return (double) Score / (double) INTSCALE;
+	}
+
+#if	0
+// In-line assembler for Result = (x*y)/z
+// Note that imul and idiv will do 64-bit arithmetic
+// on 32-bit operands, so this shouldn't overflow
+// Can't write this efficiently in C/C++ (would
+// often overlow 32 bits).
+#define MulDivAssign(Result, x, y, z)	\
+	{									\
+	int X = (x);						\
+	int Y = (y);						\
+	int Z = (z);						\
+	_asm mov	eax,X					\
+	_asm imul	Y						\
+	_asm mov	ecx,Z					\
+	_asm idiv	ecx						\
+	_asm mov	Result,eax				\
+	}
+#else
+#define MulDivAssign(Result, x, y, z)	Result = (((x)*(y))/(z))
+#endif
+
+#define	MulScoreWeight(r, s, w)		MulDivAssign(r, s, w, INTSCALE)
+#define MulWeightWCount(r, wt, wc)	MulDivAssign(r, wt, wc, INTSCALE)
+#define MulFCountScore(r, fc, sc)	MulDivAssign(r, fc, sc, INTSCALE)
+
+#if	_DEBUG
+
+static inline SCORE Add2(SCORE a, SCORE b)
+	{
+	if (MINUS_INFINITY == a)
+		return MINUS_INFINITY;
+	if (MINUS_INFINITY == b)
+		return MINUS_INFINITY;
+	SCORE sum = a + b;
+	if (sum < MINUS_INFINITY)
+		return MINUS_INFINITY;
+//	assert(sum < OVERFLOW_WARN);
+	return sum;
+	}
+
+static inline SCORE Add3(SCORE a, SCORE b, SCORE c)
+	{
+	return Add2(Add2(a, b), c);
+	}
+
+static inline SCORE Add4(SCORE a, SCORE b, SCORE c, SCORE d)
+	{
+	return Add2(Add2(a, b), Add2(c, d));
+	}
+
+static inline SCORE Add5(SCORE a, SCORE b, SCORE c, SCORE d, SCORE e)
+	{
+	return Add3(Add2(a, b), Add2(c, d), e);
+	}
+
+static inline SCORE Add6(SCORE a, SCORE b, SCORE c, SCORE d, SCORE e, SCORE f)
+	{
+	return Add3(Add2(a, b), Add2(c, d), Add2(e, f));
+	}
+
+static inline SCORE Add7(SCORE a, SCORE b, SCORE c, SCORE d, SCORE e, SCORE f, SCORE g)
+	{
+	return Add4(Add2(a, b), Add2(c, d), Add2(e, f), g);
+	}
+
+static inline SCORE Mul2(SCORE a, SCORE b)
+	{
+	if (MINUS_INFINITY == a)
+		return MINUS_INFINITY;
+	if (MINUS_INFINITY == b)
+		return MINUS_INFINITY;
+	//__int64 prod = (__int64) a * (__int64) b;
+	//assert((SCORE) prod == prod);
+	//return (SCORE) prod;
+	return a*b;
+	}
+
+static inline SCORE Sub2(SCORE a, SCORE b)
+	{
+	if (MINUS_INFINITY == a)
+		return MINUS_INFINITY;
+	if (MINUS_INFINITY == b)
+		return MINUS_INFINITY;
+	SCORE diff = a - b;
+	if (diff < MINUS_INFINITY)
+		return MINUS_INFINITY;
+//	assert(diff < OVERFLOW_WARN);
+	return diff;
+	}
+
+static inline SCORE Div2(SCORE a, int b)
+	{
+	if (MINUS_INFINITY == a)
+		return MINUS_INFINITY;
+	return a/b;
+	}
+
+//static inline SCORE MulScoreWeight(SCORE s, WEIGHT w)
+//	{
+//	SCORE Prod = s*(SCORE) w;
+//	assert(Prod < OVERFLOW_WARN);
+//	extern void Log(const char Format[], ...);
+//	if (Prod/(SCORE) w != s)
+//		Log("**WARRNING MulScoreWeight Prod=%d w=%d Prod/w=%d s=%d\n",
+//		  Prod,
+//		  w,
+//		  Prod/(SCORE) w,
+//		  s);
+//	assert(Prod/ (SCORE) w == s);
+//	return Prod/INTSCALE;
+//	}
+//
+//static inline WCOUNT MulWeightWCount(WEIGHT wt, WCOUNT wc)
+//	{
+//	return (wt*wc)/INTSCALE;
+//	}
+
+#else
+#define	Add2(a, b)					((a) + (b))
+#define Sub2(a, b)					((MINUS_INFINITY == (a)) ? MINUS_INFINITY : ((a) - (b)))
+#define Div2(a, b)					((MINUS_INFINITY == (a)) ? MINUS_INFINITY : ((a) / (b)))
+#define	Add3(a, b, c)				((a) + (b) + (c))
+#define	Add4(a, b, c, d)			((a) + (b) + (c) + (d))
+#define	Add5(a, b, c, d, e)			((a) + (b) + (c) + (d) + (e))
+#define	Add6(a, b, c, d, e, f)		((a) + (b) + (c) + (d) + (e) + (f))
+#define	Add7(a, b, c, d, e, f, g)	((a) + (b) + (c) + (d) + (e) + (f) + (g))
+//#define	MulScoreWeight(s, w)		(((s)*(SCORE) (w))/INTSCALE)
+#define	Mul2(a, b)					((a)*(b))
+#endif
+
+//static inline SCORE MulFCountScore(FCOUNT fc, SCORE sc)
+//	{
+//// Fast way to say "if (fc >= 2^15 || sc >= 2^15)":
+//	if ((fc | sc) & 0xffff1000)
+//		{
+//		SCORE Score = ((fc+5)/10)*sc;
+//		assert(Score < assert);
+//		OVERFLOW_WARN(Score > MINUS_INFINITY);
+//		return Score/(INTSCALE/10);
+//		}
+//	SCORE Score = fc*sc;
+//	assert(Score < OVERFLOW_WARN);
+//	assert(Score > MINUS_INFINITY);
+//	return Score/INTSCALE;
+//	}
+
+#endif	// IntMath_h

Added: trunk/packages/muscle/branches/upstream/current/local.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/local.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/local.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,100 @@
+#include "muscle.h"
+#include "textfile.h"
+#include "msa.h"
+#include "profile.h"
+#include "pwpath.h"
+#include "tree.h"
+
+#define TRACE	0
+
+static void MSAFromFileName(const char *FileName, MSA &a)
+	{
+	TextFile File(FileName);
+	a.FromFile(File);
+	}
+
+static ProfPos *ProfileFromMSALocal(MSA &msa, Tree &tree)
+	{
+	const unsigned uSeqCount = msa.GetSeqCount();
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		msa.SetSeqId(uSeqIndex, uSeqIndex);
+
+	TreeFromMSA(msa, tree, g_Cluster1, g_Distance1, g_Root1);
+	SetMuscleTree(tree);
+	return ProfileFromMSA(msa);
+	}
+
+void Local()
+	{
+	if (0 == g_pstrFileName1 || 0 == g_pstrFileName2)
+		Quit("Must specify both -in1 and -in2 for -sw");
+
+	SetSeqWeightMethod(g_SeqWeight1);
+
+	MSA msa1;
+	MSA msa2;
+
+	MSAFromFileName(g_pstrFileName1, msa1);
+	MSAFromFileName(g_pstrFileName2, msa2);
+
+	ALPHA Alpha = ALPHA_Undefined;
+	switch (g_SeqType)
+		{
+	case SEQTYPE_Auto:
+		Alpha = msa1.GuessAlpha();
+		break;
+
+	case SEQTYPE_Protein:
+		Alpha = ALPHA_Amino;
+		break;
+
+	case SEQTYPE_DNA:
+		Alpha = ALPHA_DNA;
+		break;
+
+	case SEQTYPE_RNA:
+		Alpha = ALPHA_RNA;
+		break;
+
+	default:
+		Quit("Invalid SeqType");
+		}
+	SetAlpha(Alpha);
+
+	msa1.FixAlpha();
+	msa2.FixAlpha();
+
+	if (ALPHA_DNA == Alpha || ALPHA_RNA == Alpha)
+		SetPPScore(PPSCORE_SPN);
+
+	const unsigned uSeqCount1 = msa1.GetSeqCount();
+	const unsigned uSeqCount2 = msa2.GetSeqCount();
+	const unsigned uMaxSeqCount = (uSeqCount1 > uSeqCount2 ? uSeqCount1 : uSeqCount2);
+	MSA::SetIdCount(uMaxSeqCount);
+
+	unsigned uLength1 = msa1.GetColCount();
+	unsigned uLength2 = msa2.GetColCount();
+
+	Tree tree1;
+	Tree tree2;
+
+	ProfPos *Prof1 = ProfileFromMSALocal(msa1, tree1);
+	ProfPos *Prof2 = ProfileFromMSALocal(msa2, tree2);
+
+	PWPath Path;
+	SW(Prof1, uLength1, Prof2, uLength2, Path);
+
+#if	TRACE
+	Path.LogMe();
+#endif
+
+	MSA msaOut;
+	AlignTwoMSAsGivenPathSW(Path, msa1, msa2, msaOut);
+
+#if	TRACE
+	msaOut.LogMe();
+#endif
+
+	TextFile fileOut(g_pstrOutFileName, true);
+	msaOut.ToFile(fileOut);
+	}

Added: trunk/packages/muscle/branches/upstream/current/main.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/main.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/main.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,66 @@
+#include "muscle.h"
+#include <stdio.h>
+#ifdef	WIN32
+#include <windows.h>	// for SetPriorityClass()
+#include <io.h>			// for isatty()
+#else
+#include <unistd.h>		// for isatty()
+#endif
+
+int g_argc;
+char **g_argv;
+
+int main(int argc, char **argv)
+	{
+#if	WIN32
+// Multi-tasking does not work well in CPU-bound
+// console apps running under Win32.
+// Reducing the process priority allows GUI apps
+// to run responsively in parallel.
+	SetPriorityClass(GetCurrentProcess(), BELOW_NORMAL_PRIORITY_CLASS);
+#endif
+	g_argc = argc;
+	g_argv = argv;
+
+	SetNewHandler();
+	SetStartTime();
+	ProcessArgVect(argc - 1, argv + 1);
+	SetParams();
+	SetLogFile();
+
+	//extern void TestSubFams(const char *);
+	//TestSubFams(g_pstrInFileName);
+	//return 0;
+
+	if (g_bVersion)
+		{
+		printf(MUSCLE_LONG_VERSION "\n");
+		exit(EXIT_SUCCESS);
+		}
+
+	if (!g_bQuiet)
+		Credits();
+
+	if (MissingCommand() && isatty(0))
+		{
+		Usage();
+		exit(EXIT_SUCCESS);
+		}
+
+	if (g_bCatchExceptions)
+		{
+		try
+			{
+			Run();
+			}
+		catch (...)
+			{
+			OnException();
+			exit(EXIT_Except);
+			}
+		}
+	else
+		Run();
+
+	exit(EXIT_Success);
+	}

Added: trunk/packages/muscle/branches/upstream/current/makerootmsa.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/makerootmsa.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/makerootmsa.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,230 @@
+#include "muscle.h"
+#include "tree.h"
+#include "seqvect.h"
+#include "profile.h"
+#include "msa.h"
+#include "pwpath.h"
+#include "estring.h"
+
+#define TRACE		0
+#define VALIDATE	0
+
+static void PathSeq(const Seq &s, const PWPath &Path, bool bRight, Seq &sOut)
+	{
+	short *esA;
+	short *esB;
+	PathToEstrings(Path, &esA, &esB);
+
+	const unsigned uSeqLength = s.Length();
+	const unsigned uEdgeCount = Path.GetEdgeCount();
+
+	sOut.Clear();
+	sOut.SetName(s.GetName());
+	unsigned uPos = 0;
+	for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
+		{
+		const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
+		char cType = Edge.cType;
+		if (bRight)
+			{
+			if (cType == 'I')
+				cType = 'D';
+			else if (cType == 'D')
+				cType = 'I';
+			}
+		switch (cType)
+			{
+		case 'M':
+			sOut.AppendChar(s[uPos++]);
+			break;
+		case 'D':
+			sOut.AppendChar('-');
+			break;
+		case 'I':
+			sOut.AppendChar(s[uPos++]);
+			break;
+		default:
+			Quit("PathSeq, invalid edge type %c", cType);
+			}
+		}
+	}
+
+#if	VALIDATE
+
+static void MakeRootSeq(const Seq &s, const Tree &GuideTree, unsigned uLeafNodeIndex,
+  const ProgNode Nodes[], Seq &sRoot)
+	{
+	sRoot.Copy(s);
+	unsigned uNodeIndex = uLeafNodeIndex;
+	for (;;)
+		{
+	  	unsigned uParent = GuideTree.GetParent(uNodeIndex);
+		if (NULL_NEIGHBOR == uParent)
+			break;
+		bool bRight = (GuideTree.GetLeft(uParent) == uNodeIndex);
+		uNodeIndex = uParent;
+		const PWPath &Path = Nodes[uNodeIndex].m_Path;
+		Seq sTmp;
+		PathSeq(sRoot, Path, bRight, sTmp);
+		sRoot.Copy(sTmp);
+		}
+	}
+
+#endif	// VALIDATE
+
+static short *MakeRootSeqE(const Seq &s, const Tree &GuideTree, unsigned uLeafNodeIndex,
+  const ProgNode Nodes[], Seq &sRoot, short *Estring1, short *Estring2)
+	{
+	short *EstringCurr = Estring1;
+	short *EstringNext = Estring2;
+
+	const unsigned uSeqLength = s.Length();
+	EstringCurr[0] = uSeqLength;
+	EstringCurr[1] = 0;
+
+	unsigned uNodeIndex = uLeafNodeIndex;
+	for (;;)
+		{
+	  	unsigned uParent = GuideTree.GetParent(uNodeIndex);
+		if (NULL_NEIGHBOR == uParent)
+			break;
+		bool bRight = (GuideTree.GetLeft(uParent) == uNodeIndex);
+		uNodeIndex = uParent;
+		const PWPath &Path = Nodes[uNodeIndex].m_Path;
+		const short *EstringNode = bRight ?
+		  Nodes[uNodeIndex].m_EstringL : Nodes[uNodeIndex].m_EstringR;
+
+		MulEstrings(EstringCurr, EstringNode, EstringNext);
+#if	TRACE
+		Log("\n");
+		Log("Curr=");
+		LogEstring(EstringCurr);
+		Log("\n");
+		Log("Node=");
+		LogEstring(EstringNode);
+		Log("\n");
+		Log("Prod=");
+		LogEstring(EstringNext);
+		Log("\n");
+#endif
+		short *EstringTmp = EstringNext;
+		EstringNext = EstringCurr;
+		EstringCurr = EstringTmp;
+		}
+	EstringOp(EstringCurr, s, sRoot);
+
+#if	TRACE
+	Log("Root estring=");
+	LogEstring(EstringCurr);
+	Log("\n");
+	Log("Root seq=");
+	sRoot.LogMe();
+#endif
+	return EstringCurr;
+	}
+
+static unsigned GetFirstNodeIndex(const Tree &tree)
+	{
+	if (g_bStable)
+		return 0;
+	return tree.FirstDepthFirstNode();
+	}
+
+static unsigned GetNextNodeIndex(const Tree &tree, unsigned uPrevNodeIndex)
+	{
+	if (g_bStable)
+		{
+		const unsigned uNodeCount = tree.GetNodeCount();
+		unsigned uNodeIndex = uPrevNodeIndex;
+		for (;;)
+			{
+			++uNodeIndex;
+			if (uNodeIndex >= uNodeCount)
+				return NULL_NEIGHBOR;
+			if (tree.IsLeaf(uNodeIndex))
+				return uNodeIndex;
+			}
+		}
+	unsigned uNodeIndex = uPrevNodeIndex;
+	for (;;)
+		{
+		uNodeIndex = tree.NextDepthFirstNode(uNodeIndex);
+		if (NULL_NEIGHBOR == uNodeIndex || tree.IsLeaf(uNodeIndex))
+			return uNodeIndex;
+		}
+	}
+
+void MakeRootMSA(const SeqVect &v, const Tree &GuideTree, ProgNode Nodes[],
+  MSA &a)
+	{
+#if	TRACE
+	Log("MakeRootMSA Tree=");
+	GuideTree.LogMe();
+#endif
+	const unsigned uSeqCount = v.GetSeqCount();
+	unsigned uColCount = uInsane;
+	unsigned uSeqIndex = 0;
+	const unsigned uTreeNodeCount = GuideTree.GetNodeCount();
+	const unsigned uRootNodeIndex = GuideTree.GetRootNodeIndex();
+	const PWPath &RootPath = Nodes[uRootNodeIndex].m_Path;
+	const unsigned uRootColCount = RootPath.GetEdgeCount();
+	const unsigned uEstringSize = uRootColCount + 1;
+	short *Estring1 = new short[uEstringSize];
+	short *Estring2 = new short[uEstringSize];
+	SetProgressDesc("Root alignment");
+
+	unsigned uTreeNodeIndex = GetFirstNodeIndex(GuideTree);
+	do
+		{
+		Progress(uSeqIndex, uSeqCount);
+
+		unsigned uId = GuideTree.GetLeafId(uTreeNodeIndex);
+		const Seq &s = *(v[uId]);
+
+		Seq sRootE;
+		short *es = MakeRootSeqE(s, GuideTree, uTreeNodeIndex, Nodes, sRootE,
+		  Estring1, Estring2);
+		Nodes[uTreeNodeIndex].m_EstringL = EstringNewCopy(es);
+
+#if	VALIDATE
+		Seq sRoot;
+		MakeRootSeq(s, GuideTree, uTreeNodeIndex, Nodes, sRoot);
+		if (!sRoot.Eq(sRootE))
+			{
+			Log("sRoot=");
+			sRoot.LogMe();
+			Log("sRootE=");
+			sRootE.LogMe();
+			Quit("Root seqs differ");
+			}
+#endif
+
+#if	TRACE
+		Log("MakeRootSeq=\n");
+		sRoot.LogMe();
+#endif
+		if (uInsane == uColCount)
+			{
+			uColCount = sRootE.Length();
+			a.SetSize(uSeqCount, uColCount);
+			}
+		else
+			{
+			assert(uColCount == sRootE.Length());
+			}
+		a.SetSeqName(uSeqIndex, s.GetName());
+		a.SetSeqId(uSeqIndex, uId);
+		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
+			a.SetChar(uSeqIndex, uColIndex, sRootE[uColIndex]);
+		++uSeqIndex;
+
+		uTreeNodeIndex = GetNextNodeIndex(GuideTree, uTreeNodeIndex);
+		}
+	while (NULL_NEIGHBOR != uTreeNodeIndex);
+
+	delete[] Estring1;
+	delete[] Estring2;
+
+	ProgressStepsDone();
+	assert(uSeqIndex == uSeqCount);
+	}

Added: trunk/packages/muscle/branches/upstream/current/makerootmsab.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/makerootmsab.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/makerootmsab.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,62 @@
+#include "muscle.h"
+#include "tree.h"
+#include "profile.h"
+#include "msa.h"
+#include "seqvect.h"
+#include "pwpath.h"
+
+static void DoSeq(Seq &s, unsigned uSeqIndex, const ProfPos *RootProf,
+  unsigned uRootProfLength, MSA &msaOut)
+	{
+	MSA msaSeq;
+	msaSeq.FromSeq(s);
+	const unsigned uSeqLength = s.Length();
+
+	MSA msaDummy;
+	msaDummy.SetSize(1, uRootProfLength);
+	msaDummy.SetSeqId(0, 0);
+	msaDummy.SetSeqName(0, "Dummy0");
+	for (unsigned uColIndex = 0; uColIndex < uRootProfLength; ++uColIndex)
+		msaDummy.SetChar(0, uColIndex, '?');
+
+	ProfPos *SeqProf = ProfileFromMSA(msaSeq);
+	for (unsigned uColIndex = 0; uColIndex < uSeqLength; ++uColIndex)
+		{
+		ProfPos &PP = SeqProf[uColIndex];
+		PP.m_scoreGapOpen = MINUS_INFINITY;
+		PP.m_scoreGapClose = MINUS_INFINITY;
+		}
+
+	ProfPos *ProfOut;
+	unsigned uLengthOut;
+	PWPath Path;
+	AlignTwoProfs(SeqProf, uSeqLength, 1.0, RootProf, uRootProfLength, 1.0,
+	  Path, &ProfOut, &uLengthOut);
+	assert(uLengthOut = uRootProfLength);
+	delete[] ProfOut;
+
+	MSA msaCombined;
+	AlignTwoMSAsGivenPath(Path, msaSeq, msaDummy, msaCombined);
+
+	msaCombined.LogMe();
+	msaOut.SetSeqName(uSeqIndex, s.GetName());
+	msaOut.SetSeqId(uSeqIndex, s.GetId());
+	for (unsigned uColIndex = 0; uColIndex < uRootProfLength; ++uColIndex)
+		msaOut.SetChar(uSeqIndex, uColIndex, msaCombined.GetChar(0, uColIndex));
+	}
+
+// Steven Brenner's O(NL^2) proposal for creating a root alignment
+// Align each sequence to the profile at the root.
+// Compare the e-string solution, which is O(NL log N).
+void MakeRootMSABrenner(SeqVect &v, const Tree &GuideTree, ProgNode Nodes[],
+  MSA &a)
+	{
+	const unsigned uSeqCount = v.Length();
+	const unsigned uRootNodeIndex = GuideTree.GetRootNodeIndex();
+	const ProfPos *RootProfile = Nodes[uRootNodeIndex].m_Prof;
+	const unsigned uRootColCount = Nodes[uRootNodeIndex].m_uLength;
+	a.SetSize(uSeqCount, uRootColCount);
+
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		DoSeq(*v[uSeqIndex], uSeqIndex, RootProfile, uRootColCount, a);
+	}

Added: trunk/packages/muscle/branches/upstream/current/mhack.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/mhack.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/mhack.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,64 @@
+#include "muscle.h"
+#include "seqvect.h"
+#include "msa.h"
+
+/***
+Methionine hack.
+Most proteins start with M.
+This results in odd-looking alignments with the terminal Ms aligned followed
+immediately by gaps.
+Hack this by treating terminal M like X.
+***/
+
+static bool *M;
+
+void MHackStart(SeqVect &v)
+	{
+	if (ALPHA_Amino != g_Alpha)
+		return;
+
+	const unsigned uSeqCount = v.Length();
+	M = new bool[uSeqCount];
+	memset(M, 0, uSeqCount*sizeof(bool));
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		Seq &s = v.GetSeq(uSeqIndex);
+		if (0 == s.Length())
+			continue;
+		unsigned uId = s.GetId();
+		if (s[0] == 'M' || s[0] == 'm')
+			{
+			M[uId] = true;
+			s[0] = 'X';
+			}
+		}
+	}
+
+void MHackEnd(MSA &msa)
+	{
+	if (ALPHA_Amino != g_Alpha)
+		return;
+	if (0 == M)
+		return;
+
+	const unsigned uSeqCount = msa.GetSeqCount();
+	const unsigned uColCount = msa.GetColCount();
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		unsigned uId = msa.GetSeqId(uSeqIndex);
+		if (M[uId])
+			{
+			for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
+				{
+				if (!msa.IsGap(uSeqIndex, uColIndex))
+					{
+					msa.SetChar(uSeqIndex, uColIndex, 'M');
+					break;
+					}
+				}
+			}
+		}
+
+	delete[] M;
+	M = 0;
+	}

Added: trunk/packages/muscle/branches/upstream/current/mk
===================================================================
--- trunk/packages/muscle/branches/upstream/current/mk	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/mk	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,10 @@
+ofiles=`echo *.o`
+
+# find -name "*.o" -exec "rm" "{}" ";"
+
+make -f Makefile 2> make.err
+
+# rm *.o
+
+cat make.err
+ls -l muscle


Property changes on: trunk/packages/muscle/branches/upstream/current/mk
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/muscle/branches/upstream/current/mpam200.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/mpam200.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/mpam200.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,107 @@
+#include "muscle.h"
+
+const float PAM_200_CENTER = (float) 20.0;
+
+#define v(x)	((float) x + PAM_200_CENTER)
+#define ROW(A, C, D, E, F, G, H, I, K, L, M, N, P, Q, R, S, T, V, W, Y) \
+	{ v(A), v(C), v(D), v(E), v(F), v(G), v(H), v(I), v(K), v(L), \
+	  v(M), v(N), v(P), v(Q), v(R), v(S), v(T), v(V), v(W), v(Y) },
+
+float PAM200[32][32] =
+	{
+//       A       C       D       E       F       G       H       I       K       L
+//       M       N       P       Q       R       S       T       V       W       Y
+ROW(   388,     -0,     34,     32,   -202,    159,    -88,     89,    -55,    -67, 
+        19,     86,    186,    -34,    -32,    237,    273,    171,   -326,   -239)  // A
+ROW(    -0,   1170,   -248,   -315,     74,    -14,     43,   -151,   -204,   -196, 
+      -132,    -49,   -142,   -215,     29,    165,     -7,    -69,    179,    313)  // C
+ROW(    34,   -248,    625,    496,   -419,    148,     78,   -245,     55,   -361, 
+      -255,    332,   -169,    122,    -64,     45,    -13,   -167,   -438,   -148)  // D
+ROW(    32,   -315,    496,    610,   -480,    125,     25,   -245,    175,   -327, 
+      -242,    166,   -141,    279,     34,    -30,    -56,   -150,   -386,   -305)  // E
+ROW(  -202,     74,   -419,   -480,    888,   -407,     62,     80,   -443,    320, 
+        67,   -236,   -180,   -294,   -327,    -51,   -173,     31,     -1,    584)  // F
+ROW(   159,    -14,    148,    125,   -407,    662,   -114,   -216,    -34,   -324, 
+      -246,     79,    -77,    -68,     97,    155,     21,    -93,    -58,   -349)  // G
+ROW(   -88,     43,     78,     25,     62,   -114,    766,   -205,    144,    -92, 
+      -152,    238,     66,    368,    257,     35,    -35,   -217,   -201,    468)  // H
+ROW(    89,   -151,   -245,   -245,     80,   -216,   -205,    554,   -224,    288, 
+       391,   -114,   -115,   -222,   -208,    -19,    162,    469,   -274,   -153)  // I
+ROW(   -55,   -204,     55,    175,   -443,    -34,    144,   -224,    632,   -249, 
+      -118,    186,    -86,    315,    466,      2,     19,   -227,   -216,   -264)  // K
+ROW(   -67,   -196,   -361,   -327,    320,   -324,    -92,    288,   -249,    591, 
+       369,   -223,     53,    -86,   -170,    -69,    -41,    239,    -66,    -29)  // L
+ROW(    19,   -132,   -255,   -242,     67,   -246,   -152,    391,   -118,    369, 
+       756,   -131,    -98,   -124,   -129,    -49,    129,    331,   -229,   -182)  // M
+ROW(    86,    -49,    332,    166,   -236,     79,    238,   -114,    186,   -223, 
+      -131,    516,    -21,     88,     73,    240,    168,   -118,   -379,     -8)  // N
+ROW(   186,   -142,   -169,   -141,   -180,    -77,     66,   -115,    -86,     53, 
+       -98,    -21,    736,    122,      5,    221,    139,    -75,   -373,   -226)  // P
+ROW(   -34,   -215,    122,    279,   -294,    -68,    368,   -222,    315,    -86, 
+      -124,     88,    122,    635,    301,    -13,    -35,   -195,   -243,    -73)  // Q
+ROW(   -32,     29,    -64,     34,   -327,     97,    257,   -208,    466,   -170, 
+      -129,     73,      5,    301,    606,     28,     -4,   -201,    104,   -133)  // R
+ROW(   237,    165,     45,    -30,    -51,    155,     35,    -19,      2,    -69, 
+       -49,    240,    221,    -13,     28,    353,    259,      8,   -213,    -55)  // S
+ROW(   273,     -7,    -13,    -56,   -173,     21,    -35,    162,     19,    -41, 
+       129,    168,    139,    -35,     -4,    259,    422,    143,   -343,   -190)  // T
+ROW(   171,    -69,   -167,   -150,     31,    -93,   -217,    469,   -227,    239, 
+       331,   -118,    -75,   -195,   -201,      8,    143,    505,   -245,   -197)  // V
+ROW(  -326,    179,   -438,   -386,     -1,    -58,   -201,   -274,   -216,    -66, 
+      -229,   -379,   -373,   -243,    104,   -213,   -343,   -245,   1475,     63)  // W
+ROW(  -239,    313,   -148,   -305,    584,   -349,    468,   -153,   -264,    -29, 
+      -182,     -8,   -226,    -73,   -133,    -55,   -190,   -197,     63,    979)  // Y
+	};
+
+#undef v
+#define v(x)	((float) x)
+#define RNC(A, C, D, E, F, G, H, I, K, L, M, N, P, Q, R, S, T, V, W, Y) \
+	{ v(A), v(C), v(D), v(E), v(F), v(G), v(H), v(I), v(K), v(L), \
+	  v(M), v(N), v(P), v(Q), v(R), v(S), v(T), v(V), v(W), v(Y) },
+
+float PAM200NoCenter[32][32] =
+
+	{
+//       A       C       D       E       F       G       H       I       K       L
+//       M       N       P       Q       R       S       T       V       W       Y
+RNC(   388,     -0,     34,     32,   -202,    159,    -88,     89,    -55,    -67, 
+        19,     86,    186,    -34,    -32,    237,    273,    171,   -326,   -239)  // A
+RNC(    -0,   1170,   -248,   -315,     74,    -14,     43,   -151,   -204,   -196, 
+      -132,    -49,   -142,   -215,     29,    165,     -7,    -69,    179,    313)  // C
+RNC(    34,   -248,    625,    496,   -419,    148,     78,   -245,     55,   -361, 
+      -255,    332,   -169,    122,    -64,     45,    -13,   -167,   -438,   -148)  // D
+RNC(    32,   -315,    496,    610,   -480,    125,     25,   -245,    175,   -327, 
+      -242,    166,   -141,    279,     34,    -30,    -56,   -150,   -386,   -305)  // E
+RNC(  -202,     74,   -419,   -480,    888,   -407,     62,     80,   -443,    320, 
+        67,   -236,   -180,   -294,   -327,    -51,   -173,     31,     -1,    584)  // F
+RNC(   159,    -14,    148,    125,   -407,    662,   -114,   -216,    -34,   -324, 
+      -246,     79,    -77,    -68,     97,    155,     21,    -93,    -58,   -349)  // G
+RNC(   -88,     43,     78,     25,     62,   -114,    766,   -205,    144,    -92, 
+      -152,    238,     66,    368,    257,     35,    -35,   -217,   -201,    468)  // H
+RNC(    89,   -151,   -245,   -245,     80,   -216,   -205,    554,   -224,    288, 
+       391,   -114,   -115,   -222,   -208,    -19,    162,    469,   -274,   -153)  // I
+RNC(   -55,   -204,     55,    175,   -443,    -34,    144,   -224,    632,   -249, 
+      -118,    186,    -86,    315,    466,      2,     19,   -227,   -216,   -264)  // K
+RNC(   -67,   -196,   -361,   -327,    320,   -324,    -92,    288,   -249,    591, 
+       369,   -223,     53,    -86,   -170,    -69,    -41,    239,    -66,    -29)  // L
+RNC(    19,   -132,   -255,   -242,     67,   -246,   -152,    391,   -118,    369, 
+       756,   -131,    -98,   -124,   -129,    -49,    129,    331,   -229,   -182)  // M
+RNC(    86,    -49,    332,    166,   -236,     79,    238,   -114,    186,   -223, 
+      -131,    516,    -21,     88,     73,    240,    168,   -118,   -379,     -8)  // N
+RNC(   186,   -142,   -169,   -141,   -180,    -77,     66,   -115,    -86,     53, 
+       -98,    -21,    736,    122,      5,    221,    139,    -75,   -373,   -226)  // P
+RNC(   -34,   -215,    122,    279,   -294,    -68,    368,   -222,    315,    -86, 
+      -124,     88,    122,    635,    301,    -13,    -35,   -195,   -243,    -73)  // Q
+RNC(   -32,     29,    -64,     34,   -327,     97,    257,   -208,    466,   -170, 
+      -129,     73,      5,    301,    606,     28,     -4,   -201,    104,   -133)  // R
+RNC(   237,    165,     45,    -30,    -51,    155,     35,    -19,      2,    -69, 
+       -49,    240,    221,    -13,     28,    353,    259,      8,   -213,    -55)  // S
+RNC(   273,     -7,    -13,    -56,   -173,     21,    -35,    162,     19,    -41, 
+       129,    168,    139,    -35,     -4,    259,    422,    143,   -343,   -190)  // T
+RNC(   171,    -69,   -167,   -150,     31,    -93,   -217,    469,   -227,    239, 
+       331,   -118,    -75,   -195,   -201,      8,    143,    505,   -245,   -197)  // V
+RNC(  -326,    179,   -438,   -386,     -1,    -58,   -201,   -274,   -216,    -66, 
+      -229,   -379,   -373,   -243,    104,   -213,   -343,   -245,   1475,     63)  // W
+RNC(  -239,    313,   -148,   -305,    584,   -349,    468,   -153,   -264,    -29, 
+      -182,     -8,   -226,    -73,   -133,    -55,   -190,   -197,     63,    979)  // Y
+	};

Added: trunk/packages/muscle/branches/upstream/current/msa.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/msa.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/msa.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,851 @@
+#include "muscle.h"
+#include "msa.h"
+#include "textfile.h"
+#include "seq.h"
+#include <math.h>
+
+const unsigned DEFAULT_SEQ_LENGTH = 500;
+
+unsigned MSA::m_uIdCount = 0;
+
+MSA::MSA()
+	{
+	m_uSeqCount = 0;
+	m_uColCount = 0;
+
+	m_szSeqs = 0;
+	m_szNames = 0;
+	m_Weights = 0;
+
+	m_IdToSeqIndex = 0;
+	m_SeqIndexToId = 0;
+
+	m_uCacheSeqCount = 0;
+	m_uCacheSeqLength = 0;
+	}
+
+MSA::~MSA()
+	{
+	Free();
+	}
+
+void MSA::Free()
+	{
+	for (unsigned n = 0; n < m_uSeqCount; ++n)
+		{
+		delete[] m_szSeqs[n];
+		delete[] m_szNames[n];
+		}
+
+	delete[] m_szSeqs;
+	delete[] m_szNames;
+	delete[] m_Weights;
+	delete[] m_IdToSeqIndex;
+	delete[] m_SeqIndexToId;
+
+	m_uSeqCount = 0;
+	m_uColCount = 0;
+
+	m_szSeqs = 0;
+	m_szNames = 0;
+	m_Weights = 0;
+
+	m_IdToSeqIndex = 0;
+	m_SeqIndexToId = 0;
+	}
+
+void MSA::SetSize(unsigned uSeqCount, unsigned uColCount)
+	{
+	Free();
+
+	m_uSeqCount = uSeqCount;
+	m_uCacheSeqLength = uColCount;
+	m_uColCount = 0;
+
+	if (0 == uSeqCount && 0 == uColCount)
+		return;
+
+	m_szSeqs = new char *[uSeqCount];
+	m_szNames = new char *[uSeqCount];
+	m_Weights = new WEIGHT[uSeqCount];
+
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		m_szSeqs[uSeqIndex] = new char[uColCount+1];
+		m_szNames[uSeqIndex] = 0;
+#if	DEBUG
+		m_Weights[uSeqIndex] = BTInsane;
+		memset(m_szSeqs[uSeqIndex], '?', uColCount);
+#endif
+		m_szSeqs[uSeqIndex][uColCount] = 0;
+		}
+
+	if (m_uIdCount > 0)
+		{
+		m_IdToSeqIndex = new unsigned[m_uIdCount];
+		m_SeqIndexToId = new unsigned[m_uSeqCount];
+#if	DEBUG
+		memset(m_IdToSeqIndex, 0xff, m_uIdCount*sizeof(unsigned));
+		memset(m_SeqIndexToId, 0xff, m_uSeqCount*sizeof(unsigned));
+#endif
+		}
+	}
+
+void MSA::LogMe() const
+	{
+	if (0 == GetColCount())
+		{
+		Log("MSA empty\n");
+		return;
+		}
+
+	const unsigned uColsPerLine = 50;
+	unsigned uLinesPerSeq = (GetColCount() - 1)/uColsPerLine + 1;
+	for (unsigned n = 0; n < uLinesPerSeq; ++n)
+		{
+		unsigned i;
+		unsigned iStart = n*uColsPerLine;
+		unsigned iEnd = GetColCount();
+		if (iEnd - iStart + 1 > uColsPerLine)
+			iEnd = iStart + uColsPerLine;
+		Log("                       ");
+		for (i = iStart; i < iEnd; ++i)
+			Log("%u", i%10);
+		Log("\n");
+		Log("                       ");
+		for (i = iStart; i + 9 < iEnd; i += 10)
+			Log("%-10u", i);
+		if (n == uLinesPerSeq - 1)
+			Log(" %-10u", GetColCount());
+		Log("\n");
+		for (unsigned uSeqIndex = 0; uSeqIndex < m_uSeqCount; ++uSeqIndex)
+			{
+			Log("%12.12s", m_szNames[uSeqIndex]);
+			if (m_Weights[uSeqIndex] != BTInsane)
+				Log(" (%5.3f)", m_Weights[uSeqIndex]);
+			else
+				Log("        ");
+			Log("   ");
+			for (i = iStart; i < iEnd; ++i)
+				Log("%c", GetChar(uSeqIndex, i));
+			if (0 != m_SeqIndexToId)
+				Log(" [%5u]", m_SeqIndexToId[uSeqIndex]);
+			Log("\n");
+			}
+		Log("\n\n");
+		}
+	}
+
+char MSA::GetChar(unsigned uSeqIndex, unsigned uIndex) const
+	{
+// TODO: Performance cost?
+	if (uSeqIndex >= m_uSeqCount || uIndex >= m_uColCount)
+		Quit("MSA::GetChar(%u/%u,%u/%u)",
+		  uSeqIndex, m_uSeqCount, uIndex, m_uColCount);
+
+	char c = m_szSeqs[uSeqIndex][uIndex];
+//	assert(IsLegalChar(c));
+	return c;
+	}
+
+unsigned MSA::GetLetter(unsigned uSeqIndex, unsigned uIndex) const
+	{
+// TODO: Performance cost?
+	char c = GetChar(uSeqIndex, uIndex);
+	unsigned uLetter = CharToLetter(c);
+	if (uLetter >= 20)
+		{
+		char c = ' ';
+		if (uSeqIndex < m_uSeqCount && uIndex < m_uColCount)
+			c = m_szSeqs[uSeqIndex][uIndex];
+		Quit("MSA::GetLetter(%u/%u, %u/%u)='%c'/%u",
+		  uSeqIndex, m_uSeqCount, uIndex, m_uColCount, c, uLetter);
+		}
+	return uLetter;
+	}
+
+unsigned MSA::GetLetterEx(unsigned uSeqIndex, unsigned uIndex) const
+	{
+// TODO: Performance cost?
+	char c = GetChar(uSeqIndex, uIndex);
+	unsigned uLetter = CharToLetterEx(c);
+	return uLetter;
+	}
+
+void MSA::SetSeqName(unsigned uSeqIndex, const char szName[])
+	{
+	if (uSeqIndex >= m_uSeqCount)
+		Quit("MSA::SetSeqName(%u, %s), count=%u", uSeqIndex, m_uSeqCount);
+	delete[] m_szNames[uSeqIndex];
+	int n = (int) strlen(szName) + 1;
+	m_szNames[uSeqIndex] = new char[n];
+	memcpy(m_szNames[uSeqIndex], szName, n);
+	}
+
+const char *MSA::GetSeqName(unsigned uSeqIndex) const
+	{
+	if (uSeqIndex >= m_uSeqCount)
+		Quit("MSA::GetSeqName(%u), count=%u", uSeqIndex, m_uSeqCount);
+	return m_szNames[uSeqIndex];
+	}
+
+bool MSA::IsGap(unsigned uSeqIndex, unsigned uIndex) const
+	{
+	char c = GetChar(uSeqIndex, uIndex);
+	return IsGapChar(c);
+	}
+
+bool MSA::IsWildcard(unsigned uSeqIndex, unsigned uIndex) const
+	{
+	char c = GetChar(uSeqIndex, uIndex);
+	return IsWildcardChar(c);
+	}
+
+void MSA::SetChar(unsigned uSeqIndex, unsigned uIndex, char c)
+	{
+	if (uSeqIndex >= m_uSeqCount || uIndex > m_uCacheSeqLength)
+		Quit("MSA::SetChar(%u,%u)", uSeqIndex, uIndex);
+
+	if (uIndex == m_uCacheSeqLength)
+		{
+		const unsigned uNewCacheSeqLength = m_uCacheSeqLength + DEFAULT_SEQ_LENGTH;
+		for (unsigned n = 0; n < m_uSeqCount; ++n)
+			{
+			char *ptrNewSeq = new char[uNewCacheSeqLength+1];
+			memcpy(ptrNewSeq, m_szSeqs[n], m_uCacheSeqLength);
+			memset(ptrNewSeq + m_uCacheSeqLength, '?', DEFAULT_SEQ_LENGTH);
+			ptrNewSeq[uNewCacheSeqLength] = 0;
+			delete[] m_szSeqs[n];
+			m_szSeqs[n] = ptrNewSeq;
+			}
+
+		m_uColCount = uIndex;
+		m_uCacheSeqLength = uNewCacheSeqLength;
+		}
+
+	if (uIndex >= m_uColCount)
+		m_uColCount = uIndex + 1;
+	m_szSeqs[uSeqIndex][uIndex] = c;
+	}
+
+void MSA::GetSeq(unsigned uSeqIndex, Seq &seq) const
+	{
+	assert(uSeqIndex < m_uSeqCount);
+
+	seq.Clear();
+
+	for (unsigned n = 0; n < m_uColCount; ++n)
+		if (!IsGap(uSeqIndex, n))
+			{
+			char c = GetChar(uSeqIndex, n);
+			if (!isalpha(c))
+				Quit("Invalid character '%c' in sequence", c);
+			c = toupper(c);
+			seq.push_back(c);
+			}
+	const char *ptrName = GetSeqName(uSeqIndex);
+	seq.SetName(ptrName);
+	}
+
+bool MSA::HasGap() const
+	{
+	for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
+		for (unsigned n = 0; n < GetColCount(); ++n)
+			if (IsGap(uSeqIndex, n))
+				return true;
+	return false;
+	}
+
+bool MSA::IsLegalLetter(unsigned uLetter) const
+	{
+	return uLetter < 20;
+	}
+
+void MSA::SetSeqCount(unsigned uSeqCount)
+	{
+	Free();
+	SetSize(uSeqCount, DEFAULT_SEQ_LENGTH);
+	}
+
+void MSA::CopyCol(unsigned uFromCol, unsigned uToCol)
+	{
+	assert(uFromCol < GetColCount());
+	assert(uToCol < GetColCount());
+	if (uFromCol == uToCol)
+		return;
+
+	for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
+		{
+		const char c = GetChar(uSeqIndex, uFromCol);
+		SetChar(uSeqIndex, uToCol, c);
+		}
+	}
+
+void MSA::Copy(const MSA &msa)
+	{
+	Free();
+	const unsigned uSeqCount = msa.GetSeqCount();
+	const unsigned uColCount = msa.GetColCount();
+	SetSize(uSeqCount, uColCount);
+
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		SetSeqName(uSeqIndex, msa.GetSeqName(uSeqIndex));
+		const unsigned uId = msa.GetSeqId(uSeqIndex);
+		SetSeqId(uSeqIndex, uId);
+		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
+			{
+			const char c = msa.GetChar(uSeqIndex, uColIndex);
+			SetChar(uSeqIndex, uColIndex, c);
+			}
+		}
+	}
+
+bool MSA::IsGapColumn(unsigned uColIndex) const
+	{
+	assert(GetSeqCount() > 0);
+	for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
+		if (!IsGap(uSeqIndex, uColIndex))
+			return false;
+	return true;
+	}
+
+bool MSA::GetSeqIndex(const char *ptrSeqName, unsigned *ptruSeqIndex) const
+	{
+	for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
+		if (0 == stricmp(ptrSeqName, GetSeqName(uSeqIndex)))
+			{
+			*ptruSeqIndex = uSeqIndex;
+			return true;
+			}
+	return false;
+	}
+
+void MSA::DeleteCol(unsigned uColIndex)
+	{
+	assert(uColIndex < m_uColCount);
+	size_t n = m_uColCount - uColIndex;
+	if (n > 0)
+		{
+		for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
+			{
+			char *ptrSeq = m_szSeqs[uSeqIndex];
+			memmove(ptrSeq + uColIndex, ptrSeq + uColIndex + 1, n);
+			}
+		}
+	--m_uColCount;
+	}
+
+void MSA::DeleteColumns(unsigned uColIndex, unsigned uColCount)
+	{
+	for (unsigned n = 0; n < uColCount; ++n)
+		DeleteCol(uColIndex);
+	}
+
+void MSA::FromFile(TextFile &File)
+	{
+	FromFASTAFile(File);
+	}
+
+// Weights sum to 1, WCounts sum to NIC
+WEIGHT MSA::GetSeqWeight(unsigned uSeqIndex) const
+	{
+	assert(uSeqIndex < m_uSeqCount);
+	WEIGHT w = m_Weights[uSeqIndex];
+	if (w == wInsane)
+		Quit("Seq weight not set");
+	return w;
+	}
+
+void MSA::SetSeqWeight(unsigned uSeqIndex, WEIGHT w) const
+	{
+	assert(uSeqIndex < m_uSeqCount);
+	m_Weights[uSeqIndex] = w;
+	}
+
+void MSA::NormalizeWeights(WEIGHT wDesiredTotal) const
+	{
+	WEIGHT wTotal = 0;
+	for (unsigned uSeqIndex = 0; uSeqIndex < m_uSeqCount; ++uSeqIndex)
+		wTotal += m_Weights[uSeqIndex];
+
+	if (0 == wTotal)
+		return;
+
+	const WEIGHT f = wDesiredTotal/wTotal;
+	for (unsigned uSeqIndex = 0; uSeqIndex < m_uSeqCount; ++uSeqIndex)
+		m_Weights[uSeqIndex] *= f;
+	}
+
+void MSA::CalcWeights() const
+	{
+	Quit("Calc weights not implemented");
+	}
+
+static void FmtChar(char c, unsigned uWidth)
+	{
+	Log("%c", c);
+	for (unsigned n = 0; n < uWidth - 1; ++n)
+		Log(" ");
+	}
+
+static void FmtInt(unsigned u, unsigned uWidth)
+	{
+	static char szStr[1024];
+	assert(uWidth < sizeof(szStr));
+	if (u > 0)
+		sprintf(szStr, "%u", u);
+	else
+		strcpy(szStr, ".");
+	Log(szStr);
+	unsigned n = (unsigned) strlen(szStr);
+	if (n < uWidth)
+		for (unsigned i = 0; i < uWidth - n; ++i)
+			Log(" ");
+	}
+
+static void FmtInt0(unsigned u, unsigned uWidth)
+	{
+	static char szStr[1024];
+	assert(uWidth < sizeof(szStr));
+	sprintf(szStr, "%u", u);
+	Log(szStr);
+	unsigned n = (unsigned) strlen(szStr);
+	if (n < uWidth)
+		for (unsigned i = 0; i < uWidth - n; ++i)
+			Log(" ");
+	}
+
+static void FmtPad(unsigned n)
+	{
+	for (unsigned i = 0; i < n; ++i)
+		Log(" ");
+	}
+
+void MSA::FromSeq(const Seq &s)
+	{
+	unsigned uSeqLength = s.Length();
+	SetSize(1, uSeqLength);
+	SetSeqName(0, s.GetName());
+	if (0 != m_SeqIndexToId)
+		SetSeqId(0, s.GetId());
+	for (unsigned n = 0; n < uSeqLength; ++n)
+		SetChar(0, n, s[n]);
+	}
+
+unsigned MSA::GetCharCount(unsigned uSeqIndex, unsigned uColIndex) const
+	{
+	assert(uSeqIndex < GetSeqCount());
+	assert(uColIndex < GetColCount());
+
+	unsigned uCol = 0;
+	for (unsigned n = 0; n <= uColIndex; ++n)
+		if (!IsGap(uSeqIndex, n))
+			++uCol;
+	return uCol;
+	}
+
+void MSA::CopySeq(unsigned uToSeqIndex, const MSA &msaFrom, unsigned uFromSeqIndex)
+	{
+	assert(uToSeqIndex < m_uSeqCount);
+	const unsigned uColCount = msaFrom.GetColCount();
+	assert(m_uColCount == uColCount ||
+	  (0 == m_uColCount && uColCount <= m_uCacheSeqLength));
+
+	memcpy(m_szSeqs[uToSeqIndex], msaFrom.GetSeqBuffer(uFromSeqIndex), uColCount);
+	SetSeqName(uToSeqIndex, msaFrom.GetSeqName(uFromSeqIndex));
+	if (0 == m_uColCount)
+		m_uColCount = uColCount;
+	}
+
+const char *MSA::GetSeqBuffer(unsigned uSeqIndex) const
+	{
+	assert(uSeqIndex < m_uSeqCount);
+	return m_szSeqs[uSeqIndex];
+	}
+
+void MSA::DeleteSeq(unsigned uSeqIndex)
+	{
+	assert(uSeqIndex < m_uSeqCount);
+
+	delete m_szSeqs[uSeqIndex];
+	delete m_szNames[uSeqIndex];
+
+	const unsigned uBytesToMove = (m_uSeqCount - uSeqIndex)*sizeof(char *);
+	if (uBytesToMove > 0)
+		{
+		memmove(m_szSeqs + uSeqIndex, m_szSeqs + uSeqIndex + 1, uBytesToMove);
+		memmove(m_szNames + uSeqIndex, m_szNames + uSeqIndex + 1, uBytesToMove);
+		}
+
+	--m_uSeqCount;
+
+	delete[] m_Weights;
+	m_Weights = 0;
+	}
+
+bool MSA::IsEmptyCol(unsigned uColIndex) const
+	{
+	const unsigned uSeqCount = GetSeqCount();
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		if (!IsGap(uSeqIndex, uColIndex))
+			return false;
+	return true;
+	}
+
+//void MSA::DeleteEmptyCols(bool bProgress)
+//	{
+//	unsigned uColCount = GetColCount();
+//	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
+//		{
+//		if (IsEmptyCol(uColIndex))
+//			{
+//			if (bProgress)
+//				{
+//				Log("Deleting col %u of %u\n", uColIndex, uColCount);
+//				printf("Deleting col %u of %u\n", uColIndex, uColCount);
+//				}
+//			DeleteCol(uColIndex);
+//			--uColCount;
+//			}
+//		}
+//	}
+
+unsigned MSA::AlignedColIndexToColIndex(unsigned uAlignedColIndex) const
+	{
+	Quit("MSA::AlignedColIndexToColIndex not implemented");
+	return 0;
+	}
+
+WEIGHT MSA::GetTotalSeqWeight() const
+	{
+	WEIGHT wTotal = 0;
+	const unsigned uSeqCount = GetSeqCount();
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		wTotal += m_Weights[uSeqIndex];
+	return wTotal;
+	}
+
+bool MSA::SeqsEq(const MSA &a1, unsigned uSeqIndex1, const MSA &a2,
+  unsigned uSeqIndex2)
+	{
+	Seq s1;
+	Seq s2;
+
+	a1.GetSeq(uSeqIndex1, s1);
+	a2.GetSeq(uSeqIndex2, s2);
+
+	s1.StripGaps();
+	s2.StripGaps();
+
+	return s1.EqIgnoreCase(s2);
+	}
+
+unsigned MSA::GetSeqLength(unsigned uSeqIndex) const
+	{
+	assert(uSeqIndex < GetSeqCount());
+
+	const unsigned uColCount = GetColCount();
+	unsigned uLength = 0;
+	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
+		if (!IsGap(uSeqIndex, uColIndex))
+			++uLength;
+	return uLength;
+	}
+
+void MSA::GetPWID(unsigned uSeqIndex1, unsigned uSeqIndex2, double *ptrPWID,
+  unsigned *ptruPosCount) const
+	{
+	assert(uSeqIndex1 < GetSeqCount());
+	assert(uSeqIndex2 < GetSeqCount());
+
+	unsigned uSameCount = 0;
+	unsigned uPosCount = 0;
+	const unsigned uColCount = GetColCount();
+	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
+		{
+		char c1 = GetChar(uSeqIndex1, uColIndex);
+		if (IsGapChar(c1))
+			continue;
+		char c2 = GetChar(uSeqIndex2, uColIndex);
+		if (IsGapChar(c2))
+			continue;
+		++uPosCount;
+		if (c1 == c2)
+			++uSameCount;
+		}
+	*ptruPosCount = uPosCount;
+	if (uPosCount > 0)
+		*ptrPWID = 100.0 * (double) uSameCount / (double) uPosCount;
+	else
+		*ptrPWID = 0;
+	}
+
+void MSA::UnWeight()
+	{
+	for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
+		m_Weights[uSeqIndex] = BTInsane;
+	}
+
+unsigned MSA::UniqueResidueTypes(unsigned uColIndex) const
+	{
+	assert(uColIndex < GetColCount());
+
+	unsigned Counts[MAX_ALPHA];
+	memset(Counts, 0, sizeof(Counts));
+	const unsigned uSeqCount = GetSeqCount();
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		if (IsGap(uSeqIndex, uColIndex) || IsWildcard(uSeqIndex, uColIndex))
+			continue;
+		const unsigned uLetter = GetLetter(uSeqIndex, uColIndex);
+		++(Counts[uLetter]);
+		}
+	unsigned uUniqueCount = 0;
+	for (unsigned uLetter = 0; uLetter < g_AlphaSize; ++uLetter)
+		if (Counts[uLetter] > 0)
+			++uUniqueCount;
+	return uUniqueCount;
+	}
+
+double MSA::GetOcc(unsigned uColIndex) const
+	{
+	unsigned uGapCount = 0;
+	for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
+		if (IsGap(uSeqIndex, uColIndex))
+			++uGapCount;
+	unsigned uSeqCount = GetSeqCount();
+	return (double) (uSeqCount - uGapCount) / (double) uSeqCount;
+	}
+
+void MSA::ToFile(TextFile &File) const
+	{
+	if (g_bMSF)
+		ToMSFFile(File);
+	else if (g_bAln)
+		ToAlnFile(File);
+	else if (g_bHTML)
+		ToHTMLFile(File);
+	else if (g_bPHYS)
+		ToPhySequentialFile(File);
+	else if (g_bPHYI)
+		ToPhyInterleavedFile(File);
+	else
+		ToFASTAFile(File);
+	if (0 != g_pstrScoreFileName)
+		WriteScoreFile(*this);
+	}
+
+bool MSA::ColumnHasGap(unsigned uColIndex) const
+	{
+	const unsigned uSeqCount = GetSeqCount();
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		if (IsGap(uSeqIndex, uColIndex))
+			return true;
+	return false;
+	}
+
+void MSA::SetIdCount(unsigned uIdCount)
+	{
+	//if (m_uIdCount != 0)
+	//	Quit("MSA::SetIdCount: may only be called once");
+
+	if (m_uIdCount > 0)
+		{
+		if (uIdCount > m_uIdCount)
+			Quit("MSA::SetIdCount: cannot increase count");
+		return;
+		}
+	m_uIdCount = uIdCount;
+	}
+
+void MSA::SetSeqId(unsigned uSeqIndex, unsigned uId)
+	{
+	assert(uSeqIndex < m_uSeqCount);
+	assert(uId < m_uIdCount);
+	if (0 == m_SeqIndexToId)
+		{
+		if (0 == m_uIdCount)
+			Quit("MSA::SetSeqId, SetIdCount has not been called");
+		m_IdToSeqIndex = new unsigned[m_uIdCount];
+		m_SeqIndexToId = new unsigned[m_uSeqCount];
+
+		memset(m_IdToSeqIndex, 0xff, m_uIdCount*sizeof(unsigned));
+		memset(m_SeqIndexToId, 0xff, m_uSeqCount*sizeof(unsigned));
+		}
+	m_SeqIndexToId[uSeqIndex] = uId;
+	m_IdToSeqIndex[uId] = uSeqIndex;
+	}
+
+unsigned MSA::GetSeqIndex(unsigned uId) const
+	{
+	assert(uId < m_uIdCount);
+	assert(0 != m_IdToSeqIndex);
+	unsigned uSeqIndex = m_IdToSeqIndex[uId];
+	assert(uSeqIndex < m_uSeqCount);
+	return uSeqIndex;
+	}
+
+bool MSA::GetSeqIndex(unsigned uId, unsigned *ptruIndex) const
+	{
+	for (unsigned uSeqIndex = 0; uSeqIndex < m_uSeqCount; ++uSeqIndex)
+		{
+		if (uId == m_SeqIndexToId[uSeqIndex])
+			{
+			*ptruIndex = uSeqIndex;
+			return true;
+			}
+		}
+	return false;
+	}
+
+unsigned MSA::GetSeqId(unsigned uSeqIndex) const
+	{
+	assert(uSeqIndex < m_uSeqCount);
+	unsigned uId = m_SeqIndexToId[uSeqIndex];
+	assert(uId < m_uIdCount);
+	return uId;
+	}
+
+bool MSA::WeightsSet() const
+	{
+	return BTInsane != m_Weights[0];
+	}
+
+void MSASubsetByIds(const MSA &msaIn, const unsigned Ids[], unsigned uIdCount,
+  MSA &msaOut)
+	{
+	const unsigned uColCount = msaIn.GetColCount();
+	msaOut.SetSize(uIdCount, uColCount);
+	for (unsigned uSeqIndexOut = 0; uSeqIndexOut < uIdCount; ++uSeqIndexOut)
+		{
+		const unsigned uId = Ids[uSeqIndexOut];
+
+		const unsigned uSeqIndexIn = msaIn.GetSeqIndex(uId);
+		const char *ptrName = msaIn.GetSeqName(uSeqIndexIn);
+
+		msaOut.SetSeqId(uSeqIndexOut, uId);
+		msaOut.SetSeqName(uSeqIndexOut, ptrName);
+
+		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
+			{
+			const char c = msaIn.GetChar(uSeqIndexIn, uColIndex);
+			msaOut.SetChar(uSeqIndexOut, uColIndex, c);
+			}
+		}
+	}
+
+// Caller must allocate ptrSeq and ptrLabel as new char[n].
+void MSA::AppendSeq(char *ptrSeq, unsigned uSeqLength, char *ptrLabel)
+	{
+	if (m_uSeqCount > m_uCacheSeqCount)
+		Quit("Internal error MSA::AppendSeq");
+	if (m_uSeqCount == m_uCacheSeqCount)
+		ExpandCache(m_uSeqCount + 4, uSeqLength);
+	m_szSeqs[m_uSeqCount] = ptrSeq;
+	m_szNames[m_uSeqCount] = ptrLabel;
+	++m_uSeqCount;
+	}
+
+void MSA::ExpandCache(unsigned uSeqCount, unsigned uColCount)
+	{
+	if (m_IdToSeqIndex != 0 || m_SeqIndexToId != 0 || uSeqCount < m_uSeqCount)
+		Quit("Internal error MSA::ExpandCache");
+
+	if (m_uSeqCount > 0 && uColCount != m_uColCount)
+		Quit("Internal error MSA::ExpandCache, ColCount changed");
+
+	char **NewSeqs = new char *[uSeqCount];
+	char **NewNames = new char *[uSeqCount];
+	WEIGHT *NewWeights = new WEIGHT[uSeqCount];
+
+	for (unsigned uSeqIndex = 0; uSeqIndex < m_uSeqCount; ++uSeqIndex)
+		{
+		NewSeqs[uSeqIndex] = m_szSeqs[uSeqIndex];
+		NewNames[uSeqIndex] = m_szNames[uSeqIndex];
+		NewWeights[uSeqIndex] = m_Weights[uSeqIndex];
+		}
+
+	for (unsigned uSeqIndex = m_uSeqCount; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		char *Seq = new char[uColCount];
+		NewSeqs[uSeqIndex] = Seq;
+#if	DEBUG
+		memset(Seq, '?', uColCount);
+#endif
+		}
+
+	delete[] m_szSeqs;
+	delete[] m_szNames;
+	delete[] m_Weights;
+
+	m_szSeqs = NewSeqs;
+	m_szNames = NewNames;
+	m_Weights = NewWeights;
+
+	m_uCacheSeqCount = uSeqCount;
+	m_uCacheSeqLength = uColCount;
+	m_uColCount = uColCount;
+	}
+
+void MSA::FixAlpha()
+	{
+	ClearInvalidLetterWarning();
+	for (unsigned uSeqIndex = 0; uSeqIndex < m_uSeqCount; ++uSeqIndex)
+		{
+		for (unsigned uColIndex = 0; uColIndex < m_uColCount; ++uColIndex)
+			{
+			char c = GetChar(uSeqIndex, uColIndex);
+			if (!IsResidueChar(c) && !IsGapChar(c))
+				{
+				char w = GetWildcardChar();
+				// Warning("Invalid letter '%c', replaced by '%c'", c, w);
+				InvalidLetterWarning(c, w);
+				SetChar(uSeqIndex, uColIndex, w);
+				}
+			}
+		}
+	ReportInvalidLetters();
+	}
+
+ALPHA MSA::GuessAlpha() const
+	{
+// If at least MIN_NUCLEO_PCT of the first CHAR_COUNT non-gap
+// letters belong to the nucleotide alphabet, guess nucleo.
+// Otherwise amino.
+	const unsigned CHAR_COUNT = 100;
+	const unsigned MIN_NUCLEO_PCT = 95;
+
+	const unsigned uSeqCount = GetSeqCount();
+	const unsigned uColCount = GetColCount();
+	if (0 == uSeqCount)
+		return ALPHA_Amino;
+
+	unsigned uDNACount = 0;
+	unsigned uRNACount = 0;
+	unsigned uTotal = 0;
+	unsigned i = 0;
+	for (;;)
+		{
+		unsigned uSeqIndex = i/uColCount;
+		if (uSeqIndex >= uSeqCount)
+			break;
+		unsigned uColIndex = i%uColCount;
+		++i;
+		char c = GetChar(uSeqIndex, uColIndex);
+		if (IsGapChar(c))
+			continue;
+		if (IsDNA(c))
+			++uDNACount;
+		if (IsRNA(c))
+			++uRNACount;
+		++uTotal;
+		if (uTotal >= CHAR_COUNT)
+			break;
+		}
+	if (uTotal != 0 && ((uRNACount*100)/uTotal) >= MIN_NUCLEO_PCT)
+		return ALPHA_RNA;
+	if (uTotal != 0 && ((uDNACount*100)/uTotal) >= MIN_NUCLEO_PCT)
+		return ALPHA_DNA;
+	return ALPHA_Amino;
+	}

Added: trunk/packages/muscle/branches/upstream/current/msa.h
===================================================================
--- trunk/packages/muscle/branches/upstream/current/msa.h	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/msa.h	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,179 @@
+#ifndef	MSA_h
+#define MSA_h
+
+const int MAX_SEQ_NAME = 63;
+struct PathEdge;
+class TextFile;
+class Seq;
+class ClusterNode;
+class NodeCounts;
+class DataBuffer;
+
+class MSA
+	{
+public:
+	MSA();
+	virtual ~MSA();
+
+public:
+// Ways to create an MSA
+	void FromFile(TextFile &File);
+	void FromFASTAFile(TextFile &File);
+	void FromSeq(const Seq &s);
+
+	void ToFile(TextFile &File) const;
+	void ToFASTAFile(TextFile &File) const;
+	void ToMSFFile(TextFile &File, const char *ptrComment = 0) const;
+	void ToAlnFile(TextFile &File) const;
+	void ToHTMLFile(TextFile &File) const;
+	void ToPhySequentialFile(TextFile &File) const;
+	void ToPhyInterleavedFile(TextFile &File) const;
+
+	void SetSize(unsigned uSeqCount, unsigned uColCount);
+	void SetSeqCount(unsigned uSeqCount);
+	char GetChar(unsigned uSeqIndex, unsigned uIndex) const;
+	unsigned GetLetter(unsigned uSeqIndex, unsigned uIndex) const;
+	unsigned GetLetterEx(unsigned uSeqIndex, unsigned uIndex) const;
+	const char *GetSeqName(unsigned uSeqIndex) const;
+	unsigned GetSeqId(unsigned uSeqIndex) const;
+	unsigned GetSeqIndex(unsigned uId) const;
+	bool GetSeqIndex(unsigned uId, unsigned *ptruIndex) const;
+	double GetOcc(unsigned uColIndex) const;
+	void GetFractionalWeightedCounts(unsigned uColIndex, bool bNormalize,
+	  FCOUNT fcCounts[], FCOUNT *ptrfcGapStart, FCOUNT *ptrfcGapEnd,
+	  FCOUNT *fcGapExtend, FCOUNT *ptrfOcc,
+	  FCOUNT *fcLL, FCOUNT *fcLG, FCOUNT *fcGL, FCOUNT *fcGG) const;
+	bool IsGap(unsigned uSeqIndex, unsigned uColIndex) const;
+	bool IsWildcard(unsigned uSeqIndex, unsigned uColIndex) const;
+	bool IsGapColumn(unsigned uColIndex) const;
+	bool ColumnHasGap(unsigned uColIndex) const;
+	bool IsGapSeq(unsigned uSeqIndex) const;
+
+	void SetChar(unsigned uSeqIndex, unsigned uColIndex, char c);
+	void SetSeqName(unsigned uSeqIndex, const char szName[]);
+	void SetSeqId(unsigned uSeqIndex, unsigned uId);
+	bool HasGap() const;
+	bool IsLegalLetter(unsigned uLetter) const;
+	void GetSeq(unsigned uSeqIndex, Seq &seq) const;
+	void Copy(const MSA &msa);
+	double GetCons(unsigned uColIndex) const;
+	double GetAvgCons() const;
+	double GetPctIdentityPair(unsigned uSeqIndex1, unsigned uSeqIndex2) const;
+	bool GetSeqIndex(const char *ptrSeqName, unsigned *ptruSeqIndex) const;
+	void DeleteCol(unsigned uColIndex);
+	void DeleteColumns(unsigned uColIndex, unsigned uColCount);
+	void CopySeq(unsigned uToSeqIndex, const MSA &msaFrom, unsigned uFromSeqIndex);
+	void DeleteSeq(unsigned uSeqIndex);
+//	void DeleteEmptyCols(bool bProgress = false);
+	bool IsEmptyCol(unsigned uColIndex) const;
+
+	WEIGHT GetSeqWeight(unsigned uSeqIndex) const;
+	WEIGHT GetTotalSeqWeight() const;
+	void SetSeqWeight(unsigned uSeqIndex, WEIGHT w) const;
+	void NormalizeWeights(WEIGHT wTotal) const;
+	bool WeightsSet() const;
+
+	unsigned GetGCGCheckSum(unsigned uSeqIndex) const;
+
+	ALPHA GuessAlpha() const;
+	void FixAlpha();
+
+	unsigned UniqueResidueTypes(unsigned uColIndex) const;
+
+	void UnWeight();
+
+	void GetNodeCounts(unsigned uAlignedColIndex, NodeCounts &Counts) const;
+	void ValidateBreakMatrices() const;
+	unsigned GetCharCount(unsigned uSeqIndex, unsigned uColIndex) const;
+	const char *GetSeqBuffer(unsigned uSeqIndex) const;
+	unsigned AlignedColIndexToColIndex(unsigned uAlignedColIndex) const;
+	unsigned GetSeqLength(unsigned uSeqIndex) const;
+	void GetPWID(unsigned uSeqIndex1, unsigned uSeqIndex2, double *ptrdPWID,
+	  unsigned *ptruPosCount) const;
+
+	void GetPairMap(unsigned uSeqIndex1, unsigned uSeqIndex2, int iMap1[],
+	  int iMap2[]) const;
+
+	void LogMe() const;
+	void ListWeights() const;
+
+	void GapInfoToDataBuffer(DataBuffer &Buffer) const;
+	void GapInfoFromDataBuffer(const DataBuffer &Buffer);
+	double GetPctGroupIdentityPair(unsigned uSeqIndex1, unsigned uSeqIndex2) const;
+
+	void Clear()
+		{
+		Free();
+		}
+	unsigned GetSeqCount() const
+		{
+		return m_uSeqCount;
+		}
+	unsigned GetColCount() const
+		{
+		return m_uColCount;
+		}
+
+	static bool SeqsEq(const MSA &a1, unsigned uSeqIndex1, const MSA &a2,
+	  unsigned uSeqIndex2);
+
+	static void SetIdCount(unsigned uIdCount);
+
+private:
+	friend void SetMSAWeightsMuscle(MSA &msa);
+	friend void SetThreeWayWeightsMuscle(MSA &msa);
+	void SetHenikoffWeightsPB() const;
+	void SetHenikoffWeights() const;
+	void SetGSCWeights() const;
+	void SetUniformWeights() const;
+	void SetClustalWWeights(const Tree &tree);
+
+	void Free();
+	void AppendSeq(char *ptrSeq, unsigned uSeqLength, char *ptrLabel);
+	void ExpandCache(unsigned uSeqCount, unsigned uColCount);
+	void CalcWeights() const;
+	void GetNameFromFASTAAnnotationLine(const char szLine[],
+	  char szName[], unsigned uBytes);
+	void CopyCol(unsigned uFromCol, unsigned uToCol);
+	unsigned CalcBLOSUMWeights(ClusterTree &BlosumCluster) const;
+	void SetBLOSUMSubtreeWeight(const ClusterNode *ptrNode, double dWeight) const;
+	unsigned SetBLOSUMNodeWeight(const ClusterNode *ptrNode, double dMinDist) const;
+	void SetSubtreeWeight2(const ClusterNode *ptrNode) const;
+	void SetSubtreeGSCWeight(ClusterNode *ptrNode) const;
+
+	void CalcHenikoffWeightsColPB(unsigned uColIndex) const;
+	void CalcHenikoffWeightsCol(unsigned uColIndex) const;
+
+private:
+	unsigned m_uSeqCount;
+	unsigned m_uColCount;
+	unsigned m_uCacheSeqLength;
+	unsigned m_uCacheSeqCount;
+	char **m_szSeqs;
+	char **m_szNames;
+
+	static unsigned m_uIdCount;
+
+	unsigned *m_IdToSeqIndex;
+	unsigned *m_SeqIndexToId;
+
+	WEIGHT *m_Weights;
+	};
+
+void SeqVectFromMSA(const MSA &msa, SeqVect &v);
+void DeleteGappedCols(MSA &msa);
+void MSAFromColRange(const MSA &msaIn, unsigned uFromColIndex, unsigned uColCount,
+  MSA &msaOut);
+void MSACat(const MSA &msa1, const MSA &msa2, MSA &msaCat);
+void MSAAppend(MSA &msa1, const MSA &msa2);
+void MSAFromSeqSubset(const MSA &msaIn, const unsigned uSeqIndexes[], unsigned uSeqCount,
+  MSA &msaOut);
+void AssertMSAEq(const MSA &msa1, const MSA &msa2);
+void AssertMSAEqIgnoreCaseAndGaps(const MSA &msa1, const MSA &msa2);
+void MSASubsetByIds(const MSA &msaIn, const unsigned Ids[], unsigned uIdCount,
+  MSA &msaOut);
+void SetMSAWeightsMuscle(MSA &msa);
+void SetClustalWWeightsMuscle(MSA &msa);
+void SetThreeWayWeightsMuscle(MSA &msa);
+
+#endif	// MSA_h

Added: trunk/packages/muscle/branches/upstream/current/msa2.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/msa2.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/msa2.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,531 @@
+#include "muscle.h"
+#include "msa.h"
+#include "seqvect.h"
+#include "profile.h"
+#include "tree.h"
+
+// These global variables are a hack to allow the tree
+// dependent iteration code to communicate the edge
+// used to divide the tree. The three-way weighting
+// scheme needs to know this edge in order to compute
+// sequence weights.
+static const Tree *g_ptrMuscleTree = 0;
+unsigned g_uTreeSplitNode1 = NULL_NEIGHBOR;
+unsigned g_uTreeSplitNode2 = NULL_NEIGHBOR;
+
+void MSA::GetFractionalWeightedCounts(unsigned uColIndex, bool bNormalize,
+  FCOUNT fcCounts[], FCOUNT *ptrfcGapStart, FCOUNT *ptrfcGapEnd,
+  FCOUNT *ptrfcGapExtend, FCOUNT *ptrfOcc,
+  FCOUNT *ptrfcLL, FCOUNT *ptrfcLG, FCOUNT *ptrfcGL, FCOUNT *ptrfcGG) const
+	{
+	const unsigned uSeqCount = GetSeqCount();
+	const unsigned uColCount = GetColCount();
+
+	memset(fcCounts, 0, g_AlphaSize*sizeof(FCOUNT));
+	WEIGHT wTotal = 0;
+	FCOUNT fGap = 0;
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		const WEIGHT w = GetSeqWeight(uSeqIndex);
+		if (IsGap(uSeqIndex, uColIndex))
+			{
+			fGap += w;
+			continue;
+			}
+		else if (IsWildcard(uSeqIndex, uColIndex))
+			{
+			const unsigned uLetter = GetLetterEx(uSeqIndex, uColIndex);
+			switch (g_Alpha)
+				{
+			case ALPHA_Amino:
+				switch (uLetter)
+					{
+				case AX_B:		// D or N
+					fcCounts[AX_D] += w/2;
+					fcCounts[AX_N] += w/2;
+					break;
+				case AX_Z:		// E or Q
+					fcCounts[AX_E] += w/2;
+					fcCounts[AX_Q] += w/2;
+					break;
+				default:		// any
+					{
+					const FCOUNT f = w/20;
+					for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
+						fcCounts[uLetter] += f;
+					break;
+					}
+					}
+				break;
+
+			case ALPHA_DNA:
+			case ALPHA_RNA:
+				switch (uLetter)
+					{
+				case AX_R:	// G or A
+					fcCounts[NX_G] += w/2;
+					fcCounts[NX_A] += w/2;
+					break;
+				case AX_Y:	// C or T/U
+					fcCounts[NX_C] += w/2;
+					fcCounts[NX_T] += w/2;
+					break;
+				default:	// any
+					const FCOUNT f = w/20;
+					for (unsigned uLetter = 0; uLetter < 4; ++uLetter)
+						fcCounts[uLetter] += f;
+					break;
+					}
+				break;
+
+			default:
+				Quit("Alphabet %d not supported", g_Alpha);
+				}
+			continue;
+			}
+		unsigned uLetter = GetLetter(uSeqIndex, uColIndex);
+		fcCounts[uLetter] += w;
+		wTotal += w;
+		}
+	*ptrfOcc = (float) (1.0 - fGap);
+
+	if (bNormalize && wTotal > 0)
+		{
+		if (wTotal > 1.001)
+			Quit("wTotal=%g\n", wTotal);
+		for (unsigned uLetter = 0; uLetter < g_AlphaSize; ++uLetter)
+			fcCounts[uLetter] /= wTotal;
+//		AssertNormalized(fcCounts);
+		}
+
+	FCOUNT fcStartCount = 0;
+	if (uColIndex == 0)
+		{
+		for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+			if (IsGap(uSeqIndex, uColIndex))
+				fcStartCount += GetSeqWeight(uSeqIndex);
+		}
+	else
+		{
+		for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+			if (IsGap(uSeqIndex, uColIndex) && !IsGap(uSeqIndex, uColIndex - 1))
+				fcStartCount += GetSeqWeight(uSeqIndex);
+		}
+
+	FCOUNT fcEndCount = 0;
+	if (uColCount - 1 == uColIndex)
+		{
+		for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+			if (IsGap(uSeqIndex, uColIndex))
+				fcEndCount += GetSeqWeight(uSeqIndex);
+		}
+	else
+		{
+		for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+			if (IsGap(uSeqIndex, uColIndex) && !IsGap(uSeqIndex, uColIndex + 1))
+				fcEndCount += GetSeqWeight(uSeqIndex);
+		}
+
+	FCOUNT LL = 0;
+	FCOUNT LG = 0;
+	FCOUNT GL = 0;
+	FCOUNT GG = 0;
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		WEIGHT w = GetSeqWeight(uSeqIndex);
+		bool bLetterHere = !IsGap(uSeqIndex, uColIndex);
+		bool bLetterPrev = (uColIndex == 0 || !IsGap(uSeqIndex, uColIndex - 1));
+		if (bLetterHere)
+			{
+			if (bLetterPrev)
+				LL += w;
+			else
+				GL += w;
+			}
+		else
+			{
+			if (bLetterPrev)
+				LG += w;
+			else
+				GG += w;
+			}
+		}
+
+	FCOUNT fcExtendCount = 0;
+	if (uColIndex > 0 && uColIndex < GetColCount() - 1)
+		for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+			if (IsGap(uSeqIndex, uColIndex) && IsGap(uSeqIndex, uColIndex - 1) &&
+			  IsGap(uSeqIndex, uColIndex + 1))
+				fcExtendCount += GetSeqWeight(uSeqIndex);
+
+	*ptrfcLL = LL;
+	*ptrfcLG = LG;
+	*ptrfcGL = GL;
+	*ptrfcGG = GG;
+	*ptrfcGapStart = fcStartCount;
+	*ptrfcGapEnd = fcEndCount;
+	*ptrfcGapExtend = fcExtendCount;
+	}
+
+// Return true if the given column has no gaps and all
+// its residues are in the same biochemical group.
+bool MSAColIsConservative(const MSA &msa, unsigned uColIndex)
+	{
+	extern unsigned ResidueGroup[];
+
+	const unsigned uSeqCount = msa.GetColCount();
+	if (0 == uSeqCount)
+		Quit("MSAColIsConservative: empty alignment");
+
+	if (msa.IsGap(0, uColIndex))
+		return false;
+
+	unsigned uLetter = msa.GetLetterEx(0, uColIndex);
+	const unsigned uGroup = ResidueGroup[uLetter];
+
+	for (unsigned uSeqIndex = 1; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		if (msa.IsGap(uSeqIndex, uColIndex))
+			return false;
+		uLetter = msa.GetLetter(uSeqIndex, uColIndex);
+		if (ResidueGroup[uLetter] != uGroup)
+			return false;
+		}
+	return true;
+	}
+
+void MSAFromSeqRange(const MSA &msaIn, unsigned uFromSeqIndex, unsigned uSeqCount,
+  MSA &msaOut)
+	{
+	const unsigned uColCount = msaIn.GetColCount();
+	msaOut.SetSize(uSeqCount, uColCount);
+
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		const char *ptrName = msaIn.GetSeqName(uFromSeqIndex + uSeqIndex);
+		msaOut.SetSeqName(uSeqIndex, ptrName);
+
+		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
+			{
+			const char c = msaIn.GetChar(uFromSeqIndex + uSeqIndex, uColIndex);
+			msaOut.SetChar(uSeqIndex, uColIndex, c);
+			}
+		}
+	}
+
+void MSAFromColRange(const MSA &msaIn, unsigned uFromColIndex, unsigned uColCount,
+  MSA &msaOut)
+	{
+	const unsigned uSeqCount = msaIn.GetSeqCount();
+	const unsigned uInColCount = msaIn.GetColCount();
+
+	if (uFromColIndex + uColCount - 1 > uInColCount)
+		Quit("MSAFromColRange, out of bounds");
+
+	msaOut.SetSize(uSeqCount, uColCount);
+
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		const char *ptrName = msaIn.GetSeqName(uSeqIndex);
+		unsigned uId = msaIn.GetSeqId(uSeqIndex);
+		msaOut.SetSeqName(uSeqIndex, ptrName);
+		msaOut.SetSeqId(uSeqIndex, uId);
+
+		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
+			{
+			const char c = msaIn.GetChar(uSeqIndex, uFromColIndex + uColIndex);
+			msaOut.SetChar(uSeqIndex, uColIndex, c);
+			}
+		}
+	}
+
+void SeqVectFromMSA(const MSA &msa, SeqVect &v)
+	{
+	v.Clear();
+	const unsigned uSeqCount = msa.GetSeqCount();
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		Seq s;
+		msa.GetSeq(uSeqIndex, s);
+
+		s.StripGaps();
+		//if (0 == s.Length())
+		//	continue;
+
+		const char *ptrName = msa.GetSeqName(uSeqIndex);
+		s.SetName(ptrName);
+
+		v.AppendSeq(s);
+		}
+	}
+
+void DeleteGappedCols(MSA &msa)
+	{
+	unsigned uColIndex = 0;
+	for (;;)
+		{
+		if (uColIndex >= msa.GetColCount())
+			break;
+		if (msa.IsGapColumn(uColIndex))
+			msa.DeleteCol(uColIndex);
+		else
+			++uColIndex;
+		}
+	}
+
+void MSAFromSeqSubset(const MSA &msaIn, const unsigned uSeqIndexes[], unsigned uSeqCount,
+  MSA &msaOut)
+	{
+	const unsigned uColCount = msaIn.GetColCount();
+	msaOut.SetSize(uSeqCount, uColCount);
+	for (unsigned uSeqIndexOut = 0; uSeqIndexOut < uSeqCount; ++uSeqIndexOut)
+		{
+		unsigned uSeqIndexIn = uSeqIndexes[uSeqIndexOut];
+		const char *ptrName = msaIn.GetSeqName(uSeqIndexIn);
+		unsigned uId = msaIn.GetSeqId(uSeqIndexIn);
+		msaOut.SetSeqName(uSeqIndexOut, ptrName);
+		msaOut.SetSeqId(uSeqIndexOut, uId);
+		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
+			{
+			const char c = msaIn.GetChar(uSeqIndexIn, uColIndex);
+			msaOut.SetChar(uSeqIndexOut, uColIndex, c);
+			}
+		}
+	}
+
+void AssertMSAEqIgnoreCaseAndGaps(const MSA &msa1, const MSA &msa2)
+	{
+	const unsigned uSeqCount1 = msa1.GetSeqCount();
+	const unsigned uSeqCount2 = msa2.GetSeqCount();
+	if (uSeqCount1 != uSeqCount2)
+		Quit("Seq count differs");
+
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount1; ++uSeqIndex)
+		{
+		Seq seq1;
+		msa1.GetSeq(uSeqIndex, seq1);
+
+		unsigned uId = msa1.GetSeqId(uSeqIndex);
+		unsigned uSeqIndex2 = msa2.GetSeqIndex(uId);
+
+		Seq seq2;
+		msa2.GetSeq(uSeqIndex2, seq2);
+
+		if (!seq1.EqIgnoreCaseAndGaps(seq2))
+			{
+			Log("Input:\n");
+			seq1.LogMe();
+			Log("Output:\n");
+			seq2.LogMe();
+			Quit("Seq %s differ ", msa1.GetSeqName(uSeqIndex));
+			}
+		}
+	}
+
+void AssertMSAEq(const MSA &msa1, const MSA &msa2)
+	{
+	const unsigned uSeqCount1 = msa1.GetSeqCount();
+	const unsigned uSeqCount2 = msa2.GetSeqCount();
+	if (uSeqCount1 != uSeqCount2)
+		Quit("Seq count differs");
+
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount1; ++uSeqIndex)
+		{
+		Seq seq1;
+		msa1.GetSeq(uSeqIndex, seq1);
+
+		unsigned uId = msa1.GetSeqId(uSeqIndex);
+		unsigned uSeqIndex2 = msa2.GetSeqIndex(uId);
+
+		Seq seq2;
+		msa2.GetSeq(uSeqIndex2, seq2);
+
+		if (!seq1.Eq(seq2))
+			{
+			Log("Input:\n");
+			seq1.LogMe();
+			Log("Output:\n");
+			seq2.LogMe();
+			Quit("Seq %s differ ", msa1.GetSeqName(uSeqIndex));
+			}
+		}
+	}
+
+void SetMSAWeightsMuscle(MSA &msa)
+	{
+	SEQWEIGHT Method = GetSeqWeightMethod();
+	switch (Method)
+		{
+	case SEQWEIGHT_None:
+		msa.SetUniformWeights();
+		return;
+
+	case SEQWEIGHT_Henikoff:
+		msa.SetHenikoffWeights();
+		return;
+
+	case SEQWEIGHT_HenikoffPB:
+		msa.SetHenikoffWeightsPB();
+		return;
+
+	case SEQWEIGHT_GSC:
+		msa.SetGSCWeights();
+		return;
+
+	case SEQWEIGHT_ClustalW:
+		SetClustalWWeightsMuscle(msa);
+		return;
+	
+	case SEQWEIGHT_ThreeWay:
+		SetThreeWayWeightsMuscle(msa);
+		return;
+		}
+	Quit("SetMSAWeightsMuscle, Invalid method=%d", Method);
+	}
+
+static WEIGHT *g_MuscleWeights;
+static unsigned g_uMuscleIdCount;
+
+WEIGHT GetMuscleSeqWeightById(unsigned uId)
+	{
+	if (0 == g_MuscleWeights)
+		Quit("g_MuscleWeights = 0");
+	if (uId >= g_uMuscleIdCount)
+		Quit("GetMuscleSeqWeightById(%u): count=%u",
+		  uId, g_uMuscleIdCount);
+
+	return g_MuscleWeights[uId];
+	}
+
+void SetMuscleTree(const Tree &tree)
+	{
+	g_ptrMuscleTree = &tree;
+
+	if (SEQWEIGHT_ClustalW != GetSeqWeightMethod())
+		return;
+
+	delete[] g_MuscleWeights;
+
+	const unsigned uLeafCount = tree.GetLeafCount();
+	g_uMuscleIdCount = uLeafCount;
+	g_MuscleWeights = new WEIGHT[uLeafCount];
+	CalcClustalWWeights(tree, g_MuscleWeights);
+	}
+
+void SetClustalWWeightsMuscle(MSA &msa)
+	{
+	if (0 == g_MuscleWeights)
+		Quit("g_MuscleWeights = 0");
+	const unsigned uSeqCount = msa.GetSeqCount();
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		const unsigned uId = msa.GetSeqId(uSeqIndex);
+		if (uId >= g_uMuscleIdCount)
+			Quit("SetClustalWWeightsMuscle: id out of range");
+		msa.SetSeqWeight(uSeqIndex, g_MuscleWeights[uId]);
+		}
+	msa.NormalizeWeights((WEIGHT) 1.0);
+	}
+
+#define	LOCAL_VERBOSE	0
+
+void SetThreeWayWeightsMuscle(MSA &msa)
+	{
+	if (NULL_NEIGHBOR == g_uTreeSplitNode1 || NULL_NEIGHBOR == g_uTreeSplitNode2)
+		{
+		msa.SetHenikoffWeightsPB();
+		return;
+		}
+
+	const unsigned uMuscleSeqCount = g_ptrMuscleTree->GetLeafCount();
+	WEIGHT *Weights = new WEIGHT[uMuscleSeqCount];
+
+	CalcThreeWayWeights(*g_ptrMuscleTree, g_uTreeSplitNode1, g_uTreeSplitNode2,
+	  Weights);
+
+	const unsigned uMSASeqCount = msa.GetSeqCount();
+	for (unsigned uSeqIndex = 0; uSeqIndex < uMSASeqCount; ++uSeqIndex)
+		{
+		const unsigned uId = msa.GetSeqId(uSeqIndex);
+		if (uId >= uMuscleSeqCount)
+			Quit("SetThreeWayWeightsMuscle: id out of range");
+		msa.SetSeqWeight(uSeqIndex, Weights[uId]);
+		}
+#if	LOCAL_VERBOSE
+	{
+	Log("SetThreeWayWeightsMuscle\n");
+	for (unsigned n = 0; n < uMSASeqCount; ++n)
+		{
+		const unsigned uId = msa.GetSeqId(n);
+		Log("%20.20s %6.3f\n", msa.GetSeqName(n), Weights[uId]);
+		}
+	}
+#endif
+	msa.NormalizeWeights((WEIGHT) 1.0);
+
+	delete[] Weights;
+	}
+
+// Append msa2 at the end of msa1
+void MSAAppend(MSA &msa1, const MSA &msa2)
+	{
+	const unsigned uSeqCount = msa1.GetSeqCount();
+
+	const unsigned uColCount1 = msa1.GetColCount();
+	const unsigned uColCount2 = msa2.GetColCount();
+	const unsigned uColCountCat = uColCount1 + uColCount2;
+
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		unsigned uId = msa1.GetSeqId(uSeqIndex);
+		unsigned uSeqIndex2 = msa2.GetSeqIndex(uId);
+		for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)
+			{
+			const char c = msa2.GetChar(uSeqIndex2, uColIndex);
+			msa1.SetChar(uSeqIndex, uColCount1 + uColIndex, c);
+			}
+		}
+	}
+
+// "Catenate" two MSAs (by bad analogy with UNIX cat command).
+// msa1 and msa2 must have same sequence names, but possibly
+// in a different order.
+// msaCat is the combined alignment produce by appending
+// sequences in msa2 to sequences in msa1.
+void MSACat(const MSA &msa1, const MSA &msa2, MSA &msaCat)
+	{
+	const unsigned uSeqCount = msa1.GetSeqCount();
+
+	const unsigned uColCount1 = msa1.GetColCount();
+	const unsigned uColCount2 = msa2.GetColCount();
+	const unsigned uColCountCat = uColCount1 + uColCount2;
+
+	msaCat.SetSize(uSeqCount, uColCountCat);
+
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		for (unsigned uColIndex = 0; uColIndex < uColCount1; ++uColIndex)
+			{
+			const char c = msa1.GetChar(uSeqIndex, uColIndex);
+			msaCat.SetChar(uSeqIndex, uColIndex, c);
+			}
+
+		const char *ptrSeqName = msa1.GetSeqName(uSeqIndex);
+		unsigned uSeqIndex2;
+		msaCat.SetSeqName(uSeqIndex, ptrSeqName);
+		bool bFound = msa2.GetSeqIndex(ptrSeqName, &uSeqIndex2);
+		if (bFound)
+			{
+			for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)
+				{
+				const char c = msa2.GetChar(uSeqIndex2, uColIndex);
+				msaCat.SetChar(uSeqIndex, uColCount1 + uColIndex, c);
+				}
+			}
+		else
+			{
+			for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)
+				msaCat.SetChar(uSeqIndex, uColCount1 + uColIndex, '-');
+			}
+		}
+	}

Added: trunk/packages/muscle/branches/upstream/current/msadist.h
===================================================================
--- trunk/packages/muscle/branches/upstream/current/msadist.h	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/msadist.h	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,34 @@
+#ifndef MSADist_h
+#define MSADist_h
+
+#include <math.h>
+
+class MSADist
+	{
+public:
+	MSADist(DISTANCE Distance)
+		{
+		m_Distance = Distance;
+		}
+
+	double ComputeDist(const MSA &msa, unsigned uSeqIndex1, unsigned uSeqIndex2)
+		{
+		double dPctId = msa.GetPctIdentityPair(uSeqIndex1, uSeqIndex2);
+		switch(m_Distance)
+			{
+		case DISTANCE_PctIdKimura:
+			return KimuraDist(dPctId);
+		case DISTANCE_PctIdLog:
+			if (dPctId < 0.05)
+				dPctId = 0.05;
+			return -log(dPctId);
+			}
+		Quit("MSADist::ComputeDist, invalid DISTANCE_%u", m_Distance);
+		return 0;
+		}
+
+private:
+	DISTANCE m_Distance;
+	};
+
+#endif	// MSADist_h

Added: trunk/packages/muscle/branches/upstream/current/msadistkimura.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/msadistkimura.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/msadistkimura.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,88 @@
+#include "muscle.h"
+#include "msa.h"
+#include <math.h>
+
+// "Standard" NJ distance: the Kimura measure.
+// This is defined to be:
+//
+//		log_e(1 - p - p*p/5)
+//
+// where p is the fraction of residues that differ, i.e.:
+//
+//		p = (1 - fractional_conservation)
+//
+// This measure is infinite for p = 0.8541 and is considered
+// unreliable for p >= 0.75 (according to the ClustalW docs).
+// ClustalW uses a table lookup for values > 0.75.
+// The following table was copied from the ClustalW file dayhoff.h.
+
+static int dayhoff_pams[]={
+  195,   /* 75.0% observed d; 195 PAMs estimated = 195% estimated d */
+  196,   /* 75.1% observed d; 196 PAMs estimated */
+                  197,    198,    199,    200,    200,    201,    202,  203,    
+  204,    205,    206,    207,    208,    209,    209,    210,    211,  212,    
+  213,    214,    215,    216,    217,    218,    219,    220,    221,  222,    
+  223,    224,    226,    227,    228,    229,    230,    231,    232,  233,    
+  234,    236,    237,    238,    239,    240,    241,    243,    244,  245,    
+  246,    248,    249,    250,    /* 250 PAMs = 80.3% observed d */          
+                                  252,    253,    254,    255,    257,  258,    
+  260,    261,    262,    264,    265,    267,    268,    270,    271,  273,    
+  274,    276,    277,    279,    281,    282,    284,    285,    287,  289,    
+  291,    292,    294,    296,    298,    299,    301,    303,    305,  307,    
+  309,    311,    313,    315,    317,    319,    321,    323,    325,  328,    
+  330,    332,    335,    337,    339,    342,    344,    347,    349,  352,    
+  354,    357,    360,    362,    365,    368,    371,    374,    377,  380,    
+  383,    386,    389,    393,    396,    399,    403,    407,    410,  414,    
+  418,    422,    426,    430,    434,    438,    442,    447,    451,  456,    
+  461,    466,    471,    476,    482,    487,    493,    498,    504,  511,    
+  517,    524,    531,    538,    545,    553,    560,    569,    577,  586,    
+  595,    605,    615,    626,    637,    649,    661,    675,    688,  703,    
+  719,    736,    754,    775,    796,    819,    845,    874,    907,  945,
+         /* 92.9% observed; 945 PAMs */    
+  988    /* 93.0% observed; 988 PAMs */
+};
+static int iTableEntries = sizeof(dayhoff_pams)/sizeof(dayhoff_pams[0]);
+
+double KimuraDist(double dPctId)
+	{
+	double p = 1 - dPctId;
+// Typical case: use Kimura's empirical formula
+	if (p < 0.75)
+		return -log(1 - p - (p*p)/5);
+
+// Per ClustalW, return 10.0 for anything over 93%
+	if (p > 0.93)
+		return 10.0;
+
+// If p >= 0.75, use table lookup
+	assert(p <= 1 && p >= 0.75);
+// Thanks for Michael Hoel for pointing out a bug
+// in the table index calculation in versions <= 3.52.
+	int iTableIndex = (int) ((p - 0.75)*1000 + 0.5);
+	if (iTableIndex < 0 || iTableIndex >= iTableEntries)
+		Quit("Internal error in MSADistKimura::ComputeDist");
+
+	return dayhoff_pams[iTableIndex] / 100.0;
+	}
+
+//double MSADistKimura::ComputeDist(const MSA &msa, unsigned uSeqIndex1,
+//  unsigned uSeqIndex2)
+//	{
+//	double dPctId = msa.GetPctIdentityPair(uSeqIndex1, uSeqIndex2);
+//	return KimuraDist(dPctId);
+//	}
+
+double KimuraDistToPctId(double dKimuraDist)
+	{
+// Solve quadratic equation
+	const double a = 0.2;
+	const double b = 1;
+	const double c = 1.0 - exp(-dKimuraDist);
+	const double p = (-b + sqrt(b*b + 4*a*c))/(2*a);
+	return 1 - p;
+	}
+
+double PctIdToHeightKimura(double dPctId)
+	{
+	return KimuraDist(dPctId);
+	}

Added: trunk/packages/muscle/branches/upstream/current/msf.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/msf.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/msf.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,121 @@
+#include "muscle.h"
+#include <stdio.h>
+#include <ctype.h>
+#include "msa.h"
+#include "textfile.h"
+
+const int MAX_NAME = 63;
+
+const unsigned uCharsPerLine = 50;
+const unsigned uCharsPerBlock = 10;
+
+// Truncate at first white space or MAX_NAME, whichever comes
+// first, then pad with blanks up to PadLength.
+static const char *GetPaddedName(const char *Name, int PadLength)
+	{
+	static char PaddedName[MAX_NAME+1];
+	memset(PaddedName, ' ', MAX_NAME);
+	size_t n = strcspn(Name, " \t");
+	memcpy(PaddedName, Name, n);
+	PaddedName[PadLength] = 0;
+	return PaddedName;
+	}
+
+static const char *strfind(const char *s, const char *t)
+	{
+	size_t n = strcspn(s, t);
+	if (0 == n)
+		return 0;
+	return s + n;
+	}
+
+// GCG checksum code kindly provided by Eric Martel.
+unsigned MSA::GetGCGCheckSum(unsigned uSeqIndex) const
+	{
+	unsigned CheckSum = 0;
+	const unsigned uColCount = GetColCount();
+	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
+		{
+		unsigned c = (unsigned) GetChar(uSeqIndex, uColIndex);
+		CheckSum += c*(uColIndex%57 + 1);
+		CheckSum %= 10000;		
+		}
+	return CheckSum;
+	}
+
+static void MSFFixGaps(MSA &a)
+	{
+	const int SeqCount = a.GetSeqCount();
+	const int ColCount = a.GetColCount();
+	for (int SeqIndex = 0; SeqIndex < SeqCount; ++SeqIndex)
+		{
+		for (int ColIndex = 0; ColIndex < ColCount; ++ColIndex)
+			if (a.IsGap(SeqIndex, ColIndex))
+				a.SetChar(SeqIndex, ColIndex, '.');
+		}
+	}
+
+void MSA::ToMSFFile(TextFile &File, const char *ptrComment) const
+	{
+// Cast away const, yuck
+	SetMSAWeightsMuscle((MSA &) *this);
+	MSFFixGaps((MSA &) *this);
+
+	File.PutString("PileUp\n");
+	
+	if (0 != ptrComment)
+		File.PutFormat("Comment: %s\n", ptrComment);
+	else
+		File.PutString("\n");
+
+	char seqtype = (g_Alpha == ALPHA_DNA || g_Alpha == ALPHA_RNA) ? 'N' : 'A';
+	File.PutFormat("  MSF: %u  Type: %c  Check: 0000  ..\n\n",
+	  GetColCount(), seqtype);
+
+	int iLongestNameLength = 0;
+	for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
+		{
+		const char *Name = GetSeqName(uSeqIndex);
+		const char *PaddedName = GetPaddedName(Name, MAX_NAME);
+		int iLength = (int) strcspn(PaddedName, " \t");
+		if (iLength > iLongestNameLength)
+			iLongestNameLength = iLength;
+		}
+		
+	for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
+		{
+		const char *Name = GetSeqName(uSeqIndex);
+		const char *PaddedName = GetPaddedName(Name, iLongestNameLength);
+		File.PutFormat(" Name: %s", PaddedName);
+		File.PutFormat("  Len: %u  Check: %5u  Weight: %g\n",
+		  GetColCount(), GetGCGCheckSum(uSeqIndex), GetSeqWeight(uSeqIndex));
+		}
+	File.PutString("\n//\n");
+	if (0 == GetColCount())
+		return;
+
+	unsigned uLineCount = (GetColCount() - 1)/uCharsPerLine + 1;
+	for (unsigned uLineIndex = 0; uLineIndex < uLineCount; ++uLineIndex)
+		{
+		File.PutString("\n");
+		unsigned uStartColIndex = uLineIndex*uCharsPerLine;
+		unsigned uEndColIndex = uStartColIndex + uCharsPerLine - 1;
+		if (uEndColIndex >= GetColCount())
+			uEndColIndex = GetColCount() - 1;
+		for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
+			{
+			const char *Name = GetSeqName(uSeqIndex);
+			const char *PaddedName = GetPaddedName(Name, iLongestNameLength);
+			File.PutFormat("%s   ", PaddedName);
+			for (unsigned uColIndex = uStartColIndex; uColIndex <= uEndColIndex;
+			  ++uColIndex)
+				{
+				if (0 == uColIndex%uCharsPerBlock)
+					File.PutString(" ");
+				char c = GetChar(uSeqIndex, uColIndex);
+				File.PutFormat("%c", c);
+				}
+			File.PutString("\n");
+			}
+		}
+	}

Added: trunk/packages/muscle/branches/upstream/current/muscle.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/muscle.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/muscle.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,130 @@
+#include "muscle.h"
+#include "msa.h"
+#include "seqvect.h"
+#include "msa.h"
+#include "tree.h"
+#include "profile.h"
+
+void MUSCLE(SeqVect &v, MSA &msaOut)
+	{
+	const unsigned uSeqCount = v.Length();
+
+	if (0 == uSeqCount)
+		Quit("No sequences in input file");
+
+	ALPHA Alpha = ALPHA_Undefined;
+	switch (g_SeqType)
+		{
+	case SEQTYPE_Auto:
+		Alpha = v.GuessAlpha();
+		break;
+
+	case SEQTYPE_Protein:
+		Alpha = ALPHA_Amino;
+		break;
+
+	case SEQTYPE_RNA:
+		Alpha = ALPHA_RNA;
+		break;
+
+	case SEQTYPE_DNA:
+		Alpha = ALPHA_DNA;
+		break;
+
+	default:
+		Quit("Invalid seq type");
+		}
+	SetAlpha(Alpha);
+	v.FixAlpha();
+
+	if (ALPHA_DNA == Alpha || ALPHA_RNA == Alpha)
+		{
+		SetPPScore(PPSCORE_SPN);
+		g_Distance1 = DISTANCE_Kmer4_6;
+		}
+
+	unsigned uMaxL = 0;
+	unsigned uTotL = 0;
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		unsigned L = v.GetSeq(uSeqIndex).Length();
+		uTotL += L;
+		if (L > uMaxL)
+			uMaxL = L;
+		}
+
+	SetIter(1);
+	g_bDiags = g_bDiags1;
+	SetSeqStats(uSeqCount, uMaxL, uTotL/uSeqCount);
+
+	MSA::SetIdCount(uSeqCount);
+
+//// Initialize sequence ids.
+//// From this point on, ids must somehow propogate from here.
+//	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+//		v.SetSeqId(uSeqIndex, uSeqIndex);
+
+	if (uSeqCount > 1)
+		MHackStart(v);
+
+	if (0 == uSeqCount)
+		{
+		msaOut.Clear();
+		return;
+		}
+
+	if (1 == uSeqCount && ALPHA_Amino == Alpha)
+		{
+		const Seq &s = v.GetSeq(0);
+		msaOut.FromSeq(s);
+		return;
+		}
+
+// First iteration
+	Tree GuideTree;
+	TreeFromSeqVect(v, GuideTree, g_Cluster1, g_Distance1, g_Root1);
+
+	SetMuscleTree(GuideTree);
+
+	ProgNode *ProgNodes = 0;
+	if (g_bLow)
+		ProgNodes = ProgressiveAlignE(v, GuideTree, msaOut);
+	else
+		ProgressiveAlign(v, GuideTree, msaOut);
+	SetCurrentAlignment(msaOut);
+
+	if (1 == g_uMaxIters || 2 == uSeqCount)
+		{
+		MHackEnd(msaOut);
+		return;
+		}
+
+	g_bDiags = g_bDiags2;
+	SetIter(2);
+
+	if (g_bLow)
+		{
+		if (0 != g_uMaxTreeRefineIters)
+			RefineTreeE(msaOut, v, GuideTree, ProgNodes);
+		}
+	else
+		RefineTree(msaOut, GuideTree);
+
+	extern void DeleteProgNode(ProgNode &Node);
+	const unsigned uNodeCount = GuideTree.GetNodeCount();
+	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
+		DeleteProgNode(ProgNodes[uNodeIndex]);
+
+	delete[] ProgNodes;
+	ProgNodes = 0;
+
+	SetSeqWeightMethod(g_SeqWeight2);
+	SetMuscleTree(GuideTree);
+
+	if (g_bAnchors)
+		RefineVert(msaOut, GuideTree, g_uMaxIters - 2);
+	else
+		RefineHoriz(msaOut, GuideTree, g_uMaxIters - 2, false, false);
+
+	MHackEnd(msaOut);
+	}

Added: trunk/packages/muscle/branches/upstream/current/muscle.h
===================================================================
--- trunk/packages/muscle/branches/upstream/current/muscle.h	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/muscle.h	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,328 @@
+#if	DEBUG && !_DEBUG
+#define _DEBUG	1
+#endif
+
+#if	_DEBUG && !DEBUG
+#define DEBUG	1
+#endif
+
+#if	_MSC_VER
+#define TIMING	0
+#endif
+
+#define VER_3_52	0
+
+#ifdef	_MSC_VER	// Miscrosoft compiler
+#pragma warning(disable : 4800)	// disable int-bool conversion warning
+#endif
+
+#define MUSCLE_LONG_VERSION		"MUSCLE v3.6 by Robert C. Edgar"
+#define MUSCLE_MAJOR_VERSION	"3"
+#define MUSCLE_MINOR_VERSION	"6"
+
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdarg.h>
+#include <stdio.h>
+
+#define DOUBLE_AFFINE	0
+#define SINGLE_AFFINE	1
+#define PAF				0
+#define HYDRO			1
+
+#include "types.h"
+#include "intmath.h"
+#include "alpha.h"
+#include "params.h"
+
+#ifndef _WIN32
+#define stricmp strcasecmp
+#define strnicmp strncasecmp
+#define	_snprintf snprintf
+#define _fsopen(name, mode, share)	fopen((name), (mode))
+#endif
+
+#if	DEBUG
+#undef	assert
+#define assert(b)	Call_MY_ASSERT(__FILE__, __LINE__, b, #b)
+void Call_MY_ASSERT(const char *file, int line, bool b, const char *msg);
+#else
+#define assert(exp)     ((void)0)
+#endif
+
+extern int g_argc;
+extern char **g_argv;
+
+#define Rotate(a, b, c)	{ SCORE *tmp = a; a = b; b = c; c = tmp; }
+
+const double VERY_LARGE_DOUBLE = 1e20;
+
+extern unsigned g_uTreeSplitNode1;
+extern unsigned g_uTreeSplitNode2;
+
+// Number of elements in array a[]
+#define countof(a)	(sizeof(a)/sizeof(a[0]))
+
+// Maximum of two of any type
+#define	Max2(a, b)			((a) > (b) ? (a) : (b))
+
+// Maximum of three of any type
+#define	Max3(a, b, c)		Max2(Max2(a, b), c)
+
+// Minimum of two of any type
+#define Min2(a, b)		((a) < (b) ? (a) : (b))
+
+// Maximum of four of any type
+#define Max4(a, b, c, d)	Max2(Max2(a, b), Max2(c, d))
+
+const double VERY_NEGATIVE_DOUBLE = -9e29;
+const float VERY_NEGATIVE_FLOAT = (float) -9e29;
+
+const double BLOSUM_DIST = 0.62;	// todo settable
+
+// insane value for uninitialized variables
+const unsigned uInsane = 8888888;
+const int iInsane = 8888888;
+const SCORE scoreInsane = 8888888;
+const char cInsane = (char) 0xcd;		// int 3 instruction, used e.g. for unint. memory
+const double dInsane = VERY_NEGATIVE_DOUBLE;
+const float fInsane = VERY_NEGATIVE_FLOAT;
+const char INVALID_STATE = '*';
+const BASETYPE BTInsane = (BASETYPE) dInsane;
+const WEIGHT wInsane = BTInsane;
+
+extern double g_dNAN;
+
+extern unsigned long g_tStart;
+
+void Quit(const char szFormat[], ...);
+void Warning(const char szFormat[], ...);
+void TrimBlanks(char szStr[]);
+void TrimLeadingBlanks(char szStr[]);
+void TrimTrailingBlanks(char szStr[]);
+void Log(const char szFormat[], ...);
+bool Verbose();
+const char *ScoreToStr(SCORE Score);
+const char *ScoreToStrL(SCORE Score);
+SCORE StrToScore(const char *pszStr);
+void Break();
+
+double VecSum(const double v[], unsigned n);
+bool IsValidInteger(const char *Str);
+bool IsValidSignedInteger(const char *Str);
+bool IsValidIdentifier(const char *Str);
+bool IsValidFloatChar(char c);
+bool isident(char c);
+bool isidentf(char c);
+
+void TreeFromSeqVect(const SeqVect &c, Tree &tree, CLUSTER Cluster,
+  DISTANCE Distance, ROOT Root);
+void TreeFromMSA(const MSA &msa, Tree &tree, CLUSTER Cluster,
+  DISTANCE Distance, ROOT Root);
+
+void StripGaps(char szStr[]);
+void StripWhitespace(char szStr[]);
+const char *GetTimeAsStr();
+unsigned CalcBLOSUMWeights(MSA &Aln, ClusterTree &BlosumCluster);
+void CalcGSCWeights(MSA &Aln, const ClusterTree &BlosumCluster);
+void AssertNormalized(const PROB p[]);
+void AssertNormalizedOrZero(const PROB p[]);
+void AssertNormalized(const double p[]);
+bool VectorIsZero(const double dValues[], unsigned n);
+void VectorSet(double dValues[], unsigned n, double d);
+bool VectorIsZero(const float dValues[], unsigned n);
+void VectorSet(float dValues[], unsigned n, float d);
+
+#if	_WIN32
+double log2(double x);	// Defined in <math.h> on Linux
+#endif
+
+double pow2(double x);
+double lnTolog2(double ln);
+
+double lp2(double x);
+SCORE SumLog(SCORE x, SCORE y);
+SCORE SumLog(SCORE x, SCORE y, SCORE z);
+SCORE SumLog(SCORE w, SCORE x, SCORE y, SCORE z);
+
+double lp2Fast(double x);
+double SumLogFast(double x, double y);
+double SumLogFast(double x, double y, double z);
+double SumLogFast(double w, double x, double y, double z);
+
+void chkmem(const char szMsg[] = "");
+
+void Normalize(PROB p[], unsigned n);
+void Normalize(PROB p[], unsigned n, double dRequiredTotal);
+void NormalizeUnlessZero(PROB p[], unsigned n);
+
+void DebugPrintf(const char szFormat[], ...);
+void SetListFileName(const char *ptrListFileName, bool bAppend);
+void ModelFromAlign(const char *strInputFileName, const char *strModelFileName,
+  double dMaxNIC);
+double GetMemUseMB();
+double GetRAMSizeMB();
+double GetPeakMemUseMB();
+void CheckMemUse();
+const char *ElapsedTimeAsString();
+char *SecsToHHMMSS(long lSecs, char szStr[]);
+double GetCPUGHz();
+SCORE GetBlosum62(unsigned uLetterA, unsigned uLetterB);
+SCORE GetBlosum62d(unsigned uLetterA, unsigned uLetterB);
+SCORE GetBlosum50(unsigned uLetterA, unsigned uLetterB);
+void AssertNormalizedDist(const PROB p[], unsigned N);
+void CmdLineError(const char *Format, ...);
+void Fatal(const char *Format, ...);
+void InitCmd();
+void ExecCommandLine(int argc, char *argv[]);
+void DoCmd();
+void SetLogFile();
+void NameFromPath(const char szPath[], char szName[], unsigned uBytes);
+char *strsave(const char *s);
+void DistKmer20_3(const SeqVect &v, DistFunc &DF);
+void DistKbit20_3(const SeqVect &v, DistFunc &DF);
+void DistKmer6_6(const SeqVect &v, DistFunc &DF);
+void DistKmer4_6(const SeqVect &v, DistFunc &DF);
+void DistPWKimura(const SeqVect &v, DistFunc &DF);
+void FastDistKmer(const SeqVect &v, DistFunc &DF);
+void DistUnaligned(const SeqVect &v, DISTANCE DistMethod, DistFunc &DF);
+double PctIdToMAFFTDist(double dPctId);
+double KimuraDist(double dPctId);
+void SetFastParams();
+void AssertProfsEq(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
+  unsigned uLengthB);
+void ValidateMuscleIds(const MSA &msa);
+void ValidateMuscleIds(const Tree &tree);
+void TraceBackToPath(int **TraceBack, unsigned uLengthA,
+  unsigned uLengthB, PWPath &Path);
+void BitTraceBack(char **TraceBack, unsigned uLengthA, unsigned uLengthB,
+  char LastEdge, PWPath &Path);
+SCORE AlignTwoMSAs(const MSA &msa1, const MSA &msa2, MSA &msaOut, PWPath &Path,
+  bool bLockLeft = false, bool bLockRight = false);
+SCORE AlignTwoProfs(
+  const ProfPos *PA, unsigned uLengthA, WEIGHT wA,
+  const ProfPos *PB, unsigned uLengthB, WEIGHT wB,
+  PWPath &Path, ProfPos **ptrPout, unsigned *ptruLengthOut);
+void AlignTwoProfsGivenPath(const PWPath &Path,
+  const ProfPos *PA, unsigned uLengthA, WEIGHT wA,
+  const ProfPos *PB, unsigned uLengthB, WEIGHT wB,
+  ProfPos **ptrPOut, unsigned *ptruLengthOut);
+void AlignTwoMSAsGivenPathSW(const PWPath &Path, const MSA &msaA, const MSA &msaB,
+  MSA &msaCombined);
+void AlignTwoMSAsGivenPath(const PWPath &Path, const MSA &msaA, const MSA &msaB,
+  MSA &msaCombined);
+SCORE FastScorePath2(const ProfPos *PA, unsigned uLengthA,
+  const ProfPos *PB, unsigned uLengthB, const PWPath &Path);
+SCORE GlobalAlignDiags(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
+  unsigned uLengthB, PWPath &Path);
+SCORE GlobalAlignSimple(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
+  unsigned uLengthB, PWPath &Path);
+SCORE GlobalAlignSP(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
+  unsigned uLengthB, PWPath &Path);
+SCORE GlobalAlignSPN(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
+  unsigned uLengthB, PWPath &Path);
+SCORE GlobalAlignLE(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
+  unsigned uLengthB, PWPath &Path);
+void CalcThreeWayWeights(const Tree &tree, unsigned uNode1, unsigned uNode2,
+  WEIGHT *Weights);
+SCORE GlobalAlignSS(const Seq &seqA, const Seq &seqB, PWPath &Path);
+bool RefineHoriz(MSA &msaIn, const Tree &tree, unsigned uIters, bool bLockLeft, bool bLockRight);
+bool RefineVert(MSA &msaIn, const Tree &tree, unsigned uIters);
+SCORE GlobalAlignNoDiags(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
+  unsigned uLengthB, PWPath &Path);
+
+void SetInputFileName(const char *pstrFileName);
+void SetIter(unsigned uIter);
+void IncIter();
+void SetMaxIters(unsigned uMaxIters);
+void Progress(unsigned uStep, unsigned uTotalSteps);
+void Progress(const char *szFormat, ...);
+void SetStartTime();
+void ProgressStepsDone();
+void SetProgressDesc(const char szDesc[]);
+void SetSeqStats(unsigned uSeqCount, unsigned uMaxL, unsigned uAvgL);
+
+void SetNewHandler();
+void SaveCurrentAlignment();
+void SetCurrentAlignment(MSA &msa);
+void SetOutputFileName(const char *out);
+
+#if	DEBUG
+void SetMuscleSeqVect(SeqVect &v);
+void SetMuscleInputMSA(MSA &msa);
+void ValidateMuscleIds(const MSA &msa);
+void ValidateMuscleIds(const Tree &tree);
+#else
+#define SetMuscleSeqVect(x)		/* empty */
+#define SetMuscleInputMSA(x)	/* empty */
+#define ValidateMuscleIds(x)	/* empty */
+#endif
+
+void ProcessArgVect(int argc, char *argv[]);
+void ProcessArgStr(const char *Str);
+void Usage();
+void SetParams();
+
+void SortCounts(const FCOUNT fcCounts[], unsigned SortOrder[]);
+unsigned ResidueGroupFromFCounts(const FCOUNT fcCounts[]);
+FCOUNT SumCounts(const FCOUNT Counts[]);
+
+bool FlagOpt(const char *Name);
+const char *ValueOpt(const char *Name);
+void DoMuscle();
+void ProfDB();
+void DoSP();
+void ProgAlignSubFams();
+void Run();
+void ListParams();
+void OnException();
+void SetSeqWeightMethod(SEQWEIGHT Method);
+SEQWEIGHT GetSeqWeightMethod();
+WEIGHT GetMuscleSeqWeightById(unsigned uId);
+void ListDiagSavings();
+void CheckMaxTime();
+const char *MaxSecsToStr();
+unsigned long GetStartTime();
+
+void ProgressiveAlign(const SeqVect &v, const Tree &GuideTree, MSA &a);
+ProgNode *ProgressiveAlignE(const SeqVect &v, const Tree &GuideTree, MSA &a);
+
+void CalcDistRangeKmer6_6(const MSA &msa, unsigned uRow, float Dist[]);
+void CalcDistRangeKmer20_3(const MSA &msa, unsigned uRow, float Dist[]);
+void CalcDistRangeKmer20_4(const MSA &msa, unsigned uRow, float Dist[]);
+void CalcDistRangePctIdKimura(const MSA &msa, unsigned uRow, float Dist[]);
+void CalcDistRangePctIdLog(const MSA &msa, unsigned uRow, float Dist[]);
+
+void MakeRootMSA(const SeqVect &v, const Tree &GuideTree, ProgNode Nodes[], MSA &a);
+void MakeRootMSABrenner(SeqVect &v, const Tree &GuideTree, ProgNode Nodes[], MSA &a);
+
+void Refine();
+void Local();
+void Profile();
+void PPScore();
+void UPGMA2(const DistCalc &DC, Tree &tree, LINKAGE Linkage);
+
+char *GetFastaSeq(FILE *f, unsigned *ptrSeqLength, char **ptrLabel,
+  bool DeleteGaps = true);
+SCORE SW(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
+  unsigned uLengthB, PWPath &Path);
+void TraceBackSW(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
+  unsigned uLengthB, const SCORE *DPM_, const SCORE *DPD_, const SCORE *DPI_,
+  unsigned uPrefixLengthAMax, unsigned uPrefixLengthBMax, PWPath &Path);
+void DiffPaths(const PWPath &p1, const PWPath &p2, unsigned Edges1[],
+  unsigned *ptruDiffCount1, unsigned Edges2[], unsigned *ptruDiffCount2);
+void SetPPScore(bool bRespectFlagOpts = true);
+void SetPPScore(PPSCORE p);
+SCORE GlobalAlignDimer(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
+  unsigned uLengthB, PWPath &Path);
+bool MissingCommand();
+void Credits();
+void ProfileProfile(MSA &msa1, MSA &msa2, MSA &msaOut);
+void MHackStart(SeqVect &v);
+void MHackEnd(MSA &msa);
+void WriteScoreFile(const MSA &msa);
+char ConsensusChar(const ProfPos &PP);
+void Stabilize(const MSA &msa, MSA &msaStable);
+void MuscleOutput(MSA &msa);
+PTR_SCOREMATRIX ReadMx(TextFile &File);

Added: trunk/packages/muscle/branches/upstream/current/muscleout.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/muscleout.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/muscleout.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,109 @@
+#include "muscle.h"
+#include "msa.h"
+#include "params.h"
+#include "textfile.h"
+
+static void DoOutput(MSA &msa)
+	{
+	bool AnyOutput = false;
+
+// Flag options, at most one used (because only one -out filename)
+	TextFile fileOut(g_pstrOutFileName, true);
+	if (g_bFASTA)
+		{
+		msa.ToFASTAFile(fileOut);
+		AnyOutput = true;
+		}
+	else if (g_bMSF)
+		{
+		msa.ToMSFFile(fileOut);
+		AnyOutput = true;
+		}
+	else if (g_bAln)
+		{
+		msa.ToAlnFile(fileOut);
+		AnyOutput = true;
+		}
+	else if (g_bHTML)
+		{
+		msa.ToHTMLFile(fileOut);
+		AnyOutput = true;
+		}
+	else if (g_bPHYI)
+		{
+		msa.ToPhyInterleavedFile(fileOut);
+		AnyOutput = true;
+		}
+	else if (g_bPHYS)
+		{
+		msa.ToPhySequentialFile(fileOut);
+		AnyOutput = true;
+		}
+
+// If -out option was given but no flags, output as FASTA
+	if (!AnyOutput && strcmp(g_pstrOutFileName, "-") != 0)
+		msa.ToFASTAFile(fileOut);
+	
+	fileOut.Close();
+
+// Value options
+	if (g_pstrFASTAOutFileName)
+		{
+		TextFile File(g_pstrFASTAOutFileName, true);
+		msa.ToFASTAFile(File);
+		}
+
+	if (g_pstrMSFOutFileName)
+		{
+		TextFile File(g_pstrMSFOutFileName, true);
+		msa.ToMSFFile(File);
+		}
+
+	if (g_pstrClwOutFileName)
+		{
+		TextFile File(g_pstrClwOutFileName, true);
+		msa.ToAlnFile(File);
+		}
+
+	if (g_pstrClwStrictOutFileName)
+		{
+		g_bClwStrict = true;
+		TextFile File(g_pstrClwStrictOutFileName, true);
+		msa.ToAlnFile(File);
+		}
+
+	if (g_pstrHTMLOutFileName)
+		{
+		TextFile File(g_pstrHTMLOutFileName, true);
+		msa.ToHTMLFile(File);
+		}
+
+	if (g_pstrPHYIOutFileName)
+		{
+		TextFile File(g_pstrPHYIOutFileName, true);
+		msa.ToPhySequentialFile(File);
+		}
+
+	if (g_pstrPHYSOutFileName)
+		{
+		TextFile File(g_pstrPHYSOutFileName, true);
+		msa.ToPhySequentialFile(File);
+		}
+
+	if (0 != g_pstrScoreFileName)
+		WriteScoreFile(msa);
+	}
+
+void MuscleOutput(MSA &msa)
+	{
+	MHackEnd(msa);
+	if (g_bStable)
+		{
+		MSA msaStable;
+		Stabilize(msa, msaStable);
+		msa.Clear();	// save memory
+		DoOutput(msaStable);
+		}
+	else
+		DoOutput(msa);
+	}

Added: trunk/packages/muscle/branches/upstream/current/nucmx.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/nucmx.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/nucmx.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,23 @@
+#include "muscle.h"
+
+// BLASTZ default parameters
+// open 400, extend 30, matrix as below
+
+const float NUC_EXTEND = 30;
+const float NUC_SP_CENTER = 2*NUC_EXTEND;
+
+#define v(x)	((float) x + NUC_SP_CENTER)
+#define ROW(A, C, G, T) \
+	{ v(A), v(C), v(G), v(T) },
+
+float NUC_SP[32][32] =
+	{
+//         A        C        G        T
+ROW(      91,    -114,     -31,    -123) // A
+
+ROW(    -114,     100,    -125,     -31) // C
+
+ROW(     -31,    -125,     100,    -114) // G
+
+ROW(    -123,     -31,    -114,      91) // T
+	};

Added: trunk/packages/muscle/branches/upstream/current/nwdasimple.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/nwdasimple.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/nwdasimple.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,494 @@
+#include "muscle.h"
+#include <math.h>
+#include "pwpath.h"
+#include "profile.h"
+#include <stdio.h>
+
+#define	TRACE	0
+
+bool g_bKeepSimpleDP;
+SCORE *g_DPM;
+SCORE *g_DPD;
+SCORE *g_DPE;
+SCORE *g_DPI;
+SCORE *g_DPJ;
+char *g_TBM;
+char *g_TBD;
+char *g_TBE;
+char *g_TBI;
+char *g_TBJ;
+
+#if	DOUBLE_AFFINE
+
+static char XlatEdgeType(char c)
+	{
+	if ('E' == c)
+		return 'D';
+	if ('J' == c)
+		return 'I';
+	return c;
+	}
+
+static const char *LocalScoreToStr(SCORE s)
+	{
+	static char str[16];
+	if (s < -100000)
+		return "     *";
+	sprintf(str, "%6.1f", s);
+	return str;
+	}
+
+static void ListTB(const char *TBM_, const ProfPos *PA, const ProfPos *PB,
+  unsigned uPrefixCountA, unsigned uPrefixCountB)
+	{
+	Log("        ");
+	for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+		{
+		char c = ' ';
+		if (uPrefixLengthB > 0)
+			c = ConsensusChar(PB[uPrefixLengthB - 1]);
+		Log(" %4u:%c", uPrefixLengthB, c);
+		}
+	Log("\n");
+	for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
+		{
+		char c = ' ';
+		if (uPrefixLengthA > 0)
+			c = ConsensusChar(PA[uPrefixLengthA - 1]);
+		Log("%4u:%c  ", uPrefixLengthA, c);
+		for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+			Log(" %6c", TBM(uPrefixLengthA, uPrefixLengthB));
+		Log("\n");
+		}
+	}
+
+static void ListDP(const SCORE *DPM_, const ProfPos *PA, const ProfPos *PB,
+  unsigned uPrefixCountA, unsigned uPrefixCountB)
+	{
+	Log("        ");
+	for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+		{
+		char c = ' ';
+		if (uPrefixLengthB > 0)
+			c = ConsensusChar(PB[uPrefixLengthB - 1]);
+		Log(" %4u:%c", uPrefixLengthB, c);
+		}
+	Log("\n");
+	for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
+		{
+		char c = ' ';
+		if (uPrefixLengthA > 0)
+			c = ConsensusChar(PA[uPrefixLengthA - 1]);
+		Log("%4u:%c  ", uPrefixLengthA, c);
+		for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+			Log(" %s", LocalScoreToStr(DPM(uPrefixLengthA, uPrefixLengthB)));
+		Log("\n");
+		}
+	}
+
+SCORE NWDASimple(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
+  unsigned uLengthB, PWPath &Path)
+	{
+	assert(uLengthB > 0 && uLengthA > 0);
+
+	const unsigned uPrefixCountA = uLengthA + 1;
+	const unsigned uPrefixCountB = uLengthB + 1;
+
+// Allocate DP matrices
+	const size_t LM = uPrefixCountA*uPrefixCountB;
+	SCORE *DPL_ = new SCORE[LM];
+	SCORE *DPM_ = new SCORE[LM];
+	SCORE *DPD_ = new SCORE[LM];
+	SCORE *DPE_ = new SCORE[LM];
+	SCORE *DPI_ = new SCORE[LM];
+	SCORE *DPJ_ = new SCORE[LM];
+
+	char *TBM_ = new char[LM];
+	char *TBD_ = new char[LM];
+	char *TBE_ = new char[LM];
+	char *TBI_ = new char[LM];
+	char *TBJ_ = new char[LM];
+
+	memset(TBM_, '?', LM);
+	memset(TBD_, '?', LM);
+	memset(TBE_, '?', LM);
+	memset(TBI_, '?', LM);
+	memset(TBJ_, '?', LM);
+
+	DPM(0, 0) = 0;
+	DPD(0, 0) = MINUS_INFINITY;
+	DPE(0, 0) = MINUS_INFINITY;
+	DPI(0, 0) = MINUS_INFINITY;
+	DPJ(0, 0) = MINUS_INFINITY;
+
+	DPM(1, 0) = MINUS_INFINITY;
+	DPD(1, 0) = PA[0].m_scoreGapOpen;
+	DPE(1, 0) = PA[0].m_scoreGapOpen2;
+	TBD(1, 0) = 'D';
+	TBE(1, 0) = 'E';
+	DPI(1, 0) = MINUS_INFINITY;
+	DPJ(1, 0) = MINUS_INFINITY;
+
+	DPM(0, 1) = MINUS_INFINITY;
+	DPD(0, 1) = MINUS_INFINITY;
+	DPE(0, 1) = MINUS_INFINITY;
+	DPI(0, 1) = PB[0].m_scoreGapOpen;
+	DPJ(0, 1) = PB[0].m_scoreGapOpen2;
+	TBI(0, 1) = 'I';
+	TBJ(0, 1) = 'J';
+
+// Empty prefix of B is special case
+	for (unsigned uPrefixLengthA = 2; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
+		{
+		DPM(uPrefixLengthA, 0) = MINUS_INFINITY;
+
+		DPD(uPrefixLengthA, 0) = DPD(uPrefixLengthA - 1, 0) + g_scoreGapExtend;
+		DPE(uPrefixLengthA, 0) = DPE(uPrefixLengthA - 1, 0) + g_scoreGapExtend2;
+
+		TBD(uPrefixLengthA, 0) = 'D';
+		TBE(uPrefixLengthA, 0) = 'E';
+
+		DPI(uPrefixLengthA, 0) = MINUS_INFINITY;
+		DPJ(uPrefixLengthA, 0) = MINUS_INFINITY;
+		}
+
+// Empty prefix of A is special case
+	for (unsigned uPrefixLengthB = 2; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+		{
+		DPM(0, uPrefixLengthB) = MINUS_INFINITY;
+
+		DPD(0, uPrefixLengthB) = MINUS_INFINITY;
+		DPE(0, uPrefixLengthB) = MINUS_INFINITY;
+
+		DPI(0, uPrefixLengthB) = DPI(0, uPrefixLengthB - 1) + g_scoreGapExtend;
+		DPJ(0, uPrefixLengthB) = DPJ(0, uPrefixLengthB - 1) + g_scoreGapExtend2;
+
+		TBI(0, uPrefixLengthB) = 'I';
+		TBJ(0, uPrefixLengthB) = 'J';
+		}
+
+// Special case to agree with NWFast, no D-I transitions so...
+	DPD(uLengthA, 0) = MINUS_INFINITY;
+	DPE(uLengthA, 0) = MINUS_INFINITY;
+//	DPI(0, uLengthB) = MINUS_INFINITY;
+//	DPJ(0, uLengthB) = MINUS_INFINITY;
+
+// ============
+// Main DP loop
+// ============
+	SCORE scoreGapCloseB = MINUS_INFINITY;
+	SCORE scoreGapClose2B = MINUS_INFINITY;
+	for (unsigned uPrefixLengthB = 1; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+		{
+		const ProfPos &PPB = PB[uPrefixLengthB - 1];
+
+		SCORE scoreGapCloseA = MINUS_INFINITY;
+		SCORE scoreGapClose2A = MINUS_INFINITY;
+		for (unsigned uPrefixLengthA = 1; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
+			{
+			const ProfPos &PPA = PA[uPrefixLengthA - 1];
+
+			{
+		// Match M=LetterA+LetterB
+			SCORE scoreLL = ScoreProfPos2(PPA, PPB);
+			DPL(uPrefixLengthA, uPrefixLengthB) = scoreLL;
+
+			SCORE scoreMM = DPM(uPrefixLengthA-1, uPrefixLengthB-1);
+			SCORE scoreDM = DPD(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapCloseA;
+			SCORE scoreEM = DPE(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapClose2A;
+			SCORE scoreIM = DPI(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapCloseB;
+			SCORE scoreJM = DPJ(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapClose2B;
+
+			SCORE scoreBest;
+			if (scoreMM >= scoreDM && scoreMM >= scoreEM && scoreMM >= scoreIM && scoreMM >= scoreJM)
+				{
+				scoreBest = scoreMM;
+				TBM(uPrefixLengthA, uPrefixLengthB) = 'M';
+				}
+			else if (scoreDM >= scoreMM && scoreDM >= scoreEM && scoreDM >= scoreIM && scoreDM >= scoreJM)
+				{
+				scoreBest = scoreDM;
+				TBM(uPrefixLengthA, uPrefixLengthB) = 'D';
+				}
+			else if (scoreEM >= scoreMM && scoreEM >= scoreDM && scoreEM >= scoreIM && scoreEM >= scoreJM)
+				{
+				scoreBest = scoreEM;
+				TBM(uPrefixLengthA, uPrefixLengthB) = 'E';
+				}
+			else if (scoreIM >= scoreMM && scoreIM >= scoreDM && scoreIM >= scoreEM && scoreIM >= scoreJM)
+				{
+				scoreBest = scoreIM;
+				TBM(uPrefixLengthA, uPrefixLengthB) = 'I';
+				}
+			else
+				{
+				assert(scoreJM >= scoreMM && scoreJM >= scoreDM && scoreJM >= scoreEM && scoreJM >= scoreIM);
+				scoreBest = scoreJM;
+				TBM(uPrefixLengthA, uPrefixLengthB) = 'J';
+				}
+			DPM(uPrefixLengthA, uPrefixLengthB) = scoreBest + scoreLL;
+			}
+
+			{
+		// Delete D=LetterA+GapB
+			SCORE scoreMD = DPM(uPrefixLengthA-1, uPrefixLengthB) +
+			  PA[uPrefixLengthA-1].m_scoreGapOpen;
+			SCORE scoreDD = DPD(uPrefixLengthA-1, uPrefixLengthB) + g_scoreGapExtend;
+
+			SCORE scoreBest;
+			if (scoreMD >= scoreDD)
+				{
+				scoreBest = scoreMD;
+				TBD(uPrefixLengthA, uPrefixLengthB) = 'M';
+				}
+			else
+				{
+				assert(scoreDD >= scoreMD);
+				scoreBest = scoreDD;
+				TBD(uPrefixLengthA, uPrefixLengthB) = 'D';
+				}
+			DPD(uPrefixLengthA, uPrefixLengthB) = scoreBest;
+			}
+
+			{
+		// Delete E=LetterA+GapB
+			SCORE scoreME = DPM(uPrefixLengthA-1, uPrefixLengthB) +
+			  PA[uPrefixLengthA-1].m_scoreGapOpen2;
+			SCORE scoreEE = DPE(uPrefixLengthA-1, uPrefixLengthB) + g_scoreGapExtend2;
+
+			SCORE scoreBest;
+			if (scoreME >= scoreEE)
+				{
+				scoreBest = scoreME;
+				TBE(uPrefixLengthA, uPrefixLengthB) = 'M';
+				}
+			else
+				{
+				assert(scoreEE >= scoreME);
+				scoreBest = scoreEE;
+				TBE(uPrefixLengthA, uPrefixLengthB) = 'E';
+				}
+			DPE(uPrefixLengthA, uPrefixLengthB) = scoreBest;
+			}
+
+		// Insert I=GapA+LetterB
+			{
+			SCORE scoreMI = DPM(uPrefixLengthA, uPrefixLengthB-1) +
+			  PB[uPrefixLengthB - 1].m_scoreGapOpen;
+			SCORE scoreII = DPI(uPrefixLengthA, uPrefixLengthB-1) + g_scoreGapExtend;
+
+			SCORE scoreBest;
+			if (scoreMI >= scoreII)
+				{
+				scoreBest = scoreMI;
+				TBI(uPrefixLengthA, uPrefixLengthB) = 'M';
+				}
+			else 
+				{
+				assert(scoreII > scoreMI);
+				scoreBest = scoreII;
+				TBI(uPrefixLengthA, uPrefixLengthB) = 'I';
+				}
+			DPI(uPrefixLengthA, uPrefixLengthB) = scoreBest;
+			}
+
+		// Insert J=GapA+LetterB
+			{
+			SCORE scoreMJ = DPM(uPrefixLengthA, uPrefixLengthB-1) +
+			  PB[uPrefixLengthB - 1].m_scoreGapOpen2;
+			SCORE scoreJJ = DPJ(uPrefixLengthA, uPrefixLengthB-1) + g_scoreGapExtend2;
+
+			SCORE scoreBest;
+			if (scoreMJ >= scoreJJ)
+				{
+				scoreBest = scoreMJ;
+				TBJ(uPrefixLengthA, uPrefixLengthB) = 'M';
+				}
+			else 
+				{
+				assert(scoreJJ > scoreMJ);
+				scoreBest = scoreJJ;
+				TBJ(uPrefixLengthA, uPrefixLengthB) = 'J';
+				}
+			DPJ(uPrefixLengthA, uPrefixLengthB) = scoreBest;
+			}
+
+			scoreGapCloseA = PPA.m_scoreGapClose;
+			scoreGapClose2A = PPA.m_scoreGapClose2;
+			}
+		scoreGapCloseB = PPB.m_scoreGapClose;
+		scoreGapClose2B = PPB.m_scoreGapClose2;
+		}
+
+#if TRACE
+	Log("\n");
+	Log("DA Simple DPL:\n");
+	ListDP(DPL_, PA, PB, uPrefixCountA, uPrefixCountB);
+	Log("\n");
+	Log("DA Simple DPM:\n");
+	ListDP(DPM_, PA, PB, uPrefixCountA, uPrefixCountB);
+	Log("\n");
+	Log("DA Simple DPD:\n");
+	ListDP(DPD_, PA, PB, uPrefixCountA, uPrefixCountB);
+	Log("\n");
+	Log("DA Simple DPE:\n");
+	ListDP(DPE_, PA, PB, uPrefixCountA, uPrefixCountB);
+	Log("\n");
+	Log("DA Simple DPI:\n");
+	ListDP(DPI_, PA, PB, uPrefixCountA, uPrefixCountB);
+	Log("\n");
+	Log("DA Simple DPJ:\n");
+	ListDP(DPJ_, PA, PB, uPrefixCountA, uPrefixCountB);
+	Log("\n");
+	Log("DA Simple TBM:\n");
+	ListTB(TBM_, PA, PB, uPrefixCountA, uPrefixCountB);
+	Log("\n");
+	Log("DA Simple TBD:\n");
+	ListTB(TBD_, PA, PB, uPrefixCountA, uPrefixCountB);
+	Log("\n");
+	Log("DA Simple TBE:\n");
+	ListTB(TBE_, PA, PB, uPrefixCountA, uPrefixCountB);
+	Log("\n");
+	Log("DA Simple TBI:\n");
+	ListTB(TBI_, PA, PB, uPrefixCountA, uPrefixCountB);
+	Log("\n");
+	Log("DA Simple TBJ:\n");
+	ListTB(TBJ_, PA, PB, uPrefixCountA, uPrefixCountB);
+#endif
+
+// Trace-back
+// ==========
+	Path.Clear();
+
+// Find last edge
+	SCORE M = DPM(uLengthA, uLengthB);
+	SCORE D = DPD(uLengthA, uLengthB) + PA[uLengthA-1].m_scoreGapClose;
+	SCORE E = DPE(uLengthA, uLengthB) + PA[uLengthA-1].m_scoreGapClose2;
+	SCORE I = DPI(uLengthA, uLengthB) + PB[uLengthB-1].m_scoreGapClose;
+	SCORE J = DPJ(uLengthA, uLengthB) + PB[uLengthB-1].m_scoreGapClose2;
+	char cEdgeType = '?';
+
+	SCORE BestScore = M;
+	cEdgeType = 'M';
+	if (D > BestScore)
+		{
+		cEdgeType = 'D';
+		BestScore = D;
+		}
+	if (E > BestScore)
+		{
+		cEdgeType = 'E';
+		BestScore = E;
+		}
+	if (I > BestScore)
+		{
+		cEdgeType = 'I';
+		BestScore = I;
+		}
+	if (J > BestScore)
+		{
+		cEdgeType = 'J';
+		BestScore = J;
+		}
+
+#if	TRACE
+	Log("DA Simple: MAB=%.4g DAB=%.4g EAB=%.4g IAB=%.4g JAB=%.4g best=%c\n",
+	  M, D, E, I, J, cEdgeType);
+#endif
+
+	unsigned PLA = uLengthA;
+	unsigned PLB = uLengthB;
+	for (;;)
+		{
+		PWEdge Edge;
+		Edge.cType = XlatEdgeType(cEdgeType);
+		Edge.uPrefixLengthA = PLA;
+		Edge.uPrefixLengthB = PLB;
+#if	TRACE
+		Log("Prepend %c%d.%d\n", Edge.cType, PLA, PLB);
+#endif
+		Path.PrependEdge(Edge);
+
+		switch (cEdgeType)
+			{
+		case 'M':
+			assert(PLA > 0);
+			assert(PLB > 0);
+			cEdgeType = TBM(PLA, PLB);
+			--PLA;
+			--PLB;
+			break;
+
+		case 'D':
+			assert(PLA > 0);
+			cEdgeType = TBD(PLA, PLB);
+			--PLA;
+			break;
+
+		case 'E':
+			assert(PLA > 0);
+			cEdgeType = TBE(PLA, PLB);
+			--PLA;
+			break;
+
+		case 'I':
+			assert(PLB > 0);
+			cEdgeType = TBI(PLA, PLB);
+			--PLB;
+			break;
+		
+		case 'J':
+			assert(PLB > 0);
+			cEdgeType = TBJ(PLA, PLB);
+			--PLB;
+			break;
+
+		default:
+			Quit("Invalid edge %c", cEdgeType);
+			}
+		if (0 == PLA && 0 == PLB)
+			break;
+		}
+	Path.Validate();
+
+//	SCORE Score = TraceBack(PA, uLengthA, PB, uLengthB, DPM_, DPD_, DPI_, Path);
+
+#if	TRACE
+	SCORE scorePath = FastScorePath2(PA, uLengthA, PB, uLengthB, Path);
+	Path.LogMe();
+	Log("Score = %s Path = %s\n", LocalScoreToStr(BestScore), LocalScoreToStr(scorePath));
+#endif
+
+	if (g_bKeepSimpleDP)
+		{
+		g_DPM = DPM_;
+		g_DPD = DPD_;
+		g_DPE = DPE_;
+		g_DPI = DPI_;
+		g_DPJ = DPJ_;
+
+		g_TBM = TBM_;
+		g_TBD = TBD_;
+		g_TBE = TBE_;
+		g_TBI = TBI_;
+		g_TBJ = TBJ_;
+		}
+	else
+		{
+		delete[] DPM_;
+		delete[] DPD_;
+		delete[] DPE_;
+		delete[] DPI_;
+		delete[] DPJ_;
+
+		delete[] TBM_;
+		delete[] TBD_;
+		delete[] TBE_;
+		delete[] TBI_;
+		delete[] TBJ_;
+		}
+
+	return BestScore;
+	}
+
+#endif // DOUBLE_AFFINE

Added: trunk/packages/muscle/branches/upstream/current/nwdasimple2.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/nwdasimple2.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/nwdasimple2.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,549 @@
+#include "muscle.h"
+#include "pwpath.h"
+#include "profile.h"
+
+#if	DOUBLE_AFFINE
+
+#define	TRACE	0
+
+extern bool g_bKeepSimpleDP;
+extern SCORE *g_DPM;
+extern SCORE *g_DPD;
+extern SCORE *g_DPE;
+extern SCORE *g_DPI;
+extern SCORE *g_DPJ;
+extern char *g_TBM;
+extern char *g_TBD;
+extern char *g_TBE;
+extern char *g_TBI;
+extern char *g_TBJ;
+
+static char XlatEdgeType(char c)
+	{
+	if ('E' == c)
+		return 'D';
+	if ('J' == c)
+		return 'I';
+	return c;
+	}
+
+static const char *LocalScoreToStr(SCORE s)
+	{
+	static char str[16];
+	if (s < -100000)
+		return "     *";
+	sprintf(str, "%6.1f", s);
+	return str;
+	}
+
+static void ListDP(const SCORE *DPM_, const ProfPos *PA, const ProfPos *PB,
+  unsigned uPrefixCountA, unsigned uPrefixCountB)
+	{
+	Log("        ");
+	for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+		{
+		char c = ' ';
+		if (uPrefixLengthB > 0)
+			c = ConsensusChar(PB[uPrefixLengthB - 1]);
+		Log(" %4u:%c", uPrefixLengthB, c);
+		}
+	Log("\n");
+	for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
+		{
+		char c = ' ';
+		if (uPrefixLengthA > 0)
+			c = ConsensusChar(PA[uPrefixLengthA - 1]);
+		Log("%4u:%c  ", uPrefixLengthA, c);
+		for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+			Log(" %s", LocalScoreToStr(DPM(uPrefixLengthA, uPrefixLengthB)));
+		Log("\n");
+		}
+	}
+
+static void ListTB(const char *TBM_, const ProfPos *PA, const ProfPos *PB,
+  unsigned uPrefixCountA, unsigned uPrefixCountB)
+	{
+	Log("        ");
+	for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+		{
+		char c = ' ';
+		if (uPrefixLengthB > 0)
+			c = ConsensusChar(PB[uPrefixLengthB - 1]);
+		Log(" %4u:%c", uPrefixLengthB, c);
+		}
+	Log("\n");
+	for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
+		{
+		char c = ' ';
+		if (uPrefixLengthA > 0)
+			c = ConsensusChar(PA[uPrefixLengthA - 1]);
+		Log("%4u:%c  ", uPrefixLengthA, c);
+		for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+			Log(" %6c", TBM(uPrefixLengthA, uPrefixLengthB));
+		Log("\n");
+		}
+	}
+
+static void ListDPM(const SCORE *DPM_, const ProfPos *PA, const ProfPos *PB,
+  unsigned uPrefixCountA, unsigned uPrefixCountB)
+	{
+	Log("        ");
+	for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+		{
+		char c = ' ';
+		if (uPrefixLengthB > 0)
+			c = ConsensusChar(PB[uPrefixLengthB - 1]);
+		Log(" %4u:%c", uPrefixLengthB, c);
+		}
+	Log("\n");
+	for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
+		{
+		char c = ' ';
+		if (uPrefixLengthA > 0)
+			c = ConsensusChar(PA[uPrefixLengthA - 1]);
+		Log("%4u:%c  ", uPrefixLengthA, c);
+		for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+			{
+			SCORE x = (uPrefixLengthA + uPrefixLengthB)*g_scoreGapExtend;
+			SCORE s = DPM(uPrefixLengthA, uPrefixLengthB) - x;
+			Log(" %s", LocalScoreToStr(s));
+			}
+		Log("\n");
+		}
+	}
+
+extern SCORE ScoreProfPos2(const ProfPos &PP, const ProfPos &PPB);
+
+SCORE NWDASimple2(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
+  unsigned uLengthB, PWPath &Path)
+	{
+	assert(uLengthB > 0 && uLengthA > 0);
+
+	const unsigned uPrefixCountA = uLengthA + 1;
+	const unsigned uPrefixCountB = uLengthB + 1;
+
+// Allocate DP matrices
+	const size_t LM = uPrefixCountA*uPrefixCountB;
+	SCORE *DPM_ = new SCORE[LM];
+	SCORE *DPD_ = new SCORE[LM];
+	SCORE *DPE_ = new SCORE[LM];
+	SCORE *DPI_ = new SCORE[LM];
+	SCORE *DPJ_ = new SCORE[LM];
+	SCORE *DPL_ = new SCORE[LM];
+
+	char *TBM_ = new char[LM];
+	char *TBD_ = new char[LM];
+	char *TBE_ = new char[LM];
+	char *TBI_ = new char[LM];
+	char *TBJ_ = new char[LM];
+
+	memset(DPM_, 0, LM*sizeof(SCORE));
+	memset(DPD_, 0, LM*sizeof(SCORE));
+	memset(DPE_, 0, LM*sizeof(SCORE));
+	memset(DPI_, 0, LM*sizeof(SCORE));
+	memset(DPJ_, 0, LM*sizeof(SCORE));
+
+//	memset(DPL_, 0, LM*sizeof(SCORE));
+
+	memset(TBM_, '?', LM);
+	memset(TBD_, '?', LM);
+	memset(TBE_, '?', LM);
+	memset(TBI_, '?', LM);
+	memset(TBJ_, '?', LM);
+
+	DPM(0, 0) = 0;
+	DPD(0, 0) = MINUS_INFINITY;
+	DPE(0, 0) = MINUS_INFINITY;
+	DPI(0, 0) = MINUS_INFINITY;
+	DPJ(0, 0) = MINUS_INFINITY;
+
+	DPM(1, 0) = MINUS_INFINITY;
+	DPD(1, 0) = PA[0].m_scoreGapOpen;
+	DPE(1, 0) = PA[0].m_scoreGapOpen2;
+	DPI(1, 0) = MINUS_INFINITY;
+	DPJ(1, 0) = MINUS_INFINITY;
+
+	DPM(0, 1) = MINUS_INFINITY;
+	DPD(0, 1) = MINUS_INFINITY;
+	DPE(0, 1) = MINUS_INFINITY;
+	DPI(0, 1) = PB[0].m_scoreGapOpen;
+	DPJ(0, 1) = PB[0].m_scoreGapOpen2;
+
+// Empty prefix of B is special case
+	for (unsigned uPrefixLengthA = 2; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
+		{
+	// M=LetterA+LetterB, impossible with empty prefix
+		DPM(uPrefixLengthA, 0) = MINUS_INFINITY;
+
+	// D=LetterA+GapB
+		DPD(uPrefixLengthA, 0) = DPD(uPrefixLengthA - 1, 0) + g_scoreGapExtend;
+		TBD(uPrefixLengthA, 0) = 'D';
+
+		DPE(uPrefixLengthA, 0) = DPE(uPrefixLengthA - 1, 0) + g_scoreGapExtend2;
+		TBE(uPrefixLengthA, 0) = 'E';
+
+	// I=GapA+LetterB, impossible with empty prefix
+		DPI(uPrefixLengthA, 0) = MINUS_INFINITY;
+		DPJ(uPrefixLengthA, 0) = MINUS_INFINITY;
+		}
+
+// Empty prefix of A is special case
+	for (unsigned uPrefixLengthB = 2; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+		{
+	// M=LetterA+LetterB, impossible with empty prefix
+		DPM(0, uPrefixLengthB) = MINUS_INFINITY;
+
+	// D=LetterA+GapB, impossible with empty prefix
+		DPD(0, uPrefixLengthB) = MINUS_INFINITY;
+		DPE(0, uPrefixLengthB) = MINUS_INFINITY;
+
+	// I=GapA+LetterB
+		DPI(0, uPrefixLengthB) = DPI(0, uPrefixLengthB - 1) + g_scoreGapExtend;
+		TBI(0, uPrefixLengthB) = 'I';
+
+		DPJ(0, uPrefixLengthB) = DPJ(0, uPrefixLengthB - 1) + g_scoreGapExtend2;
+		TBJ(0, uPrefixLengthB) = 'J';
+		}
+
+// ============
+// Main DP loop
+// ============
+	for (unsigned uPrefixLengthB = 1; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+		{
+		const ProfPos &PPB = PB[uPrefixLengthB - 1];
+		SCORE scoreGapCloseB;
+		if (uPrefixLengthB == 1)
+			scoreGapCloseB = MINUS_INFINITY;
+		else
+			scoreGapCloseB = PB[uPrefixLengthB-2].m_scoreGapClose;
+
+		SCORE scoreGapClose2B;
+		if (uPrefixLengthB == 1)
+			scoreGapClose2B = MINUS_INFINITY;
+		else
+			scoreGapClose2B = PB[uPrefixLengthB-2].m_scoreGapClose2;
+
+		for (unsigned uPrefixLengthA = 1; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
+			{
+			const ProfPos &PPA = PA[uPrefixLengthA - 1];
+
+			{
+		// Match M=LetterA+LetterB
+			SCORE scoreLL = ScoreProfPos2(PPA, PPB);
+			DPL(uPrefixLengthA, uPrefixLengthB) = scoreLL;
+
+			SCORE scoreGapCloseA;
+			if (uPrefixLengthA == 1)
+				scoreGapCloseA = MINUS_INFINITY;
+			else
+				scoreGapCloseA = PA[uPrefixLengthA-2].m_scoreGapClose;
+
+			SCORE scoreGapClose2A;
+			if (uPrefixLengthA == 1)
+				scoreGapClose2A = MINUS_INFINITY;
+			else
+				scoreGapClose2A = PA[uPrefixLengthA-2].m_scoreGapClose2;
+
+			SCORE scoreMM = DPM(uPrefixLengthA-1, uPrefixLengthB-1);
+			SCORE scoreDM = DPD(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapCloseA;
+			SCORE scoreEM = DPE(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapClose2A;
+			SCORE scoreIM = DPI(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapCloseB;
+			SCORE scoreJM = DPJ(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapClose2B;
+			SCORE scoreBest;
+			if (scoreMM >= scoreDM && scoreMM >= scoreIM && scoreMM >= scoreEM && scoreMM >= scoreJM)
+				{
+				scoreBest = scoreMM;
+				TBM(uPrefixLengthA, uPrefixLengthB) = 'M';
+				}
+			else if (scoreDM >= scoreMM && scoreDM >= scoreIM && scoreDM >= scoreEM && scoreDM >= scoreJM)
+				{
+				scoreBest = scoreDM;
+				TBM(uPrefixLengthA, uPrefixLengthB) = 'D';
+				}
+			else if (scoreEM >= scoreMM && scoreEM >= scoreIM && scoreEM >= scoreDM && scoreEM >= scoreJM)
+				{
+				scoreBest = scoreEM;
+				TBM(uPrefixLengthA, uPrefixLengthB) = 'E';
+				}
+			else if (scoreIM >= scoreMM && scoreIM >= scoreDM && scoreIM >= scoreEM && scoreIM >= scoreJM)
+				{
+				scoreBest = scoreIM;
+				TBM(uPrefixLengthA, uPrefixLengthB) = 'I';
+				}
+			else if (scoreJM >= scoreMM && scoreJM >= scoreDM && scoreJM >= scoreEM && scoreJM >= scoreIM)
+				{
+				scoreBest = scoreJM;
+				TBM(uPrefixLengthA, uPrefixLengthB) = 'J';
+				}
+			else
+				Quit("Max failed (M)");
+
+			DPM(uPrefixLengthA, uPrefixLengthB) = scoreBest + scoreLL;
+			}
+
+			{
+		// Delete D=LetterA+GapB
+			SCORE scoreMD = DPM(uPrefixLengthA-1, uPrefixLengthB) +
+			  PA[uPrefixLengthA-1].m_scoreGapOpen;
+			SCORE scoreDD = DPD(uPrefixLengthA-1, uPrefixLengthB) +
+			  g_scoreGapExtend;
+
+			SCORE scoreBest;
+			if (scoreMD >= scoreDD)
+				{
+				scoreBest = scoreMD;
+				TBD(uPrefixLengthA, uPrefixLengthB) = 'M';
+				}
+			else
+				{
+				assert(scoreDD >= scoreMD);
+				scoreBest = scoreDD;
+				TBD(uPrefixLengthA, uPrefixLengthB) = 'D';
+				}
+			DPD(uPrefixLengthA, uPrefixLengthB) = scoreBest;
+			}
+
+			{
+		// Delete E=LetterA+GapB
+			SCORE scoreME = DPM(uPrefixLengthA-1, uPrefixLengthB) +
+			  PA[uPrefixLengthA-1].m_scoreGapOpen2;
+			SCORE scoreEE = DPE(uPrefixLengthA-1, uPrefixLengthB) +
+			  g_scoreGapExtend2;
+
+			SCORE scoreBest;
+			if (scoreME >= scoreEE)
+				{
+				scoreBest = scoreME;
+				TBE(uPrefixLengthA, uPrefixLengthB) = 'M';
+				}
+			else
+				{
+				assert(scoreEE >= scoreME);
+				scoreBest = scoreEE;
+				TBE(uPrefixLengthA, uPrefixLengthB) = 'E';
+				}
+			DPE(uPrefixLengthA, uPrefixLengthB) = scoreBest;
+			}
+
+		// Insert I=GapA+LetterB
+			{
+			SCORE scoreMI = DPM(uPrefixLengthA, uPrefixLengthB-1) +
+			  PB[uPrefixLengthB-1].m_scoreGapOpen;
+			SCORE scoreII = DPI(uPrefixLengthA, uPrefixLengthB-1) +
+			  g_scoreGapExtend;
+
+			SCORE scoreBest;
+			if (scoreMI >= scoreII)
+				{
+				scoreBest = scoreMI;
+				TBI(uPrefixLengthA, uPrefixLengthB) = 'M';
+				}
+			else 
+				{
+				assert(scoreII > scoreMI);
+				scoreBest = scoreII;
+				TBI(uPrefixLengthA, uPrefixLengthB) = 'I';
+				}
+			DPI(uPrefixLengthA, uPrefixLengthB) = scoreBest;
+			}
+
+		// Insert J=GapA+LetterB
+			{
+			SCORE scoreMJ = DPM(uPrefixLengthA, uPrefixLengthB-1) +
+			  PB[uPrefixLengthB-1].m_scoreGapOpen2;
+			SCORE scoreJJ = DPJ(uPrefixLengthA, uPrefixLengthB-1) +
+			  g_scoreGapExtend2;
+
+			SCORE scoreBest;
+			if (scoreMJ > scoreJJ)
+				{
+				scoreBest = scoreMJ;
+				TBJ(uPrefixLengthA, uPrefixLengthB) = 'M';
+				}
+			else 
+				{
+				assert(scoreJJ >= scoreMJ);
+				scoreBest = scoreJJ;
+				TBJ(uPrefixLengthA, uPrefixLengthB) = 'J';
+				}
+			DPJ(uPrefixLengthA, uPrefixLengthB) = scoreBest;
+			}
+			}
+		}
+
+// Special case: close gaps at end of alignment
+	DPD(uLengthA, uLengthB) += PA[uLengthA-1].m_scoreGapClose;
+	DPE(uLengthA, uLengthB) += PA[uLengthA-1].m_scoreGapClose2;
+
+	DPI(uLengthA, uLengthB) += PB[uLengthB-1].m_scoreGapClose;
+	DPJ(uLengthA, uLengthB) += PB[uLengthB-1].m_scoreGapClose2;
+
+#if TRACE
+	Log("DPL:\n");
+	ListDP(DPL_, PA, PB, uPrefixCountA, uPrefixCountB);
+	Log("DPM:\n");
+	ListDP(DPM_, PA, PB, uPrefixCountA, uPrefixCountB);
+	Log("DPD:\n");
+	ListDP(DPD_, PA, PB, uPrefixCountA, uPrefixCountB);
+	Log("DPE:\n");
+	ListDP(DPE_, PA, PB, uPrefixCountA, uPrefixCountB);
+	Log("DPI:\n");
+	ListDP(DPI_, PA, PB, uPrefixCountA, uPrefixCountB);
+	Log("DPJ:\n");
+	ListDP(DPJ_, PA, PB, uPrefixCountA, uPrefixCountB);
+	Log("TBM:\n");
+	ListTB(TBM_, PA, PB, uPrefixCountA, uPrefixCountB);
+	Log("TBD:\n");
+	ListTB(TBD_, PA, PB, uPrefixCountA, uPrefixCountB);
+	Log("TBE:\n");
+	ListTB(TBE_, PA, PB, uPrefixCountA, uPrefixCountB);
+	Log("TBI:\n");
+	ListTB(TBI_, PA, PB, uPrefixCountA, uPrefixCountB);
+	Log("TBJ:\n");
+	ListTB(TBJ_, PA, PB, uPrefixCountA, uPrefixCountB);
+#endif
+
+// ==========
+// Trace-back
+// ==========
+
+	Path.Clear();
+
+// Find last edge
+	char cEdgeType = '?';
+	SCORE BestScore = MINUS_INFINITY;
+	SCORE M = DPM(uLengthA, uLengthB);
+	SCORE D = DPD(uLengthA, uLengthB);
+	SCORE E = DPE(uLengthA, uLengthB);
+	SCORE I = DPI(uLengthA, uLengthB);
+	SCORE J = DPJ(uLengthA, uLengthB);
+
+	if (M >= D && M >= E && M >= I && M >= J)
+		{
+		cEdgeType = 'M';
+		BestScore = M;
+		}
+	else if (D >= M && D >= E && D >= I && D >= J)
+		{
+		cEdgeType = 'D';
+		BestScore = D;
+		}
+	else if (E >= M && E >= D && E >= I && E >= J)
+		{
+		cEdgeType = 'E';
+		BestScore = E;
+		}
+	else if (I >= M && I >= D && I >= E && I >= J)
+		{
+		cEdgeType = 'I';
+		BestScore = I;
+		}
+	else if (J >= M && J >= D && J >= E && J >= I)
+		{
+		cEdgeType = 'J';
+		BestScore = J;
+		}
+	else
+		Quit("Bad max");
+
+	unsigned PLA = uLengthA;
+	unsigned PLB = uLengthB;
+	unsigned ECount = 0;
+	unsigned JCount = 0;
+	for (;;)
+		{
+#if	TRACE
+		Log("TraceBack: %c%u.%u\n", cEdgeType, PLA, PLB);
+#endif
+		PWEdge Edge;
+		Edge.cType = XlatEdgeType(cEdgeType);
+		Edge.uPrefixLengthA = PLA;
+		Edge.uPrefixLengthB = PLB;
+		Path.PrependEdge(Edge);
+
+		switch (cEdgeType)
+			{
+		case 'M':
+			assert(PLA > 0);
+			assert(PLB > 0);
+			cEdgeType = TBM(PLA, PLB);
+			--PLA;
+			--PLB;
+			break;
+
+		case 'D':
+			assert(PLA > 0);
+			cEdgeType = TBD(PLA, PLB);
+			--PLA;
+			break;
+
+		case 'E':
+			++ECount;
+			assert(PLA > 0);
+			cEdgeType = TBE(PLA, PLB);
+			--PLA;
+			break;
+
+		case 'I':
+			assert(PLB > 0);
+			cEdgeType = TBI(PLA, PLB);
+			--PLB;
+			break;
+		
+		case 'J':
+			++JCount;
+			assert(PLB > 0);
+			cEdgeType = TBJ(PLA, PLB);
+			--PLB;
+			break;
+
+		default:
+			Quit("Invalid edge %c", cEdgeType);
+			}
+		if (0 == PLA && 0 == PLB)
+			break;
+		}
+	//if (ECount > 0 || JCount > 0)
+	//	fprintf(stderr, "E=%d J=%d\n", ECount, JCount);
+	Path.Validate();
+	if (Path.GetMatchCount() + Path.GetDeleteCount() != uLengthA)
+		Quit("Path count A");
+	if (Path.GetMatchCount() + Path.GetInsertCount() != uLengthB)
+		Quit("Path count B");
+
+	if (g_bKeepSimpleDP)
+		{
+		g_DPM = DPM_;
+		g_DPD = DPD_;
+		g_DPE = DPE_;
+		g_DPI = DPI_;
+		g_DPJ = DPJ_;
+
+		g_TBM = TBM_;
+		g_TBD = TBD_;
+		g_TBE = TBE_;
+		g_TBI = TBI_;
+		g_TBJ = TBJ_;
+		}
+	else
+		{
+		delete[] DPM_;
+		delete[] DPD_;
+		delete[] DPE_;
+		delete[] DPI_;
+		delete[] DPJ_;
+
+		delete[] TBM_;
+		delete[] TBD_;
+		delete[] TBE_;
+		delete[] TBI_;
+		delete[] TBJ_;
+		}
+
+#if	TRACE
+	Log("BestScore=%.6g\n", BestScore);
+#endif
+	return BestScore;
+	}
+
+#endif	// DOUBLE_AFFINE

Added: trunk/packages/muscle/branches/upstream/current/nwdasmall.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/nwdasmall.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/nwdasmall.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,947 @@
+#include "muscle.h"
+#include <math.h>
+#include "pwpath.h"
+#include "profile.h"
+#include <stdio.h>
+
+#if	DOUBLE_AFFINE
+
+// NW double affine small memory, term gaps fully penalized
+// (so up to caller to adjust in profile if desired).
+
+#define	TRACE	0
+
+#define MIN(x, y)	((x) < (y) ? (x) : (y))
+
+#if	TRACE
+extern bool g_bKeepSimpleDP;
+extern SCORE *g_DPM;
+extern SCORE *g_DPD;
+extern SCORE *g_DPE;
+extern SCORE *g_DPI;
+extern SCORE *g_DPJ;
+extern char *g_TBM;
+extern char *g_TBD;
+extern char *g_TBE;
+extern char *g_TBI;
+extern char *g_TBJ;
+#endif
+
+#if	TRACE
+#define ALLOC_TRACE()								\
+	const SCORE UNINIT = MINUS_INFINITY;			\
+	const size_t LM = uPrefixCountA*uPrefixCountB;	\
+													\
+	SCORE *DPM_ = new SCORE[LM];					\
+	SCORE *DPD_ = new SCORE[LM];					\
+	SCORE *DPE_ = new SCORE[LM];					\
+	SCORE *DPI_ = new SCORE[LM];					\
+	SCORE *DPJ_ = new SCORE[LM];					\
+													\
+	char *TBM_ = new char[LM];						\
+	char *TBD_ = new char[LM];						\
+	char *TBE_ = new char[LM];						\
+	char *TBI_ = new char[LM];						\
+	char *TBJ_ = new char[LM];						\
+													\
+	memset(TBM_, '?', LM);							\
+	memset(TBD_, '?', LM);							\
+	memset(TBE_, '?', LM);							\
+	memset(TBI_, '?', LM);							\
+	memset(TBJ_, '?', LM);							\
+													\
+	for (unsigned i = 0; i <= uLengthA; ++i)		\
+		for (unsigned j = 0; j <= uLengthB; ++j)	\
+			{										\
+			DPM(i, j) = UNINIT;						\
+			DPD(i, j) = UNINIT;						\
+			DPE(i, j) = UNINIT;						\
+			DPI(i, j) = UNINIT;						\
+			DPJ(i, j) = UNINIT;						\
+			}
+#else
+#define ALLOC_TRACE()
+#endif
+
+#if	TRACE
+#define SetDPM(i, j, x)		DPM(i, j) = x
+#define SetDPD(i, j, x)		DPD(i, j) = x
+#define SetDPE(i, j, x)		DPE(i, j) = x
+#define SetDPI(i, j, x)		DPI(i, j) = x
+#define SetDPJ(i, j, x)		DPJ(i, j) = x
+#define SetTBM(i, j, x)		TBM(i, j) = x
+#define SetTBD(i, j, x)		TBD(i, j) = x
+#define SetTBE(i, j, x)		TBE(i, j) = x
+#define SetTBI(i, j, x)		TBI(i, j) = x
+#define SetTBJ(i, j, x)		TBJ(i, j) = x
+#else
+#define SetDPM(i, j, x)		/* empty  */
+#define SetDPD(i, j, x)		/* empty  */
+#define SetDPE(i, j, x)		/* empty  */
+#define SetDPI(i, j, x)		/* empty  */
+#define SetDPJ(i, j, x)		/* empty  */
+#define SetTBM(i, j, x)		/* empty  */
+#define SetTBD(i, j, x)		/* empty  */
+#define SetTBE(i, j, x)		/* empty  */
+#define SetTBI(i, j, x)		/* empty  */
+#define SetTBJ(i, j, x)		/* empty  */
+#endif
+
+#define RECURSE_D(i, j)				\
+	{								\
+	SCORE DD = DRow[j] + e;			\
+	SCORE MD = MPrev[j] + PA[i-1].m_scoreGapOpen;\
+	if (DD > MD)					\
+		{							\
+		DRow[j] = DD;				\
+		SetTBD(i, j, 'D');			\
+		}							\
+	else							\
+		{							\
+		DRow[j] = MD;				\
+		SetBitTBD(TB, i, j, 'M');	\
+		SetTBD(i, j, 'M');			\
+		}							\
+	SetDPD(i, j, DRow[j]);			\
+	}
+
+#define RECURSE_E(i, j)				\
+	{								\
+	SCORE EE = ERow[j] + e2;		\
+	SCORE ME = MPrev[j] + PA[i-1].m_scoreGapOpen2;\
+	if (EE > ME)					\
+		{							\
+		ERow[j] = EE;				\
+		SetTBE(i, j, 'E');			\
+		}							\
+	else							\
+		{							\
+		ERow[j] = ME;				\
+		SetBitTBE(TB, i, j, 'M');	\
+		SetTBE(i, j, 'M');			\
+		}							\
+	SetDPE(i, j, ERow[j]);			\
+	}
+
+#define RECURSE_D_ATerm(j)	RECURSE_D(uLengthA, j)
+#define RECURSE_E_ATerm(j)	RECURSE_E(uLengthA, j)
+
+#define RECURSE_D_BTerm(j)	RECURSE_D(i, uLengthB)
+#define RECURSE_E_BTerm(j)	RECURSE_E(i, uLengthB)
+
+#define RECURSE_I(i, j)				\
+	{								\
+	Iij += e;						\
+	SCORE MI = MCurr[j-1] + PB[j-1].m_scoreGapOpen;\
+	if (MI >= Iij)					\
+		{							\
+		Iij = MI;					\
+		SetBitTBI(TB, i, j, 'M');	\
+		SetTBI(i, j, 'M');			\
+		}							\
+	else							\
+		SetTBI(i, j, 'I');			\
+	SetDPI(i, j, Iij);				\
+	}
+
+#define RECURSE_J(i, j)				\
+	{								\
+	Jij += e2;						\
+	SCORE MJ = MCurr[j-1] + PB[j-1].m_scoreGapOpen2;\
+	if (MJ >= Jij)					\
+		{							\
+		Jij = MJ;					\
+		SetBitTBJ(TB, i, j, 'M');	\
+		SetTBJ(i, j, 'M');			\
+		}							\
+	else							\
+		SetTBJ(i, j, 'I');			\
+	SetDPJ(i, j, Jij);				\
+	}
+
+#define RECURSE_I_ATerm(j)	RECURSE_I(uLengthA, j)
+#define RECURSE_J_ATerm(j)	RECURSE_J(uLengthA, j)
+
+#define RECURSE_I_BTerm(j)	RECURSE_I(i, uLengthB)
+#define RECURSE_J_BTerm(j)	RECURSE_J(i, uLengthB)
+
+#define RECURSE_M(i, j)									\
+	{													\
+	SCORE Best = MCurr[j];  /*  MM  */					\
+	SetTBM(i+1, j+1, 'M');								\
+	SetBitTBM(TB, i+1, j+1, 'M');						\
+														\
+	SCORE DM = DRow[j] + PA[i-1].m_scoreGapClose;		\
+	if (DM > Best)										\
+		{												\
+		Best = DM;										\
+		SetTBM(i+1, j+1, 'D');							\
+		SetBitTBM(TB, i+1, j+1, 'D');					\
+		}												\
+														\
+	SCORE EM = ERow[j] + PA[i-1].m_scoreGapClose2;		\
+	if (EM > Best)										\
+		{												\
+		Best = EM;										\
+		SetTBM(i+1, j+1, 'E');							\
+		SetBitTBM(TB, i+1, j+1, 'E');					\
+		}												\
+														\
+	SCORE IM = Iij + PB[j-1].m_scoreGapClose;			\
+	if (IM > Best)										\
+		{												\
+		Best = IM;										\
+		SetTBM(i+1, j+1, 'I');							\
+		SetBitTBM(TB, i+1, j+1, 'I');					\
+		}												\
+														\
+	SCORE JM = Jij + PB[j-1].m_scoreGapClose2;			\
+	if (JM > Best)										\
+		{												\
+		Best = JM;										\
+		SetTBM(i+1, j+1, 'J');							\
+		SetBitTBM(TB, i+1, j+1, 'J');					\
+		}												\
+	MNext[j+1] += Best;									\
+	SetDPM(i+1, j+1, MNext[j+1]);						\
+	}
+
+#if	TRACE
+static bool LocalEq(BASETYPE b1, BASETYPE b2)
+	{
+	if (b1 < -100000 && b2 < -100000)
+		return true;
+	double diff = fabs(b1 - b2);
+	if (diff < 0.0001)
+		return true;
+	double sum = fabs(b1) + fabs(b2);
+	return diff/sum < 0.005;
+	}
+
+static char Get_M_Char(char Bits)
+	{
+	switch (Bits & BIT_xM)
+		{
+	case BIT_MM:
+		return 'M';
+	case BIT_DM:
+		return 'D';
+	case BIT_EM:
+		return 'E';
+	case BIT_IM:
+		return 'I';
+	case BIT_JM:
+		return 'J';
+		}
+	Quit("Huh?");
+	return '?';
+	}
+
+static char Get_D_Char(char Bits)
+	{
+	return (Bits & BIT_xD) ? 'M' : 'D';
+	}
+
+static char Get_E_Char(char Bits)
+	{
+	return (Bits & BIT_xE) ? 'M' : 'E';
+	}
+
+static char Get_I_Char(char Bits)
+	{
+	return (Bits & BIT_xI) ? 'M' : 'I';
+	}
+
+static char Get_J_Char(char Bits)
+	{
+	return (Bits & BIT_xJ) ? 'M' : 'J';
+	}
+
+static bool DPEq(char c, SCORE *g_DP, SCORE *DPD_,
+  unsigned uPrefixCountA, unsigned uPrefixCountB)
+	{
+	if (0 == g_DP)
+		{
+		Log("***DPDIFF*** DP%c=NULL\n", c);
+		return true;
+		}
+
+	SCORE *DPM_ = g_DP;
+	for (unsigned i = 0; i < uPrefixCountA; ++i)
+		for (unsigned j = 0; j < uPrefixCountB; ++j)
+			if (!LocalEq(DPM(i, j), DPD(i, j)))
+				{
+				Log("***DPDIFF*** DP%c(%d, %d) Simple = %.2g, Small = %.2g\n",
+				  c, i, j, DPM(i, j), DPD(i, j));
+				return false;
+				}
+	return true;
+	}
+
+static bool CompareTB(char **TB, char *TBM_, char *TBD_, char *TBE_, char *TBI_, char *TBJ_,
+  unsigned uPrefixCountA, unsigned uPrefixCountB)
+	{
+	if (!g_bKeepSimpleDP)
+		return true;
+	SCORE *DPM_ = g_DPM;
+	bool Eq = true;
+	for (unsigned i = 0; i < uPrefixCountA; ++i)
+		for (unsigned j = 0; j < uPrefixCountB; ++j)
+			{
+			char c1 = TBM(i, j);
+			char c2 = Get_M_Char(TB[i][j]);
+			if (c1 != '?' && c1 != c2 && DPM(i, j) > -100000)
+				{
+				Log("TBM(%d, %d) Simple = %c, NW = %c\n", i, j, c1, c2);
+				Eq = false;
+				goto D;
+				}
+			}
+
+D:
+	SCORE *DPD_ = g_DPD;
+	for (unsigned i = 0; i < uPrefixCountA; ++i)
+		for (unsigned j = 0; j < uPrefixCountB; ++j)
+			{
+			char c1 = TBD(i, j);
+			char c2 = Get_D_Char(TB[i][j]);
+			if (c1 != '?' && c1 != c2 && DPD(i, j) > -100000)
+				{
+				Log("TBD(%d, %d) Simple = %c, NW = %c\n", i, j, c1, c2);
+				Eq = false;
+				goto E;
+				}
+			}
+E:
+	SCORE *DPE_ = g_DPE;
+	if (0 == TBE_)
+		goto I;
+	for (unsigned i = 0; i < uPrefixCountA; ++i)
+		for (unsigned j = 0; j < uPrefixCountB; ++j)
+			{
+			char c1 = TBE(i, j);
+			char c2 = Get_E_Char(TB[i][j]);
+			if (c1 != '?' && c1 != c2 && DPE(i, j) > -100000)
+				{
+				Log("TBE(%d, %d) Simple = %c, NW = %c\n", i, j, c1, c2);
+				Eq = false;
+				goto I;
+				}
+			}
+I:
+	SCORE *DPI_ = g_DPI;
+	for (unsigned i = 0; i < uPrefixCountA; ++i)
+		for (unsigned j = 0; j < uPrefixCountB; ++j)
+			{
+			char c1 = TBI(i, j);
+			char c2 = Get_I_Char(TB[i][j]);
+			if (c1 != '?' && c1 != c2 && DPI(i, j) > -100000)
+				{
+				Log("TBI(%d, %d) Simple = %c, NW = %c\n", i, j, c1, c2);
+				Eq = false;
+				goto J;
+				}
+			}
+J:
+	SCORE *DPJ_ = g_DPJ;
+	if (0 == DPJ_)
+		goto Done;
+	for (unsigned i = 0; i < uPrefixCountA; ++i)
+		for (unsigned j = 0; j < uPrefixCountB; ++j)
+			{
+			char c1 = TBJ(i, j);
+			char c2 = Get_J_Char(TB[i][j]);
+			if (c1 != '?' && c1 != c2 && DPJ(i, j) > -100000)
+				{
+				Log("TBJ(%d, %d) Simple = %c, NW = %c\n", i, j, c1, c2);
+				Eq = false;
+				goto Done;
+				}
+			}
+Done:
+	if (Eq)
+		Log("TB success\n");
+	return Eq;
+	}
+
+static const char *LocalScoreToStr(SCORE s)
+	{
+	static char str[16];
+	if (s < -100000)
+		return "     *";
+	sprintf(str, "%6.1f", s);
+	return str;
+	}
+
+static void LogDP(const SCORE *DPM_, const ProfPos *PA, const ProfPos *PB,
+  unsigned uPrefixCountA, unsigned uPrefixCountB)
+	{
+	Log("        ");
+	for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+		{
+		char c = ' ';
+		if (uPrefixLengthB > 0)
+			c = ConsensusChar(PB[uPrefixLengthB - 1]);
+		Log(" %4u:%c", uPrefixLengthB, c);
+		}
+	Log("\n");
+	for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
+		{
+		char c = ' ';
+		if (uPrefixLengthA > 0)
+			c = ConsensusChar(PA[uPrefixLengthA - 1]);
+		Log("%4u:%c  ", uPrefixLengthA, c);
+		for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+			Log(" %s", LocalScoreToStr(DPM(uPrefixLengthA, uPrefixLengthB)));
+		Log("\n");
+		}
+	}
+
+static void LogBitTB(char **TB, const ProfPos *PA, const ProfPos *PB,
+  unsigned uPrefixCountA, unsigned uPrefixCountB)
+	{
+	Log("        ");
+	for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+		{
+		char c = ' ';
+		if (uPrefixLengthB > 0)
+			c = ConsensusChar(PB[uPrefixLengthB - 1]);
+		Log(" %4u:%c", uPrefixLengthB, c);
+		}
+	Log("\n");
+	Log("Bit TBM:\n");
+	for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
+		{
+		char c = ' ';
+		if (uPrefixLengthA > 0)
+			c = ConsensusChar(PA[uPrefixLengthA - 1]);
+		Log("%4u:%c  ", uPrefixLengthA, c);
+		for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+			{
+			char c = Get_M_Char(TB[uPrefixLengthA][uPrefixLengthB]);
+			Log(" %6c", c);
+			}
+		Log("\n");
+		}
+
+	Log("\n");
+	Log("Bit TBD:\n");
+	for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
+		{
+		char c = ' ';
+		if (uPrefixLengthA > 0)
+			c = ConsensusChar(PA[uPrefixLengthA - 1]);
+		Log("%4u:%c  ", uPrefixLengthA, c);
+		for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+			{
+			char c = Get_D_Char(TB[uPrefixLengthA][uPrefixLengthB]);
+			Log(" %6c", c);
+			}
+		Log("\n");
+		}
+
+	Log("\n");
+	Log("Bit TBE:\n");
+	for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
+		{
+		char c = ' ';
+		if (uPrefixLengthA > 0)
+			c = ConsensusChar(PA[uPrefixLengthA - 1]);
+		Log("%4u:%c  ", uPrefixLengthA, c);
+		for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+			{
+			char c = Get_E_Char(TB[uPrefixLengthA][uPrefixLengthB]);
+			Log(" %6c", c);
+			}
+		Log("\n");
+		}
+
+	Log("\n");
+	Log("Bit TBI:\n");
+	for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
+		{
+		char c = ' ';
+		if (uPrefixLengthA > 0)
+			c = ConsensusChar(PA[uPrefixLengthA - 1]);
+		Log("%4u:%c  ", uPrefixLengthA, c);
+		for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+			{
+			char c = Get_I_Char(TB[uPrefixLengthA][uPrefixLengthB]);
+			Log(" %6c", c);
+			}
+		Log("\n");
+		}
+
+	Log("\n");
+	Log("Bit TBJ:\n");
+	for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
+		{
+		char c = ' ';
+		if (uPrefixLengthA > 0)
+			c = ConsensusChar(PA[uPrefixLengthA - 1]);
+		Log("%4u:%c  ", uPrefixLengthA, c);
+		for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+			{
+			char c = Get_J_Char(TB[uPrefixLengthA][uPrefixLengthB]);
+			Log(" %6c", c);
+			}
+		Log("\n");
+		}
+	}
+
+static void ListTB(char *TBM_, const ProfPos *PA, const ProfPos *PB,
+  unsigned uPrefixCountA, unsigned uPrefixCountB)
+	{
+	Log("        ");
+	for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+		{
+		char c = ' ';
+		if (uPrefixLengthB > 0)
+			c = ConsensusChar(PB[uPrefixLengthB - 1]);
+		Log(" %4u:%c", uPrefixLengthB, c);
+		}
+	Log("\n");
+	for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
+		{
+		char c = ' ';
+		if (uPrefixLengthA > 0)
+			c = ConsensusChar(PA[uPrefixLengthA - 1]);
+		Log("%4u:%c  ", uPrefixLengthA, c);
+		for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+			{
+			char c = TBM(uPrefixLengthA, uPrefixLengthB);
+			Log(" %6c", c);
+			}
+		Log("\n");
+		}
+	}
+
+static const char *BitsToStr(char Bits)
+	{
+	static char Str[32];
+
+	sprintf(Str, "%cM %cD %cE %cI %cJ",
+	  Get_M_Char(Bits),
+	  Get_D_Char(Bits),
+	  Get_E_Char(Bits),
+	  Get_I_Char(Bits),
+	  Get_J_Char(Bits));
+	}
+#endif	// TRACE
+
+static inline void SetBitTBM(char **TB, unsigned i, unsigned j, char c)
+	{
+	char Bit;
+	switch (c)
+		{
+	case 'M':
+		Bit = BIT_MM;
+		break;
+	case 'D':
+		Bit = BIT_DM;
+		break;
+#if	DOUBLE_AFFINE
+	case 'E':
+		Bit = BIT_EM;
+		break;
+	case 'I':
+		Bit = BIT_IM;
+		break;
+	case 'J':
+		Bit = BIT_JM;
+		break;
+#endif
+	default:
+		Quit("Huh?!");
+		}
+	TB[i][j] &= ~BIT_xM;
+	TB[i][j] |= Bit;
+	}
+
+static inline void SetBitTBD(char **TB, unsigned i, unsigned j, char c)
+	{
+	char Bit;
+	switch (c)
+		{
+	case 'M':
+		Bit = BIT_MD;
+		break;
+	case 'D':
+		Bit = BIT_DD;
+		break;
+	default:
+		Quit("Huh?!");
+		}
+	TB[i][j] &= ~BIT_xD;
+	TB[i][j] |= Bit;
+	}
+
+static inline void SetBitTBI(char **TB, unsigned i, unsigned j, char c)
+	{
+	char Bit;
+	switch (c)
+		{
+	case 'M':
+		Bit = BIT_MI;
+		break;
+	case 'I':
+		Bit = BIT_II;
+		break;
+	default:
+		Quit("Huh?!");
+		}
+	TB[i][j] &= ~BIT_xI;
+	TB[i][j] |= Bit;
+	}
+
+#if	DOUBLE_AFFINE
+static inline void SetBitTBE(char **TB, unsigned i, unsigned j, char c)
+	{
+	char Bit;
+	switch (c)
+		{
+	case 'M':
+		Bit = BIT_ME;
+		break;
+	case 'E':
+		Bit = BIT_EE;
+		break;
+	default:
+		Quit("Huh?!");
+		}
+	TB[i][j] &= ~BIT_xE;
+	TB[i][j] |= Bit;
+	}
+
+static inline void SetBitTBJ(char **TB, unsigned i, unsigned j, char c)
+	{
+	char Bit;
+	switch (c)
+		{
+	case 'M':
+		Bit = BIT_MJ;
+		break;
+	case 'J':
+		Bit = BIT_JJ;
+		break;
+	default:
+		Quit("Huh?!");
+		}
+	TB[i][j] &= ~BIT_xJ;
+	TB[i][j] |= Bit;
+	}
+#endif
+
+#if	TRACE
+#define LogMatrices()											\
+	{															\
+	Log("Bit DPM:\n");											\
+	LogDP(DPM_, PA, PB, uPrefixCountA, uPrefixCountB);			\
+	Log("Bit DPD:\n");											\
+	LogDP(DPD_, PA, PB, uPrefixCountA, uPrefixCountB);			\
+	Log("Bit DPE:\n");											\
+	LogDP(DPE_, PA, PB, uPrefixCountA, uPrefixCountB);			\
+	Log("Bit DPI:\n");											\
+	LogDP(DPI_, PA, PB, uPrefixCountA, uPrefixCountB);			\
+	Log("Bit DPJ:\n");											\
+	LogDP(DPJ_, PA, PB, uPrefixCountA, uPrefixCountB);			\
+	Log("Bit TB:\n");											\
+	LogBitTB(TB, PA, PB, uPrefixCountA, uPrefixCountB);			\
+	bool Same;													\
+	Same = DPEq('M', g_DPM, DPM_, uPrefixCountA, uPrefixCountB);\
+	if (Same)													\
+		Log("DPM success\n");									\
+	Same = DPEq('D', g_DPD, DPD_, uPrefixCountA, uPrefixCountB);\
+	if (Same)													\
+		Log("DPD success\n");									\
+	Same = DPEq('E', g_DPE, DPE_, uPrefixCountA, uPrefixCountB);\
+	if (Same)													\
+		Log("DPE success\n");									\
+	Same = DPEq('I', g_DPI, DPI_, uPrefixCountA, uPrefixCountB);\
+	if (Same)													\
+		Log("DPI success\n");									\
+	Same = DPEq('J', g_DPJ, DPJ_, uPrefixCountA, uPrefixCountB);\
+	if (Same)													\
+		Log("DPJ success\n");									\
+	CompareTB(TB, g_TBM, g_TBD, g_TBE, g_TBI, g_TBJ, uPrefixCountA, uPrefixCountB);\
+	}
+#else
+#define LogMatrices()	/* empty */
+#endif
+
+SCORE NWDASmall(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
+  unsigned uLengthB, PWPath &Path)
+	{
+	assert(uLengthB > 0 && uLengthA > 0);
+
+	ProfPos *pa0 = (ProfPos *) PA;
+	ProfPos *pb0 = (ProfPos *) PB;
+	ProfPos *paa = (ProfPos *) (PA + uLengthA - 1);
+	ProfPos *pbb = (ProfPos *) (PB + uLengthB - 1);
+
+	pa0->m_scoreGapOpen *= -1;
+	pb0->m_scoreGapOpen *= -1;
+
+	paa->m_scoreGapClose *= -1;
+	pbb->m_scoreGapClose *= -1;
+
+	pa0->m_scoreGapOpen2 *= -1;
+	pb0->m_scoreGapOpen2 *= -1;
+	paa->m_scoreGapClose2 *= -1;
+	pbb->m_scoreGapClose2 *= -1;
+
+	const unsigned uPrefixCountA = uLengthA + 1;
+	const unsigned uPrefixCountB = uLengthB + 1;
+	const SCORE e = g_scoreGapExtend;
+
+	const SCORE e2 = g_scoreGapExtend2;
+	const SCORE min_e = MIN(g_scoreGapExtend, g_scoreGapExtend2);
+
+	ALLOC_TRACE()
+
+	SCORE *MCurr = new SCORE[uPrefixCountB];
+	SCORE *MNext = new SCORE[uPrefixCountB];
+	SCORE *MPrev = new SCORE[uPrefixCountB];
+	SCORE *DRow = new SCORE[uPrefixCountB];
+	SCORE *ERow = new SCORE[uPrefixCountB];
+
+	char **TB = new char *[uPrefixCountA];
+	for (unsigned i = 0; i < uPrefixCountA; ++i)
+		{
+		TB[i] = new char [uPrefixCountB];
+		memset(TB[i], 0, uPrefixCountB);
+		}
+
+	SCORE Iij = MINUS_INFINITY;
+	SetDPI(0, 0, Iij);
+
+	SCORE Jij = MINUS_INFINITY;
+	SetDPJ(0, 0, Jij);
+
+	Iij = PB[0].m_scoreGapOpen;
+	SetDPI(0, 1, Iij);
+
+	Jij = PB[0].m_scoreGapOpen2;
+	SetDPJ(0, 1, Jij);
+
+	for (unsigned j = 2; j <= uLengthB; ++j)
+		{
+		Iij += e;
+		Jij += e2;
+
+		SetDPI(0, j, Iij);
+		SetDPJ(0, j, Jij);
+
+		SetTBI(0, j, 'I');
+		SetTBJ(0, j, 'J');
+		}
+
+	for (unsigned j = 0; j <= uLengthB; ++j)
+		{
+		DRow[j] = MINUS_INFINITY;
+		ERow[j] = MINUS_INFINITY;
+
+		SetDPD(0, j, DRow[j]);
+		SetDPE(0, j, ERow[j]);
+
+		SetTBD(0, j, 'D');
+		SetTBE(0, j, 'E');
+		}
+
+	MPrev[0] = 0;
+	SetDPM(0, 0, MPrev[0]);
+	for (unsigned j = 1; j <= uLengthB; ++j)
+		{
+		MPrev[j] = MINUS_INFINITY;
+		SetDPM(0, j, MPrev[j]);
+		}
+
+	MCurr[0] = MINUS_INFINITY;
+	SetDPM(1, 0, MCurr[0]);
+
+	MCurr[1] = ScoreProfPos2(PA[0], PB[0]);
+	SetDPM(1, 1, MCurr[1]);
+	SetBitTBM(TB, 1, 1, 'M');
+	SetTBM(1, 1, 'M');
+
+	for (unsigned j = 2; j <= uLengthB; ++j)
+		{
+		SCORE M = ScoreProfPos2(PA[0], PB[j-1]) + PB[0].m_scoreGapOpen +
+		  (j - 2)*e + PB[j-2].m_scoreGapClose;
+		SCORE M2 = ScoreProfPos2(PA[0], PB[j-1]) + PB[0].m_scoreGapOpen2 +
+		  (j - 2)*e2 + PB[j-2].m_scoreGapClose2;
+		
+		if (M >= M2)
+			{
+			MCurr[j] = M;
+			SetBitTBM(TB, 1, j, 'I');
+			SetTBM(1, j, 'I');
+			}
+		else
+			{
+			MCurr[j] = M2;
+			SetBitTBM(TB, 1, j, 'J');
+			SetTBM(1, j, 'J');
+			}
+		SetDPM(1, j, MCurr[j]);
+		}
+
+// Main DP loop
+	for (unsigned i = 1; i < uLengthA; ++i)
+		{
+		Iij = MINUS_INFINITY;
+		Jij = MINUS_INFINITY;
+		SetDPI(i, 0, Iij);
+		SetDPJ(i, 0, Jij);
+
+		DRow[0] = PA[0].m_scoreGapOpen + (i - 1)*e;
+		ERow[0] = PA[0].m_scoreGapOpen2 + (i - 1)*e2;
+		SetDPD(i, 0, DRow[0]);
+		SetDPE(i, 0, ERow[0]);
+
+		MCurr[0] = MINUS_INFINITY; 
+		if (i == 1)
+			{
+			MCurr[1] = ScoreProfPos2(PA[0], PB[0]);
+			SetBitTBM(TB, i, 1, 'M');
+			SetTBM(i, 1, 'M');
+			}
+		else
+			{
+			SCORE M = ScoreProfPos2(PA[i-1], PB[0]) + PA[0].m_scoreGapOpen +
+			  (i - 2)*e + PA[i-2].m_scoreGapClose;
+			SCORE M2 = ScoreProfPos2(PA[i-1], PB[0]) + PA[0].m_scoreGapOpen2 +
+			  (i - 2)*e2 + PA[i-2].m_scoreGapClose2;
+			if (M >= M2)
+				{
+				MCurr[1] = M;
+				SetBitTBM(TB, i, 1, 'D');
+				SetTBM(i, 1, 'D');
+				}
+			else
+				{
+				MCurr[1] = M2;
+				SetBitTBM(TB, i, 1, 'E');
+				SetTBM(i, 1, 'E');
+				}
+			}
+		SetDPM(i, 0, MCurr[0]);
+		SetDPM(i, 1, MCurr[1]);
+
+		for (unsigned j = 1; j < uLengthB; ++j)
+			MNext[j+1] = ScoreProfPos2(PA[i], PB[j]);
+
+		for (unsigned j = 1; j < uLengthB; ++j)
+			{
+			RECURSE_D(i, j)
+			RECURSE_E(i, j)
+			RECURSE_I(i, j)
+			RECURSE_J(i, j)
+			RECURSE_M(i, j)
+			}
+	// Special case for j=uLengthB
+		RECURSE_D_BTerm(i)
+		RECURSE_E_BTerm(i)
+		RECURSE_I_BTerm(i)
+		RECURSE_J_BTerm(i)
+
+	// Prev := Curr, Curr := Next, Next := Prev
+		Rotate(MPrev, MCurr, MNext);
+		}
+
+// Special case for i=uLengthA
+	MCurr[0] = MINUS_INFINITY;
+	SCORE M = ScoreProfPos2(PA[uLengthA-1], PB[0]) + (uLengthA - 2)*e +
+	  PA[0].m_scoreGapOpen + PA[uLengthA-2].m_scoreGapClose;
+	SCORE M2 = ScoreProfPos2(PA[uLengthA-1], PB[0]) + (uLengthA - 2)*e +
+	  PA[0].m_scoreGapOpen + PA[uLengthA-2].m_scoreGapClose;
+	if (M >= M2)
+		{
+		MCurr[1] = M;
+		SetBitTBM(TB, uLengthA, 1, 'D');
+		SetTBM(uLengthA, 1, 'D');
+		}
+	else
+		{
+		MCurr[1] = M2;
+		SetBitTBM(TB, uLengthA, 1, 'E');
+		SetTBM(uLengthA, 1, 'D');
+		}
+	SetDPM(uLengthA, 0, MCurr[0]);
+	SetDPM(uLengthA, 1, MCurr[1]);
+
+	DRow[0] = MINUS_INFINITY;
+	ERow[0] = MINUS_INFINITY;
+
+	SetDPD(uLengthA, 0, DRow[0]);
+	SetDPE(uLengthA, 0, ERow[0]);
+
+	for (unsigned j = 1; j <= uLengthB; ++j)
+		{
+		RECURSE_D_ATerm(j);
+		RECURSE_E_ATerm(j);
+		}
+
+	Iij = MINUS_INFINITY;
+	Jij = MINUS_INFINITY;
+
+	for (unsigned j = 1; j <= uLengthB; ++j)
+		{
+		RECURSE_I_ATerm(j)
+		RECURSE_J_ATerm(j)
+		}
+
+	LogMatrices();
+
+	SCORE MAB = MCurr[uLengthB];
+	SCORE DAB = DRow[uLengthB] + PA[uLengthA-1].m_scoreGapClose;
+	SCORE EAB = ERow[uLengthB] + PA[uLengthA-1].m_scoreGapClose2;
+	SCORE IAB = Iij + PB[uLengthB-1].m_scoreGapClose;
+	SCORE JAB = Jij + PB[uLengthB-1].m_scoreGapClose2;
+
+	SCORE Score = MAB;
+	char cEdgeType = 'M';
+	if (DAB > Score)
+		{
+		Score = DAB;
+		cEdgeType = 'D';
+		}
+	if (EAB > Score)
+		{
+		Score = EAB;
+		cEdgeType = 'E';
+		}
+	if (IAB > Score)
+		{
+		Score = IAB;
+		cEdgeType = 'I';
+		}
+	if (JAB > Score)
+		{
+		Score = JAB;
+		cEdgeType = 'J';
+		}
+
+#if TRACE
+	Log("    Small: MAB=%.4g DAB=%.4g EAB=%.4g IAB=%.4g JAB=%.4g best=%c\n",
+	  MAB, DAB, EAB, IAB, JAB, cEdgeType);
+#endif
+
+	BitTraceBack(TB, uLengthA, uLengthB, cEdgeType, Path);
+
+#if	DBEUG
+	Path.Validate();
+#endif
+
+	delete[] MCurr;
+	delete[] MNext;
+	delete[] MPrev;
+	delete[] DRow;
+	delete[] ERow;
+	for (unsigned i = 0; i < uPrefixCountA; ++i)
+		delete[] TB[i];
+	delete[] TB;
+
+	return 0;
+	}
+#endif	// DOUBLE_AFFINE

Added: trunk/packages/muscle/branches/upstream/current/nwrec.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/nwrec.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/nwrec.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,137 @@
+/***
+Needleman-Wunch recursions
+
+Notation: i,j are prefix lengths so are in
+ranges i = [0,|A|) and j = [0,|B|].
+
+Profile positions are in ranges [0,|A|-1]
+and [0,|B|-1] so prefix length i corresponds
+to position (i-1) in the profile, and similarly
+for j.
+
+Terminal gap scoring
+--------------------
+Terminal gaps are scored as with open [close]
+penalties only at the left [right] terminal,
+as follows:
+
+      0  i
+	  |  |
+	A XXXXX...
+	B ---XX...
+
+          i |A|-1
+          |  |
+	A ...XXXXX
+	B ...XX---
+
+In these examples, open / close penalty at position
+i is  included, but close / open penalty at |A|-1 /
+0 is not included.
+
+This is implemented by setting the open [close] 
+penalty to zero in the first [last] position of
+each profile.
+
+Consider adding a column to a sub-alignment. After the
+column is added, there are i letters from A and j letters
+from B.
+
+The column starts a left-terminal gap if:
+	Delete with i=1, j=0 or
+	Insert with i=0, j=1.
+
+The column ends a left-terminal gap if:
+	Match following Delete with j=1, or
+	Match following Insert with i=1.
+
+The column starts a right-terminal gap if:
+	Delete following a Match and i=|A|, or
+	Insert following a Match and j=|B|.
+
+The column ends a right-terminal gap if:
+	Match with i=|A|, j=|B| following Delete or Insert.
+	
+RECURSION RELATIONS
+===================
+
+         i-1
+          |
+DD	A ..X X
+	B ..- -
+
+MD	A ..X X
+	B ..X -
+
+D(i,j) = max
+			D(i-1,j) + e
+			M(i-1,j) + goA(i-1)
+Valid for:
+	i = [1,|A|-1]
+	j = [1,|B|]
+
+I(i,j) By symmetry with D(i,j).
+
+       i-2
+        | i-1
+		| |
+MM	A ..X X
+	B ..X X
+
+DM	A ..X X
+	B ..- X
+
+IM  A ..- X
+	B ..X X
+	    | |
+		| j-1
+	   j-2
+
+M(i,j) = L(i-1,j-1) + max
+			M(i-1,j-1)
+			D(i-1,j-1) + gcA(i-2)
+			I(i-1,j-1) + gcB(j-2)
+Valid for:
+	i = [2,|A|]
+	j = [2,|B|]
+
+Equivalently:
+
+M(i+1,j+1) = L(i,j) + max
+			M(i,j)
+			D(i,j) + gcA(i-1)
+			I(i,j) + gcB(j-1)
+
+Valid for:
+	i = [1,|A|-1]
+	j = [1,|B|-1]
+
+Boundary conditions
+===================
+
+A XXXX
+B ----
+	D(0,0) = -infinity
+
+	D(i,0) = ie
+		i = [1,|A|]
+
+	D(0,j) = -infinity
+		j = [0,|B|]
+
+I(0,0), I(0,j) and I(i,0) by symmetry with D.
+
+	M(0,0) = 0
+	M(i,0) = -infinity, i > 0
+	M(0,j) = -infinity, j > 0
+
+A X
+B -
+	D(1,0) = e
+	D(1,j) = -infinity, j = [1,|B|]
+		(assuming no I-D allowed).
+
+	D(0,1) = -infinity
+	D(1,1) = -infinity
+	D(i,1) = max.
+***/

Added: trunk/packages/muscle/branches/upstream/current/nwsmall.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/nwsmall.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/nwsmall.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,660 @@
+#include "muscle.h"
+#include <math.h>
+#include "pwpath.h"
+#include "profile.h"
+#include <stdio.h>
+
+// NW small memory
+
+#define	TRACE	0
+
+#if	TRACE
+extern bool g_bKeepSimpleDP;
+extern SCORE *g_DPM;
+extern SCORE *g_DPD;
+extern SCORE *g_DPI;
+extern char *g_TBM;
+extern char *g_TBD;
+extern char *g_TBI;
+#endif
+
+#if	TRACE
+#define ALLOC_TRACE()								\
+	const SCORE UNINIT = MINUS_INFINITY;			\
+	const size_t LM = uPrefixCountA*uPrefixCountB;	\
+													\
+	SCORE *DPM_ = new SCORE[LM];					\
+	SCORE *DPD_ = new SCORE[LM];					\
+	SCORE *DPI_ = new SCORE[LM];					\
+													\
+	char *TBM_ = new char[LM];						\
+	char *TBD_ = new char[LM];						\
+	char *TBI_ = new char[LM];						\
+													\
+	memset(TBM_, '?', LM);							\
+	memset(TBD_, '?', LM);							\
+	memset(TBI_, '?', LM);							\
+													\
+	for (unsigned i = 0; i <= uLengthA; ++i)		\
+		for (unsigned j = 0; j <= uLengthB; ++j)	\
+			{										\
+			DPM(i, j) = UNINIT;						\
+			DPD(i, j) = UNINIT;						\
+			DPI(i, j) = UNINIT;						\
+			}
+#else
+#define ALLOC_TRACE()
+#endif
+
+#if	TRACE
+#define SetDPM(i, j, x)		DPM(i, j) = x
+#define SetDPD(i, j, x)		DPD(i, j) = x
+#define SetDPI(i, j, x)		DPI(i, j) = x
+#define SetTBM(i, j, x)		TBM(i, j) = x
+#define SetTBD(i, j, x)		TBD(i, j) = x
+#define SetTBI(i, j, x)		TBI(i, j) = x
+#else
+#define SetDPM(i, j, x)		/* empty  */
+#define SetDPD(i, j, x)		/* empty  */
+#define SetDPI(i, j, x)		/* empty  */
+#define SetTBM(i, j, x)		/* empty  */
+#define SetTBD(i, j, x)		/* empty  */
+#define SetTBI(i, j, x)		/* empty  */
+#endif
+
+#define RECURSE_D(i, j)				\
+	{								\
+	SCORE DD = DRow[j] + e;			\
+	SCORE MD = MPrev[j] + PA[i-1].m_scoreGapOpen;\
+	if (DD > MD)					\
+		{							\
+		DRow[j] = DD;				\
+		SetTBD(i, j, 'D');			\
+		}							\
+	else							\
+		{							\
+		DRow[j] = MD;				\
+		/* SetBitTBD(TB, i, j, 'M'); */	\
+		TBRow[j] &= ~BIT_xD;		\
+		TBRow[j] |= BIT_MD;			\
+		SetTBD(i, j, 'M');			\
+		}							\
+	SetDPD(i, j, DRow[j]);			\
+	}
+
+#define RECURSE_D_ATerm(j)	RECURSE_D(uLengthA, j)
+#define RECURSE_D_BTerm(j)	RECURSE_D(i, uLengthB)
+
+#define RECURSE_I(i, j)				\
+	{								\
+	Iij += e;						\
+	SCORE MI = MCurr[j-1] + PB[j-1].m_scoreGapOpen;\
+	if (MI >= Iij)					\
+		{							\
+		Iij = MI;					\
+		/* SetBitTBI(TB, i, j, 'M'); */	\
+		TBRow[j] &= ~BIT_xI;		\
+		TBRow[j] |= BIT_MI;			\
+		SetTBI(i, j, 'M');			\
+		}							\
+	else							\
+		SetTBI(i, j, 'I');			\
+	SetDPI(i, j, Iij);				\
+	}
+
+#define RECURSE_I_ATerm(j)	RECURSE_I(uLengthA, j)
+#define RECURSE_I_BTerm(j)	RECURSE_I(i, uLengthB)
+
+#define RECURSE_M(i, j)								\
+	{												\
+	SCORE DM = DRow[j] + PA[i-1].m_scoreGapClose;	\
+	SCORE IM = Iij +     PB[j-1].m_scoreGapClose;	\
+	SCORE MM = MCurr[j];							\
+	TB[i+1][j+1] &= ~BIT_xM;							\
+	if (MM >= DM && MM >= IM)						\
+		{											\
+		MNext[j+1] += MM;							\
+		SetDPM(i+1, j+1, MNext[j+1]);				\
+		SetTBM(i+1, j+1, 'M');						\
+		/* SetBitTBM(TB, i+1, j+1, 'M');	*/		\
+		TB[i+1][j+1] |= BIT_MM;						\
+		}											\
+	else if (DM >= MM && DM >= IM)					\
+		{											\
+		MNext[j+1] += DM;							\
+		SetDPM(i+1, j+1, MNext[j+1]);				\
+		SetTBM(i+1, j+1, 'D');						\
+		/* SetBitTBM(TB, i+1, j+1, 'D'); */			\
+		TB[i+1][j+1] |= BIT_DM;						\
+		}											\
+	else											\
+		{											\
+		assert(IM >= MM && IM >= DM);				\
+		MNext[j+1] += IM;							\
+		SetDPM(i+1, j+1, MNext[j+1]);				\
+		SetTBM(i+1, j+1, 'I');						\
+		/* SetBitTBM(TB, i+1, j+1, 'I'); */			\
+		TB[i+1][j+1] |= BIT_IM;						\
+		}											\
+	}
+
+#if	TRACE
+static bool LocalEq(BASETYPE b1, BASETYPE b2)
+	{
+	if (b1 < -100000 && b2 < -100000)
+		return true;
+	double diff = fabs(b1 - b2);
+	if (diff < 0.0001)
+		return true;
+	double sum = fabs(b1) + fabs(b2);
+	return diff/sum < 0.005;
+	}
+
+static char Get_M_Char(char Bits)
+	{
+	switch (Bits & BIT_xM)
+		{
+	case BIT_MM:
+		return 'M';
+	case BIT_DM:
+		return 'D';
+	case BIT_IM:
+		return 'I';
+		}
+	Quit("Huh?");
+	return '?';
+	}
+
+static char Get_D_Char(char Bits)
+	{
+	return (Bits & BIT_xD) ? 'M' : 'D';
+	}
+
+static char Get_I_Char(char Bits)
+	{
+	return (Bits & BIT_xI) ? 'M' : 'I';
+	}
+
+static bool DPEq(char c, SCORE *g_DP, SCORE *DPD_,
+  unsigned uPrefixCountA, unsigned uPrefixCountB)
+	{
+	SCORE *DPM_ = g_DP;
+	for (unsigned i = 0; i < uPrefixCountA; ++i)
+		for (unsigned j = 0; j < uPrefixCountB; ++j)
+			if (!LocalEq(DPM(i, j), DPD(i, j)))
+				{
+				Log("***DPDIFF*** DP%c(%d, %d) Simple = %.2g, Fast = %.2g\n",
+				  c, i, j, DPM(i, j), DPD(i, j));
+				return false;
+				}
+	return true;
+	}
+
+static bool CompareTB(char **TB, char *TBM_, char *TBD_, char *TBI_, 
+  unsigned uPrefixCountA, unsigned uPrefixCountB)
+	{
+	SCORE *DPM_ = g_DPM;
+	bool Eq = true;
+	for (unsigned i = 0; i < uPrefixCountA; ++i)
+		for (unsigned j = 0; j < uPrefixCountB; ++j)
+			{
+			char c1 = TBM(i, j);
+			char c2 = Get_M_Char(TB[i][j]);
+			if (c1 != '?' && c1 != c2 && DPM(i, j) > -100000)
+				{
+				Log("TBM(%d, %d) Simple = %c, NW = %c\n", i, j, c1, c2);
+				Eq = false;
+				goto D;
+				}
+			}
+
+D:
+	SCORE *DPD_ = g_DPD;
+	for (unsigned i = 0; i < uPrefixCountA; ++i)
+		for (unsigned j = 0; j < uPrefixCountB; ++j)
+			{
+			char c1 = TBD(i, j);
+			char c2 = Get_D_Char(TB[i][j]);
+			if (c1 != '?' && c1 != c2 && DPD(i, j) > -100000)
+				{
+				Log("TBD(%d, %d) Simple = %c, NW = %c\n", i, j, c1, c2);
+				Eq = false;
+				goto I;
+				}
+			}
+I:
+	SCORE *DPI_ = g_DPI;
+	for (unsigned i = 0; i < uPrefixCountA; ++i)
+		for (unsigned j = 0; j < uPrefixCountB; ++j)
+			{
+			char c1 = TBI(i, j);
+			char c2 = Get_I_Char(TB[i][j]);
+			if (c1 != '?' && c1 != c2 && DPI(i, j) > -100000)
+				{
+				Log("TBI(%d, %d) Simple = %c, NW = %c\n", i, j, c1, c2);
+				Eq = false;
+				goto Done;
+				}
+			}
+Done:
+	if (Eq)
+		Log("TB success\n");
+	return Eq;
+	}
+
+static const char *LocalScoreToStr(SCORE s)
+	{
+	static char str[16];
+	if (s < -100000)
+		return "     *";
+	sprintf(str, "%6.1f", s);
+	return str;
+	}
+
+static void LogDP(const SCORE *DPM_, const ProfPos *PA, const ProfPos *PB,
+  unsigned uPrefixCountA, unsigned uPrefixCountB)
+	{
+	Log("        ");
+	for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+		{
+		char c = ' ';
+		if (uPrefixLengthB > 0)
+			c = ConsensusChar(PB[uPrefixLengthB - 1]);
+		Log(" %4u:%c", uPrefixLengthB, c);
+		}
+	Log("\n");
+	for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
+		{
+		char c = ' ';
+		if (uPrefixLengthA > 0)
+			c = ConsensusChar(PA[uPrefixLengthA - 1]);
+		Log("%4u:%c  ", uPrefixLengthA, c);
+		for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+			Log(" %s", LocalScoreToStr(DPM(uPrefixLengthA, uPrefixLengthB)));
+		Log("\n");
+		}
+	}
+
+static void LogBitTB(char **TB, const ProfPos *PA, const ProfPos *PB,
+  unsigned uPrefixCountA, unsigned uPrefixCountB)
+	{
+	Log("        ");
+	for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+		{
+		char c = ' ';
+		if (uPrefixLengthB > 0)
+			c = ConsensusChar(PB[uPrefixLengthB - 1]);
+		Log(" %4u:%c", uPrefixLengthB, c);
+		}
+	Log("\n");
+	Log("Bit TBM:\n");
+	for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
+		{
+		char c = ' ';
+		if (uPrefixLengthA > 0)
+			c = ConsensusChar(PA[uPrefixLengthA - 1]);
+		Log("%4u:%c  ", uPrefixLengthA, c);
+		for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+			{
+			char c = Get_M_Char(TB[uPrefixLengthA][uPrefixLengthB]);
+			Log(" %6c", c);
+			}
+		Log("\n");
+		}
+
+	Log("\n");
+	Log("Bit TBD:\n");
+	for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
+		{
+		char c = ' ';
+		if (uPrefixLengthA > 0)
+			c = ConsensusChar(PA[uPrefixLengthA - 1]);
+		Log("%4u:%c  ", uPrefixLengthA, c);
+		for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+			{
+			char c = Get_D_Char(TB[uPrefixLengthA][uPrefixLengthB]);
+			Log(" %6c", c);
+			}
+		Log("\n");
+		}
+
+	Log("\n");
+	Log("Bit TBI:\n");
+	for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
+		{
+		char c = ' ';
+		if (uPrefixLengthA > 0)
+			c = ConsensusChar(PA[uPrefixLengthA - 1]);
+		Log("%4u:%c  ", uPrefixLengthA, c);
+		for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+			{
+			char c = Get_I_Char(TB[uPrefixLengthA][uPrefixLengthB]);
+			Log(" %6c", c);
+			}
+		Log("\n");
+		}
+	}
+
+static void ListTB(char *TBM_, const ProfPos *PA, const ProfPos *PB,
+  unsigned uPrefixCountA, unsigned uPrefixCountB)
+	{
+	Log("        ");
+	for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+		{
+		char c = ' ';
+		if (uPrefixLengthB > 0)
+			c = ConsensusChar(PB[uPrefixLengthB - 1]);
+		Log(" %4u:%c", uPrefixLengthB, c);
+		}
+	Log("\n");
+	for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
+		{
+		char c = ' ';
+		if (uPrefixLengthA > 0)
+			c = ConsensusChar(PA[uPrefixLengthA - 1]);
+		Log("%4u:%c  ", uPrefixLengthA, c);
+		for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+			{
+			char c = TBM(uPrefixLengthA, uPrefixLengthB);
+			Log(" %6c", c);
+			}
+		Log("\n");
+		}
+	}
+
+static const char *BitsToStr(char Bits)
+	{
+	static char Str[9];
+
+	sprintf(Str, "%cM %cD %cI",
+	  Get_M_Char(Bits),
+	  Get_D_Char(Bits),
+	  Get_I_Char(Bits));
+	}
+#endif	// TRACE
+
+static inline void SetBitTBM(char **TB, unsigned i, unsigned j, char c)
+	{
+	char Bit;
+	switch (c)
+		{
+	case 'M':
+		Bit = BIT_MM;
+		break;
+	case 'D':
+		Bit = BIT_DM;
+		break;
+	case 'I':
+		Bit = BIT_IM;
+		break;
+	default:
+		Quit("Huh?!");
+		}
+	TB[i][j] &= ~BIT_xM;
+	TB[i][j] |= Bit;
+	}
+
+static inline void SetBitTBD(char **TB, unsigned i, unsigned j, char c)
+	{
+	char Bit;
+	switch (c)
+		{
+	case 'M':
+		Bit = BIT_MD;
+		break;
+	case 'D':
+		Bit = BIT_DD;
+		break;
+	default:
+		Quit("Huh?!");
+		}
+	TB[i][j] &= ~BIT_xD;
+	TB[i][j] |= Bit;
+	}
+
+static inline void SetBitTBI(char **TB, unsigned i, unsigned j, char c)
+	{
+	char Bit;
+	switch (c)
+		{
+	case 'M':
+		Bit = BIT_MI;
+		break;
+	case 'I':
+		Bit = BIT_II;
+		break;
+	default:
+		Quit("Huh?!");
+		}
+	TB[i][j] &= ~BIT_xI;
+	TB[i][j] |= Bit;
+	}
+
+#if	TRACE
+#define LogMatrices()											\
+	{															\
+	Log("Bit DPM:\n");											\
+	LogDP(DPM_, PA, PB, uPrefixCountA, uPrefixCountB);			\
+	Log("Bit DPD:\n");											\
+	LogDP(DPD_, PA, PB, uPrefixCountA, uPrefixCountB);			\
+	Log("Bit DPI:\n");											\
+	LogDP(DPI_, PA, PB, uPrefixCountA, uPrefixCountB);			\
+	Log("Bit TB:\n");											\
+	LogBitTB(TB, PA, PB, uPrefixCountA, uPrefixCountB);			\
+	bool Same;													\
+	Same = DPEq('M', g_DPM, DPM_, uPrefixCountA, uPrefixCountB);\
+	if (Same)													\
+		Log("DPM success\n");									\
+	Same = DPEq('D', g_DPD, DPD_, uPrefixCountA, uPrefixCountB);\
+	if (Same)													\
+		Log("DPD success\n");									\
+	Same = DPEq('I', g_DPI, DPI_, uPrefixCountA, uPrefixCountB);\
+	if (Same)													\
+		Log("DPI success\n");									\
+	CompareTB(TB, g_TBM, g_TBD, g_TBI, uPrefixCountA, uPrefixCountB);\
+	}
+#else
+#define LogMatrices()	/* empty */
+#endif
+
+static unsigned uCachePrefixCountB;
+static unsigned uCachePrefixCountA;
+static SCORE *CacheMCurr;
+static SCORE *CacheMNext;
+static SCORE *CacheMPrev;
+static SCORE *CacheDRow;
+static char **CacheTB;
+
+static void AllocCache(unsigned uPrefixCountA, unsigned uPrefixCountB)
+	{
+	if (uPrefixCountA <= uCachePrefixCountA && uPrefixCountB <= uCachePrefixCountB)
+		return;
+
+	delete[] CacheMCurr;
+	delete[] CacheMNext;
+	delete[] CacheMPrev;
+	delete[] CacheDRow;
+	for (unsigned i = 0; i < uCachePrefixCountA; ++i)
+		delete[] CacheTB[i];
+	delete[] CacheTB;
+
+	uCachePrefixCountA = uPrefixCountA + 1024;
+	uCachePrefixCountB = uPrefixCountB + 1024;
+
+	CacheMCurr = new SCORE[uCachePrefixCountB];
+	CacheMNext = new SCORE[uCachePrefixCountB];
+	CacheMPrev = new SCORE[uCachePrefixCountB];
+	CacheDRow = new SCORE[uCachePrefixCountB];
+
+	CacheTB = new char *[uCachePrefixCountA];
+	for (unsigned i = 0; i < uCachePrefixCountA; ++i)
+		CacheTB[i] = new char [uCachePrefixCountB];
+	}
+
+SCORE NWSmall(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
+  unsigned uLengthB, PWPath &Path)
+	{
+	if (0 == uLengthB || 0 == uLengthA )
+		Quit("Internal error, NWSmall: length=0");
+
+	SetTermGaps(PA, uLengthA);
+	SetTermGaps(PB, uLengthB);
+
+	const unsigned uPrefixCountA = uLengthA + 1;
+	const unsigned uPrefixCountB = uLengthB + 1;
+	const SCORE e = g_scoreGapExtend;
+
+	ALLOC_TRACE()
+
+	AllocCache(uPrefixCountA, uPrefixCountB);
+
+	SCORE *MCurr = CacheMCurr;
+	SCORE *MNext = CacheMNext;
+	SCORE *MPrev = CacheMPrev;
+	SCORE *DRow = CacheDRow;
+
+	char **TB = CacheTB;
+	for (unsigned i = 0; i < uPrefixCountA; ++i)
+		memset(TB[i], 0, uPrefixCountB);
+
+	SCORE Iij = MINUS_INFINITY;
+	SetDPI(0, 0, Iij);
+
+	Iij = PB[0].m_scoreGapOpen;
+	SetDPI(0, 1, Iij);
+
+	for (unsigned j = 2; j <= uLengthB; ++j)
+		{
+		Iij += e;
+		SetDPI(0, j, Iij);
+		SetTBI(0, j, 'I');
+		}
+
+	for (unsigned j = 0; j <= uLengthB; ++j)
+		{
+		DRow[j] = MINUS_INFINITY;
+		SetDPD(0, j, DRow[j]);
+		SetTBD(0, j, 'D');
+		}
+
+	MPrev[0] = 0;
+	SetDPM(0, 0, MPrev[0]);
+	for (unsigned j = 1; j <= uLengthB; ++j)
+		{
+		MPrev[j] = MINUS_INFINITY;
+		SetDPM(0, j, MPrev[j]);
+		}
+
+	MCurr[0] = MINUS_INFINITY;
+	SetDPM(1, 0, MCurr[0]);
+
+	MCurr[1] = ScoreProfPos2(PA[0], PB[0]);
+	SetDPM(1, 1, MCurr[1]);
+	SetBitTBM(TB, 1, 1, 'M');
+	SetTBM(1, 1, 'M');
+
+	for (unsigned j = 2; j <= uLengthB; ++j)
+		{
+		MCurr[j] = ScoreProfPos2(PA[0], PB[j-1]) + PB[0].m_scoreGapOpen +
+		  (j - 2)*e + PB[j-2].m_scoreGapClose;
+		SetDPM(1, j, MCurr[j]);
+		SetBitTBM(TB, 1, j, 'I');
+		SetTBM(1, j, 'I');
+		}
+
+// Main DP loop
+	for (unsigned i = 1; i < uLengthA; ++i)
+		{
+		char *TBRow = TB[i];
+
+		Iij = MINUS_INFINITY;
+		SetDPI(i, 0, Iij);
+
+		DRow[0] = PA[0].m_scoreGapOpen + (i - 1)*e;
+		SetDPD(i, 0, DRow[0]);
+
+		MCurr[0] = MINUS_INFINITY; 
+		if (i == 1)
+			{
+			MCurr[1] = ScoreProfPos2(PA[0], PB[0]);
+			SetBitTBM(TB, i, 1, 'M');
+			SetTBM(i, 1, 'M');
+			}
+		else
+			{
+			MCurr[1] = ScoreProfPos2(PA[i-1], PB[0]) + PA[0].m_scoreGapOpen +
+			  (i - 2)*e + PA[i-2].m_scoreGapClose;
+			SetBitTBM(TB, i, 1, 'D');
+			SetTBM(i, 1, 'D');
+			}
+		SetDPM(i, 0, MCurr[0]);
+		SetDPM(i, 1, MCurr[1]);
+
+		for (unsigned j = 1; j < uLengthB; ++j)
+			MNext[j+1] = ScoreProfPos2(PA[i], PB[j]);
+
+		for (unsigned j = 1; j < uLengthB; ++j)
+			{
+			RECURSE_D(i, j)
+			RECURSE_I(i, j)
+			RECURSE_M(i, j)
+			}
+	// Special case for j=uLengthB
+		RECURSE_D_BTerm(i)
+		RECURSE_I_BTerm(i)
+
+	// Prev := Curr, Curr := Next, Next := Prev
+		Rotate(MPrev, MCurr, MNext);
+		}
+
+// Special case for i=uLengthA
+	char *TBRow = TB[uLengthA];
+	MCurr[0] = MINUS_INFINITY;
+	MCurr[1] = ScoreProfPos2(PA[uLengthA-1], PB[0]) + (uLengthA - 2)*e +
+	  PA[0].m_scoreGapOpen + PA[uLengthA-2].m_scoreGapClose;
+	SetBitTBM(TB, uLengthA, 1, 'D');
+	SetTBM(uLengthA, 1, 'D');
+	SetDPM(uLengthA, 0, MCurr[0]);
+	SetDPM(uLengthA, 1, MCurr[1]);
+
+	DRow[0] = MINUS_INFINITY;
+	SetDPD(uLengthA, 0, DRow[0]);
+	for (unsigned j = 1; j <= uLengthB; ++j)
+		RECURSE_D_ATerm(j);
+
+	Iij = MINUS_INFINITY;
+	for (unsigned j = 1; j <= uLengthB; ++j)
+		RECURSE_I_ATerm(j)
+
+	LogMatrices();
+
+	SCORE MAB = MCurr[uLengthB];
+	SCORE DAB = DRow[uLengthB];
+	SCORE IAB = Iij;
+
+	SCORE Score = MAB;
+	char cEdgeType = 'M';
+	if (DAB > Score)
+		{
+		Score = DAB;
+		cEdgeType = 'D';
+		}
+	if (IAB > Score)
+		{
+		Score = IAB;
+		cEdgeType = 'I';
+		}
+
+#if TRACE
+	Log("    Fast: MAB=%.4g DAB=%.4g IAB=%.4g best=%c\n",
+	  MAB, DAB, IAB, cEdgeType);
+#endif
+
+	BitTraceBack(TB, uLengthA, uLengthB, cEdgeType, Path);
+
+#if	DBEUG
+	Path.Validate();
+#endif
+
+	return 0;
+	}

Added: trunk/packages/muscle/branches/upstream/current/objscore.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/objscore.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/objscore.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,113 @@
+#include "muscle.h"
+#include "msa.h"
+#include "objscore.h"
+#include "profile.h"
+#include "timing.h"
+
+#if	TIMING
+TICKS g_ticksObjScore = 0;
+#endif
+
+SCORE ObjScore(const MSA &msa, const unsigned SeqIndexes1[],
+  unsigned uSeqCount1, const unsigned SeqIndexes2[], unsigned uSeqCount2)
+	{
+#if	TIMING
+	TICKS t1 = GetClockTicks();
+#endif
+	const unsigned uSeqCount = msa.GetSeqCount();
+
+	OBJSCORE OS = g_ObjScore;
+	if (g_ObjScore == OBJSCORE_SPM)
+		{
+        if (uSeqCount <= 100)
+			OS = OBJSCORE_XP;
+		else
+			OS = OBJSCORE_SPF;
+		}
+
+	MSA msa1;
+	MSA msa2;
+
+	switch (OS)
+		{
+	case OBJSCORE_DP:
+	case OBJSCORE_XP:
+		MSAFromSeqSubset(msa, SeqIndexes1, uSeqCount1, msa1);
+		MSAFromSeqSubset(msa, SeqIndexes2, uSeqCount2, msa2);
+
+		SetMSAWeightsMuscle(msa1);
+		SetMSAWeightsMuscle(msa2);
+		break;
+
+	case OBJSCORE_SP:
+	case OBJSCORE_SPF:
+	case OBJSCORE_PS:
+	// Yuck -- casting away const (design flaw)
+		SetMSAWeightsMuscle((MSA &) msa);
+		break;
+		}
+
+	SCORE Score = 0;
+	switch (OS)
+		{
+	case OBJSCORE_SP:
+		Score = ObjScoreSP(msa);
+		break;
+
+	case OBJSCORE_DP:
+		Score = ObjScoreDP(msa1, msa2);
+		break;
+
+	case OBJSCORE_XP:
+		Score = ObjScoreXP(msa1, msa2);
+		break;
+
+	case OBJSCORE_PS:
+		Score = ObjScorePS(msa);
+		break;
+
+	case OBJSCORE_SPF:
+		Score = ObjScoreSPDimer(msa);
+		break;
+	
+	default:
+		Quit("Invalid g_ObjScore=%d", g_ObjScore);
+		}
+#if	TIMING
+	TICKS t2 = GetClockTicks();
+	g_ticksObjScore += (t2 - t1);
+#endif
+	return Score;
+	}
+
+SCORE ObjScoreIds(const MSA &msa, const unsigned Ids1[],
+  unsigned uCount1, const unsigned Ids2[], unsigned uCount2)
+	{
+#if	TIMING
+	TICKS t1 = GetClockTicks();
+#endif
+	unsigned *SeqIndexes1 = new unsigned[uCount1];
+	unsigned *SeqIndexes2 = new unsigned[uCount2];
+
+	for (unsigned n = 0; n < uCount1; ++n)
+		SeqIndexes1[n] = msa.GetSeqIndex(Ids1[n]);
+
+	for (unsigned n = 0; n < uCount2; ++n)
+		SeqIndexes2[n] = msa.GetSeqIndex(Ids2[n]);
+
+#if DOUBLE_AFFINE
+	extern SCORE ObjScoreDA(const MSA &msa, SCORE *ptrLetters, SCORE *ptrGaps);
+	SCORE Letters, Gaps;
+	SCORE dObjScore = ObjScoreDA(msa, &Letters, &Gaps);
+
+	delete[] SeqIndexes1;
+	delete[] SeqIndexes2;
+#else
+	SCORE dObjScore = ObjScore(msa, SeqIndexes1, uCount1, SeqIndexes2, uCount2);
+#endif
+#if	TIMING
+	TICKS t2 = GetClockTicks();
+	g_ticksObjScore += (t2 - t1);
+#endif
+	return dObjScore;
+	}

Added: trunk/packages/muscle/branches/upstream/current/objscore.h
===================================================================
--- trunk/packages/muscle/branches/upstream/current/objscore.h	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/objscore.h	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,30 @@
+#ifndef ObjScore_h
+#define ObjScore_h
+
+SCORE ScoreSeqPairGaps(const MSA &msa1, unsigned uSeqIndex1,
+  const MSA &msa2, unsigned uSeqIndex2);
+SCORE ScoreSeqPairLetters(const MSA &msa1, unsigned uSeqIndex1,
+  const MSA &msa2, unsigned uSeqIndex2);
+SCORE ScoreGaps(const MSA &msa, const unsigned Cols[], unsigned ColCount);
+
+SCORE ObjScore(const MSA &msa, const unsigned SeqIndexes1[],
+  unsigned uSeqCount1, const unsigned SeqIndexes2[], unsigned uSeqCount2);
+
+SCORE ObjScoreIds(const MSA &msa, const unsigned Ids1[],
+  unsigned uCount1, const unsigned Ids2[], unsigned uCount2);
+
+void GetLetterScores(const MSA &msa, SCORE LetterScores[]);
+
+SCORE ObjScoreDP(const MSA &msa1, const MSA &msa2, SCORE MatchScore[] = 0);
+SCORE ObjScorePS(const MSA &msa, SCORE MatchScore[] = 0);
+SCORE ObjScoreSP(const MSA &msa, SCORE MatchScore[] = 0);
+SCORE ObjScoreXP(const MSA &msa, const MSA &msa2);
+SCORE ObjScoreSPDimer(const MSA &msa);
+SCORE ObjScoreDP_Profs(const ProfPos *PA, const ProfPos *PB, unsigned uColCount,
+  SCORE MatchScore[] = 0);
+
+SCORE DiffObjScore(
+  const MSA &msa1, const PWPath &Path1, const unsigned Edges1[], unsigned uEdgeCount1, 
+  const MSA &msa2, const PWPath &Path2, const unsigned Edges2[], unsigned uEdgeCount2);
+
+#endif // ObjScore_h

Added: trunk/packages/muscle/branches/upstream/current/objscore2.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/objscore2.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/objscore2.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,522 @@
+#include "muscle.h"
+#include "msa.h"
+#include "profile.h"
+#include "objscore.h"
+
+#define TRACE			0
+#define TRACE_SEQPAIR	0
+#define TEST_SPFAST		0
+
+extern SCOREMATRIX VTML_LA;
+extern SCOREMATRIX PAM200;
+extern SCOREMATRIX PAM200NoCenter;
+extern SCOREMATRIX VTML_SP;
+extern SCOREMATRIX VTML_SPNoCenter;
+extern SCOREMATRIX NUC_SP;
+
+SCORE g_SPScoreLetters;
+SCORE g_SPScoreGaps;
+
+static SCORE TermGapScore(bool Gap)
+	{
+	switch (g_TermGaps)
+		{
+	case TERMGAPS_Full:
+		return 0;
+
+	case TERMGAPS_Half:
+		if (Gap)
+			return g_scoreGapOpen/2;
+		return 0;
+
+	case TERMGAPS_Ext:
+		if (Gap)
+			return g_scoreGapExtend;
+		return 0;
+		}
+	Quit("TermGapScore?!");
+	return 0;
+	}
+
+SCORE ScoreSeqPairLetters(const MSA &msa1, unsigned uSeqIndex1,
+  const MSA &msa2, unsigned uSeqIndex2)
+	{
+	const unsigned uColCount = msa1.GetColCount();
+	const unsigned uColCount2 = msa2.GetColCount();
+	if (uColCount != uColCount2)
+		Quit("ScoreSeqPairLetters, different lengths");
+
+#if	TRACE_SEQPAIR
+	{
+	Log("\n");
+	Log("ScoreSeqPairLetters\n");
+	MSA msaTmp;
+	msaTmp.SetSize(2, uColCount);
+	msaTmp.CopySeq(0, msa1, uSeqIndex1);
+	msaTmp.CopySeq(1, msa2, uSeqIndex2);
+	msaTmp.LogMe();
+	}
+#endif
+
+	SCORE scoreLetters = 0;
+	SCORE scoreGaps = 0;
+	bool bGapping1 = false;
+	bool bGapping2 = false;
+
+	unsigned uColStart = 0;
+	bool bLeftTermGap = false;
+	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
+		{
+		bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
+		bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);
+		if (!bGap1 || !bGap2)
+			{
+			if (bGap1 || bGap2)
+				bLeftTermGap = true;
+			uColStart = uColIndex;
+			break;
+			}
+		}
+
+	unsigned uColEnd = uColCount - 1;
+	bool bRightTermGap = false;
+	for (int iColIndex = (int) uColCount - 1; iColIndex >= 0; --iColIndex)
+		{
+		bool bGap1 = msa1.IsGap(uSeqIndex1, iColIndex);
+		bool bGap2 = msa2.IsGap(uSeqIndex2, iColIndex);
+		if (!bGap1 || !bGap2)
+			{
+			if (bGap1 || bGap2)
+				bRightTermGap = true;
+			uColEnd = (unsigned) iColIndex;
+			break;
+			}
+		}
+
+#if	TRACE_SEQPAIR
+	Log("LeftTermGap=%d RightTermGap=%d\n", bLeftTermGap, bRightTermGap);
+#endif
+
+	for (unsigned uColIndex = uColStart; uColIndex <= uColEnd; ++uColIndex)
+		{
+		unsigned uLetter1 = msa1.GetLetterEx(uSeqIndex1, uColIndex);
+		if (uLetter1 >= g_AlphaSize)
+			continue;
+		unsigned uLetter2 = msa2.GetLetterEx(uSeqIndex2, uColIndex);
+		if (uLetter2 >= g_AlphaSize)
+			continue;
+
+		SCORE scoreMatch = (*g_ptrScoreMatrix)[uLetter1][uLetter2];
+		scoreLetters += scoreMatch;
+		}
+	return scoreLetters;
+	}
+
+SCORE ScoreSeqPairGaps(const MSA &msa1, unsigned uSeqIndex1,
+  const MSA &msa2, unsigned uSeqIndex2)
+	{
+	const unsigned uColCount = msa1.GetColCount();
+	const unsigned uColCount2 = msa2.GetColCount();
+	if (uColCount != uColCount2)
+		Quit("ScoreSeqPairGaps, different lengths");
+
+#if	TRACE_SEQPAIR
+	{
+	Log("\n");
+	Log("ScoreSeqPairGaps\n");
+	MSA msaTmp;
+	msaTmp.SetSize(2, uColCount);
+	msaTmp.CopySeq(0, msa1, uSeqIndex1);
+	msaTmp.CopySeq(1, msa2, uSeqIndex2);
+	msaTmp.LogMe();
+	}
+#endif
+
+	SCORE scoreGaps = 0;
+	bool bGapping1 = false;
+	bool bGapping2 = false;
+
+	unsigned uColStart = 0;
+	bool bLeftTermGap = false;
+	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
+		{
+		bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
+		bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);
+		if (!bGap1 || !bGap2)
+			{
+			if (bGap1 || bGap2)
+				bLeftTermGap = true;
+			uColStart = uColIndex;
+			break;
+			}
+		}
+
+	unsigned uColEnd = uColCount - 1;
+	bool bRightTermGap = false;
+	for (int iColIndex = (int) uColCount - 1; iColIndex >= 0; --iColIndex)
+		{
+		bool bGap1 = msa1.IsGap(uSeqIndex1, iColIndex);
+		bool bGap2 = msa2.IsGap(uSeqIndex2, iColIndex);
+		if (!bGap1 || !bGap2)
+			{
+			if (bGap1 || bGap2)
+				bRightTermGap = true;
+			uColEnd = (unsigned) iColIndex;
+			break;
+			}
+		}
+
+#if	TRACE_SEQPAIR
+	Log("LeftTermGap=%d RightTermGap=%d\n", bLeftTermGap, bRightTermGap);
+#endif
+
+	for (unsigned uColIndex = uColStart; uColIndex <= uColEnd; ++uColIndex)
+		{
+		bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
+		bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);
+
+		if (bGap1 && bGap2)
+			continue;
+
+		if (bGap1)
+			{
+			if (!bGapping1)
+				{
+#if	TRACE_SEQPAIR
+				Log("Gap open seq 1 col %d\n", uColIndex);
+#endif
+				if (uColIndex == uColStart)
+					scoreGaps += TermGapScore(true);
+				else
+					scoreGaps += g_scoreGapOpen;
+				bGapping1 = true;
+				}
+			else
+				scoreGaps += g_scoreGapExtend;
+			continue;
+			}
+
+		else if (bGap2)
+			{
+			if (!bGapping2)
+				{
+#if	TRACE_SEQPAIR
+				Log("Gap open seq 2 col %d\n", uColIndex);
+#endif
+				if (uColIndex == uColStart)
+					scoreGaps += TermGapScore(true);
+				else
+					scoreGaps += g_scoreGapOpen;
+				bGapping2 = true;
+				}
+			else
+				scoreGaps += g_scoreGapExtend;
+			continue;
+			}
+
+		bGapping1 = false;
+		bGapping2 = false;
+		}
+
+	if (bGapping1 || bGapping2)
+		{
+		scoreGaps -= g_scoreGapOpen;
+		scoreGaps += TermGapScore(true);
+		}
+	return scoreGaps;
+	}
+
+// The usual sum-of-pairs objective score: sum the score
+// of the alignment of each pair of sequences.
+SCORE ObjScoreSP(const MSA &msa, SCORE MatchScore[])
+	{
+#if	TRACE
+	Log("==================ObjScoreSP==============\n");
+	Log("msa=\n");
+	msa.LogMe();
+#endif
+	g_SPScoreLetters = 0;
+	g_SPScoreGaps = 0;
+
+	if (0 != MatchScore)
+		{
+		const unsigned uColCount = msa.GetColCount();
+		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
+			MatchScore[uColIndex] = 0;
+		}
+
+	const unsigned uSeqCount = msa.GetSeqCount();
+	SCORE scoreTotal = 0;
+	unsigned uPairCount = 0;
+#if	TRACE
+	Log("Seq1  Seq2     wt1     wt2    Letters         Gaps  Unwt.Score    Wt.Score       Total\n");
+	Log("----  ----  ------  ------  ----------  ----------  ----------  ----------  ----------\n");
+#endif
+	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
+		{
+		const WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1);
+		for (unsigned uSeqIndex2 = uSeqIndex1 + 1; uSeqIndex2 < uSeqCount; ++uSeqIndex2)
+			{
+			const WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2);
+			const WEIGHT w = w1*w2;
+
+			SCORE scoreLetters = ScoreSeqPairLetters(msa, uSeqIndex1, msa, uSeqIndex2);
+			SCORE scoreGaps = ScoreSeqPairGaps(msa, uSeqIndex1, msa, uSeqIndex2);
+			SCORE scorePair = scoreLetters + scoreGaps;
+			++uPairCount;
+
+			scoreTotal += w*scorePair;
+
+			g_SPScoreLetters += w*scoreLetters;
+			g_SPScoreGaps += w*scoreGaps;
+#if	TRACE
+			Log("%4d  %4d  %6.3f  %6.3f  %10.2f  %10.2f  %10.2f  %10.2f  %10.2f >%s >%s\n",
+			  uSeqIndex1,
+			  uSeqIndex2,
+			  w1,
+			  w2,
+			  scoreLetters,
+			  scoreGaps,
+			  scorePair,
+			  scorePair*w1*w2,
+			  scoreTotal,
+			  msa.GetSeqName(uSeqIndex1),
+			  msa.GetSeqName(uSeqIndex2));
+#endif
+			}
+		}
+#if	TEST_SPFAST
+	{
+	SCORE f = ObjScoreSPFast(msa);
+	Log("Fast  = %.6g\n", f);
+	Log("Brute = %.6g\n", scoreTotal);
+	if (BTEq(f, scoreTotal))
+		Log("Agree\n");
+	else
+		Log("** DISAGREE **\n");
+	}
+#endif
+//	return scoreTotal / uPairCount;
+	return scoreTotal;
+	}
+
+// Objective score defined as the dynamic programming score.
+// Input is two alignments, which must be of the same length.
+// Result is the same profile-profile score that is optimized
+// by dynamic programming.
+SCORE ObjScoreDP(const MSA &msa1, const MSA &msa2, SCORE MatchScore[])
+	{
+	const unsigned uColCount = msa1.GetColCount();
+	if (msa2.GetColCount() != uColCount)
+		Quit("ObjScoreDP, must be same length");
+
+	const unsigned uColCount1 = msa1.GetColCount();
+	const unsigned uColCount2 = msa2.GetColCount();
+
+	const ProfPos *PA = ProfileFromMSA(msa1);
+	const ProfPos *PB = ProfileFromMSA(msa2);
+
+	return ObjScoreDP_Profs(PA, PB, uColCount1, MatchScore);
+	}
+
+SCORE ObjScoreDP_Profs(const ProfPos *PA, const ProfPos *PB, unsigned uColCount,
+  SCORE MatchScore[])
+	{
+//#if	TRACE
+//	Log("Profile 1:\n");
+//	ListProfile(PA, uColCount, &msa1);
+//
+//	Log("Profile 2:\n");
+//	ListProfile(PB, uColCount, &msa2);
+//#endif
+
+	SCORE scoreTotal = 0;
+
+	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
+		{
+		const ProfPos &PPA = PA[uColIndex];
+		const ProfPos &PPB = PB[uColIndex];
+
+		SCORE scoreGap = 0;
+		SCORE scoreMatch = 0;
+	// If gapped column...
+		if (PPA.m_bAllGaps && PPB.m_bAllGaps)
+			scoreGap = 0;
+		else if (PPA.m_bAllGaps)
+			{
+			if (uColCount - 1 == uColIndex || !PA[uColIndex+1].m_bAllGaps)
+				scoreGap = PPB.m_scoreGapClose;
+			if (0 == uColIndex || !PA[uColIndex-1].m_bAllGaps)
+				scoreGap += PPB.m_scoreGapOpen;
+			//if (0 == scoreGap)
+			//	scoreGap = PPB.m_scoreGapExtend;
+			}
+		else if (PPB.m_bAllGaps)
+			{
+			if (uColCount - 1 == uColIndex || !PB[uColIndex+1].m_bAllGaps)
+				scoreGap = PPA.m_scoreGapClose;
+			if (0 == uColIndex || !PB[uColIndex-1].m_bAllGaps)
+				scoreGap += PPA.m_scoreGapOpen;
+			//if (0 == scoreGap)
+			//	scoreGap = PPA.m_scoreGapExtend;
+			}
+		else
+			scoreMatch = ScoreProfPos2(PPA, PPB);
+
+		if (0 != MatchScore)
+			MatchScore[uColIndex] = scoreMatch;
+
+		scoreTotal += scoreMatch + scoreGap;
+
+		extern bool g_bTracePPScore;
+		extern MSA *g_ptrPPScoreMSA1;
+		extern MSA *g_ptrPPScoreMSA2;
+		if (g_bTracePPScore)
+			{
+			const MSA &msa1 = *g_ptrPPScoreMSA1;
+			const MSA &msa2 = *g_ptrPPScoreMSA2;
+			const unsigned uSeqCount1 = msa1.GetSeqCount();
+			const unsigned uSeqCount2 = msa2.GetSeqCount();
+
+			for (unsigned n = 0; n < uSeqCount1; ++n)
+				Log("%c", msa1.GetChar(n, uColIndex));
+			Log("  ");
+			for (unsigned n = 0; n < uSeqCount2; ++n)
+				Log("%c", msa2.GetChar(n, uColIndex));
+			Log("  %10.3f", scoreMatch);
+			if (scoreGap != 0)
+				Log("  %10.3f", scoreGap);
+			Log("\n");
+			}
+		}
+
+	delete[] PA;
+	delete[] PB;
+
+	return scoreTotal;
+	}
+
+// Objective score defined as the sum of profile-sequence
+// scores for each sequence in the alignment. The profile
+// is computed from the entire alignment, so this includes
+// the score of each sequence against itself. This is to
+// avoid recomputing the profile each time, so we reduce
+// complexity but introduce a questionable approximation.
+// The goal is to see if we can exploit the apparent
+// improvement in performance of log-expectation score
+// over the usual sum-of-pairs by optimizing this
+// objective score in the iterative refinement stage.
+SCORE ObjScorePS(const MSA &msa, SCORE MatchScore[])
+	{
+	if (g_PPScore != PPSCORE_LE)
+		Quit("FastScoreMSA_LASimple: LA");
+
+	const unsigned uSeqCount = msa.GetSeqCount();
+	const unsigned uColCount = msa.GetColCount();
+
+	const ProfPos *Prof = ProfileFromMSA(msa);
+
+	if (0 != MatchScore)
+		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
+			MatchScore[uColIndex] = 0;
+
+	SCORE scoreTotal = 0;
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		const WEIGHT weightSeq = msa.GetSeqWeight(uSeqIndex);
+		SCORE scoreSeq = 0;
+		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
+			{
+			const ProfPos &PP = Prof[uColIndex];
+			if (msa.IsGap(uSeqIndex, uColIndex))
+				{
+				bool bOpen = (0 == uColIndex ||
+				  !msa.IsGap(uSeqIndex, uColIndex - 1));
+				bool bClose = (uColCount - 1 == uColIndex ||
+				  !msa.IsGap(uSeqIndex, uColIndex + 1));
+
+				if (bOpen)
+					scoreSeq += PP.m_scoreGapOpen;
+				if (bClose)
+					scoreSeq += PP.m_scoreGapClose;
+				//if (!bOpen && !bClose)
+				//	scoreSeq += PP.m_scoreGapExtend;
+				}
+			else if (msa.IsWildcard(uSeqIndex, uColIndex))
+				continue;
+			else
+				{
+				unsigned uLetter = msa.GetLetter(uSeqIndex, uColIndex);
+				const SCORE scoreMatch = PP.m_AAScores[uLetter];
+				if (0 != MatchScore)
+					MatchScore[uColIndex] += weightSeq*scoreMatch;
+				scoreSeq += scoreMatch;
+				}
+			}
+		scoreTotal += weightSeq*scoreSeq;
+		}
+
+	delete[] Prof;
+	return scoreTotal;
+	}
+
+// The XP score is the sum of the score of each pair of
+// sequences between two profiles which are aligned to
+// each other. Notice that for two given profiles aligned
+// in different ways, the difference in XP score must be
+// the same as the difference in SP score because the
+// score of a pair of sequences in one profile doesn't
+// depend on the alignment.
+SCORE ObjScoreXP(const MSA &msa1, const MSA &msa2)
+	{
+	const unsigned uColCount1 = msa1.GetColCount();
+	const unsigned uColCount2 = msa2.GetColCount();
+	if (uColCount1 != uColCount2)
+		Quit("ObjScoreXP, alignment lengths differ %u %u", uColCount1, uColCount2);
+
+	const unsigned uSeqCount1 = msa1.GetSeqCount();
+	const unsigned uSeqCount2 = msa2.GetSeqCount();
+
+#if	TRACE
+	Log("     Score  Weight  Weight       Total\n");
+	Log("----------  ------  ------  ----------\n");
+#endif
+
+	SCORE scoreTotal = 0;
+	unsigned uPairCount = 0;
+	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount1; ++uSeqIndex1)
+		{
+		const WEIGHT w1 = msa1.GetSeqWeight(uSeqIndex1);
+		for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqCount2; ++uSeqIndex2)
+			{
+			const WEIGHT w2 = msa2.GetSeqWeight(uSeqIndex2);
+			const WEIGHT w = w1*w2;
+			SCORE scoreLetters = ScoreSeqPairLetters(msa1, uSeqIndex1, msa2, uSeqIndex2);
+			SCORE scoreGaps = ScoreSeqPairGaps(msa1, uSeqIndex1, msa2, uSeqIndex2);
+			SCORE scorePair = scoreLetters + scoreGaps;
+			scoreTotal += w1*w2*scorePair;
+			++uPairCount;
+#if	TRACE
+			Log("%10.2f  %6.3f  %6.3f  %10.2f  >%s >%s\n",
+			  scorePair,
+			  w1,
+			  w2,
+			  scorePair*w1*w2,
+			  msa1.GetSeqName(uSeqIndex1),
+			  msa2.GetSeqName(uSeqIndex2));
+#endif
+			}
+		}
+	if (0 == uPairCount)
+		Quit("0 == uPairCount");
+
+#if	TRACE
+	Log("msa1=\n");
+	msa1.LogMe();
+	Log("msa2=\n");
+	msa2.LogMe();
+	Log("XP=%g\n", scoreTotal);
+#endif
+//	return scoreTotal / uPairCount;
+	return scoreTotal;
+	}

Added: trunk/packages/muscle/branches/upstream/current/objscoreda.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/objscoreda.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/objscoreda.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,289 @@
+#include "muscle.h"
+#include "msa.h"
+#include "profile.h"
+#include "objscore.h"
+
+#if	DOUBLE_AFFINE
+
+#define TRACE			0
+#define TEST_SPFAST		0
+
+static SCORE GapPenalty(unsigned uLength, bool Term, SCORE g, SCORE e)
+	{
+	//if (Term)
+	//	{
+	//	switch (g_TermGap)
+	//		{
+	//	case TERMGAP_Full:
+	//		return g + (uLength - 1)*e;
+
+	//	case TERMGAP_Half:
+	//		return g/2 + (uLength - 1)*e;
+
+	//	case TERMGAP_Ext:
+	//		return uLength*e;
+	//		}
+	//	Quit("Bad termgap");
+	//	}
+	//else
+	//	return g + (uLength - 1)*e;
+	//return MINUS_INFINITY;
+	return g + (uLength - 1)*e;
+	}
+
+static SCORE GapPenalty(unsigned uLength, bool Term)
+	{
+	SCORE s1 = GapPenalty(uLength, Term, g_scoreGapOpen, g_scoreGapExtend);
+#if	DOUBLE_AFFINE
+	SCORE s2 = GapPenalty(uLength, Term, g_scoreGapOpen2, g_scoreGapExtend2);
+	if (s1 > s2)
+		return s1;
+	return s2;
+#else
+	return s1;
+#endif
+	}
+
+static const MSA *g_ptrMSA1;
+static const MSA *g_ptrMSA2;
+static unsigned g_uSeqIndex1;
+static unsigned g_uSeqIndex2;
+
+static void LogGap(unsigned uStart, unsigned uEnd, unsigned uGapLength,
+  bool bNTerm, bool bCTerm)
+	{
+	Log("%16.16s  ", "");
+	for (unsigned i = 0; i < uStart; ++i)
+		Log(" ");
+	unsigned uMyLength = 0;
+	for (unsigned i = uStart; i <= uEnd; ++i)
+		{
+		bool bGap1 = g_ptrMSA1->IsGap(g_uSeqIndex1, i);
+		bool bGap2 = g_ptrMSA2->IsGap(g_uSeqIndex2, i);
+		if (!bGap1 && !bGap2)
+			Quit("Error -- neither gapping");
+		if (bGap1 && bGap2)
+			Log(".");
+		else
+			{
+			++uMyLength;
+			Log("-");
+			}
+		}
+	SCORE s = GapPenalty(uGapLength, bNTerm || bCTerm);
+	Log(" L=%d N%d C%d s=%.3g", uGapLength, bNTerm, bCTerm, s);
+	Log("\n");
+	if (uMyLength != uGapLength)
+		Quit("Lengths differ");
+
+	}
+
+static SCORE ScoreSeqPair(const MSA &msa1, unsigned uSeqIndex1,
+  const MSA &msa2, unsigned uSeqIndex2, SCORE *ptrLetters, SCORE *ptrGaps)
+	{
+	g_ptrMSA1 = &msa1;
+	g_ptrMSA2 = &msa2;
+	g_uSeqIndex1 = uSeqIndex1;
+	g_uSeqIndex2 = uSeqIndex2;
+
+	const unsigned uColCount = msa1.GetColCount();
+	const unsigned uColCount2 = msa2.GetColCount();
+	if (uColCount != uColCount2)
+		Quit("ScoreSeqPair, different lengths");
+
+#if	TRACE
+	Log("ScoreSeqPair\n");
+	Log("%16.16s  ", msa1.GetSeqName(uSeqIndex1));
+	for (unsigned i = 0; i < uColCount; ++i)
+		Log("%c", msa1.GetChar(uSeqIndex1, i));
+	Log("\n");
+	Log("%16.16s  ", msa2.GetSeqName(uSeqIndex2));
+	for (unsigned i = 0; i < uColCount; ++i)
+		Log("%c", msa1.GetChar(uSeqIndex2, i));
+	Log("\n");
+#endif
+
+	SCORE scoreTotal = 0;
+
+// Substitution scores
+	unsigned uFirstLetter1 = uInsane;
+	unsigned uFirstLetter2 = uInsane;
+	unsigned uLastLetter1 = uInsane;
+	unsigned uLastLetter2 = uInsane;
+	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
+		{
+		bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
+		bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);
+		bool bWildcard1 = msa1.IsWildcard(uSeqIndex1, uColIndex);
+		bool bWildcard2 = msa2.IsWildcard(uSeqIndex2, uColIndex);
+
+		if (!bGap1)
+			{
+			if (uInsane == uFirstLetter1)
+				uFirstLetter1 = uColIndex;
+			uLastLetter1 = uColIndex;
+			}
+		if (!bGap2)
+			{
+			if (uInsane == uFirstLetter2)
+				uFirstLetter2 = uColIndex;
+			uLastLetter2 = uColIndex;
+			}
+
+		if (bGap1 || bGap2 || bWildcard1 || bWildcard2)
+			continue;
+
+		unsigned uLetter1 = msa1.GetLetter(uSeqIndex1, uColIndex);
+		unsigned uLetter2 = msa2.GetLetter(uSeqIndex2, uColIndex);
+
+		SCORE scoreMatch = (*g_ptrScoreMatrix)[uLetter1][uLetter2];
+		scoreTotal += scoreMatch;
+#if	TRACE
+		Log("%c <-> %c = %7.1f  %10.1f\n",
+		  msa1.GetChar(uSeqIndex1, uColIndex),
+		  msa2.GetChar(uSeqIndex2, uColIndex),
+		  scoreMatch,
+		  scoreTotal);
+#endif
+		}
+	
+	*ptrLetters = scoreTotal;
+
+// Gap penalties
+	unsigned uGapLength = uInsane;
+	unsigned uGapStartCol = uInsane;
+	bool bGapping1 = false;
+	bool bGapping2 = false;
+
+	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
+		{
+		bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
+		bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);
+
+		if (bGap1 && bGap2)
+			continue;
+
+		if (bGapping1)
+			{
+			if (bGap1)
+				++uGapLength;
+			else
+				{
+				bGapping1 = false;
+				bool bNTerm = (uFirstLetter2 == uGapStartCol);
+				bool bCTerm = (uLastLetter2 + 1 == uColIndex);
+				SCORE scoreGap = GapPenalty(uGapLength, bNTerm || bCTerm);
+				scoreTotal += scoreGap;
+#if	TRACE
+				LogGap(uGapStartCol, uColIndex - 1, uGapLength, bNTerm, bCTerm);
+				Log("GAP         %7.1f  %10.1f\n",
+				  scoreGap,
+				  scoreTotal);
+#endif
+				}
+			continue;
+			}
+		else
+			{
+			if (bGap1)
+				{
+				uGapStartCol = uColIndex;
+				bGapping1 = true;
+				uGapLength = 1;
+				continue;
+				}
+			}
+
+		if (bGapping2)
+			{
+			if (bGap2)
+				++uGapLength;
+			else
+				{
+				bGapping2 = false;
+				bool bNTerm = (uFirstLetter1 == uGapStartCol);
+				bool bCTerm = (uLastLetter1 + 1 == uColIndex);
+				SCORE scoreGap = GapPenalty(uGapLength, bNTerm || bCTerm);
+				scoreTotal += scoreGap;
+#if	TRACE
+				LogGap(uGapStartCol, uColIndex - 1, uGapLength, bNTerm, bCTerm);
+				Log("GAP         %7.1f  %10.1f\n",
+				  scoreGap,
+				  scoreTotal);
+#endif
+				}
+			}
+		else
+			{
+			if (bGap2)
+				{
+				uGapStartCol = uColIndex;
+				bGapping2 = true;
+				uGapLength = 1;
+				}
+			}
+		}
+
+	if (bGapping1 || bGapping2)
+		{
+		SCORE scoreGap = GapPenalty(uGapLength, true);
+		scoreTotal += scoreGap;
+#if	TRACE
+		LogGap(uGapStartCol, uColCount - 1, uGapLength, false, true);
+		Log("GAP         %7.1f  %10.1f\n",
+		  scoreGap,
+		  scoreTotal);
+#endif
+		}
+	*ptrGaps = scoreTotal - *ptrLetters;
+	return scoreTotal;
+	}
+
+// The usual sum-of-pairs objective score: sum the score
+// of the alignment of each pair of sequences.
+SCORE ObjScoreDA(const MSA &msa, SCORE *ptrLetters, SCORE *ptrGaps)
+	{
+	const unsigned uSeqCount = msa.GetSeqCount();
+	SCORE scoreTotal = 0;
+	unsigned uPairCount = 0;
+#if	TRACE
+	msa.LogMe();
+	Log("     Score  Weight  Weight       Total\n");
+	Log("----------  ------  ------  ----------\n");
+#endif
+	SCORE TotalLetters = 0;
+	SCORE TotalGaps = 0;
+	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
+		{
+		const WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1);
+		for (unsigned uSeqIndex2 = uSeqIndex1 + 1; uSeqIndex2 < uSeqCount; ++uSeqIndex2)
+			{
+			const WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2);
+			const WEIGHT w = w1*w2;
+			SCORE Letters;
+			SCORE Gaps;
+			SCORE scorePair = ScoreSeqPair(msa, uSeqIndex1, msa, uSeqIndex2,
+			  &Letters, &Gaps);
+			scoreTotal += w1*w2*scorePair;
+			TotalLetters += w1*w2*Letters;
+			TotalGaps += w1*w2*Gaps;
+			++uPairCount;
+#if	TRACE
+			Log("%10.2f  %6.3f  %6.3f  %10.2f  %d=%s %d=%s\n",
+			  scorePair,
+			  w1,
+			  w2,
+			  scorePair*w1*w2,
+			  uSeqIndex1,
+			  msa.GetSeqName(uSeqIndex1),
+			  uSeqIndex2,
+			  msa.GetSeqName(uSeqIndex2));
+#endif
+			}
+		}
+	*ptrLetters = TotalLetters;
+	*ptrGaps = TotalGaps;
+	return scoreTotal;
+	}
+
+#endif	// DOUBLE_AFFINE

Added: trunk/packages/muscle/branches/upstream/current/onexception.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/onexception.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/onexception.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,15 @@
+#include "muscle.h"
+#include <stdio.h>
+
+static char szOnExceptionMessage[] =
+	{
+	"\nFatal error, exception caught.\n"
+	};
+
+void OnException()
+	{
+	fprintf(stderr, szOnExceptionMessage);
+	Log(szOnExceptionMessage);
+	Log("Finished %s\n", GetTimeAsStr());
+	exit(EXIT_Except);
+	}

Added: trunk/packages/muscle/branches/upstream/current/options.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/options.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/options.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,233 @@
+#include "muscle.h"
+#include <stdio.h>
+
+struct VALUE_OPT
+	{
+	const char *m_pstrName;
+	const char *m_pstrValue;
+	};
+
+struct FLAG_OPT
+	{
+	const char *m_pstrName;
+	bool m_bSet;
+	};
+
+static VALUE_OPT ValueOpts[] =
+	{
+	"in",				0,
+	"in1",				0,
+	"in2",				0,
+	"out",				0,
+	"MaxIters",			0,
+	"MaxHours",			0,
+	"GapOpen",			0,
+	"GapOpen2",			0,
+	"GapExtend",		0,
+	"GapExtend2",		0,
+	"GapAmbig",			0,
+	"Center",			0,
+	"SmoothScoreCeil",	0,
+	"MinBestColScore",	0,
+	"MinSmoothScore",	0,
+	"ObjScore",			0,
+	"SmoothWindow",		0,
+	"RefineWindow",		0,
+	"FromWindow",		0,
+	"ToWindow",			0,
+	"SaveWindow",		0,
+	"WindowOffset",		0,
+	"FirstWindow",		0,
+	"AnchorSpacing",	0,
+	"Log",				0,
+	"LogA",				0,
+	"MaxTrees",			0,
+	"SUEFF",			0,
+	"Distance1",		0,
+	"Distance2",		0,
+	"Weight1",			0,
+	"Weight2",			0,
+	"Cluster1",			0,
+	"Cluster2",			0,
+	"Root1",			0,
+	"Root2",			0,
+	"Tree1",			0,
+	"Tree2",			0,
+	"UseTree",			0,
+	"UseTree_NoWarn",	0,
+	"DiagLength",		0,
+	"DiagMargin",		0,
+	"DiagBreak",		0,
+	"Hydro",			0,
+	"HydroFactor",		0,
+	"SPScore",			0,
+	"SeqType",			0,
+	"MaxMB",			0,
+	"ComputeWeights",	0,
+	"MaxSubFam",		0,
+	"ScoreFile",		0,
+	"TermGaps",			0,
+	"FASTAOut",			0,
+	"CLWOut",			0,
+	"CLWStrictOut",		0,
+	"HTMLOut",			0,
+	"MSFOut",			0,
+	"PHYIOut",			0,
+	"PHYSOut",			0,
+	"Matrix",			0,
+	};
+static int ValueOptCount = sizeof(ValueOpts)/sizeof(ValueOpts[0]);
+
+static FLAG_OPT FlagOpts[] =
+	{
+	"LE",					false,
+	"SP",					false,
+	"SV",					false,
+	"SPN",					false,
+	"Core",					false,
+	"NoCore",				false,
+	"Diags1",				false,
+	"Diags2",				false,
+	"Diags",				false,
+	"Quiet",				false,
+	"MSF",					false,
+	"Verbose",				false,
+	"Anchors",				false,
+	"NoAnchors",			false,
+	"Refine",				false,
+	"RefineW",				false,
+	"SW",					false,
+	"Profile",				false,
+	"PPScore",				false,
+	"Cluster",				false,
+	"Brenner",				false,
+	"Dimer",				false,
+	"clw",					false,
+	"clwstrict",			false,
+	"HTML",					false,
+	"Version",				false,
+	"Stable",				false,
+	"Group",				false,
+	"FASTA",				false,
+	"ProfDB",				false,
+	"PAS",					false,
+	"PHYI",					false,
+	"PHYS",					false,
+	};
+static int FlagOptCount = sizeof(FlagOpts)/sizeof(FlagOpts[0]);
+
+static bool TestSetFlagOpt(const char *Arg)
+	{
+	for (int i = 0; i < FlagOptCount; ++i)
+		if (!stricmp(Arg, FlagOpts[i].m_pstrName))
+			{
+			FlagOpts[i].m_bSet = true;
+			return true;
+			}
+	return false;
+	}
+
+static bool TestSetValueOpt(const char *Arg, const char *Value)
+	{
+	for (int i = 0; i < ValueOptCount; ++i)
+		if (!stricmp(Arg, ValueOpts[i].m_pstrName))
+			{
+			if (0 == Value)
+				{
+				fprintf(stderr, "Option -%s must have value\n", Arg);
+				exit(EXIT_NotStarted);
+				}
+			ValueOpts[i].m_pstrValue = strsave(Value);
+			return true;
+			}
+	return false;
+	}
+
+bool FlagOpt(const char *Name)
+	{
+	for (int i = 0; i < FlagOptCount; ++i)
+		if (!stricmp(Name, FlagOpts[i].m_pstrName))
+			return FlagOpts[i].m_bSet;
+	Quit("FlagOpt(%s) invalid", Name);
+	return false;
+	}
+
+const char *ValueOpt(const char *Name)
+	{
+	for (int i = 0; i < ValueOptCount; ++i)
+		if (!stricmp(Name, ValueOpts[i].m_pstrName))
+			return ValueOpts[i].m_pstrValue;
+	Quit("ValueOpt(%s) invalid", Name);
+	return 0;
+	}
+
+void ProcessArgVect(int argc, char *argv[])
+	{
+	for (int iArgIndex = 0; iArgIndex < argc; )
+		{
+		const char *Arg = argv[iArgIndex];
+		if (Arg[0] != '-')
+			{
+			fprintf(stderr, "Command-line option \"%s\" must start with '-'\n", Arg);
+			exit(EXIT_NotStarted);
+			}
+		const char *ArgName = Arg + 1;
+		if (TestSetFlagOpt(ArgName))
+			{
+			++iArgIndex;
+			continue;
+			}
+		
+		char *Value = 0;
+		if (iArgIndex < argc - 1)
+			Value = argv[iArgIndex+1];
+		if (TestSetValueOpt(ArgName, Value))
+			{
+			iArgIndex += 2;
+			continue;
+			}
+		fprintf(stderr, "Invalid command line option \"%s\"\n", ArgName);
+		Usage();
+		exit(EXIT_NotStarted);
+		}
+	}
+
+void ProcessArgStr(const char *ArgStr)
+	{
+	const int MAX_ARGS = 64;
+	char *argv[MAX_ARGS];
+
+	if (0 == ArgStr)
+		return;
+
+// Modifiable copy
+	char *StrCopy = strsave(ArgStr);
+
+	int argc = 0;
+	bool bInArg = false;
+	char *Str = StrCopy;
+	while (char c = *Str)
+		{
+		if (isspace(c))
+			{
+			*Str = 0;
+			bInArg = false;
+			}
+		else if (!bInArg)
+			{
+			bInArg = true;
+			if (argc >= MAX_ARGS)
+				Quit("Too many args in MUSCLE_CMDLINE");
+			argv[argc++] = Str;
+			}
+		Str++;
+		}
+	ProcessArgVect(argc, argv);
+	free(StrCopy);
+	}
+
+void ListFlagOpts()
+	{
+	for (int i = 0; i < FlagOptCount; ++i)
+		Log("%s %d\n", FlagOpts[i].m_pstrName, FlagOpts[i].m_bSet);
+	}

Added: trunk/packages/muscle/branches/upstream/current/outweights.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/outweights.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/outweights.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,17 @@
+#include "muscle.h"
+#include "msa.h"
+
+void OutWeights(const char *FileName, const MSA &msa)
+	{
+	FILE *f = fopen(FileName, "w");
+	if (0 == f)
+		Quit("Cannot open '%s'", FileName);
+	const unsigned uSeqCount = msa.GetSeqCount();
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		const char *Id = msa.GetSeqName(uSeqIndex);
+		const WEIGHT w = msa.GetSeqWeight(uSeqIndex);
+		fprintf(f, "%s\t%.3g\n", Id, w);
+		}
+	fclose(f);
+	}

Added: trunk/packages/muscle/branches/upstream/current/pam200mafft.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/pam200mafft.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/pam200mafft.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,32 @@
+#include "muscle.h"
+
+// Adjusted PAM200 scoring matrix as used by default in MAFFT.
+//	Katoh, Misawa, Kuma and Miyata (2002), NAR 30(14), 3059-3066.
+
+static float PAM200[23][23] =
+	{
+//   A      C      D      E      F      G      H      I      K      L      M      N      P      Q      R      S      T      V      W      Y      B      Z      X      
+   408,    20,    54,    52,  -182,   179,   -68,   109,   -35,   -47,    39,   106,   206,   -14,   -12,   257,   293,   191,  -306,  -219,     0,     0,     0, // A
+    20,  1190,  -228,  -295,    94,     6,    63,  -131,  -184,  -176,  -112,   -29,  -122,  -195,    49,   185,    13,   -49,   199,   333,     0,     0,     0, // C
+    54,  -228,   645,   516,  -399,   168,    98,  -225,    75,  -341,  -235,   352,  -149,   142,   -44,    65,     7,  -147,  -418,  -128,     0,     0,     0, // D
+    52,  -295,   516,   630,  -460,   145,    45,  -225,   195,  -307,  -222,   186,  -121,   299,    54,   -10,   -36,  -130,  -366,  -285,     0,     0,     0, // E
+  -182,    94,  -399,  -460,   908,  -387,    82,   100,  -423,   340,    87,  -216,  -160,  -274,  -307,   -31,  -153,    51,    19,   604,     0,     0,     0, // F
+   179,     6,   168,   145,  -387,   682,   -94,  -196,   -14,  -304,  -226,    99,   -57,   -48,   117,   175,    41,   -73,   -38,  -329,     0,     0,     0, // G
+   -68,    63,    98,    45,    82,   -94,   786,  -185,   164,   -72,  -132,   258,    86,   388,   277,    55,   -15,  -197,  -181,   488,     0,     0,     0, // H
+   109,  -131,  -225,  -225,   100,  -196,  -185,   574,  -204,   308,   411,   -94,   -95,  -202,  -188,     1,   182,   489,  -254,  -133,     0,     0,     0, // I
+   -35,  -184,    75,   195,  -423,   -14,   164,  -204,   652,  -229,   -98,   206,   -66,   335,   486,    22,    39,  -207,  -196,  -244,     0,     0,     0, // K
+   -47,  -176,  -341,  -307,   340,  -304,   -72,   308,  -229,   611,   389,  -203,    73,   -66,  -150,   -49,   -21,   259,   -46,    -9,     0,     0,     0, // L
+    39,  -112,  -235,  -222,    87,  -226,  -132,   411,   -98,   389,   776,  -111,   -78,  -104,  -109,   -29,   149,   351,  -209,  -162,     0,     0,     0, // M
+   106,   -29,   352,   186,  -216,    99,   258,   -94,   206,  -203,  -111,   536,    -1,   108,    93,   260,   188,   -98,  -359,    12,     0,     0,     0, // N
+   206,  -122,  -149,  -121,  -160,   -57,    86,   -95,   -66,    73,   -78,    -1,   756,   142,    25,   241,   159,   -55,  -353,  -206,     0,     0,     0, // P
+   -14,  -195,   142,   299,  -274,   -48,   388,  -202,   335,   -66,  -104,   108,   142,   655,   321,     7,   -15,  -175,  -223,   -53,     0,     0,     0, // Q
+   -12,    49,   -44,    54,  -307,   117,   277,  -188,   486,  -150,  -109,    93,    25,   321,   626,    48,    16,  -181,   124,  -113,     0,     0,     0, // R
+   257,   185,    65,   -10,   -31,   175,    55,     1,    22,   -49,   -29,   260,   241,     7,    48,   373,   279,    28,  -193,   -35,     0,     0,     0, // S
+   293,    13,     7,   -36,  -153,    41,   -15,   182,    39,   -21,   149,   188,   159,   -15,    16,   279,   442,   163,  -323,  -170,     0,     0,     0, // T
+   191,   -49,  -147,  -130,    51,   -73,  -197,   489,  -207,   259,   351,   -98,   -55,  -175,  -181,    28,   163,   525,  -225,  -177,     0,     0,     0, // V
+  -306,   199,  -418,  -366,    19,   -38,  -181,  -254,  -196,   -46,  -209,  -359,  -353,  -223,   124,  -193,  -323,  -225,  1495,    83,     0,     0,     0, // W
+  -219,   333,  -128,  -285,   604,  -329,   488,  -133,  -244,    -9,  -162,    12,  -206,   -53,  -113,   -35,  -170,  -177,    83,   999,     0,     0,     0, // Y
+     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0, // B
+     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0, // Z
+     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0, // X
+	};

Added: trunk/packages/muscle/branches/upstream/current/params.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/params.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/params.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,608 @@
+#include "muscle.h"
+#include "objscore.h"
+#include "profile.h"
+#include "enumopts.h"
+
+const double DEFAULT_MAX_MB_FRACT = 0.8;
+
+SCORE g_scoreCenter = 0;
+SCORE g_scoreGapExtend = 0;
+SCORE g_scoreGapOpen2 = MINUS_INFINITY;
+SCORE g_scoreGapExtend2 = MINUS_INFINITY;
+SCORE g_scoreGapAmbig = 0;
+SCORE g_scoreAmbigFactor = 0;
+
+extern SCOREMATRIX VTML_LA;
+extern SCOREMATRIX PAM200;
+extern SCOREMATRIX PAM200NoCenter;
+extern SCOREMATRIX VTML_SP;
+extern SCOREMATRIX VTML_SPNoCenter;
+extern SCOREMATRIX NUC_SP;
+
+PTR_SCOREMATRIX g_ptrScoreMatrix;
+
+const char *g_pstrInFileName = "-";
+const char *g_pstrOutFileName = "-";
+const char *g_pstrFASTAOutFileName = 0;
+const char *g_pstrMSFOutFileName = 0;
+const char *g_pstrClwOutFileName = 0;
+const char *g_pstrClwStrictOutFileName = 0;
+const char *g_pstrHTMLOutFileName = 0;
+const char *g_pstrPHYIOutFileName = 0;
+const char *g_pstrPHYSOutFileName = 0;
+
+const char *g_pstrFileName1 = 0;
+const char *g_pstrFileName2 = 0;
+
+const char *g_pstrSPFileName = 0;
+const char *g_pstrMatrixFileName = 0;
+
+const char *g_pstrUseTreeFileName = 0;
+bool g_bUseTreeNoWarn = false;
+
+const char *g_pstrComputeWeightsFileName;
+const char *g_pstrScoreFileName;
+
+const char *g_pstrProf1FileName = 0;
+const char *g_pstrProf2FileName = 0;
+
+unsigned g_uSmoothWindowLength = 7;
+unsigned g_uAnchorSpacing = 32;
+unsigned g_uMaxTreeRefineIters = 1;
+
+unsigned g_uRefineWindow = 200;
+unsigned g_uWindowFrom = 0;
+unsigned g_uWindowTo = 0;
+unsigned g_uSaveWindow = uInsane;
+unsigned g_uWindowOffset = 0;
+
+unsigned g_uMaxSubFamCount = 5;
+
+unsigned g_uHydrophobicRunLength = 5;
+float g_dHydroFactor = (float) 1.2;
+
+unsigned g_uMinDiagLength = 24;	// TODO alpha -- should depend on alphabet?
+unsigned g_uMaxDiagBreak = 1;
+unsigned g_uDiagMargin = 5;
+
+float g_dSUEFF = (float) 0.1;
+
+bool g_bPrecompiledCenter = true;
+bool g_bNormalizeCounts = false;
+bool g_bDiags1 = false;
+bool g_bDiags2 = false;
+bool g_bAnchors = true;
+bool g_bQuiet = false;
+bool g_bVerbose = false;
+bool g_bRefine = false;
+bool g_bRefineW = false;
+bool g_bProfDB = false;
+bool g_bLow = false;
+bool g_bSW = false;
+bool g_bCluster = false;
+bool g_bProfile = false;
+bool g_bPPScore = false;
+bool g_bBrenner = false;
+bool g_bDimer = false;
+bool g_bVersion = false;
+bool g_bStable = false;
+bool g_bFASTA = false;
+bool g_bPAS = false;
+
+#if	DEBUG
+bool g_bCatchExceptions = false;
+#else
+bool g_bCatchExceptions = true;
+#endif
+
+bool g_bMSF = false;
+bool g_bAln = false;
+bool g_bClwStrict = false;
+bool g_bHTML = false;
+bool g_bPHYI = false;
+bool g_bPHYS = false;
+
+unsigned g_uMaxIters = 8;
+unsigned long g_ulMaxSecs = 0;
+unsigned g_uMaxMB = 500;
+
+PPSCORE g_PPScore = PPSCORE_LE;
+OBJSCORE g_ObjScore = OBJSCORE_SPM;
+
+SEQWEIGHT g_SeqWeight1 = SEQWEIGHT_ClustalW;
+SEQWEIGHT g_SeqWeight2 = SEQWEIGHT_ClustalW;
+
+DISTANCE g_Distance1 = DISTANCE_Kmer6_6;
+DISTANCE g_Distance2 = DISTANCE_PctIdKimura;
+
+CLUSTER g_Cluster1 = CLUSTER_UPGMB;
+CLUSTER g_Cluster2 = CLUSTER_UPGMB;
+
+ROOT g_Root1 = ROOT_Pseudo;
+ROOT g_Root2 = ROOT_Pseudo;
+
+bool g_bDiags;
+
+SEQTYPE g_SeqType = SEQTYPE_Auto;
+
+TERMGAPS g_TermGaps = TERMGAPS_Half;
+
+//------------------------------------------------------
+// These parameters depending on the chosen prof-prof
+// score (g_PPScore), initialized to "Undefined".
+float g_dSmoothScoreCeil = fInsane;
+float g_dMinBestColScore = fInsane;
+float g_dMinSmoothScore = fInsane;
+SCORE g_scoreGapOpen = fInsane;
+//------------------------------------------------------
+
+static unsigned atou(const char *s)
+	{
+	return (unsigned) atoi(s);
+	}
+
+const char *MaxSecsToStr()
+	{
+	if (0 == g_ulMaxSecs)
+		return "(No limit)";
+	return SecsToStr(g_ulMaxSecs);
+	}
+
+void ListParams()
+	{
+	Log("\n");
+	Log("%s\n", MUSCLE_LONG_VERSION);
+	Log("http://www.drive5.com/muscle\n");
+	Log("\n");
+	Log("Profile-profile score    %s\n", PPSCOREToStr(g_PPScore));
+	Log("Max iterations           %u\n", g_uMaxIters);
+	Log("Max trees                %u\n", g_uMaxTreeRefineIters);
+	Log("Max time                 %s\n", MaxSecsToStr());
+	Log("Max MB                   %u\n", g_uMaxMB);
+	Log("Gap open                 %g\n", g_scoreGapOpen);
+	Log("Gap extend (dimer)       %g\n", g_scoreGapExtend);
+	Log("Gap ambig factor         %g\n", g_scoreAmbigFactor);
+	Log("Gap ambig penalty        %g\n", g_scoreGapAmbig);
+	Log("Center (LE)              %g\n", g_scoreCenter);
+	Log("Term gaps                %s\n", TERMGAPSToStr(g_TermGaps));
+
+	Log("Smooth window length     %u\n", g_uSmoothWindowLength);
+	Log("Refine window length     %u\n", g_uRefineWindow);
+	Log("Min anchor spacing       %u\n", g_uAnchorSpacing);
+	Log("Min diag length (lambda) %u\n", g_uMinDiagLength);
+	Log("Diag margin (mu)         %u\n", g_uDiagMargin);
+	Log("Min diag break           %u\n", g_uMaxDiagBreak);
+	Log("Hydrophobic window       %u\n", g_uHydrophobicRunLength);
+
+	Log("Hydrophobic gap factor   %g\n", g_dHydroFactor);
+	Log("Smooth score ceiling     %g\n", g_dSmoothScoreCeil);
+	Log("Min best col score       %g\n", g_dMinBestColScore);
+	Log("Min anchor score         %g\n", g_dMinSmoothScore);
+	Log("SUEFF                    %g\n", g_dSUEFF);
+
+	Log("Brenner root MSA         %s\n", BoolToStr(g_bBrenner));
+	Log("Normalize counts         %s\n", BoolToStr(g_bNormalizeCounts));
+	Log("Diagonals (1)            %s\n", BoolToStr(g_bDiags1));
+	Log("Diagonals (2)            %s\n", BoolToStr(g_bDiags2));
+	Log("Anchors                  %s\n", BoolToStr(g_bAnchors));
+	Log("MSF output format        %s\n", BoolToStr(g_bMSF));
+	Log("Phylip interleaved       %s\n", BoolToStr(g_bPHYI));
+	Log("Phylip sequential        %s\n", BoolToStr(g_bPHYS));
+	Log("ClustalW output format   %s\n", BoolToStr(g_bAln));
+	Log("Catch exceptions         %s\n", BoolToStr(g_bCatchExceptions));
+	Log("Quiet                    %s\n", BoolToStr(g_bQuiet));
+	Log("Refine                   %s\n", BoolToStr(g_bRefine));
+	Log("ProdfDB                  %s\n", BoolToStr(g_bProfDB));
+	Log("Low complexity profiles  %s\n", BoolToStr(g_bLow));
+
+	Log("Objective score          %s\n", OBJSCOREToStr(g_ObjScore));
+
+	Log("Distance method (1)      %s\n", DISTANCEToStr(g_Distance1));
+	Log("Clustering method (1)    %s\n", CLUSTERToStr(g_Cluster1));
+	Log("Root method (1)          %s\n", ROOTToStr(g_Root1));
+	Log("Sequence weighting (1)   %s\n", SEQWEIGHTToStr(g_SeqWeight1));
+
+	Log("Distance method (2)      %s\n", DISTANCEToStr(g_Distance2));
+	Log("Clustering method (2)    %s\n", CLUSTERToStr(g_Cluster2));
+	Log("Root method (2)          %s\n", ROOTToStr(g_Root2));
+	Log("Sequence weighting (2)   %s\n", SEQWEIGHTToStr(g_SeqWeight2));
+
+	Log("\n");
+	}
+
+static void SetDefaultsLE()
+	{
+	g_ptrScoreMatrix = &VTML_LA;
+
+	//g_scoreGapOpen = (SCORE) -3.00;
+	//g_scoreCenter = (SCORE) -0.55;
+	g_scoreGapOpen = (SCORE) -2.9;
+	g_scoreCenter = (SCORE) -0.52;
+
+	g_bNormalizeCounts = true;
+
+	//g_dSmoothScoreCeil = 5.0;
+	//g_dMinBestColScore = 4.0;
+	//g_dMinSmoothScore = 2.0;
+	g_dSmoothScoreCeil = 3.0;
+	g_dMinBestColScore = 2.0;
+	g_dMinSmoothScore = 1.0;
+
+	g_Distance1 = DISTANCE_Kmer6_6;
+	g_Distance2 = DISTANCE_PctIdKimura;
+	}
+
+static void SetDefaultsSP()
+	{
+	g_ptrScoreMatrix = &PAM200;
+
+	g_scoreGapOpen = -1439;
+	g_scoreCenter = 0.0;	// center pre-added into score mx
+
+	g_bNormalizeCounts = false;
+
+	g_dSmoothScoreCeil = 200.0;
+	g_dMinBestColScore = 300.0;
+	g_dMinSmoothScore = 125.0;
+
+	g_Distance1 = DISTANCE_Kmer6_6;
+	g_Distance2 = DISTANCE_PctIdKimura;
+	}
+
+static void SetDefaultsSV()
+	{
+	g_ptrScoreMatrix = &VTML_SP;
+
+	g_scoreGapOpen = -300;
+	g_scoreCenter = 0.0;	// center pre-added into score mx
+
+	g_bNormalizeCounts = false;
+
+	g_dSmoothScoreCeil = 90.0;
+	g_dMinBestColScore = 130.0;
+	g_dMinSmoothScore = 40.0;
+
+	g_Distance1 = DISTANCE_Kmer6_6;
+	g_Distance2 = DISTANCE_PctIdKimura;
+	}
+
+//static void SetDefaultsSPN()
+//	{
+//	g_ptrScoreMatrix = &NUC_SP;
+//
+//	g_scoreGapOpen = -400;
+//	g_scoreCenter = 0.0;	// center pre-added into score mx
+//
+//	g_bNormalizeCounts = false;
+//
+//	g_dSmoothScoreCeil = 999.0;		// disable
+//	g_dMinBestColScore = 90;
+//	g_dMinSmoothScore = 90;
+//
+//	g_Distance1 = DISTANCE_Kmer4_6;
+//	g_Distance2 = DISTANCE_PctIdKimura;
+//	}
+
+static void SetDefaultsSPN_DNA()
+	{
+	g_ptrScoreMatrix = &NUC_SP;
+
+	g_scoreGapOpen = -400;
+	g_scoreCenter = 0.0;	// center pre-added into score mx
+	g_scoreGapExtend = 0.0;
+
+	g_bNormalizeCounts = false;
+
+	g_dSmoothScoreCeil = 999.0;		// disable
+	g_dMinBestColScore = 90;
+	g_dMinSmoothScore = 90;
+
+	g_Distance1 = DISTANCE_Kmer4_6;
+	g_Distance2 = DISTANCE_PctIdKimura;
+	}
+
+static void SetDefaultsSPN_RNA()
+	{
+	g_ptrScoreMatrix = &NUC_SP;
+
+	g_scoreGapOpen = -420;
+	g_scoreCenter = -300;	// total center = NUC_EXTEND - 300 
+	g_scoreGapExtend = 0.0;
+
+	g_bNormalizeCounts = false;
+
+	g_dSmoothScoreCeil = 999.0;		// disable
+	g_dMinBestColScore = 90;
+	g_dMinSmoothScore = 90;
+
+	g_Distance1 = DISTANCE_Kmer4_6;
+	g_Distance2 = DISTANCE_PctIdKimura;
+	}
+
+static void FlagParam(const char *OptName, bool *ptrParam, bool bValueIfFlagSet)
+	{
+	bool bIsSet = FlagOpt(OptName);
+	if (bIsSet)
+		*ptrParam = bValueIfFlagSet;
+	}
+
+static void StrParam(const char *OptName, const char **ptrptrParam)
+	{
+	const char *opt = ValueOpt(OptName);
+	if (0 != opt)
+		*ptrptrParam = opt;
+	}
+
+static void FloatParam(const char *OptName, float *ptrParam)
+	{
+	const char *opt = ValueOpt(OptName);
+	if (0 != opt)
+		*ptrParam = (float) atof(opt);
+	}
+
+static void UintParam(const char *OptName, unsigned *ptrParam)
+	{
+	const char *opt = ValueOpt(OptName);
+	if (0 != opt)
+		*ptrParam = atou(opt);
+	}
+
+static void EnumParam(const char *OptName, EnumOpt *Opts, int *Param)
+	{
+	const char *Value = ValueOpt(OptName);
+	if (0 == Value)
+		return;
+
+	for (;;)
+		{
+		if (0 == Opts->pstrOpt)
+			Quit("Invalid parameter -%s %s", OptName, Value);
+		if (0 == stricmp(Value, Opts->pstrOpt))
+			{
+			*Param = Opts->iValue;
+			return;
+			}
+		++Opts;
+		}
+	}
+
+static void SetPPDefaultParams()
+	{
+	switch (g_PPScore)
+		{
+	case PPSCORE_SP:
+		SetDefaultsSP();
+		break;
+
+	case PPSCORE_LE:
+		SetDefaultsLE();
+		break;
+
+	case PPSCORE_SV:
+		SetDefaultsSV();
+		break;
+
+	case PPSCORE_SPN:
+		switch (g_Alpha)
+			{
+		case ALPHA_DNA:
+			SetDefaultsSPN_DNA();
+			break;
+		case ALPHA_RNA:
+			SetDefaultsSPN_RNA();
+			break;
+		default:
+			Quit("Invalid alpha %d", g_Alpha);
+			}
+		break;
+
+	default:
+		Quit("Invalid g_PPScore");
+		}
+	}
+
+static void SetPPCommandLineParams()
+	{
+	FloatParam("GapOpen", &g_scoreGapOpen);
+	FloatParam("GapOpen2", &g_scoreGapOpen2);
+	FloatParam("GapExtend", &g_scoreGapExtend);
+	FloatParam("GapExtend2", &g_scoreGapExtend2);
+	FloatParam("GapAmbig", &g_scoreAmbigFactor);
+	FloatParam("Center", &g_scoreCenter);
+	FloatParam("SmoothScoreCeil", &g_dSmoothScoreCeil);
+	FloatParam("MinBestColScore", &g_dMinBestColScore);
+	FloatParam("MinSmoothScore", &g_dMinSmoothScore);
+
+	EnumParam("Distance1", DISTANCE_Opts, (int *) &g_Distance1);
+	EnumParam("Distance2", DISTANCE_Opts, (int *) &g_Distance2);
+	}
+
+void SetPPScore(bool bRespectFlagOpts)
+	{
+	if (bRespectFlagOpts)
+		{
+		if (FlagOpt("SP"))
+			g_PPScore = PPSCORE_SP;
+		else if (FlagOpt("LE"))
+			g_PPScore = PPSCORE_LE;
+		else if (FlagOpt("SV"))
+			g_PPScore = PPSCORE_SV;
+		else if (FlagOpt("SPN"))
+			g_PPScore = PPSCORE_SPN;
+		}
+
+	switch (g_PPScore)
+		{
+	case PPSCORE_LE:
+	case PPSCORE_SP:
+	case PPSCORE_SV:
+		if (ALPHA_RNA == g_Alpha || ALPHA_DNA == g_Alpha)
+			g_PPScore = PPSCORE_SPN;
+		break;
+	case PPSCORE_SPN:
+		if (ALPHA_Amino == g_Alpha)
+			g_PPScore = PPSCORE_LE;
+		break;
+		}
+
+	SetPPDefaultParams();
+	SetPPCommandLineParams();
+
+	if (g_bVerbose)
+		ListParams();
+	}
+
+void SetPPScore(PPSCORE p)
+	{
+	g_PPScore = p;
+	SetPPScore(true);
+	}
+
+static void SetMaxSecs()
+	{
+	float fMaxHours = 0.0;
+	FloatParam("MaxHours", &fMaxHours);
+	if (0.0 == fMaxHours)
+		return;
+	g_ulMaxSecs = (unsigned long) (fMaxHours*60*60);
+	}
+
+static bool CanDoLowComplexity()
+	{
+	if (g_SeqWeight1 != SEQWEIGHT_ClustalW)
+		return false;
+	if (1 == g_uMaxIters)
+		return true;
+	return g_SeqWeight2 == SEQWEIGHT_ClustalW;
+	}
+
+bool MissingCommand()
+	{
+	if (strcmp(g_pstrInFileName, "-"))
+		return false;
+	if (0 != g_pstrFileName1)
+		return false;
+	if (0 != g_pstrSPFileName)
+		return false;
+	return true;
+	}
+
+void SetParams()
+	{
+	SetMaxSecs();
+
+	StrParam("in", &g_pstrInFileName);
+	StrParam("out", &g_pstrOutFileName);
+
+	StrParam("FASTAOut", &g_pstrFASTAOutFileName);
+	StrParam("ClwOut", &g_pstrClwOutFileName);
+	StrParam("ClwStrictOut", &g_pstrClwStrictOutFileName);
+	StrParam("HTMLOut", &g_pstrHTMLOutFileName);
+	StrParam("PHYIOut", &g_pstrPHYIOutFileName);
+	StrParam("PHYSOut", &g_pstrPHYSOutFileName);
+	StrParam("MSFOut", &g_pstrMSFOutFileName);
+
+	StrParam("in1", &g_pstrFileName1);
+	StrParam("in2", &g_pstrFileName2);
+
+	StrParam("Matrix", &g_pstrMatrixFileName);
+	StrParam("SPScore", &g_pstrSPFileName);
+
+	StrParam("UseTree_NoWarn", &g_pstrUseTreeFileName);
+	if (0 != g_pstrUseTreeFileName)
+		g_bUseTreeNoWarn = true;
+
+	StrParam("UseTree", &g_pstrUseTreeFileName);
+	StrParam("ComputeWeights", &g_pstrComputeWeightsFileName);
+	StrParam("ScoreFile", &g_pstrScoreFileName);
+
+	FlagParam("Core", &g_bCatchExceptions, false);
+	FlagParam("NoCore", &g_bCatchExceptions, true);
+
+	FlagParam("Diags1", &g_bDiags1, true);
+	FlagParam("Diags2", &g_bDiags2, true);
+
+	bool Diags = false;
+	FlagParam("Diags", &Diags, true);
+	if (Diags)
+		{
+		g_bDiags1 = true;
+		g_bDiags2 = true;
+		}
+
+	FlagParam("Anchors", &g_bAnchors, true);
+	FlagParam("NoAnchors", &g_bAnchors, false);
+
+	FlagParam("Quiet", &g_bQuiet, true);
+	FlagParam("Verbose", &g_bVerbose, true);
+	FlagParam("Version", &g_bVersion, true);
+	FlagParam("Stable", &g_bStable, true);
+	FlagParam("Group", &g_bStable, false);
+	FlagParam("Refine", &g_bRefine, true);
+	FlagParam("RefineW", &g_bRefineW, true);
+	FlagParam("ProfDB", &g_bProfDB, true);
+	FlagParam("SW", &g_bSW, true);
+	FlagParam("Cluster", &g_bCluster, true);
+	FlagParam("Profile", &g_bProfile, true);
+	FlagParam("PPScore", &g_bPPScore, true);
+	FlagParam("Brenner", &g_bBrenner, true);
+	FlagParam("Dimer", &g_bDimer, true);
+
+	FlagParam("MSF", &g_bMSF, true);
+	FlagParam("PHYI", &g_bPHYI, true);
+	FlagParam("PHYS", &g_bPHYS, true);
+	FlagParam("clw", &g_bAln, true);
+	FlagParam("HTML", &g_bHTML, true);
+	FlagParam("FASTA", &g_bFASTA, true);
+	FlagParam("PAS", &g_bPAS, true);
+
+	bool b = false;
+	FlagParam("clwstrict", &b, true);
+	if (b)
+		{
+		g_bAln = true;
+		g_bClwStrict = true;
+		}
+
+	UintParam("MaxIters", &g_uMaxIters);
+	UintParam("MaxTrees", &g_uMaxTreeRefineIters);
+	UintParam("SmoothWindow", &g_uSmoothWindowLength);
+	UintParam("RefineWindow", &g_uRefineWindow);
+	UintParam("FromWindow", &g_uWindowFrom);
+	UintParam("ToWindow", &g_uWindowTo);
+	UintParam("SaveWindow", &g_uSaveWindow);
+	UintParam("WindowOffset", &g_uWindowOffset);
+	UintParam("AnchorSpacing", &g_uAnchorSpacing);
+	UintParam("DiagLength", &g_uMinDiagLength);
+	UintParam("DiagMargin", &g_uDiagMargin);
+	UintParam("DiagBreak", &g_uMaxDiagBreak);
+	UintParam("Hydro", &g_uHydrophobicRunLength);
+	UintParam("MaxSubFam", &g_uMaxSubFamCount);
+
+	FloatParam("SUEFF", &g_dSUEFF);
+	FloatParam("HydroFactor", &g_dHydroFactor);
+
+	EnumParam("ObjScore", OBJSCORE_Opts, (int *) &g_ObjScore);
+	EnumParam("TermGaps", TERMGAPS_Opts, (int *) &g_TermGaps);
+
+	EnumParam("Weight1", SEQWEIGHT_Opts, (int *) &g_SeqWeight1);
+	EnumParam("Weight2", SEQWEIGHT_Opts, (int *) &g_SeqWeight2);
+
+	EnumParam("Cluster1", CLUSTER_Opts, (int *) &g_Cluster1);
+	EnumParam("Cluster2", CLUSTER_Opts, (int *) &g_Cluster2);
+
+	EnumParam("Root1", ROOT_Opts, (int *) &g_Root1);
+	EnumParam("Root2", ROOT_Opts, (int *) &g_Root2);
+
+	EnumParam("SeqType", SEQTYPE_Opts, (int *) &g_SeqType);
+
+	g_scoreGapAmbig = g_scoreGapOpen*g_scoreAmbigFactor;
+	g_bLow = CanDoLowComplexity();
+
+	if (g_bDimer)
+		g_bPrecompiledCenter = false;
+
+	UintParam("MaxMB", &g_uMaxMB);
+	if (0 == ValueOpt("MaxMB"))
+		g_uMaxMB = (unsigned) (GetRAMSizeMB()*DEFAULT_MAX_MB_FRACT);
+	}

Added: trunk/packages/muscle/branches/upstream/current/params.h
===================================================================
--- trunk/packages/muscle/branches/upstream/current/params.h	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/params.h	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,114 @@
+#ifndef params_h
+#define params_h
+
+extern const char *g_pstrInFileName;
+extern const char *g_pstrOutFileName;
+
+extern const char *g_pstrFASTAOutFileName;
+extern const char *g_pstrMSFOutFileName;
+extern const char *g_pstrClwOutFileName;
+extern const char *g_pstrClwStrictOutFileName;
+extern const char *g_pstrHTMLOutFileName;
+extern const char *g_pstrPHYIOutFileName;
+extern const char *g_pstrPHYSOutFileName;
+
+extern const char *g_pstrFileName1;
+extern const char *g_pstrFileName2;
+
+extern const char *g_pstrSPFileName;
+extern const char *g_pstrMatrixFileName;
+
+extern const char *g_pstrUseTreeFileName;
+extern bool g_bUseTreeNoWarn;
+
+extern const char *g_pstrComputeWeightsFileName;
+extern const char *g_pstrScoreFileName;
+
+extern SCORE g_scoreGapOpen;
+extern SCORE g_scoreCenter;
+extern SCORE g_scoreGapExtend;
+extern SCORE g_scoreGapAmbig;
+
+#if	DOUBLE_AFFINE
+extern SCORE g_scoreGapOpen2;
+extern SCORE g_scoreGapExtend2;
+#endif
+
+extern unsigned g_uSmoothWindowLength;
+extern unsigned g_uAnchorSpacing;
+extern unsigned g_uMaxTreeRefineIters;
+
+extern unsigned g_uMinDiagLength;
+extern unsigned g_uMaxDiagBreak;
+extern unsigned g_uDiagMargin;
+
+extern unsigned g_uRefineWindow;
+extern unsigned g_uWindowFrom;
+extern unsigned g_uWindowTo;
+extern unsigned g_uSaveWindow;
+extern unsigned g_uWindowOffset;
+
+extern unsigned g_uMaxSubFamCount;
+
+extern unsigned g_uHydrophobicRunLength;
+extern float g_dHydroFactor;
+
+extern float g_dSmoothScoreCeil;
+extern float g_dMinBestColScore;
+extern float g_dMinSmoothScore;
+extern float g_dSUEFF;
+
+extern bool g_bPrecompiledCenter;
+extern bool g_bNormalizeCounts;
+extern bool g_bDiags1;
+extern bool g_bDiags2;
+extern bool g_bDiags;
+extern bool g_bAnchors;
+extern bool g_bCatchExceptions;
+
+extern bool g_bMSF;
+extern bool g_bAln;
+extern bool g_bClwStrict;
+extern bool g_bHTML;
+extern bool g_bPHYI;
+extern bool g_bPHYS;
+
+extern bool g_bQuiet;
+extern bool g_bVerbose;
+extern bool g_bRefine;
+extern bool g_bRefineW;
+extern bool g_bRefineX;
+extern bool g_bLow;
+extern bool g_bSW;
+extern bool g_bCluster;
+extern bool g_bProfile;
+extern bool g_bProfDB;
+extern bool g_bPPScore;
+extern bool g_bBrenner;
+extern bool g_bDimer;
+extern bool g_bVersion;
+extern bool g_bStable;
+extern bool g_bFASTA;
+extern bool g_bPAS;
+
+extern PPSCORE g_PPScore;
+extern OBJSCORE g_ObjScore;
+
+extern DISTANCE g_Distance1;
+extern CLUSTER g_Cluster1;
+extern ROOT g_Root1;
+extern SEQWEIGHT g_SeqWeight1;
+
+extern DISTANCE g_Distance2;
+extern CLUSTER g_Cluster2;
+extern ROOT g_Root2;
+extern SEQWEIGHT g_SeqWeight2;
+
+extern unsigned g_uMaxIters;
+extern unsigned long g_ulMaxSecs;
+extern unsigned g_uMaxMB;
+
+extern SEQTYPE g_SeqType;
+extern TERMGAPS g_TermGaps;
+
+#endif // params_h

Added: trunk/packages/muscle/branches/upstream/current/phy.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/phy.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/phy.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,1069 @@
+#include "muscle.h"
+#include "tree.h"
+#include <math.h>
+
+#define TRACE 0
+
+/***
+Node has 0 to 3 neighbors:
+	0 neighbors:	singleton root
+	1 neighbor:		leaf, neighbor is parent
+	2 neigbors:		non-singleton root
+	3 neighbors:	internal node (other than root)
+
+Minimal rooted tree is single node.
+Minimal unrooted tree is single edge.
+Leaf node always has nulls in neighbors 2 and 3, neighbor 1 is parent.
+When tree is rooted, neighbor 1=parent, 2=left, 3=right.
+***/
+
+void Tree::AssertAreNeighbors(unsigned uNodeIndex1, unsigned uNodeIndex2) const
+	{
+	if (uNodeIndex1 >= m_uNodeCount || uNodeIndex2 >= m_uNodeCount)
+		Quit("AssertAreNeighbors(%u,%u), are %u nodes",
+		  uNodeIndex1, uNodeIndex2, m_uNodeCount);
+
+	if (m_uNeighbor1[uNodeIndex1] != uNodeIndex2 &&
+	  m_uNeighbor2[uNodeIndex1] != uNodeIndex2 &&
+	  m_uNeighbor3[uNodeIndex1] != uNodeIndex2)
+		{
+		LogMe();
+		Quit("AssertAreNeighbors(%u,%u) failed", uNodeIndex1, uNodeIndex2);
+		}
+
+	if (m_uNeighbor1[uNodeIndex2] != uNodeIndex1 &&
+	  m_uNeighbor2[uNodeIndex2] != uNodeIndex1 &&
+	  m_uNeighbor3[uNodeIndex2] != uNodeIndex1)
+		{
+		LogMe();
+		Quit("AssertAreNeighbors(%u,%u) failed", uNodeIndex1, uNodeIndex2);
+		}
+
+	if (HasEdgeLength(uNodeIndex1, uNodeIndex2) && 
+	  GetEdgeLength(uNodeIndex1, uNodeIndex2) !=
+	    GetEdgeLength(uNodeIndex2, uNodeIndex1))
+		{
+		LogMe();
+		Quit("Tree::AssertAreNeighbors, Edge length disagrees %u, %u",
+		uNodeIndex1, uNodeIndex2);
+		}
+	}
+
+void Tree::ValidateNode(unsigned uNodeIndex) const
+	{
+	if (uNodeIndex >= m_uNodeCount)
+		Quit("ValidateNode(%u), %u nodes", uNodeIndex, m_uNodeCount);
+
+	const unsigned uNeighborCount = GetNeighborCount(uNodeIndex);
+
+	if (2 == uNeighborCount)
+		{
+		if (!m_bRooted)
+			{
+			LogMe();
+			Quit("Tree::ValidateNode: Node %u has two neighbors, tree is not rooted",
+			 uNodeIndex);
+			}
+		if (uNodeIndex != m_uRootNodeIndex)
+			{
+			LogMe();
+			Quit("Tree::ValidateNode: Node %u has two neighbors, but not root node=%u",
+			 uNodeIndex, m_uRootNodeIndex);
+			}
+		}
+
+	const unsigned n1 = m_uNeighbor1[uNodeIndex];
+	const unsigned n2 = m_uNeighbor2[uNodeIndex];
+	const unsigned n3 = m_uNeighbor3[uNodeIndex];
+
+	if (NULL_NEIGHBOR == n2 && NULL_NEIGHBOR != n3)
+		{
+		LogMe();
+		Quit("Tree::ValidateNode, n2=null, n3!=null", uNodeIndex);
+		}
+	if (NULL_NEIGHBOR == n3 && NULL_NEIGHBOR != n2)
+		{
+		LogMe();
+		Quit("Tree::ValidateNode, n3=null, n2!=null", uNodeIndex);
+		}
+
+	if (n1 != NULL_NEIGHBOR)
+		AssertAreNeighbors(uNodeIndex, n1);
+	if (n2 != NULL_NEIGHBOR)
+		AssertAreNeighbors(uNodeIndex, n2);
+	if (n3 != NULL_NEIGHBOR)
+		AssertAreNeighbors(uNodeIndex, n3);
+
+	if (n1 != NULL_NEIGHBOR && (n1 == n2 || n1 == n3))
+		{
+		LogMe();
+		Quit("Tree::ValidateNode, duplicate neighbors in node %u", uNodeIndex);
+		}
+	if (n2 != NULL_NEIGHBOR && (n2 == n1 || n2 == n3))
+		{
+		LogMe();
+		Quit("Tree::ValidateNode, duplicate neighbors in node %u", uNodeIndex);
+		}
+	if (n3 != NULL_NEIGHBOR && (n3 == n1 || n3 == n2))
+		{
+		LogMe();
+		Quit("Tree::ValidateNode, duplicate neighbors in node %u", uNodeIndex);
+		}
+
+	if (IsRooted())
+		{
+		if (NULL_NEIGHBOR == GetParent(uNodeIndex))
+			{
+			if (uNodeIndex != m_uRootNodeIndex)
+				{
+				LogMe();
+				Quit("Tree::ValiateNode(%u), no parent", uNodeIndex);
+				}
+			}
+		else if (GetLeft(GetParent(uNodeIndex)) != uNodeIndex &&
+		  GetRight(GetParent(uNodeIndex)) != uNodeIndex)
+			{
+			LogMe();
+			Quit("Tree::ValidateNode(%u), parent / child mismatch", uNodeIndex);
+			}
+		}
+	}
+
+void Tree::Validate() const
+	{
+	for (unsigned uNodeIndex = 0; uNodeIndex < m_uNodeCount; ++uNodeIndex)
+		ValidateNode(uNodeIndex);
+	}
+
+bool Tree::IsEdge(unsigned uNodeIndex1, unsigned uNodeIndex2) const
+	{
+	assert(uNodeIndex1 < m_uNodeCount && uNodeIndex2 < m_uNodeCount);
+
+	return m_uNeighbor1[uNodeIndex1] == uNodeIndex2 ||
+	  m_uNeighbor2[uNodeIndex1] == uNodeIndex2 ||
+	  m_uNeighbor3[uNodeIndex1] == uNodeIndex2;
+	}
+
+double Tree::GetEdgeLength(unsigned uNodeIndex1, unsigned uNodeIndex2) const
+	{
+	assert(uNodeIndex1 < m_uNodeCount && uNodeIndex2 < m_uNodeCount);
+	assert(HasEdgeLength(uNodeIndex1, uNodeIndex2));
+
+	if (m_uNeighbor1[uNodeIndex1] == uNodeIndex2)
+		return m_dEdgeLength1[uNodeIndex1];
+	else if (m_uNeighbor2[uNodeIndex1] == uNodeIndex2)
+		return m_dEdgeLength2[uNodeIndex1];
+	assert(m_uNeighbor3[uNodeIndex1] == uNodeIndex2);
+	return m_dEdgeLength3[uNodeIndex1];
+	}
+
+void Tree::ExpandCache()
+	{
+	const unsigned uNodeCount = 100;
+	unsigned uNewCacheCount = m_uCacheCount + uNodeCount;
+	unsigned *uNewNeighbor1 = new unsigned[uNewCacheCount];
+	unsigned *uNewNeighbor2 = new unsigned[uNewCacheCount];
+	unsigned *uNewNeighbor3 = new unsigned[uNewCacheCount];
+
+	unsigned *uNewIds = new unsigned[uNewCacheCount];
+	memset(uNewIds, 0xff, uNewCacheCount*sizeof(unsigned));
+
+	double *dNewEdgeLength1 = new double[uNewCacheCount];
+	double *dNewEdgeLength2 = new double[uNewCacheCount];
+	double *dNewEdgeLength3 = new double[uNewCacheCount];
+	double *dNewHeight = new double[uNewCacheCount];
+
+	bool *bNewHasEdgeLength1 = new bool[uNewCacheCount];
+	bool *bNewHasEdgeLength2 = new bool[uNewCacheCount];
+	bool *bNewHasEdgeLength3 = new bool[uNewCacheCount];
+	bool *bNewHasHeight = new bool[uNewCacheCount];
+
+	char **ptrNewName = new char *[uNewCacheCount];
+	memset(ptrNewName, 0, uNewCacheCount*sizeof(char *));
+
+	if (m_uCacheCount > 0)
+		{
+		const unsigned uUnsignedBytes = m_uCacheCount*sizeof(unsigned);
+		memcpy(uNewNeighbor1, m_uNeighbor1, uUnsignedBytes);
+		memcpy(uNewNeighbor2, m_uNeighbor2, uUnsignedBytes);
+		memcpy(uNewNeighbor3, m_uNeighbor3, uUnsignedBytes);
+
+		memcpy(uNewIds, m_Ids, uUnsignedBytes);
+
+		const unsigned uEdgeBytes = m_uCacheCount*sizeof(double);
+		memcpy(dNewEdgeLength1, m_dEdgeLength1, uEdgeBytes);
+		memcpy(dNewEdgeLength2, m_dEdgeLength2, uEdgeBytes);
+		memcpy(dNewEdgeLength3, m_dEdgeLength3, uEdgeBytes);
+		memcpy(dNewHeight, m_dHeight, uEdgeBytes);
+
+		const unsigned uBoolBytes = m_uCacheCount*sizeof(bool);
+		memcpy(bNewHasEdgeLength1, m_bHasEdgeLength1, uBoolBytes);
+		memcpy(bNewHasEdgeLength2, m_bHasEdgeLength1, uBoolBytes);
+		memcpy(bNewHasEdgeLength3, m_bHasEdgeLength1, uBoolBytes);
+		memcpy(bNewHasHeight, m_bHasHeight, uBoolBytes);
+
+		const unsigned uNameBytes = m_uCacheCount*sizeof(char *);
+		memcpy(ptrNewName, m_ptrName, uNameBytes);
+
+		delete[] m_uNeighbor1;
+		delete[] m_uNeighbor2;
+		delete[] m_uNeighbor3;
+
+		delete[] m_Ids;
+
+		delete[] m_dEdgeLength1;
+		delete[] m_dEdgeLength2;
+		delete[] m_dEdgeLength3;
+
+		delete[] m_bHasEdgeLength1;
+		delete[] m_bHasEdgeLength2;
+		delete[] m_bHasEdgeLength3;
+		delete[] m_bHasHeight;
+
+		delete[] m_ptrName;
+		}
+	m_uCacheCount = uNewCacheCount;
+	m_uNeighbor1 = uNewNeighbor1;
+	m_uNeighbor2 = uNewNeighbor2;
+	m_uNeighbor3 = uNewNeighbor3;
+	m_Ids = uNewIds;
+	m_dEdgeLength1 = dNewEdgeLength1;
+	m_dEdgeLength2 = dNewEdgeLength2;
+	m_dEdgeLength3 = dNewEdgeLength3;
+	m_dHeight = dNewHeight;
+	m_bHasEdgeLength1 = bNewHasEdgeLength1;
+	m_bHasEdgeLength2 = bNewHasEdgeLength2;
+	m_bHasEdgeLength3 = bNewHasEdgeLength3;
+	m_bHasHeight = bNewHasHeight;
+	m_ptrName = ptrNewName;
+	}
+
+// Creates tree with single node, no edges.
+// Root node always has index 0.
+void Tree::CreateRooted()
+	{
+	Clear();
+	ExpandCache();
+	m_uNodeCount = 1;
+
+	m_uNeighbor1[0] = NULL_NEIGHBOR;
+	m_uNeighbor2[0] = NULL_NEIGHBOR;
+	m_uNeighbor3[0] = NULL_NEIGHBOR;
+
+	m_bHasEdgeLength1[0] = false;
+	m_bHasEdgeLength2[0] = false;
+	m_bHasEdgeLength3[0] = false;
+	m_bHasHeight[0] = false;
+
+	m_uRootNodeIndex = 0;
+	m_bRooted = true;
+
+#if	DEBUG
+	Validate();
+#endif
+	}
+
+// Creates unrooted tree with single edge.
+// Nodes for that edge are always 0 and 1.
+void Tree::CreateUnrooted(double dEdgeLength)
+	{
+	Clear();
+	ExpandCache();
+
+	m_uNeighbor1[0] = 1;
+	m_uNeighbor2[0] = NULL_NEIGHBOR;
+	m_uNeighbor3[0] = NULL_NEIGHBOR;
+
+	m_uNeighbor1[1] = 0;
+	m_uNeighbor2[1] = NULL_NEIGHBOR;
+	m_uNeighbor3[1] = NULL_NEIGHBOR;
+
+	m_dEdgeLength1[0] = dEdgeLength;
+	m_dEdgeLength1[1] = dEdgeLength;
+
+	m_bHasEdgeLength1[0] = true;
+	m_bHasEdgeLength1[1] = true;
+
+	m_bRooted = false;
+
+#if	DEBUG
+	Validate();
+#endif
+	}
+
+void Tree::SetLeafName(unsigned uNodeIndex, const char *ptrName)
+	{
+	assert(uNodeIndex < m_uNodeCount);
+	assert(IsLeaf(uNodeIndex));
+	free(m_ptrName[uNodeIndex]);
+	m_ptrName[uNodeIndex] = strsave(ptrName);
+	}
+
+void Tree::SetLeafId(unsigned uNodeIndex, unsigned uId)
+	{
+	assert(uNodeIndex < m_uNodeCount);
+	assert(IsLeaf(uNodeIndex));
+	m_Ids[uNodeIndex] = uId;
+	}
+
+const char *Tree::GetLeafName(unsigned uNodeIndex) const
+	{
+	assert(uNodeIndex < m_uNodeCount);
+	assert(IsLeaf(uNodeIndex));
+	return m_ptrName[uNodeIndex];
+	}
+
+unsigned Tree::GetLeafId(unsigned uNodeIndex) const
+	{
+	assert(uNodeIndex < m_uNodeCount);
+	assert(IsLeaf(uNodeIndex));
+	return m_Ids[uNodeIndex];
+	}
+
+// Append a new branch.
+// This adds two new nodes and joins them to an existing leaf node.
+// Return value is k, new nodes have indexes k and k+1 respectively.
+unsigned Tree::AppendBranch(unsigned uExistingLeafIndex)
+	{
+	if (0 == m_uNodeCount)
+		Quit("Tree::AppendBranch: tree has not been created");
+
+#if	DEBUG
+	assert(uExistingLeafIndex < m_uNodeCount);
+	if (!IsLeaf(uExistingLeafIndex))
+		{
+		LogMe();
+		Quit("AppendBranch(%u): not leaf", uExistingLeafIndex);
+		}
+#endif
+
+	if (m_uNodeCount >= m_uCacheCount - 2)
+		ExpandCache();
+
+	const unsigned uNewLeaf1 = m_uNodeCount;
+	const unsigned uNewLeaf2 = m_uNodeCount + 1;
+
+	m_uNodeCount += 2;
+
+	assert(m_uNeighbor2[uExistingLeafIndex] == NULL_NEIGHBOR);
+	assert(m_uNeighbor3[uExistingLeafIndex] == NULL_NEIGHBOR);
+
+	m_uNeighbor2[uExistingLeafIndex] = uNewLeaf1;
+	m_uNeighbor3[uExistingLeafIndex] = uNewLeaf2;
+
+	m_uNeighbor1[uNewLeaf1] = uExistingLeafIndex;
+	m_uNeighbor1[uNewLeaf2] = uExistingLeafIndex;
+
+	m_uNeighbor2[uNewLeaf1] = NULL_NEIGHBOR;
+	m_uNeighbor2[uNewLeaf2] = NULL_NEIGHBOR;
+
+	m_uNeighbor3[uNewLeaf1] = NULL_NEIGHBOR;
+	m_uNeighbor3[uNewLeaf2] = NULL_NEIGHBOR;
+
+	m_dEdgeLength2[uExistingLeafIndex] = 0;
+	m_dEdgeLength3[uExistingLeafIndex] = 0;
+
+	m_dEdgeLength1[uNewLeaf1] = 0;
+	m_dEdgeLength2[uNewLeaf1] = 0;
+	m_dEdgeLength3[uNewLeaf1] = 0;
+
+	m_dEdgeLength1[uNewLeaf2] = 0;
+	m_dEdgeLength2[uNewLeaf2] = 0;
+	m_dEdgeLength3[uNewLeaf2] = 0;
+
+	m_bHasEdgeLength1[uNewLeaf1] = false;
+	m_bHasEdgeLength2[uNewLeaf1] = false;
+	m_bHasEdgeLength3[uNewLeaf1] = false;
+
+	m_bHasEdgeLength1[uNewLeaf2] = false;
+	m_bHasEdgeLength2[uNewLeaf2] = false;
+	m_bHasEdgeLength3[uNewLeaf2] = false;
+
+	m_bHasHeight[uNewLeaf1] = false;
+	m_bHasHeight[uNewLeaf2] = false;
+
+	return uNewLeaf1;
+	}
+
+void Tree::LogMe() const
+	{
+	Log("Tree::LogMe %u nodes, ", m_uNodeCount);
+
+	if (IsRooted())
+		{
+		Log("rooted.\n");
+		Log("\n");
+		Log("Index  Parnt  LengthP  Left   LengthL  Right  LengthR  Name\n");
+		Log("-----  -----  -------  ----   -------  -----  -------  ----\n");
+		}
+	else
+		{
+		Log("unrooted.\n");
+		Log("\n");
+		Log("Index  Nbr_1  Length1  Nbr_2  Length2  Nbr_3  Length3  Name\n");
+		Log("-----  -----  -------  -----  -------  -----  -------  ----\n");
+		}
+
+	for (unsigned uNodeIndex = 0; uNodeIndex < m_uNodeCount; ++uNodeIndex)
+		{
+		Log("%5u  ", uNodeIndex);
+		const unsigned n1 = m_uNeighbor1[uNodeIndex];
+		const unsigned n2 = m_uNeighbor2[uNodeIndex];
+		const unsigned n3 = m_uNeighbor3[uNodeIndex];
+		if (NULL_NEIGHBOR != n1)
+			Log("%5u  %7.3g  ", n1, m_dEdgeLength1[uNodeIndex]);
+		else
+			Log("                ");
+		if (NULL_NEIGHBOR != n2)
+			Log("%5u  %7.3g  ", n2, m_dEdgeLength2[uNodeIndex]);
+		else
+			Log("                ");
+		if (NULL_NEIGHBOR != n3)
+			Log("%5u  %7.3g  ", n3, m_dEdgeLength3[uNodeIndex]);
+		else
+			Log("                ");
+		if (m_bRooted && uNodeIndex == m_uRootNodeIndex)
+			Log("[ROOT] ");
+		const char *ptrName = m_ptrName[uNodeIndex];
+		if (ptrName != 0)
+			Log("%s", ptrName);
+		Log("\n");
+		}
+	}
+
+void Tree::SetEdgeLength(unsigned uNodeIndex1, unsigned uNodeIndex2,
+  double dLength)
+	{
+	assert(uNodeIndex1 < m_uNodeCount && uNodeIndex2 < m_uNodeCount);
+	assert(IsEdge(uNodeIndex1, uNodeIndex2));
+
+	if (m_uNeighbor1[uNodeIndex1] == uNodeIndex2)
+		{
+		m_dEdgeLength1[uNodeIndex1] = dLength;
+		m_bHasEdgeLength1[uNodeIndex1] = true;
+		}
+	else if (m_uNeighbor2[uNodeIndex1] == uNodeIndex2)
+		{
+		m_dEdgeLength2[uNodeIndex1] = dLength;
+		m_bHasEdgeLength2[uNodeIndex1] = true;
+		}
+	else
+		{
+		assert(m_uNeighbor3[uNodeIndex1] == uNodeIndex2);
+		m_dEdgeLength3[uNodeIndex1] = dLength;
+		m_bHasEdgeLength3[uNodeIndex1] = true;
+		}
+
+	if (m_uNeighbor1[uNodeIndex2] == uNodeIndex1)
+		{
+		m_dEdgeLength1[uNodeIndex2] = dLength;
+		m_bHasEdgeLength1[uNodeIndex2] = true;
+		}
+	else if (m_uNeighbor2[uNodeIndex2] == uNodeIndex1)
+		{
+		m_dEdgeLength2[uNodeIndex2] = dLength;
+		m_bHasEdgeLength2[uNodeIndex2] = true;
+		}
+	else
+		{
+		assert(m_uNeighbor3[uNodeIndex2] == uNodeIndex1);
+		m_dEdgeLength3[uNodeIndex2] = dLength;
+		m_bHasEdgeLength3[uNodeIndex2] = true;
+		}
+	}
+
+unsigned Tree::UnrootFromFile()
+	{
+#if	TRACE
+	Log("Before unroot:\n");
+	LogMe();
+#endif
+
+	if (!m_bRooted)
+		Quit("Tree::Unroot, not rooted");
+
+// Convention: root node is always node zero
+	assert(IsRoot(0));
+	assert(NULL_NEIGHBOR == m_uNeighbor1[0]);
+
+	const unsigned uThirdNode = m_uNodeCount++;
+
+	m_uNeighbor1[0] = uThirdNode;
+	m_uNeighbor1[uThirdNode] = 0;
+
+	m_uNeighbor2[uThirdNode] = NULL_NEIGHBOR;
+	m_uNeighbor3[uThirdNode] = NULL_NEIGHBOR;
+
+	m_dEdgeLength1[0] = 0;
+	m_dEdgeLength1[uThirdNode] = 0;
+	m_bHasEdgeLength1[uThirdNode] = true;
+
+	m_bRooted = false;
+
+#if	TRACE
+	Log("After unroot:\n");
+	LogMe();
+#endif
+
+	return uThirdNode;
+	}
+
+// In an unrooted tree, equivalent of GetLeft/Right is 
+// GetFirst/SecondNeighbor.
+// uNeighborIndex must be a known neighbor of uNodeIndex.
+// This is the way to find the other two neighbor nodes of
+// an internal node.
+// The labeling as "First" and "Second" neighbor is arbitrary.
+// Calling these functions on a leaf returns NULL_NEIGHBOR, as
+// for GetLeft/Right.
+unsigned Tree::GetFirstNeighbor(unsigned uNodeIndex, unsigned uNeighborIndex) const
+	{
+	assert(uNodeIndex < m_uNodeCount);
+	assert(uNeighborIndex < m_uNodeCount);
+	assert(IsEdge(uNodeIndex, uNeighborIndex));
+
+	for (unsigned n = 0; n < 3; ++n)
+		{
+		unsigned uNeighbor = GetNeighbor(uNodeIndex, n);
+		if (NULL_NEIGHBOR != uNeighbor && uNeighborIndex != uNeighbor)
+			return uNeighbor;
+		}
+	return NULL_NEIGHBOR;
+	}
+
+unsigned Tree::GetSecondNeighbor(unsigned uNodeIndex, unsigned uNeighborIndex) const
+	{
+	assert(uNodeIndex < m_uNodeCount);
+	assert(uNeighborIndex < m_uNodeCount);
+	assert(IsEdge(uNodeIndex, uNeighborIndex));
+
+	bool bFoundOne = false;
+	for (unsigned n = 0; n < 3; ++n)
+		{
+		unsigned uNeighbor = GetNeighbor(uNodeIndex, n);
+		if (NULL_NEIGHBOR != uNeighbor && uNeighborIndex != uNeighbor)
+			{
+			if (bFoundOne)
+				return uNeighbor;
+			else
+				bFoundOne = true;
+			}
+		}
+	return NULL_NEIGHBOR;
+	}
+
+// Compute the number of leaves in the sub-tree defined by an edge
+// in an unrooted tree. Conceptually, the tree is cut at this edge,
+// and uNodeIndex2 considered the root of the sub-tree.
+unsigned Tree::GetLeafCountUnrooted(unsigned uNodeIndex1, unsigned uNodeIndex2,
+  double *ptrdTotalDistance) const
+	{
+	assert(!IsRooted());
+
+	if (IsLeaf(uNodeIndex2))
+		{
+		*ptrdTotalDistance = GetEdgeLength(uNodeIndex1, uNodeIndex2);
+		return 1;
+		}
+
+// Recurse down the rooted sub-tree defined by cutting the edge
+// and considering uNodeIndex2 as the root.
+	const unsigned uLeft = GetFirstNeighbor(uNodeIndex2, uNodeIndex1);
+	const unsigned uRight = GetSecondNeighbor(uNodeIndex2, uNodeIndex1);
+
+	double dLeftDistance;
+	double dRightDistance;
+
+	const unsigned uLeftCount = GetLeafCountUnrooted(uNodeIndex2, uLeft,
+	  &dLeftDistance);
+	const unsigned uRightCount = GetLeafCountUnrooted(uNodeIndex2, uRight,
+	  &dRightDistance);
+
+	*ptrdTotalDistance = dLeftDistance + dRightDistance;
+	return uLeftCount + uRightCount;
+	}
+
+void Tree::RootUnrootedTree(ROOT Method)
+	{
+	assert(!IsRooted());
+#if TRACE
+	Log("Tree::RootUnrootedTree, before:");
+	LogMe();
+#endif
+
+	unsigned uNode1;
+	unsigned uNode2;
+	double dLength1;
+	double dLength2;
+	FindRoot(*this, &uNode1, &uNode2, &dLength1, &dLength2, Method);
+
+	if (m_uNodeCount == m_uCacheCount)
+		ExpandCache();
+	m_uRootNodeIndex = m_uNodeCount++;
+
+	double dEdgeLength = GetEdgeLength(uNode1, uNode2);
+
+	m_uNeighbor1[m_uRootNodeIndex] = NULL_NEIGHBOR;
+	m_uNeighbor2[m_uRootNodeIndex] = uNode1;
+	m_uNeighbor3[m_uRootNodeIndex] = uNode2;
+
+	if (m_uNeighbor1[uNode1] == uNode2)
+		m_uNeighbor1[uNode1] = m_uRootNodeIndex;
+	else if (m_uNeighbor2[uNode1] == uNode2)
+		m_uNeighbor2[uNode1] = m_uRootNodeIndex;
+	else
+		{
+		assert(m_uNeighbor3[uNode1] == uNode2);
+		m_uNeighbor3[uNode1] = m_uRootNodeIndex;
+		}
+
+	if (m_uNeighbor1[uNode2] == uNode1)
+		m_uNeighbor1[uNode2] = m_uRootNodeIndex;
+	else if (m_uNeighbor2[uNode2] == uNode1)
+		m_uNeighbor2[uNode2] = m_uRootNodeIndex;
+	else
+		{
+		assert(m_uNeighbor3[uNode2] == uNode1);
+		m_uNeighbor3[uNode2] = m_uRootNodeIndex;
+		}
+
+	OrientParent(uNode1, m_uRootNodeIndex);
+	OrientParent(uNode2, m_uRootNodeIndex);
+
+	SetEdgeLength(m_uRootNodeIndex, uNode1, dLength1);
+	SetEdgeLength(m_uRootNodeIndex, uNode2, dLength2);
+
+	m_bHasHeight[m_uRootNodeIndex] = false;
+
+	m_ptrName[m_uRootNodeIndex] = 0;
+
+	m_bRooted = true;
+
+#if	TRACE
+	Log("\nPhy::RootUnrootedTree, after:");
+	LogMe();
+#endif
+
+	Validate();
+	}
+
+bool Tree::HasEdgeLength(unsigned uNodeIndex1, unsigned uNodeIndex2) const
+	{
+	assert(uNodeIndex1 < m_uNodeCount);
+	assert(uNodeIndex2 < m_uNodeCount);
+	assert(IsEdge(uNodeIndex1, uNodeIndex2));
+
+	if (m_uNeighbor1[uNodeIndex1] == uNodeIndex2)
+		return m_bHasEdgeLength1[uNodeIndex1];
+	else if (m_uNeighbor2[uNodeIndex1] == uNodeIndex2)
+		return m_bHasEdgeLength2[uNodeIndex1];
+	assert(m_uNeighbor3[uNodeIndex1] == uNodeIndex2);
+	return m_bHasEdgeLength3[uNodeIndex1];
+	}
+
+void Tree::OrientParent(unsigned uNodeIndex, unsigned uParentNodeIndex)
+	{
+	if (NULL_NEIGHBOR == uNodeIndex)
+		return;
+
+	if (m_uNeighbor1[uNodeIndex] == uParentNodeIndex)
+		;
+	else if (m_uNeighbor2[uNodeIndex] == uParentNodeIndex)
+		{
+		double dEdgeLength2 = m_dEdgeLength2[uNodeIndex];
+		m_uNeighbor2[uNodeIndex] = m_uNeighbor1[uNodeIndex];
+		m_dEdgeLength2[uNodeIndex] = m_dEdgeLength1[uNodeIndex];
+		m_uNeighbor1[uNodeIndex] = uParentNodeIndex;
+		m_dEdgeLength1[uNodeIndex] = dEdgeLength2;
+		}
+	else
+		{
+		assert(m_uNeighbor3[uNodeIndex] == uParentNodeIndex);
+		double dEdgeLength3 = m_dEdgeLength3[uNodeIndex];
+		m_uNeighbor3[uNodeIndex] = m_uNeighbor1[uNodeIndex];
+		m_dEdgeLength3[uNodeIndex] = m_dEdgeLength1[uNodeIndex];
+		m_uNeighbor1[uNodeIndex] = uParentNodeIndex;
+		m_dEdgeLength1[uNodeIndex] = dEdgeLength3;
+		}
+
+	OrientParent(m_uNeighbor2[uNodeIndex], uNodeIndex);
+	OrientParent(m_uNeighbor3[uNodeIndex], uNodeIndex);
+	}
+
+unsigned Tree::FirstDepthFirstNode() const
+	{
+	assert(IsRooted());
+
+// Descend via left branches until we hit a leaf
+	unsigned uNodeIndex = m_uRootNodeIndex;
+	while (!IsLeaf(uNodeIndex))
+		uNodeIndex = GetLeft(uNodeIndex);
+	return uNodeIndex;
+	}
+
+unsigned Tree::FirstDepthFirstNodeR() const
+	{
+	assert(IsRooted());
+
+// Descend via left branches until we hit a leaf
+	unsigned uNodeIndex = m_uRootNodeIndex;
+	while (!IsLeaf(uNodeIndex))
+		uNodeIndex = GetRight(uNodeIndex);
+	return uNodeIndex;
+	}
+
+unsigned Tree::NextDepthFirstNode(unsigned uNodeIndex) const
+	{
+#if	TRACE
+	Log("NextDepthFirstNode(%3u) ", uNodeIndex);
+#endif
+
+	assert(IsRooted());
+	assert(uNodeIndex < m_uNodeCount);
+
+	if (IsRoot(uNodeIndex))
+		{
+#if	TRACE
+		Log(">> Node %u is root, end of traversal\n", uNodeIndex);
+#endif
+		return NULL_NEIGHBOR;
+		}
+
+	unsigned uParent = GetParent(uNodeIndex);
+	if (GetRight(uParent) == uNodeIndex)
+		{
+#if	TRACE
+		Log(">> Is right branch, return parent=%u\n", uParent);
+#endif
+		return uParent;
+		}
+
+	uNodeIndex = GetRight(uParent);
+#if	TRACE
+		Log(">> Descend left from right sibling=%u ... ", uNodeIndex);
+#endif
+	while (!IsLeaf(uNodeIndex))
+		uNodeIndex = GetLeft(uNodeIndex);
+
+#if	TRACE
+	Log("bottom out at leaf=%u\n", uNodeIndex);
+#endif
+	return uNodeIndex;
+	}
+
+unsigned Tree::NextDepthFirstNodeR(unsigned uNodeIndex) const
+	{
+#if	TRACE
+	Log("NextDepthFirstNode(%3u) ", uNodeIndex);
+#endif
+
+	assert(IsRooted());
+	assert(uNodeIndex < m_uNodeCount);
+
+	if (IsRoot(uNodeIndex))
+		{
+#if	TRACE
+		Log(">> Node %u is root, end of traversal\n", uNodeIndex);
+#endif
+		return NULL_NEIGHBOR;
+		}
+
+	unsigned uParent = GetParent(uNodeIndex);
+	if (GetLeft(uParent) == uNodeIndex)
+		{
+#if	TRACE
+		Log(">> Is left branch, return parent=%u\n", uParent);
+#endif
+		return uParent;
+		}
+
+	uNodeIndex = GetLeft(uParent);
+#if	TRACE
+		Log(">> Descend right from left sibling=%u ... ", uNodeIndex);
+#endif
+	while (!IsLeaf(uNodeIndex))
+		uNodeIndex = GetRight(uNodeIndex);
+
+#if	TRACE
+	Log("bottom out at leaf=%u\n", uNodeIndex);
+#endif
+	return uNodeIndex;
+	}
+
+void Tree::UnrootByDeletingRoot()
+	{
+	assert(IsRooted());
+	assert(m_uNodeCount >= 3);
+
+	const unsigned uLeft = GetLeft(m_uRootNodeIndex);
+	const unsigned uRight = GetRight(m_uRootNodeIndex);
+
+	m_uNeighbor1[uLeft] = uRight;
+	m_uNeighbor1[uRight] = uLeft;
+
+	bool bHasEdgeLength = HasEdgeLength(m_uRootNodeIndex, uLeft) &&
+	  HasEdgeLength(m_uRootNodeIndex, uRight);
+	if (bHasEdgeLength)
+		{
+		double dEdgeLength = GetEdgeLength(m_uRootNodeIndex, uLeft) +
+		  GetEdgeLength(m_uRootNodeIndex, uRight);
+		m_dEdgeLength1[uLeft] = dEdgeLength;
+		m_dEdgeLength1[uRight] = dEdgeLength;
+		}
+
+// Remove root node entry from arrays
+	const unsigned uMoveCount = m_uNodeCount - m_uRootNodeIndex;
+	const unsigned uUnsBytes = uMoveCount*sizeof(unsigned);
+	memmove(m_uNeighbor1 + m_uRootNodeIndex, m_uNeighbor1 + m_uRootNodeIndex + 1,
+	  uUnsBytes);
+	memmove(m_uNeighbor2 + m_uRootNodeIndex, m_uNeighbor2 + m_uRootNodeIndex + 1,
+	  uUnsBytes);
+	memmove(m_uNeighbor3 + m_uRootNodeIndex, m_uNeighbor3 + m_uRootNodeIndex + 1,
+	  uUnsBytes);
+
+	const unsigned uDoubleBytes = uMoveCount*sizeof(double);
+	memmove(m_dEdgeLength1 + m_uRootNodeIndex, m_dEdgeLength1 + m_uRootNodeIndex + 1,
+	  uDoubleBytes);
+	memmove(m_dEdgeLength2 + m_uRootNodeIndex, m_dEdgeLength2 + m_uRootNodeIndex + 1,
+	  uDoubleBytes);
+	memmove(m_dEdgeLength3 + m_uRootNodeIndex, m_dEdgeLength3 + m_uRootNodeIndex + 1,
+	  uDoubleBytes);
+
+	const unsigned uBoolBytes = uMoveCount*sizeof(bool);
+	memmove(m_bHasEdgeLength1 + m_uRootNodeIndex, m_bHasEdgeLength1 + m_uRootNodeIndex + 1,
+	  uBoolBytes);
+	memmove(m_bHasEdgeLength2 + m_uRootNodeIndex, m_bHasEdgeLength2 + m_uRootNodeIndex + 1,
+	  uBoolBytes);
+	memmove(m_bHasEdgeLength3 + m_uRootNodeIndex, m_bHasEdgeLength3 + m_uRootNodeIndex + 1,
+	  uBoolBytes);
+
+	const unsigned uPtrBytes = uMoveCount*sizeof(char *);
+	memmove(m_ptrName + m_uRootNodeIndex, m_ptrName + m_uRootNodeIndex + 1, uPtrBytes);
+
+	--m_uNodeCount;
+	m_bRooted = false;
+
+// Fix up table entries
+	for (unsigned uNodeIndex = 0; uNodeIndex < m_uNodeCount; ++uNodeIndex)
+		{
+#define DEC(x)	if (x != NULL_NEIGHBOR && x > m_uRootNodeIndex) --x;
+		DEC(m_uNeighbor1[uNodeIndex])
+		DEC(m_uNeighbor2[uNodeIndex])
+		DEC(m_uNeighbor3[uNodeIndex])
+#undef	DEC
+		}
+
+	Validate();
+	}
+
+unsigned Tree::GetLeafParent(unsigned uNodeIndex) const
+	{
+	assert(IsLeaf(uNodeIndex));
+
+	if (IsRooted())
+		return GetParent(uNodeIndex);
+
+	if (m_uNeighbor1[uNodeIndex] != NULL_NEIGHBOR)
+		return m_uNeighbor1[uNodeIndex];
+	if (m_uNeighbor2[uNodeIndex] != NULL_NEIGHBOR)
+		return m_uNeighbor2[uNodeIndex];
+	return m_uNeighbor3[uNodeIndex];
+	}
+
+// TODO: This is not efficient for large trees, should cache.
+double Tree::GetNodeHeight(unsigned uNodeIndex) const
+	{
+	if (!IsRooted())
+		Quit("Tree::GetNodeHeight: undefined unless rooted tree");
+	
+	if (IsLeaf(uNodeIndex))
+		return 0.0;
+
+	if (m_bHasHeight[uNodeIndex])
+		return m_dHeight[uNodeIndex];
+
+	const unsigned uLeft = GetLeft(uNodeIndex);
+	const unsigned uRight = GetRight(uNodeIndex);
+	double dLeftLength = GetEdgeLength(uNodeIndex, uLeft);
+	double dRightLength = GetEdgeLength(uNodeIndex, uRight);
+
+	if (dLeftLength < 0)
+		dLeftLength = 0;
+	if (dRightLength < 0)
+		dRightLength = 0;
+
+	const double dLeftHeight = dLeftLength + GetNodeHeight(uLeft);
+	const double dRightHeight = dRightLength + GetNodeHeight(uRight);
+	const double dHeight = (dLeftHeight + dRightHeight)/2;
+	m_bHasHeight[uNodeIndex] = true;
+	m_dHeight[uNodeIndex] = dHeight;
+	return dHeight;
+	}
+
+unsigned Tree::GetNeighborSubscript(unsigned uNodeIndex, unsigned uNeighborIndex) const
+	{
+	assert(uNodeIndex < m_uNodeCount);
+	assert(uNeighborIndex < m_uNodeCount);
+	if (uNeighborIndex == m_uNeighbor1[uNodeIndex])
+		return 0;
+	if (uNeighborIndex == m_uNeighbor2[uNodeIndex])
+		return 1;
+	if (uNeighborIndex == m_uNeighbor3[uNodeIndex])
+		return 2;
+	return NULL_NEIGHBOR;
+	}
+
+unsigned Tree::GetNeighbor(unsigned uNodeIndex, unsigned uNeighborSubscript) const
+	{
+	switch (uNeighborSubscript)
+		{
+	case 0:
+		return m_uNeighbor1[uNodeIndex];
+	case 1:
+		return m_uNeighbor2[uNodeIndex];
+	case 2:
+		return m_uNeighbor3[uNodeIndex];
+		}
+	Quit("Tree::GetNeighbor, sub=%u", uNeighborSubscript);
+	return NULL_NEIGHBOR;
+	}
+
+// TODO: check if this is a performance issue, could cache a lookup table
+unsigned Tree::LeafIndexToNodeIndex(unsigned uLeafIndex) const
+	{
+	const unsigned uNodeCount = GetNodeCount();
+	unsigned uLeafCount = 0;
+	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
+		{
+		if (IsLeaf(uNodeIndex))
+			{
+			if (uLeafCount == uLeafIndex)
+				return uNodeIndex;
+			else
+				++uLeafCount;
+			}
+		}
+	Quit("LeafIndexToNodeIndex: out of range");
+	return 0;
+	}
+
+unsigned Tree::GetLeafNodeIndex(const char *ptrName) const
+	{
+	const unsigned uNodeCount = GetNodeCount();
+	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
+		{
+		if (!IsLeaf(uNodeIndex))
+			continue;
+		const char *ptrLeafName = GetLeafName(uNodeIndex);
+		if (0 == strcmp(ptrName, ptrLeafName))
+			return uNodeIndex;
+		}
+	Quit("Tree::GetLeafNodeIndex, name not found");
+	return 0;
+	}
+
+void Tree::Copy(const Tree &tree)
+	{
+	const unsigned uNodeCount = tree.GetNodeCount();
+	InitCache(uNodeCount);
+
+	m_uNodeCount = uNodeCount;
+
+	const size_t UnsignedBytes = uNodeCount*sizeof(unsigned);
+	const size_t DoubleBytes = uNodeCount*sizeof(double);
+	const size_t BoolBytes = uNodeCount*sizeof(bool);
+
+	memcpy(m_uNeighbor1, tree.m_uNeighbor1, UnsignedBytes);
+	memcpy(m_uNeighbor2, tree.m_uNeighbor2, UnsignedBytes);
+	memcpy(m_uNeighbor3, tree.m_uNeighbor3, UnsignedBytes);
+
+	memcpy(m_Ids, tree.m_Ids, UnsignedBytes);
+
+	memcpy(m_dEdgeLength1, tree.m_dEdgeLength1, DoubleBytes);
+	memcpy(m_dEdgeLength2, tree.m_dEdgeLength2, DoubleBytes);
+	memcpy(m_dEdgeLength3, tree.m_dEdgeLength3, DoubleBytes);
+
+	memcpy(m_dHeight, tree.m_dHeight, DoubleBytes);
+
+	memcpy(m_bHasEdgeLength1, tree.m_bHasEdgeLength1, BoolBytes);
+	memcpy(m_bHasEdgeLength2, tree.m_bHasEdgeLength2, BoolBytes);
+	memcpy(m_bHasEdgeLength3, tree.m_bHasEdgeLength3, BoolBytes);
+
+	memcpy(m_bHasHeight, tree.m_bHasHeight, BoolBytes);
+
+	m_uRootNodeIndex = tree.m_uRootNodeIndex;
+	m_bRooted = tree.m_bRooted;
+
+	for (unsigned uNodeIndex = 0; uNodeIndex < m_uNodeCount; ++uNodeIndex)
+		{
+		if (tree.IsLeaf(uNodeIndex))
+			{
+			const char *ptrName = tree.GetLeafName(uNodeIndex);
+			m_ptrName[uNodeIndex] = strsave(ptrName);
+			}
+		else
+			m_ptrName[uNodeIndex] = 0;
+		}
+
+#if	DEBUG
+	Validate();
+#endif
+	}
+
+// Create rooted tree from a vector description.
+// Node indexes are 0..N-1 for leaves, N..2N-2 for
+// internal nodes.
+// Vector subscripts are i-N and have values for
+// internal nodes only, but those values are node
+// indexes 0..2N-2. So e.g. if N=6 and Left[2]=1,
+// this means that the third internal node (node index 8)
+// has the second leaf (node index 1) as its left child.
+// uRoot gives the vector subscript of the root, so add N
+// to get the node index.
+void Tree::Create(unsigned uLeafCount, unsigned uRoot, const unsigned Left[],
+  const unsigned Right[], const float LeftLength[], const float RightLength[],
+  const unsigned LeafIds[], char **LeafNames)
+	{
+	Clear();
+
+	m_uNodeCount = 2*uLeafCount - 1;
+	InitCache(m_uNodeCount);
+
+	for (unsigned uNodeIndex = 0; uNodeIndex < uLeafCount; ++uNodeIndex)
+		{
+		m_Ids[uNodeIndex] = LeafIds[uNodeIndex];
+		m_ptrName[uNodeIndex] = strsave(LeafNames[uNodeIndex]);
+		}
+
+	for (unsigned uNodeIndex = uLeafCount; uNodeIndex < m_uNodeCount; ++uNodeIndex)
+		{
+		unsigned v = uNodeIndex - uLeafCount;
+		unsigned uLeft = Left[v];
+		unsigned uRight = Right[v];
+		float fLeft = LeftLength[v];
+		float fRight = RightLength[v];
+
+		m_uNeighbor2[uNodeIndex] = uLeft;
+		m_uNeighbor3[uNodeIndex] = uRight;
+
+		m_bHasEdgeLength2[uNodeIndex] = true;
+		m_bHasEdgeLength3[uNodeIndex] = true;
+
+		m_dEdgeLength2[uNodeIndex] = fLeft;
+		m_dEdgeLength3[uNodeIndex] = fRight;
+
+		m_uNeighbor1[uLeft] = uNodeIndex;
+		m_uNeighbor1[uRight] = uNodeIndex;
+
+		m_dEdgeLength1[uLeft] = fLeft;
+		m_dEdgeLength1[uRight] = fRight;
+
+		m_bHasEdgeLength1[uLeft] = true;
+		m_bHasEdgeLength1[uRight] = true;
+		}
+
+	m_bRooted = true;
+	m_uRootNodeIndex = uRoot + uLeafCount;
+
+	Validate();
+	}

Added: trunk/packages/muscle/branches/upstream/current/phy2.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/phy2.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/phy2.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,282 @@
+#include "muscle.h"
+#include "tree.h"
+
+#define TRACE	0
+
+// Return false when done
+bool PhyEnumEdges(const Tree &tree, PhyEnumEdgeState &ES)
+	{
+	unsigned uNode1 = uInsane;
+
+	if (!ES.m_bInit)
+		{
+		if (tree.GetNodeCount() <= 1)
+			{
+			ES.m_uNodeIndex1 = NULL_NEIGHBOR;
+			ES.m_uNodeIndex2 = NULL_NEIGHBOR;
+			return false;
+			}
+		uNode1 = tree.FirstDepthFirstNode();
+		ES.m_bInit = true;
+		}
+	else
+		{
+		uNode1 = tree.NextDepthFirstNode(ES.m_uNodeIndex1);
+		if (NULL_NEIGHBOR == uNode1)
+			return false;
+		if (tree.IsRooted() && tree.IsRoot(uNode1))
+			{
+			uNode1 = tree.NextDepthFirstNode(uNode1);
+			if (NULL_NEIGHBOR == uNode1)
+				return false;
+			}
+		}
+	unsigned uNode2 = tree.GetParent(uNode1);
+
+	ES.m_uNodeIndex1 = uNode1;
+	ES.m_uNodeIndex2 = uNode2;
+	return true;
+	}
+
+bool PhyEnumEdgesR(const Tree &tree, PhyEnumEdgeState &ES)
+	{
+	unsigned uNode1 = uInsane;
+
+	if (!ES.m_bInit)
+		{
+		if (tree.GetNodeCount() <= 1)
+			{
+			ES.m_uNodeIndex1 = NULL_NEIGHBOR;
+			ES.m_uNodeIndex2 = NULL_NEIGHBOR;
+			return false;
+			}
+		uNode1 = tree.FirstDepthFirstNodeR();
+		ES.m_bInit = true;
+		}
+	else
+		{
+		uNode1 = tree.NextDepthFirstNodeR(ES.m_uNodeIndex1);
+		if (NULL_NEIGHBOR == uNode1)
+			return false;
+		if (tree.IsRooted() && tree.IsRoot(uNode1))
+			{
+			uNode1 = tree.NextDepthFirstNode(uNode1);
+			if (NULL_NEIGHBOR == uNode1)
+				return false;
+			}
+		}
+	unsigned uNode2 = tree.GetParent(uNode1);
+
+	ES.m_uNodeIndex1 = uNode1;
+	ES.m_uNodeIndex2 = uNode2;
+	return true;
+	}
+
+static void GetLeavesSubtree(const Tree &tree, unsigned uNodeIndex1,
+  const unsigned uNodeIndex2, unsigned Leaves[], unsigned *ptruCount)
+	{
+	if (tree.IsLeaf(uNodeIndex1))
+		{
+		Leaves[*ptruCount] = uNodeIndex1;
+		++(*ptruCount);
+		return;
+		}
+
+	const unsigned uLeft = tree.GetFirstNeighbor(uNodeIndex1, uNodeIndex2);
+	const unsigned uRight = tree.GetSecondNeighbor(uNodeIndex1, uNodeIndex2);
+	if (NULL_NEIGHBOR != uLeft)
+		GetLeavesSubtree(tree, uLeft, uNodeIndex1, Leaves, ptruCount);
+	if (NULL_NEIGHBOR != uRight)
+		GetLeavesSubtree(tree, uRight, uNodeIndex1, Leaves, ptruCount);
+	}
+
+static void PhyGetLeaves(const Tree &tree, unsigned uNodeIndex1, unsigned uNodeIndex2,
+  unsigned Leaves[], unsigned *ptruCount)
+	{
+	*ptruCount = 0;
+	GetLeavesSubtree(tree, uNodeIndex1, uNodeIndex2, Leaves, ptruCount);
+	}
+
+bool PhyEnumBiParts(const Tree &tree, PhyEnumEdgeState &ES,
+  unsigned Leaves1[], unsigned *ptruCount1,
+  unsigned Leaves2[], unsigned *ptruCount2)
+	{
+	bool bOk = PhyEnumEdges(tree, ES);
+	if (!bOk)
+		{
+		*ptruCount1 = 0;
+		*ptruCount2 = 0;
+		return false;
+		}
+
+// Special case: in a rooted tree, both edges from the root
+// give the same bipartition, so skip one of them.
+	if (tree.IsRooted() && tree.IsRoot(ES.m_uNodeIndex2)
+	  && tree.GetRight(ES.m_uNodeIndex2) == ES.m_uNodeIndex1)
+		{
+		bOk = PhyEnumEdges(tree, ES);
+		if (!bOk)
+			return false;
+		}
+
+	PhyGetLeaves(tree, ES.m_uNodeIndex1, ES.m_uNodeIndex2, Leaves1, ptruCount1);
+	PhyGetLeaves(tree, ES.m_uNodeIndex2, ES.m_uNodeIndex1, Leaves2, ptruCount2);
+
+	if (*ptruCount1 + *ptruCount2 != tree.GetLeafCount())
+		Quit("PhyEnumBiParts %u + %u != %u",
+		  *ptruCount1, *ptruCount2, tree.GetLeafCount());
+#if	DEBUG
+	{
+	for (unsigned i = 0; i < *ptruCount1; ++i)
+		{
+		if (!tree.IsLeaf(Leaves1[i]))
+			Quit("PhyEnumByParts: not leaf");
+		for (unsigned j = 0; j < *ptruCount2; ++j)
+			{
+			if (!tree.IsLeaf(Leaves2[j]))
+				Quit("PhyEnumByParts: not leaf");
+			if (Leaves1[i] == Leaves2[j])
+				Quit("PhyEnumByParts: dupe");
+			}
+		}
+	}
+#endif
+
+	return true;
+	}
+
+#if	0
+void TestBiPart()
+	{
+	SetListFileName("c:\\tmp\\lobster.log", false);
+	Tree tree;
+	TextFile fileIn("c:\\tmp\\test.phy");
+	tree.FromFile(fileIn);
+	tree.LogMe();
+
+	const unsigned uNodeCount = tree.GetNodeCount();
+	unsigned *Leaves1 = new unsigned[uNodeCount];
+	unsigned *Leaves2 = new unsigned[uNodeCount];
+
+	PhyEnumEdgeState ES;
+	bool bDone = false;
+	for (;;)
+		{
+		unsigned uCount1 = uInsane;
+		unsigned uCount2 = uInsane;
+		bool bOk = PhyEnumBiParts(tree, ES, Leaves1, &uCount1, Leaves2, &uCount2);
+		Log("PEBP=%d ES.Init=%d ES.ni1=%d ES.ni2=%d\n",
+		  bOk,
+		  ES.m_bInit,
+		  ES.m_uNodeIndex1,
+		  ES.m_uNodeIndex2);
+		if (!bOk)
+			break;
+		Log("\n");
+		Log("Part1: ");
+		for (unsigned n = 0; n < uCount1; ++n)
+			Log(" %d(%s)", Leaves1[n], tree.GetLeafName(Leaves1[n]));
+		Log("\n");
+		Log("Part2: ");
+		for (unsigned n = 0; n < uCount2; ++n)
+			Log(" %d(%s)", Leaves2[n], tree.GetLeafName(Leaves2[n]));
+		Log("\n");
+		}
+	}
+#endif
+
+static void GetLeavesSubtreeExcluding(const Tree &tree, unsigned uNodeIndex,
+  unsigned uExclude, unsigned Leaves[], unsigned *ptruCount)
+	{
+	if (uNodeIndex == uExclude)
+		return;
+
+	if (tree.IsLeaf(uNodeIndex))
+		{
+		Leaves[*ptruCount] = uNodeIndex;
+		++(*ptruCount);
+		return;
+		}
+
+	const unsigned uLeft = tree.GetLeft(uNodeIndex);
+	const unsigned uRight = tree.GetRight(uNodeIndex);
+	if (NULL_NEIGHBOR != uLeft)
+		GetLeavesSubtreeExcluding(tree, uLeft, uExclude, Leaves, ptruCount);
+	if (NULL_NEIGHBOR != uRight)
+		GetLeavesSubtreeExcluding(tree, uRight, uExclude, Leaves, ptruCount);
+	}
+
+void GetLeavesExcluding(const Tree &tree, unsigned uNodeIndex,
+  unsigned uExclude, unsigned Leaves[], unsigned *ptruCount)
+	{
+	*ptruCount = 0;
+	GetLeavesSubtreeExcluding(tree, uNodeIndex, uExclude, Leaves, ptruCount);
+	}
+
+void GetInternalNodesInHeightOrder(const Tree &tree, unsigned NodeIndexes[])
+	{
+	const unsigned uNodeCount = tree.GetNodeCount();
+	if (uNodeCount < 3)
+		Quit("GetInternalNodesInHeightOrder: %u nodes, none are internal",
+		  uNodeCount);
+	const unsigned uInternalNodeCount = (uNodeCount - 1)/2;
+	double *Heights = new double[uInternalNodeCount];
+
+	unsigned uIndex = 0;
+	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
+		{
+		if (tree.IsLeaf(uNodeIndex))
+			continue;
+		NodeIndexes[uIndex] = uNodeIndex;
+		Heights[uIndex] = tree.GetNodeHeight(uNodeIndex);
+		++uIndex;
+		}
+	if (uIndex != uInternalNodeCount)
+		Quit("Internal error: GetInternalNodesInHeightOrder");
+
+// Simple but slow bubble sort (probably don't care about speed here)
+	bool bDone = false;
+	while (!bDone)
+		{
+		bDone = true;
+		for (unsigned i = 0; i < uInternalNodeCount - 1; ++i)
+			{
+			if (Heights[i] > Heights[i+1])
+				{
+				double dTmp = Heights[i];
+				Heights[i] = Heights[i+1];
+				Heights[i+1] = dTmp;
+
+				unsigned uTmp = NodeIndexes[i];
+				NodeIndexes[i] = NodeIndexes[i+1];
+				NodeIndexes[i+1] = uTmp;
+				bDone = false;
+				}
+			}
+		}
+#if	TRACE
+	Log("Internal node index     Height\n");
+	Log("-------------------   --------\n");
+	//    1234567890123456789  123456789
+	for (unsigned n = 0; n < uInternalNodeCount; ++n)
+		Log("%19u  %9.3f\n", NodeIndexes[n], Heights[n]);
+#endif
+	delete[] Heights;
+	}
+
+void ApplyMinEdgeLength(Tree &tree, double dMinEdgeLength)
+	{
+	const unsigned uNodeCount = tree.GetNodeCount();
+	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
+		{
+		const unsigned uNeighborCount = tree.GetNeighborCount(uNodeIndex);
+		for (unsigned n = 0; n < uNeighborCount; ++n)
+			{
+			const unsigned uNeighborNodeIndex = tree.GetNeighbor(uNodeIndex, n);
+			if (!tree.HasEdgeLength(uNodeIndex, uNeighborNodeIndex))
+				continue;
+			if (tree.GetEdgeLength(uNodeIndex, uNeighborNodeIndex) < dMinEdgeLength)
+				tree.SetEdgeLength(uNodeIndex, uNeighborNodeIndex, dMinEdgeLength);
+			}
+		}
+	}

Added: trunk/packages/muscle/branches/upstream/current/phy3.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/phy3.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/phy3.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,469 @@
+#include "muscle.h"
+#include "tree.h"
+#include "edgelist.h"
+
+#define TRACE	0
+
+struct EdgeInfo
+	{
+	EdgeInfo()
+		{
+		m_bSet = false;
+		}
+// Is data in this structure valid (i.e, has been set)?
+	bool m_bSet;
+
+// Node at start of this edge
+	unsigned m_uNode1;
+
+// Node at end of this edge
+	unsigned m_uNode2;
+
+// Maximum distance from Node2 to a leaf
+	double m_dMaxDistToLeaf;
+
+// Sum of distances from Node2 to all leaves under Node2
+	double m_dTotalDistToLeaves;
+
+// Next node on path from Node2 to most distant leaf
+	unsigned m_uMaxStep;
+
+// Most distant leaf from Node2 (used for debugging only)
+	unsigned m_uMostDistantLeaf;
+
+// Number of leaves under Node2
+	unsigned m_uLeafCount;
+	};
+
+static void RootByMidLongestSpan(const Tree &tree, EdgeInfo **EIs,
+  unsigned *ptruNode1, unsigned *ptruNode2,
+  double *ptrdLength1, double *ptrdLength2);
+static void RootByMinAvgLeafDist(const Tree &tree, EdgeInfo **EIs,
+  unsigned *ptruNode1, unsigned *ptruNode2,
+  double *ptrdLength1, double *ptrdLength2);
+
+static void ListEIs(EdgeInfo **EIs, unsigned uNodeCount)
+	{
+	Log("Node1  Node2  MaxDist  TotDist  MostDist  LeafCount  Step\n");
+	Log("-----  -----  -------  -------  --------  ---------  ----\n");
+	//    12345  12345  1234567  1234567  12345678  123456789
+
+	for (unsigned uNode = 0; uNode < uNodeCount; ++uNode)
+		for (unsigned uNeighbor = 0; uNeighbor < 3; ++uNeighbor)
+			{
+			const EdgeInfo &EI = EIs[uNode][uNeighbor];
+			if (!EI.m_bSet)
+				continue;
+			Log("%5u  %5u  %7.3g  %7.3g  %8u  %9u",
+			  EI.m_uNode1,
+			  EI.m_uNode2,
+			  EI.m_dMaxDistToLeaf,
+			  EI.m_dTotalDistToLeaves,
+			  EI.m_uMostDistantLeaf,
+			  EI.m_uLeafCount);
+			if (NULL_NEIGHBOR != EI.m_uMaxStep)
+				Log("  %4u", EI.m_uMaxStep);
+			Log("\n");
+			}
+	}
+
+static void CalcInfo(const Tree &tree, unsigned uNode1, unsigned uNode2, EdgeInfo **EIs)
+	{
+	const unsigned uNeighborIndex = tree.GetNeighborSubscript(uNode1, uNode2);
+	EdgeInfo &EI = EIs[uNode1][uNeighborIndex];
+	EI.m_uNode1 = uNode1;
+	EI.m_uNode2 = uNode2;
+
+	if (tree.IsLeaf(uNode2))
+		{
+		EI.m_dMaxDistToLeaf = 0;
+		EI.m_dTotalDistToLeaves = 0;
+		EI.m_uMaxStep = NULL_NEIGHBOR;
+		EI.m_uMostDistantLeaf = uNode2;
+		EI.m_uLeafCount = 1;
+		EI.m_bSet = true;
+		return;
+		}
+
+	double dMaxDistToLeaf = -1e29;
+	double dTotalDistToLeaves = 0.0;
+	unsigned uLeafCount = 0;
+	unsigned uMostDistantLeaf = NULL_NEIGHBOR;
+	unsigned uMaxStep = NULL_NEIGHBOR;
+
+	const unsigned uNeighborCount = tree.GetNeighborCount(uNode2);
+	for (unsigned uSub = 0; uSub < uNeighborCount; ++uSub)
+		{
+		const unsigned uNode3 = tree.GetNeighbor(uNode2, uSub);
+		if (uNode3 == uNode1)
+			continue;
+		const EdgeInfo &EINext = EIs[uNode2][uSub];
+		if (!EINext.m_bSet)
+			Quit("CalcInfo: internal error, dist %u->%u not known",
+				uNode2, uNode3);
+
+
+		uLeafCount += EINext.m_uLeafCount;
+
+		const double dEdgeLength = tree.GetEdgeLength(uNode2, uNode3);
+		const double dTotalDist = EINext.m_dTotalDistToLeaves +
+		  EINext.m_uLeafCount*dEdgeLength;
+		dTotalDistToLeaves += dTotalDist;
+
+		const double dDist = EINext.m_dMaxDistToLeaf + dEdgeLength;
+		if (dDist > dMaxDistToLeaf)
+			{
+			dMaxDistToLeaf = dDist;
+			uMostDistantLeaf = EINext.m_uMostDistantLeaf;
+			uMaxStep = uNode3;
+			}
+		}
+	if (NULL_NEIGHBOR == uMaxStep || NULL_NEIGHBOR == uMostDistantLeaf ||
+	  0 == uLeafCount)
+		Quit("CalcInfo: internal error 2");
+
+	const double dThisDist = tree.GetEdgeLength(uNode1, uNode2);
+	EI.m_dMaxDistToLeaf = dMaxDistToLeaf;
+	EI.m_dTotalDistToLeaves = dTotalDistToLeaves;
+	EI.m_uMaxStep = uMaxStep;
+	EI.m_uMostDistantLeaf = uMostDistantLeaf;
+	EI.m_uLeafCount = uLeafCount;
+	EI.m_bSet = true;
+	}
+
+static bool Known(const Tree &tree, EdgeInfo **EIs, unsigned uNodeFrom,
+  unsigned uNodeTo)
+	{
+	const unsigned uSub = tree.GetNeighborSubscript(uNodeFrom, uNodeTo);
+	return EIs[uNodeFrom][uSub].m_bSet;
+	}
+
+static bool AllKnownOut(const Tree &tree, EdgeInfo **EIs, unsigned uNodeFrom,
+  unsigned uNodeTo)
+	{
+	const unsigned uNeighborCount = tree.GetNeighborCount(uNodeTo);
+	for (unsigned uSub = 0; uSub < uNeighborCount; ++uSub)
+		{
+		unsigned uNeighborIndex = tree.GetNeighbor(uNodeTo, uSub);
+		if (uNeighborIndex == uNodeFrom)
+			continue;
+		if (!EIs[uNodeTo][uSub].m_bSet)
+			return false;
+		}
+	return true;
+	}
+
+void FindRoot(const Tree &tree, unsigned *ptruNode1, unsigned *ptruNode2,
+  double *ptrdLength1, double *ptrdLength2,
+  ROOT RootMethod)
+	{
+#if	TRACE
+	tree.LogMe();
+#endif
+	if (tree.IsRooted())
+		Quit("FindRoot: tree already rooted");
+
+	const unsigned uNodeCount = tree.GetNodeCount();
+	const unsigned uLeafCount = tree.GetLeafCount();
+
+	if (uNodeCount < 2)
+		Quit("Root: don't support trees with < 2 edges");
+
+	EdgeInfo **EIs = new EdgeInfo *[uNodeCount];
+	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
+		EIs[uNodeIndex] = new EdgeInfo[3];
+
+	EdgeList Edges;
+	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
+		if (tree.IsLeaf(uNodeIndex))
+			{
+			unsigned uParent = tree.GetNeighbor1(uNodeIndex);
+			Edges.Add(uParent, uNodeIndex);
+			}
+
+#if	TRACE
+	Log("Edges: ");
+	Edges.LogMe();
+#endif
+
+// Main loop: iterate until all distances known
+	double dAllMaxDist = -1e20;
+	unsigned uMaxFrom = NULL_NEIGHBOR;
+	unsigned uMaxTo = NULL_NEIGHBOR;
+	for (;;)
+		{
+		EdgeList NextEdges;
+
+#if	TRACE
+		Log("\nTop of main loop\n");
+		Log("Edges: ");
+		Edges.LogMe();
+		Log("MDs:\n");
+		ListEIs(EIs, uNodeCount);
+#endif
+
+	// For all edges
+		const unsigned uEdgeCount = Edges.GetCount();
+		if (0 == uEdgeCount)
+			break;
+		for (unsigned n = 0; n < uEdgeCount; ++n)
+			{
+			unsigned uNodeFrom;
+			unsigned uNodeTo;
+			Edges.GetEdge(n, &uNodeFrom, &uNodeTo);
+
+			CalcInfo(tree, uNodeFrom, uNodeTo, EIs);
+#if	TRACE
+			Log("Edge %u -> %u\n", uNodeFrom, uNodeTo);
+#endif
+			const unsigned uNeighborCount = tree.GetNeighborCount(uNodeFrom);
+			for (unsigned i = 0; i < uNeighborCount; ++i)
+				{
+				const unsigned uNeighborIndex = tree.GetNeighbor(uNodeFrom, i);
+				if (!Known(tree, EIs, uNeighborIndex, uNodeFrom) &&
+				  AllKnownOut(tree, EIs, uNeighborIndex, uNodeFrom))
+					NextEdges.Add(uNeighborIndex, uNodeFrom);
+				}
+			}
+		Edges.Copy(NextEdges);
+		}
+
+#if	TRACE
+	ListEIs(EIs, uNodeCount);
+#endif
+
+	switch (RootMethod)
+		{
+	case ROOT_MidLongestSpan:
+		RootByMidLongestSpan(tree, EIs, ptruNode1, ptruNode2,
+		  ptrdLength1, ptrdLength2);
+		break;
+
+	case ROOT_MinAvgLeafDist:
+		RootByMinAvgLeafDist(tree, EIs, ptruNode1, ptruNode2,
+		  ptrdLength1, ptrdLength2);
+		break;
+
+	default:
+		Quit("Invalid RootMethod=%d", RootMethod);
+		}
+
+	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
+		delete[] EIs[uNodeIndex];
+	delete[] EIs;
+	}
+
+static void RootByMidLongestSpan(const Tree &tree, EdgeInfo **EIs,
+  unsigned *ptruNode1, unsigned *ptruNode2,
+  double *ptrdLength1, double *ptrdLength2)
+	{
+	const unsigned uNodeCount = tree.GetNodeCount();
+
+	unsigned uLeaf1 = NULL_NEIGHBOR;
+	unsigned uMostDistantLeaf = NULL_NEIGHBOR;
+	double dMaxDist = -VERY_LARGE_DOUBLE;
+	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
+		{
+		if (!tree.IsLeaf(uNodeIndex))
+			continue;
+
+		const unsigned uNode2 = tree.GetNeighbor1(uNodeIndex);
+		if (NULL_NEIGHBOR == uNode2)
+			Quit("RootByMidLongestSpan: internal error 0");
+		const double dEdgeLength = tree.GetEdgeLength(uNodeIndex, uNode2);
+		const EdgeInfo &EI = EIs[uNodeIndex][0];
+		if (!EI.m_bSet)
+			Quit("RootByMidLongestSpan: internal error 1");
+		if (EI.m_uNode1 != uNodeIndex || EI.m_uNode2 != uNode2)
+			Quit("RootByMidLongestSpan: internal error 2");
+		const double dSpanLength = dEdgeLength + EI.m_dMaxDistToLeaf;
+		if (dSpanLength > dMaxDist)
+			{
+			dMaxDist = dSpanLength;
+			uLeaf1 = uNodeIndex;
+			uMostDistantLeaf = EI.m_uMostDistantLeaf;
+			}
+		}
+	
+	if (NULL_NEIGHBOR == uLeaf1)
+		Quit("RootByMidLongestSpan: internal error 3");
+
+	const double dTreeHeight = dMaxDist/2.0;
+	unsigned uNode1 = uLeaf1;
+	unsigned uNode2 = tree.GetNeighbor1(uLeaf1);
+	double dAccumSpanLength = 0;
+
+#if	TRACE
+	Log("RootByMidLongestSpan: span=%u", uLeaf1);
+#endif
+
+	for (;;)
+		{
+		const double dEdgeLength = tree.GetEdgeLength(uNode1, uNode2);
+#if	TRACE
+		Log("->%u(%g;%g)", uNode2, dEdgeLength, dAccumSpanLength);
+#endif
+		if (dAccumSpanLength + dEdgeLength >= dTreeHeight)
+			{
+			*ptruNode1 = uNode1;
+			*ptruNode2 = uNode2;
+			*ptrdLength1 = dTreeHeight - dAccumSpanLength;
+			*ptrdLength2 = dEdgeLength - *ptrdLength1;
+#if	TRACE
+			{
+			const EdgeInfo &EI = EIs[uLeaf1][0];
+			Log("...\n");
+			Log("Midpoint: Leaf1=%u Leaf2=%u Node1=%u Node2=%u Length1=%g Length2=%g\n",
+			  uLeaf1, EI.m_uMostDistantLeaf, *ptruNode1, *ptruNode2, *ptrdLength1, *ptrdLength2);
+			}
+#endif
+			return;
+			}
+
+		if (tree.IsLeaf(uNode2))
+			Quit("RootByMidLongestSpan: internal error 4");
+
+		dAccumSpanLength += dEdgeLength;
+		const unsigned uSub = tree.GetNeighborSubscript(uNode1, uNode2);
+		const EdgeInfo &EI = EIs[uNode1][uSub];
+		if (!EI.m_bSet)
+			Quit("RootByMidLongestSpan: internal error 5");
+
+		uNode1 = uNode2;
+		uNode2 = EI.m_uMaxStep;
+		}
+	}
+
+/***
+Root by balancing average distance to leaves.
+The root is a point p such that the average
+distance to leaves to the left of p is the
+same as the to the right.
+
+This is the method used by CLUSTALW, which
+was originally used in PROFILEWEIGHT:
+
+	Thompson et al. (1994) CABIOS (10) 1, 19-29.
+***/
+
+static void RootByMinAvgLeafDist(const Tree &tree, EdgeInfo **EIs,
+  unsigned *ptruNode1, unsigned *ptruNode2,
+  double *ptrdLength1, double *ptrdLength2)
+	{
+	const unsigned uNodeCount = tree.GetNodeCount();
+	const unsigned uLeafCount = tree.GetLeafCount();
+	unsigned uNode1 = NULL_NEIGHBOR;
+	unsigned uNode2 = NULL_NEIGHBOR;
+	double dMinHeight = VERY_LARGE_DOUBLE;
+	double dBestLength1 = VERY_LARGE_DOUBLE;
+	double dBestLength2 = VERY_LARGE_DOUBLE;
+
+	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
+		{
+		const unsigned uNeighborCount = tree.GetNeighborCount(uNodeIndex);
+		for (unsigned uSub = 0; uSub < uNeighborCount; ++uSub)
+			{
+			const unsigned uNeighborIndex = tree.GetNeighbor(uNodeIndex, uSub);
+
+		// Avoid visiting same edge a second time in reversed order.
+			if (uNeighborIndex < uNodeIndex)
+				continue;
+
+			const unsigned uSubRev = tree.GetNeighborSubscript(uNeighborIndex, uNodeIndex);
+			if (NULL_NEIGHBOR == uSubRev)
+				Quit("RootByMinAvgLeafDist, internal error 1");
+
+		// Get info for edges Node1->Node2 and Node2->Node1 (reversed)
+			const EdgeInfo &EI = EIs[uNodeIndex][uSub];
+			const EdgeInfo &EIRev = EIs[uNeighborIndex][uSubRev];
+
+			if (EI.m_uNode1 != uNodeIndex || EI.m_uNode2 != uNeighborIndex ||
+			  EIRev.m_uNode1 != uNeighborIndex || EIRev.m_uNode2 != uNodeIndex)
+				Quit("RootByMinAvgLeafDist, internal error 2");
+			if (!EI.m_bSet)
+				Quit("RootByMinAvgLeafDist, internal error 3");
+			if (uLeafCount != EI.m_uLeafCount + EIRev.m_uLeafCount)
+				Quit("RootByMinAvgLeafDist, internal error 4");
+
+			const double dEdgeLength = tree.GetEdgeLength(uNodeIndex, uNeighborIndex);
+			if (dEdgeLength != tree.GetEdgeLength(uNeighborIndex, uNodeIndex))
+				Quit("RootByMinAvgLeafDist, internal error 5");
+
+		// Consider point p on edge 12 in tree (1=Node, 2=Neighbor).
+		//
+        //	-----         ----
+        //	     |       |
+        //	     1----p--2
+        //	     |       |
+        //	-----         ----
+		//
+		// Define:
+		//    ADLp = average distance to leaves to left of point p.
+		//	  ADRp = average distance to leaves to right of point p.
+		//	  L = edge length = distance 12
+		//    x = distance 1p
+		// So distance p2 = L - x.
+		// Average distance from p to leaves on left of p is:
+		//		ADLp = ADL1 + x
+		// Average distance from p to leaves on right of p is:
+		//		ADRp = ADR2 + (L - x)
+		// To be a root, we require these two distances to be equal,
+		//		ADLp = ADRp
+		//		ADL1 + x = ADR2 + (L - x)
+		// Solving for x,
+		//		x = (ADR2 - ADL1 + L)/2
+		// If 0 <= x <= L, we can place the root on edge 12.
+
+			const double ADL1 = EI.m_dTotalDistToLeaves / EI.m_uLeafCount;
+			const double ADR2 = EIRev.m_dTotalDistToLeaves / EIRev.m_uLeafCount;
+
+			const double x = (ADR2 - ADL1 + dEdgeLength)/2.0;
+			if (x >= 0 && x <= dEdgeLength)
+				{
+				const double dLength1 = x;
+				const double dLength2 = dEdgeLength - x;
+				const double dHeight1 = EI.m_dMaxDistToLeaf + dLength1;
+				const double dHeight2 = EIRev.m_dMaxDistToLeaf + dLength2;
+				const double dHeight = dHeight1 >= dHeight2 ? dHeight1 : dHeight2;
+#if	TRACE
+				Log("Candidate root Node1=%u Node2=%u Height=%g\n",
+				  uNodeIndex, uNeighborIndex, dHeight);
+#endif
+				if (dHeight < dMinHeight)
+					{
+					uNode1 = uNodeIndex;
+					uNode2 = uNeighborIndex;
+					dBestLength1 = dLength1;
+					dBestLength2 = dLength2;
+					dMinHeight = dHeight;
+					}
+				}
+			}
+		}
+
+	if (NULL_NEIGHBOR == uNode1 || NULL_NEIGHBOR == uNode2)
+		Quit("RootByMinAvgLeafDist, internal error 6");
+
+#if	TRACE
+	Log("Best root Node1=%u Node2=%u Length1=%g Length2=%g Height=%g\n",
+	  uNode1, uNode2, dBestLength1, dBestLength2, dMinHeight);
+#endif
+
+	*ptruNode1 = uNode1;
+	*ptruNode2 = uNode2;
+	*ptrdLength1 = dBestLength1;
+	*ptrdLength2 = dBestLength2;
+	}
+
+void FixRoot(Tree &tree, ROOT Method)
+	{
+	if (!tree.IsRooted())
+		Quit("FixRoot: expecting rooted tree");
+
+	// Pseudo-root: keep root assigned by clustering
+	if (ROOT_Pseudo == Method)
+		return;
+
+	tree.UnrootByDeletingRoot();
+	tree.RootUnrootedTree(Method);
+	}

Added: trunk/packages/muscle/branches/upstream/current/phy4.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/phy4.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/phy4.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,295 @@
+#include "muscle.h"
+#include "tree.h"
+#include <stdio.h>
+
+#define	TRACE	0
+
+void ClusterByHeight(const Tree &tree, double dMaxHeight, unsigned Subtrees[],
+  unsigned *ptruSubtreeCount)
+	{
+	if (!tree.IsRooted())
+		Quit("ClusterByHeight: requires rooted tree");
+
+#if	TRACE
+	Log("ClusterByHeight, max height=%g\n", dMaxHeight);
+#endif
+
+	unsigned uSubtreeCount = 0;
+	const unsigned uNodeCount = tree.GetNodeCount();
+	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
+		{
+		if (tree.IsRoot(uNodeIndex))
+			continue;
+		unsigned uParent = tree.GetParent(uNodeIndex);
+		double dHeight = tree.GetNodeHeight(uNodeIndex);
+		double dParentHeight = tree.GetNodeHeight(uParent);
+
+#if	TRACE
+		Log("Node %3u  Height %5.2f  ParentHeight %5.2f\n",
+		  uNodeIndex, dHeight, dParentHeight);
+#endif
+		if (dParentHeight > dMaxHeight && dHeight <= dMaxHeight)
+			{
+			Subtrees[uSubtreeCount] = uNodeIndex;
+#if	TRACE
+			Log("Subtree[%u]=%u\n", uSubtreeCount, uNodeIndex);
+#endif
+			++uSubtreeCount;
+			}
+		}
+	*ptruSubtreeCount = uSubtreeCount;
+	}
+
+static void ClusterBySubfamCount_Iteration(const Tree &tree, unsigned Subfams[],
+  unsigned uCount)
+	{
+// Find highest child node of current set of subfamilies.
+	double dHighestHeight = -1e20;
+	int iParentSubscript = -1;
+
+	for (int n = 0; n < (int) uCount; ++n)
+		{
+		const unsigned uNodeIndex = Subfams[n];
+		if (tree.IsLeaf(uNodeIndex))
+			continue;
+
+		const unsigned uLeft = tree.GetLeft(uNodeIndex);
+		const double dHeightLeft = tree.GetNodeHeight(uLeft);
+		if (dHeightLeft > dHighestHeight)
+			{
+			dHighestHeight = dHeightLeft;
+			iParentSubscript = n;
+			}
+
+		const unsigned uRight = tree.GetRight(uNodeIndex);
+		const double dHeightRight = tree.GetNodeHeight(uRight);
+		if (dHeightRight > dHighestHeight)
+			{
+			dHighestHeight = dHeightRight;
+			iParentSubscript = n;
+			}
+		}
+
+	if (-1 == iParentSubscript)
+		Quit("CBSFCIter: failed to find highest child");
+
+	const unsigned uNodeIndex = Subfams[iParentSubscript];
+	const unsigned uLeft = tree.GetLeft(uNodeIndex);
+	const unsigned uRight = tree.GetRight(uNodeIndex);
+
+// Delete parent by replacing with left child
+	Subfams[iParentSubscript] = uLeft;
+
+// Append right child to list
+	Subfams[uCount] = uRight;
+
+#if	TRACE
+	{
+	Log("Iter %3u:", uCount);
+	for (unsigned n = 0; n < uCount; ++n)
+		Log(" %u", Subfams[n]);
+	Log("\n");
+	}
+#endif
+	}
+
+// Divide a tree containing N leaves into k families by
+// cutting the tree at a horizontal line at some height.
+// Each internal node defines a height for the cut, 
+// considering all internal nodes enumerates all distinct
+// cuts. Visit internal nodes in decreasing order of height.
+// Visiting the node corresponds to moving the horizontal
+// line down to cut the tree at the height of that node.
+// We consider the cut to be "infinitestimally below"
+// the node, so the effect is to remove the current node 
+// from the list of subfamilies and add its two children.
+// We must visit a parent before its children (so care may
+// be needed to handle zero edge lengths properly).
+// We assume that N is small, and write dumb O(N^2) code.
+// More efficient strategies are possible for large N
+// by maintaining a list of nodes sorted by height.
+void ClusterBySubfamCount(const Tree &tree, unsigned uSubfamCount,
+  unsigned Subfams[], unsigned *ptruSubfamCount)
+	{
+	const unsigned uNodeCount = tree.GetNodeCount();
+	const unsigned uLeafCount = (uNodeCount + 1)/2;
+
+// Special case: empty tree
+	if (0 == uNodeCount)
+		{
+		*ptruSubfamCount = 0;
+		return;
+		}
+
+// Special case: more subfamilies than leaves
+	if (uSubfamCount >= uLeafCount)
+		{
+		for (unsigned n = 0; n < uLeafCount; ++n)
+			Subfams[n] = n;
+		*ptruSubfamCount = uLeafCount;
+		return;
+		}
+
+// Initialize list of subfamilies to be root
+	Subfams[0] = tree.GetRootNodeIndex();
+
+// Iterate
+	for (unsigned i = 1; i < uSubfamCount; ++i)
+		ClusterBySubfamCount_Iteration(tree, Subfams, i);
+	
+	*ptruSubfamCount = uSubfamCount;
+	}
+
+static void GetLeavesRecurse(const Tree &tree, unsigned uNodeIndex,
+  unsigned Leaves[], unsigned &uLeafCount /* in-out */)
+	{
+	if (tree.IsLeaf(uNodeIndex))
+		{
+		Leaves[uLeafCount] = uNodeIndex;
+		++uLeafCount;
+		return;
+		}
+
+	const unsigned uLeft = tree.GetLeft(uNodeIndex);
+	const unsigned uRight = tree.GetRight(uNodeIndex);
+
+	GetLeavesRecurse(tree, uLeft, Leaves, uLeafCount);
+	GetLeavesRecurse(tree, uRight, Leaves, uLeafCount);
+	}
+
+void GetLeaves(const Tree &tree, unsigned uNodeIndex, unsigned Leaves[],
+  unsigned *ptruLeafCount)
+	{
+	unsigned uLeafCount = 0;
+	GetLeavesRecurse(tree, uNodeIndex, Leaves, uLeafCount);
+	*ptruLeafCount = uLeafCount;
+	}
+
+void Tree::PruneTree(const Tree &tree, unsigned Subfams[],
+  unsigned uSubfamCount)
+	{
+	if (!tree.IsRooted())
+		Quit("Tree::PruneTree: requires rooted tree");
+
+	Clear();
+
+	m_uNodeCount = 2*uSubfamCount - 1;
+	InitCache(m_uNodeCount);
+
+	const unsigned uUnprunedNodeCount = tree.GetNodeCount();
+
+	unsigned *uUnprunedToPrunedIndex = new unsigned[uUnprunedNodeCount];
+	unsigned *uPrunedToUnprunedIndex = new unsigned[m_uNodeCount];
+
+	for (unsigned n = 0; n < uUnprunedNodeCount; ++n)
+		uUnprunedToPrunedIndex[n] = NULL_NEIGHBOR;
+
+	for (unsigned n = 0; n < m_uNodeCount; ++n)
+		uPrunedToUnprunedIndex[n] = NULL_NEIGHBOR;
+
+// Create mapping between unpruned and pruned node indexes
+	unsigned uInternalNodeIndex = uSubfamCount;
+	for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex)
+		{
+		unsigned uUnprunedNodeIndex = Subfams[uSubfamIndex];
+		uUnprunedToPrunedIndex[uUnprunedNodeIndex] = uSubfamIndex;
+		uPrunedToUnprunedIndex[uSubfamIndex] = uUnprunedNodeIndex;
+		for (;;)
+			{
+			uUnprunedNodeIndex = tree.GetParent(uUnprunedNodeIndex);
+			if (tree.IsRoot(uUnprunedNodeIndex))
+				break;
+
+		// Already visited this node?
+			if (NULL_NEIGHBOR != uUnprunedToPrunedIndex[uUnprunedNodeIndex])
+				break;
+
+			uUnprunedToPrunedIndex[uUnprunedNodeIndex] = uInternalNodeIndex;
+			uPrunedToUnprunedIndex[uInternalNodeIndex] = uUnprunedNodeIndex;
+
+			++uInternalNodeIndex;
+			}
+		}
+
+	const unsigned uUnprunedRootIndex = tree.GetRootNodeIndex();
+	uUnprunedToPrunedIndex[uUnprunedRootIndex] = uInternalNodeIndex;
+	uPrunedToUnprunedIndex[uInternalNodeIndex] = uUnprunedRootIndex;
+
+#if	TRACE
+	{
+	Log("Pruned to unpruned:\n");
+	for (unsigned i = 0; i < m_uNodeCount; ++i)
+		Log(" [%u]=%u", i, uPrunedToUnprunedIndex[i]);
+	Log("\n");
+	Log("Unpruned to pruned:\n");
+	for (unsigned i = 0; i < uUnprunedNodeCount; ++i)
+		{
+		unsigned n = uUnprunedToPrunedIndex[i];
+		if (n != NULL_NEIGHBOR)
+			Log(" [%u]=%u", i, n);
+		}
+	Log("\n");
+	}
+#endif
+
+	if (uInternalNodeIndex != m_uNodeCount - 1)
+		Quit("Tree::PruneTree, Internal error");
+
+// Nodes 0, 1 ... are the leaves
+	for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex)
+		{
+		char szName[32];
+		sprintf(szName, "Subfam_%u", uSubfamIndex + 1);
+		m_ptrName[uSubfamIndex] = strsave(szName);
+		}
+
+	for (unsigned uPrunedNodeIndex = uSubfamCount; uPrunedNodeIndex < m_uNodeCount;
+	  ++uPrunedNodeIndex)
+		{
+		unsigned uUnprunedNodeIndex = uPrunedToUnprunedIndex[uPrunedNodeIndex];
+
+		const unsigned uUnprunedLeft = tree.GetLeft(uUnprunedNodeIndex);
+		const unsigned uUnprunedRight = tree.GetRight(uUnprunedNodeIndex);
+
+		const unsigned uPrunedLeft = uUnprunedToPrunedIndex[uUnprunedLeft];
+		const unsigned uPrunedRight = uUnprunedToPrunedIndex[uUnprunedRight];
+
+		const double dLeftLength =
+		  tree.GetEdgeLength(uUnprunedNodeIndex, uUnprunedLeft);
+		const double dRightLength =
+		  tree.GetEdgeLength(uUnprunedNodeIndex, uUnprunedRight);
+
+		m_uNeighbor2[uPrunedNodeIndex] = uPrunedLeft;
+		m_uNeighbor3[uPrunedNodeIndex] = uPrunedRight;
+
+		m_dEdgeLength1[uPrunedLeft] = dLeftLength;
+		m_dEdgeLength1[uPrunedRight] = dRightLength;
+
+		m_uNeighbor1[uPrunedLeft] = uPrunedNodeIndex;
+		m_uNeighbor1[uPrunedRight] = uPrunedNodeIndex;
+
+		m_bHasEdgeLength1[uPrunedLeft] = true;
+		m_bHasEdgeLength1[uPrunedRight] = true;
+
+		m_dEdgeLength2[uPrunedNodeIndex] = dLeftLength;
+		m_dEdgeLength3[uPrunedNodeIndex] = dRightLength;
+
+		m_bHasEdgeLength2[uPrunedNodeIndex] = true;
+		m_bHasEdgeLength3[uPrunedNodeIndex] = true;
+		}
+
+	m_uRootNodeIndex = uUnprunedToPrunedIndex[uUnprunedRootIndex];
+
+	m_bRooted = true;
+
+	Validate();
+
+	delete[] uUnprunedToPrunedIndex;
+	}
+
+void LeafIndexesToIds(const Tree &tree, const unsigned Leaves[], unsigned uCount,
+  unsigned Ids[])
+	{
+	for (unsigned n = 0; n < uCount; ++n)
+		Ids[n] = tree.GetLeafId(Leaves[n]);
+	}

Added: trunk/packages/muscle/branches/upstream/current/phyfromclust.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/phyfromclust.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/phyfromclust.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,95 @@
+#include "muscle.h"
+#include "tree.h"
+#include "clust.h"
+
+void Tree::InitCache(unsigned uCacheCount)
+	{
+	m_uCacheCount = uCacheCount;
+
+	m_uNeighbor1 = new unsigned[m_uCacheCount];
+	m_uNeighbor2 = new unsigned[m_uCacheCount];
+	m_uNeighbor3 = new unsigned[m_uCacheCount];
+
+	m_Ids = new unsigned[m_uCacheCount];
+
+	m_dEdgeLength1 = new double[m_uCacheCount];
+	m_dEdgeLength2 = new double[m_uCacheCount];
+	m_dEdgeLength3 = new double[m_uCacheCount];
+	m_dHeight = new double[m_uCacheCount];
+
+	m_bHasEdgeLength1 = new bool[m_uCacheCount];
+	m_bHasEdgeLength2 = new bool[m_uCacheCount];
+	m_bHasEdgeLength3 = new bool[m_uCacheCount];
+	m_bHasHeight = new bool[m_uCacheCount];
+
+	m_ptrName = new char *[m_uCacheCount];
+
+	for (unsigned uNodeIndex = 0; uNodeIndex < m_uNodeCount; ++uNodeIndex)
+		{
+		m_uNeighbor1[uNodeIndex] = NULL_NEIGHBOR;
+		m_uNeighbor2[uNodeIndex] = NULL_NEIGHBOR;
+		m_uNeighbor3[uNodeIndex] = NULL_NEIGHBOR;
+		m_bHasEdgeLength1[uNodeIndex] = false;
+		m_bHasEdgeLength2[uNodeIndex] = false;
+		m_bHasEdgeLength3[uNodeIndex] = false;
+		m_bHasHeight[uNodeIndex] = false;
+		m_dEdgeLength1[uNodeIndex] = dInsane;
+		m_dEdgeLength2[uNodeIndex] = dInsane;
+		m_dEdgeLength3[uNodeIndex] = dInsane;
+		m_dHeight[uNodeIndex] = dInsane;
+		m_ptrName[uNodeIndex] = 0;
+		m_Ids[uNodeIndex] = uInsane;
+		}
+	}
+
+void Tree::FromClust(Clust &C)
+	{
+	Clear();
+
+	m_uNodeCount = C.GetNodeCount();
+	InitCache(m_uNodeCount);
+
+// Cluster is always rooted. An unrooted cluster
+// is represented by a pseudo-root, which we fix later.
+	m_bRooted = true;
+	const unsigned uRoot = C.GetRootNodeIndex();
+	m_uRootNodeIndex = uRoot;
+	m_uNeighbor1[uRoot] = NULL_NEIGHBOR;
+	m_bHasEdgeLength1[uRoot] = false;
+
+	for (unsigned uNodeIndex = 0; uNodeIndex < m_uNodeCount; ++uNodeIndex)
+		{
+		if (C.IsLeaf(uNodeIndex))
+			{
+			const char *ptrName = C.GetNodeName(uNodeIndex);
+			m_ptrName[uNodeIndex] = strsave(ptrName);
+			m_Ids[uNodeIndex] = C.GetNodeId(uNodeIndex);
+			continue;
+			}
+
+		const unsigned uLeft = C.GetLeftIndex(uNodeIndex);
+		const unsigned uRight = C.GetRightIndex(uNodeIndex);
+
+		const double dLeftLength = C.GetLength(uLeft);
+		const double dRightLength = C.GetLength(uRight);
+
+		m_uNeighbor2[uNodeIndex] = uLeft;
+		m_uNeighbor3[uNodeIndex] = uRight;
+
+		m_dEdgeLength1[uLeft] = dLeftLength;
+		m_dEdgeLength1[uRight] = dRightLength;
+
+		m_uNeighbor1[uLeft] = uNodeIndex;
+		m_uNeighbor1[uRight] = uNodeIndex;
+
+		m_bHasEdgeLength1[uLeft] = true;
+		m_bHasEdgeLength1[uRight] = true;
+
+		m_dEdgeLength2[uNodeIndex] = dLeftLength;
+		m_dEdgeLength3[uNodeIndex] = dRightLength;
+
+		m_bHasEdgeLength2[uNodeIndex] = true;
+		m_bHasEdgeLength3[uNodeIndex] = true;
+		}
+	Validate();
+	}

Added: trunk/packages/muscle/branches/upstream/current/phyfromfile.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/phyfromfile.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/phyfromfile.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,269 @@
+#include "muscle.h"
+#include "tree.h"
+#include "textfile.h"
+
+#define TRACE 0
+
+// Tokens in Newick files are:
+//		( ) : , ;
+//		string
+//		'string'
+//		"string"
+//		[ comment ]
+//
+// We can't safely distinguish between identifiers and floating point
+// numbers at the lexical level (because identifiers may be numeric,
+// or start with digits), so both edge lengths and identifiers are
+// returned as strings.
+
+const char *Tree::NTTStr(NEWICK_TOKEN_TYPE NTT) const
+	{
+	switch (NTT)
+		{
+#define c(x)	case NTT_##x: return #x;
+	c(Unknown)
+	c(Lparen)
+	c(Rparen)
+	c(Colon)
+	c(Comma)
+	c(Semicolon)
+	c(String)
+	c(SingleQuotedString)
+	c(DoubleQuotedString)
+	c(Comment)
+#undef c
+		}
+	return "??";
+	}
+
+NEWICK_TOKEN_TYPE Tree::GetToken(TextFile &File, char szToken[], unsigned uBytes) const
+	{
+// Skip leading white space
+	File.SkipWhite();
+
+	char c;
+	File.GetCharX(c);
+
+// In case a single-character token
+	szToken[0] = c;
+	szToken[1] = 0;
+
+	unsigned uBytesCopied = 0;
+	NEWICK_TOKEN_TYPE TT;
+	switch (c)
+		{
+	case '(':
+		return NTT_Lparen;
+
+	case ')':
+		return NTT_Rparen;
+
+	case ':':
+		return NTT_Colon;
+
+	case ';':
+		return NTT_Semicolon;
+
+	case ',':
+		return NTT_Comma;
+
+	case '\'':
+		TT = NTT_SingleQuotedString;
+		File.GetCharX(c);
+		break;
+
+	case '"':
+		TT = NTT_DoubleQuotedString;
+		File.GetCharX(c);
+		break;
+
+	case '[':
+		TT = NTT_Comment;
+		break;
+
+	default:
+		TT = NTT_String;
+		break;
+		}
+
+	for (;;)
+		{
+		if (TT != NTT_Comment)
+			{
+			if (uBytesCopied < uBytes - 2)
+				{
+				szToken[uBytesCopied++] = c;
+				szToken[uBytesCopied] = 0;
+				}
+			else
+				Quit("Tree::GetToken: input buffer too small, token so far='%s'", szToken);
+			}
+		bool bEof = File.GetChar(c);
+		if (bEof)
+			return TT;
+
+		switch (TT)
+			{
+		case NTT_String:
+			if (0 != strchr("():;,", c))
+				{
+				File.PushBack(c);
+				return NTT_String;
+				}
+			if (isspace(c))
+				return NTT_String;
+			break;
+
+		case NTT_SingleQuotedString:
+			if ('\'' == c)
+				return NTT_String;
+			break;
+
+		case NTT_DoubleQuotedString:
+			if ('"' == c)
+				return NTT_String;
+			break;
+
+		case NTT_Comment:
+			if (']' == c)
+				return GetToken(File, szToken, uBytes);
+			break;
+
+		default:
+			Quit("Tree::GetToken, invalid TT=%u", TT);
+			}
+		}
+	}
+
+// NOTE: this hack must come after definition of Tree::GetToken.
+#if	TRACE
+#define GetToken	GetTokenVerbose
+#endif
+
+void Tree::FromFile(TextFile &File)
+	{
+// Assume rooted.
+// If we discover that it is unrooted, will convert on the fly.
+	CreateRooted();
+
+	double dEdgeLength;
+	bool bEdgeLength = GetGroupFromFile(File, 0, &dEdgeLength);
+
+// Next token should be either ';' for rooted tree or ',' for unrooted.
+	char szToken[16];
+	NEWICK_TOKEN_TYPE NTT = GetToken(File, szToken, sizeof(szToken));
+
+// If rooted, all done.
+	if (NTT_Semicolon == NTT)
+		{
+		if (bEdgeLength)
+			Log(" *** Warning *** edge length on root group in Newick file %s\n",
+			  File.GetFileName());
+		Validate();
+		return;
+		}
+
+	if (NTT_Comma != NTT)
+		Quit("Tree::FromFile, expected ';' or ',', got '%s'", szToken);
+
+	const unsigned uThirdNode = UnrootFromFile();
+	bEdgeLength = GetGroupFromFile(File, uThirdNode, &dEdgeLength);
+	if (bEdgeLength)
+		SetEdgeLength(0, uThirdNode, dEdgeLength);
+	Validate();
+	}
+
+// Return true if edge length for this group.
+bool Tree::GetGroupFromFile(TextFile &File, unsigned uNodeIndex,
+  double *ptrdEdgeLength)
+	{
+	char szToken[1024];
+	NEWICK_TOKEN_TYPE NTT = GetToken(File, szToken, sizeof(szToken));
+
+// Group is either leaf name or (left, right).
+	if (NTT_String == NTT)
+		{
+		SetLeafName(uNodeIndex, szToken);
+#if	TRACE
+		Log("Group is leaf '%s'\n", szToken);
+#endif
+		}
+	else if (NTT_Lparen == NTT)
+		{
+		const unsigned uLeft = AppendBranch(uNodeIndex);
+		const unsigned uRight = uLeft + 1;
+
+	// Left sub-group...
+#if	TRACE
+		Log("Got '(', group is compound, expect left sub-group\n");
+#endif
+		double dEdgeLength;
+		bool bLeftLength = GetGroupFromFile(File, uLeft, &dEdgeLength);
+#if	TRACE
+		if (bLeftLength)
+			Log("Edge length for left sub-group: %.3g\n", dEdgeLength);
+		else
+			Log("No edge length for left sub-group\n");
+#endif
+		if (bLeftLength)
+			SetEdgeLength(uNodeIndex, uLeft, dEdgeLength);
+
+	// ... then comma ...
+#if	TRACE
+		Log("Expect comma\n");
+#endif
+		NTT = GetToken(File, szToken, sizeof(szToken));
+		if (NTT_Comma != NTT)
+			Quit("Tree::GetGroupFromFile, expected ',', got '%s'", szToken);
+
+	// ...then right sub-group...
+#if	TRACE
+		Log("Expect right sub-group\n");
+#endif
+		bool bRightLength = GetGroupFromFile(File, uRight, &dEdgeLength);
+		if (bRightLength)
+			SetEdgeLength(uNodeIndex, uRight, dEdgeLength);
+#if	TRACE
+		if (bRightLength)
+			Log("Edge length for right sub-group: %.3g\n", dEdgeLength);
+		else
+			Log("No edge length for right sub-group\n");
+#endif
+
+	// ... then closing parenthesis.
+#if	TRACE
+		Log("Expect closing parenthesis (or comma if > 2-ary)\n");
+#endif
+		NTT = GetToken(File, szToken, sizeof(szToken));
+		if (NTT_Rparen == NTT)
+			;
+		else if (NTT_Comma == NTT)
+			{
+			File.PushBack(',');
+			return false;
+			}
+		else
+			Quit("Tree::GetGroupFromFile, expected ')' or ',', got '%s'", szToken);
+		}
+	else
+		Quit("Tree::GetGroupFromFile, expected '(' or leaf name, got '%s'",
+		  szToken);
+
+// Group may optionally be followed by edge length.
+	File.SkipWhite();
+	char c;
+	File.GetCharX(c);
+#if	TRACE
+	Log("Character following group, could be colon, is '%c'\n", c);
+#endif
+	if (':' == c)
+		{
+		NTT = GetToken(File, szToken, sizeof(szToken));
+		if (NTT_String != NTT)
+			Quit("Tree::GetGroupFromFile, expected edge length, got '%s'", szToken);
+		*ptrdEdgeLength = atof(szToken);
+		return true;
+		}
+	File.PushBack(c);
+	return false;
+	}

Added: trunk/packages/muscle/branches/upstream/current/physeq.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/physeq.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/physeq.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,128 @@
+#include "muscle.h"
+#include "msa.h"
+#include "textfile.h"
+
+const int BLOCKSIZE = 60;
+
+static char FixChar(char c)
+	{
+	switch (c)
+		{
+	case '(':
+	case ')':
+	case '[':
+	case ']':
+	case ':':
+	case ';':
+	case ',':
+		return '_';
+		}
+	if (!isprint(c))
+		return '_';
+	return c;
+	}
+
+static void FixName(char Name[])
+	{
+	while (char c = *Name)
+		*Name++ = FixChar(c);
+	}
+
+void MSA::ToPhySequentialFile(TextFile &File) const
+	{
+	const unsigned SeqCount = GetSeqCount();
+	const unsigned ColCount = GetColCount();
+
+	File.PutFormat("%d %d\n", SeqCount, ColCount);
+
+	if (0 == ColCount)
+		return;
+
+	for (unsigned Seq = 0; Seq < SeqCount; ++Seq)
+		{
+		char Name[11];
+		const char *ptrName = GetSeqName(Seq);
+		size_t n = strlen(ptrName);
+		if (n > 10)
+			n = 10;
+		memcpy(Name, ptrName, n);
+		Name[n] = 0;
+		FixName(Name);
+		File.PutFormat("%-10.10s", Name);
+
+		int BlockIndex = 0;
+		int Col = 0;
+		for (;;)
+			{
+			const unsigned MaxCols = (BlockIndex == 0) ? (BLOCKSIZE - 10) : BLOCKSIZE;
+			for (unsigned ColsThisBlock = 0; ColsThisBlock < MaxCols; ++ColsThisBlock)
+				{
+				if (Col == ColCount)
+					break;
+				if (ColsThisBlock%10 == 0 && (BlockIndex == 0 || ColsThisBlock > 0))
+					File.PutChar(' ');
+				char c = GetChar(Seq, Col);
+				if (isalpha(c))
+					c = toupper(c);
+				File.PutChar(c);
+				++Col;
+				}
+			File.PutChar('\n');
+			if (Col == ColCount)
+				break;
+			++BlockIndex;
+			}
+		}
+	}
+
+void MSA::ToPhyInterleavedFile(TextFile &File) const
+	{
+	const unsigned SeqCount = GetSeqCount();
+	const unsigned ColCount = GetColCount();
+
+	File.PutFormat("%d %d\n", SeqCount, ColCount);
+
+	if (0 == ColCount)
+		return;
+
+	int Col = 0;
+	for (;;)
+		{
+		const unsigned ColBlockStart = Col;
+		const unsigned MaxCols = (ColBlockStart == 0) ? (BLOCKSIZE - 10) : BLOCKSIZE;
+
+		for (unsigned Seq = 0; Seq < SeqCount; ++Seq)
+			{
+			if (0 == ColBlockStart)
+				{
+				char Name[11];
+				const char *ptrName = GetSeqName(Seq);
+				size_t n = strlen(ptrName);
+				if (n > 10)
+					n = 10;
+				memcpy(Name, ptrName, n);
+				Name[n] = 0;
+				FixName(Name);
+				File.PutFormat("%-10.10s", Name);
+				}
+
+			Col = ColBlockStart;
+			for (unsigned ColsThisBlock = 0; ColsThisBlock < MaxCols; ++ColsThisBlock)
+				{
+				if (Col == ColCount)
+					break;
+				if (ColsThisBlock%10 == 0 && (0 == ColBlockStart || ColsThisBlock > 0))
+					File.PutChar(' ');
+				char c = GetChar(Seq, Col);
+				if (isalpha(c))
+					c = toupper(c);
+				File.PutChar(c);
+				++Col;
+				}
+			File.PutChar('\n');
+			}
+		if (Col == ColCount)
+			break;
+		File.PutChar('\n');
+		}
+	}

Added: trunk/packages/muscle/branches/upstream/current/phytofile.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/phytofile.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/phytofile.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,86 @@
+#include "muscle.h"
+#include "tree.h"
+#include "textfile.h"
+
+unsigned Tree::GetAnyNonLeafNode() const
+	{
+	for (unsigned uNodeIndex = 0; uNodeIndex < m_uNodeCount; ++uNodeIndex)
+		if (!IsLeaf(uNodeIndex))
+			return uNodeIndex;
+	return NULL_NEIGHBOR;
+	}
+
+void Tree::ToFile(TextFile &File) const
+	{
+	if (IsRooted())
+		{
+		ToFileNodeRooted(File, m_uRootNodeIndex);
+		File.PutString(";\n");
+		return;
+		}
+
+// Unrooted.
+	unsigned uNodeIndex = GetAnyNonLeafNode();
+
+	File.PutString("(\n");
+	ToFileNodeUnrooted(File, m_uNeighbor1[uNodeIndex], uNodeIndex);
+	File.PutString(",\n");
+	ToFileNodeUnrooted(File, m_uNeighbor2[uNodeIndex], uNodeIndex);
+	File.PutString(",\n");
+	ToFileNodeUnrooted(File, m_uNeighbor3[uNodeIndex], uNodeIndex);
+	File.PutString(");\n");
+	}
+
+void Tree::ToFileNodeUnrooted(TextFile &File, unsigned uNodeIndex, unsigned uParent) const
+	{
+	assert(!IsRooted());
+
+	bool bGroup = !IsLeaf(uNodeIndex);
+	if (bGroup)
+		File.PutString("(\n");
+
+	if (IsLeaf(uNodeIndex))
+		File.PutString(GetName(uNodeIndex));
+	else
+		{
+		ToFileNodeUnrooted(File, GetFirstNeighbor(uNodeIndex, uParent), uNodeIndex);
+		File.PutString(",\n");
+		ToFileNodeUnrooted(File, GetSecondNeighbor(uNodeIndex, uParent), uNodeIndex);
+		}
+
+	if (bGroup)
+		File.PutString(")");
+
+	if (HasEdgeLength(uNodeIndex, uParent))
+		File.PutFormat(":%g", GetEdgeLength(uNodeIndex, uParent));
+	File.PutString("\n");
+	}
+
+void Tree::ToFileNodeRooted(TextFile &File, unsigned uNodeIndex) const
+	{
+	assert(IsRooted());
+
+	bool bGroup = !IsLeaf(uNodeIndex) || IsRoot(uNodeIndex);
+	if (bGroup)
+		File.PutString("(\n");
+
+	if (IsLeaf(uNodeIndex))
+		File.PutString(GetName(uNodeIndex));
+	else
+		{
+		ToFileNodeRooted(File, GetLeft(uNodeIndex));
+		File.PutString(",\n");
+		ToFileNodeRooted(File, GetRight(uNodeIndex));
+		}
+
+	if (bGroup)
+		File.PutString(")");
+
+	if (!IsRoot(uNodeIndex))
+		{
+		unsigned uParent = GetParent(uNodeIndex);
+		if (HasEdgeLength(uNodeIndex, uParent))
+			File.PutFormat(":%g", GetEdgeLength(uNodeIndex, uParent));
+		}
+	File.PutString("\n");
+	}

Added: trunk/packages/muscle/branches/upstream/current/posgap.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/posgap.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/posgap.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,141 @@
+#include "muscle.h"
+
+// Pascaralle and Argos gap factors
+// after Table 1 in Thompson et. al. ClustalW NAR paper.
+static double PAFFacs[20] =
+	{
+	1.13,		// A
+	1.13,		// C
+	0.96,		// D
+	1.31,		// E
+	1.20,		// F
+	0.61,		// G
+	1.00,		// H
+	1.32,		// I
+	0.96,		// K
+	1.21,		// L
+	1.29,		// M
+	0.62,		// N
+	0.74,		// P
+	1.07,		// Q
+	0.72,		// R
+	0.76,		// S
+	0.89,		// T
+	1.25,		// V
+	1.00,		// Y
+	1.23,		// W
+	};
+
+// (Not used: does not appear to work well).
+SCORE PAFactor(const FCOUNT fcCounts[])
+	{
+	if (ALPHA_Amino != g_Alpha)
+		Quit("PAFFactor: requires amino acid sequence");
+
+	FCOUNT fLetterCount = 0;
+	double dSum = 0;
+	for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
+		{
+		const FCOUNT fCount = fcCounts[uLetter];
+		dSum += fCount*PAFFacs[uLetter];
+		fLetterCount += fCount;
+		}
+	if (0 == fLetterCount)
+		return 0.5;
+	return (SCORE) (dSum/fLetterCount);
+	}
+
+static bool Hydrophilic[20] =
+	{
+	false,		// A
+	false,		// C
+	true,		// D
+	true,		// E
+	false,		// F
+	true,		// G
+	false,		// H
+	false,		// I
+	true,		// K
+	false,		// L
+	false,		// M
+	true,		// N
+	true,		// P
+	true,		// Q
+	true,		// R
+	true,		// S
+	false,		// T
+	false,		// V
+	false,		// Y
+	false,		// W
+	};
+
+bool IsHydrophilic(const FCOUNT fcCounts[])
+	{
+	if (ALPHA_Amino != g_Alpha)
+		Quit("IsHydrophilic: requires amino acid sequence");
+
+	for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
+		if (fcCounts[uLetter] > 0 && !Hydrophilic[uLetter])
+			return false;
+	return true;
+	}
+
+bool IsHydrophilic(const unsigned uCounts[])
+	{
+	if (ALPHA_Amino != g_Alpha)
+		Quit("IsHydrophilic: requires amino acid sequence");
+
+	for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
+		if (uCounts[uLetter] > 0 && !Hydrophilic[uLetter])
+			return false;
+	return true;
+	}
+
+// LIVCATMFYWHK
+// Venn		Pascaralla	B&T		Me
+// L		y			y		y
+// I		y			y		y
+// V		y			y		y
+// C		y			n
+// A		y			y		y
+// T		N			n
+// M		y			y		y
+// F		y			y		y
+// Y		n			n
+// W		y			n
+// H		n			n
+// K		n			n
+static bool Hydrophobic[20] =
+	{
+	true,		// A
+	true,		// C
+	false,		// D
+	false,		// E
+	true,		// F
+	false,		// G
+	true,		// H
+	true,		// I
+	false,		// K
+	true,		// L
+	true,		// M
+	false,		// N
+	false,		// P
+	false,		// Q
+	false,		// R
+	false,		// S
+	true,		// T
+	true,		// V
+	true,		// Y
+	true,		// W
+	};
+
+bool IsHydrophobic(const FCOUNT fcCounts[])
+	{
+	if (ALPHA_Amino != g_Alpha)
+		Quit("IsHydrophobic: requires amino acid sequence");
+
+	for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
+		if (fcCounts[uLetter] > 0.0 && !Hydrophobic[uLetter])
+			return false;
+	return true;
+	}

Added: trunk/packages/muscle/branches/upstream/current/ppscore.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/ppscore.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/ppscore.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,93 @@
+#include "muscle.h"
+#include "textfile.h"
+#include "msa.h"
+#include "tree.h"
+#include "profile.h"
+#include "objscore.h"
+
+bool g_bTracePPScore = false;
+MSA *g_ptrPPScoreMSA1 = 0;
+MSA *g_ptrPPScoreMSA2 = 0;
+
+static ProfPos *ProfileFromMSALocal(MSA &msa, Tree &tree)
+	{
+	const unsigned uSeqCount = msa.GetSeqCount();
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		msa.SetSeqId(uSeqIndex, uSeqIndex);
+
+	TreeFromMSA(msa, tree, g_Cluster2, g_Distance2, g_Root1);
+	SetMuscleTree(tree);
+	return ProfileFromMSA(msa);
+	}
+
+void PPScore()
+	{
+	if (0 == g_pstrFileName1 || 0 == g_pstrFileName2)
+		Quit("-ppscore needs -in1 and -in2");
+
+	SetSeqWeightMethod(g_SeqWeight1);
+
+	TextFile file1(g_pstrFileName1);
+	TextFile file2(g_pstrFileName2);
+
+	MSA msa1;
+	MSA msa2;
+
+	msa1.FromFile(file1);
+	msa2.FromFile(file2);
+
+	const unsigned uLength1 = msa1.GetColCount();
+	const unsigned uLength2 = msa2.GetColCount();
+
+	if (uLength1 != uLength2)
+		Quit("Profiles must have the same length");
+
+	ALPHA Alpha = ALPHA_Undefined;
+	switch (g_SeqType)
+		{
+	case SEQTYPE_Auto:
+		Alpha = msa1.GuessAlpha();
+		break;
+
+	case SEQTYPE_Protein:
+		Alpha = ALPHA_Amino;
+		break;
+
+	case SEQTYPE_DNA:
+		Alpha = ALPHA_DNA;
+		break;
+
+	case SEQTYPE_RNA:
+		Alpha = ALPHA_RNA;
+		break;
+
+	default:
+		Quit("Invalid SeqType");
+		}
+	SetAlpha(Alpha);
+
+	msa1.FixAlpha();
+	msa2.FixAlpha();
+
+	if (ALPHA_DNA == Alpha || ALPHA_RNA == Alpha)
+		SetPPScore(PPSCORE_SPN);
+
+	const unsigned uSeqCount1 = msa1.GetSeqCount();
+	const unsigned uSeqCount2 = msa2.GetSeqCount();
+	const unsigned uMaxSeqCount = (uSeqCount1 > uSeqCount2 ? uSeqCount1 : uSeqCount2);
+	MSA::SetIdCount(uMaxSeqCount);
+
+	Tree tree1;
+	Tree tree2;
+	ProfPos *Prof1 = ProfileFromMSALocal(msa1, tree1);
+	ProfPos *Prof2 = ProfileFromMSALocal(msa2, tree2);
+
+	g_bTracePPScore = true;
+	g_ptrPPScoreMSA1 = &msa1;
+	g_ptrPPScoreMSA2 = &msa2;
+
+	SCORE Score = ObjScoreDP_Profs(Prof1, Prof2, uLength1);
+
+	Log("Score=%.4g\n", Score);
+	printf("Score=%.4g\n", Score);
+	}

Added: trunk/packages/muscle/branches/upstream/current/profdb.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/profdb.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/profdb.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,54 @@
+#include "muscle.h"
+#include "textfile.h"
+#include "seqvect.h"
+#include "distfunc.h"
+#include "msa.h"
+#include "tree.h"
+#include "clust.h"
+#include "profile.h"
+#include "clustsetmsa.h"
+
+void ProfDB()
+	{
+	SetOutputFileName(g_pstrOutFileName);
+	SetInputFileName(g_pstrFileName2);
+	SetStartTime();
+
+	TextFile file1(g_pstrFileName1);
+	TextFile file2(g_pstrFileName2);
+
+	SetMaxIters(g_uMaxIters);
+	SetSeqWeightMethod(g_SeqWeight1);
+
+	TextFile fileIn(g_pstrFileName1);
+	MSA msa1;
+	msa1.FromFile(fileIn);
+
+	const unsigned uSeqCount1 = msa1.GetSeqCount();
+	if (0 == uSeqCount1)
+		Quit("No sequences in input alignment");
+
+	SeqVect v;
+	v.FromFASTAFile(file2);
+	const unsigned uSeqCount2 = v.Length();
+	if (0 == uSeqCount2)
+		Quit("No sequences in input alignment");
+
+	MSA::SetIdCount(uSeqCount1 + uSeqCount2);
+	SetProgressDesc("Align sequence database to profile");
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount2; ++uSeqIndex)
+		{
+		Progress(uSeqIndex, uSeqCount2);
+		Seq &s = *(v[uSeqIndex]);
+		s.SetId(0);
+		MSA msaTmp;
+		msaTmp.FromSeq(s);
+		MSA msaOut;
+		ProfileProfile(msa1, msaTmp, msaOut);
+		msa1.Copy(msaOut);
+		}
+	ProgressStepsDone();
+
+	TextFile fileOut(g_pstrOutFileName, true);
+	msa1.ToFile(fileOut);
+	}

Added: trunk/packages/muscle/branches/upstream/current/profile.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/profile.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/profile.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,180 @@
+#include "muscle.h"
+#include "textfile.h"
+#include "msa.h"
+#include "tree.h"
+#include "profile.h"
+#include "objscore.h"
+
+static ProfPos *ProfileFromMSALocal(MSA &msa, Tree &tree)
+	{
+	const unsigned uSeqCount = msa.GetSeqCount();
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		msa.SetSeqId(uSeqIndex, uSeqIndex);
+
+	TreeFromMSA(msa, tree, g_Cluster2, g_Distance2, g_Root1);
+	SetMuscleTree(tree);
+	return ProfileFromMSA(msa);
+	}
+
+void ProfileProfile(MSA &msa1, MSA &msa2, MSA &msaOut)
+	{
+	ALPHA Alpha = ALPHA_Undefined;
+	switch (g_SeqType)
+		{
+	case SEQTYPE_Auto:
+		Alpha = msa1.GuessAlpha();
+		break;
+
+	case SEQTYPE_Protein:
+		Alpha = ALPHA_Amino;
+		break;
+
+	case SEQTYPE_DNA:
+		Alpha = ALPHA_DNA;
+		break;
+
+	case SEQTYPE_RNA:
+		Alpha = ALPHA_RNA;
+		break;
+
+	default:
+		Quit("Invalid SeqType");
+		}
+	SetAlpha(Alpha);
+
+	msa1.FixAlpha();
+	msa2.FixAlpha();
+
+	if (ALPHA_DNA == Alpha || ALPHA_RNA == Alpha)
+		SetPPScore(PPSCORE_SPN);
+
+	unsigned uLength1;
+	unsigned uLength2;
+
+	uLength1 = msa1.GetColCount();
+	uLength2 = msa2.GetColCount();
+
+	Tree tree1;
+	Tree tree2;
+	ProfPos *Prof1 = ProfileFromMSALocal(msa1, tree1);
+	ProfPos *Prof2 = ProfileFromMSALocal(msa2, tree2);
+
+	PWPath Path;
+	ProfPos *ProfOut;
+	unsigned uLengthOut;
+	AlignTwoProfs(Prof1, uLength1, 1.0, Prof2, uLength2, 1.0, Path, &ProfOut, &uLengthOut);
+
+	AlignTwoMSAsGivenPath(Path, msa1, msa2, msaOut);
+	}
+
+// Do profile-profile alignment
+void Profile()
+	{
+	if (0 == g_pstrFileName1 || 0 == g_pstrFileName2)
+		Quit("-profile needs -in1 and -in2");
+
+	SetSeqWeightMethod(g_SeqWeight1);
+
+	TextFile file1(g_pstrFileName1);
+	TextFile file2(g_pstrFileName2);
+
+	MSA msa1;
+	MSA msa2;
+	MSA msaOut;
+
+	msa1.FromFile(file1);
+	msa2.FromFile(file2);
+
+	ALPHA Alpha = ALPHA_Undefined;
+	switch (g_SeqType)
+		{
+	case SEQTYPE_Auto:
+		Alpha = msa1.GuessAlpha();
+		break;
+
+	case SEQTYPE_Protein:
+		Alpha = ALPHA_Amino;
+		break;
+
+	case SEQTYPE_DNA:
+		Alpha = ALPHA_DNA;
+		break;
+
+	case SEQTYPE_RNA:
+		Alpha = ALPHA_RNA;
+		break;
+
+	default:
+		Quit("Invalid seq type");
+		}
+	SetAlpha(Alpha);
+	msa1.FixAlpha();
+	msa2.FixAlpha();
+	SetPPScore();
+
+	const unsigned uSeqCount1 = msa1.GetSeqCount();
+	const unsigned uSeqCount2 = msa2.GetSeqCount();
+	//const unsigned uMaxSeqCount = (uSeqCount1 > uSeqCount2 ? uSeqCount1 : uSeqCount2);
+	//MSA::SetIdCount(uMaxSeqCount);
+	const unsigned uSumSeqCount = uSeqCount1 + uSeqCount2;
+	MSA::SetIdCount(uSumSeqCount);
+
+	//msa1.FromFile(file1);
+	//msa2.FromFile(file2);
+
+	//ALPHA Alpha = ALPHA_Undefined;
+	//switch (g_SeqType)
+	//	{
+	//case SEQTYPE_Auto:
+	//	Alpha = msa1.GuessAlpha();
+	//	break;
+
+	//case SEQTYPE_Protein:
+	//	Alpha = ALPHA_Amino;
+	//	break;
+
+	//case SEQTYPE_Nucleo:
+	//	Alpha = ALPHA_Nucleo;
+	//	break;
+
+	//default:
+	//	Quit("Invalid SeqType");
+	//	}
+	//SetAlpha(Alpha);
+
+	//msa1.FixAlpha();
+	//msa2.FixAlpha();
+
+	//if (ALPHA_Nucleo == Alpha)
+	//	SetPPScore(PPSCORE_SPN);
+
+	//unsigned uLength1;
+	//unsigned uLength2;
+
+	//uLength1 = msa1.GetColCount();
+	//uLength2 = msa2.GetColCount();
+
+	//const unsigned uSeqCount1 = msa1.GetSeqCount();
+	//const unsigned uSeqCount2 = msa2.GetSeqCount();
+	//const unsigned uMaxSeqCount = (uSeqCount1 > uSeqCount2 ? uSeqCount1 : uSeqCount2);
+	//MSA::SetIdCount(uMaxSeqCount);
+
+	//Tree tree1;
+	//Tree tree2;
+	//ProfPos *Prof1 = ProfileFromMSALocal(msa1, tree1);
+	//ProfPos *Prof2 = ProfileFromMSALocal(msa2, tree2);
+
+	//PWPath Path;
+	//ProfPos *ProfOut;
+	//unsigned uLengthOut;
+	//AlignTwoProfs(Prof1, uLength1, 1.0, Prof2, uLength2, 1.0, Path, &ProfOut, &uLengthOut);
+
+	//MSA msaOut;
+	//AlignTwoMSAsGivenPath(Path, msa1, msa2, msaOut);
+
+	ProfileProfile(msa1, msa2, msaOut);
+
+//	TextFile fileOut(g_pstrOutFileName, true);
+//	msaOut.ToFile(fileOut);
+	MuscleOutput(msaOut);
+	}

Added: trunk/packages/muscle/branches/upstream/current/profile.h
===================================================================
--- trunk/packages/muscle/branches/upstream/current/profile.h	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/profile.h	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,127 @@
+#ifndef FastProf2_h
+#define FastProf2_h
+
+#include "msa.h"
+#include "pwpath.h"
+#include <math.h>	// for log function
+
+class DiagList;
+class WeightList;
+
+struct ProfPos
+	{
+	bool m_bAllGaps;
+	unsigned m_uSortOrder[21];
+	FCOUNT m_fcCounts[20];
+	FCOUNT m_LL;
+	FCOUNT m_LG;
+	FCOUNT m_GL;
+	FCOUNT m_GG;
+	SCORE m_AAScores[20];
+	unsigned m_uResidueGroup;
+	FCOUNT m_fOcc;
+	FCOUNT m_fcStartOcc;
+	FCOUNT m_fcEndOcc;
+	SCORE m_scoreGapOpen;
+	SCORE m_scoreGapClose;
+#if	DOUBLE_AFFINE
+	SCORE m_scoreGapOpen2;
+	SCORE m_scoreGapClose2;
+#endif
+//	SCORE m_scoreGapExtend;
+	};
+
+struct ProgNode
+	{
+	ProgNode()
+		{
+		m_Prof = 0;
+		m_EstringL = 0;
+		m_EstringR = 0;
+		}
+	MSA m_MSA;
+	ProfPos *m_Prof;
+	PWPath m_Path;
+	short *m_EstringL;
+	short *m_EstringR;
+	unsigned m_uLength;
+	WEIGHT m_Weight;
+	};
+
+extern unsigned ResidueGroup[];
+const unsigned RESIDUE_GROUP_MULTIPLE = (unsigned) ~0;
+
+extern PTR_SCOREMATRIX g_ptrScoreMatrix;
+
+ProfPos *ProfileFromMSA(const MSA &a);
+
+SCORE TraceBack(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
+  unsigned uLengthB, const SCORE *DPM_, const SCORE *DPD_, const SCORE *DPI_,
+  PWPath &Path);
+SCORE GlobalAlign(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
+  unsigned uLengthB, PWPath &Path);
+void ProgressiveAlign(const SeqVect &v, const Tree &tree, MSA &a);
+SCORE MSAPairSP(const MSA &msa1, const MSA &msa2);
+
+void AlignTwoMSAsGivenPath(const PWPath &Path, const MSA &msaA, const MSA &msaB,
+  MSA &msaCombined);
+
+void ListProfile(const ProfPos *Prof, unsigned uLength, const MSA *ptrMSA = 0);
+SCORE ScoreProfPos2(const ProfPos &PPA, const ProfPos &PPB);
+SCORE FastScorePath2(const ProfPos *PA, unsigned uLengthA,
+  const ProfPos *PB, unsigned uLengthB, const PWPath &Path);
+bool IsHydrophilic(const FCOUNT fcCounts[]);
+int PAM200_Letter(unsigned uLetter1, unsigned uLetter2);
+SCORE AverageMatchScore(const PWPath &Path, unsigned uEdgeIndex,
+  unsigned uWindowLength);
+void WindowSmooth(const SCORE Score[], unsigned uCount, unsigned uWindowLength,
+  SCORE SmoothScore[], double dCeil = 9e29);
+SCORE FastScoreMSA_LA(const MSA &msa, SCORE MatchScore[] = 0);
+SCORE FastScoreMSA_NS(const MSA &msa, SCORE MatchScore[] = 0);
+SCORE FastScoreMSA_SP(const MSA &msa, SCORE MatchScore[] = 0);
+bool RefineMSA(MSA &msa, const Tree &tree);
+SCORE MSAQScore(const MSA &msa, SCORE MatchScore[] = 0);
+bool RefineBiParts(MSA &msa, const Tree &tree, bool R);
+void FindAnchorCols(const MSA &msa, unsigned AnchorCols[],
+  unsigned *ptruAnchorColCount);
+double PctIdToHeight(double dPctId);
+double PctIdToHeightKimura(double dPctId);
+double PctIdToHeightMAFFT(double dPctId);
+double PctIdToMAFFTDist(double dPctId);
+bool RefineBlocks(MSA &msa, const Tree &tree);
+bool RefineSubfams(MSA &msaIn, const Tree &tree, unsigned uIters);
+void SetMuscleTree(const Tree &tree);
+void CalcClustalWWeights(const Tree &tree, WEIGHT Weights[]);
+void RealignDiffs(const MSA &msaIn, const Tree &Diffs,
+  const unsigned IdToDiffsTreeNodeIndex[], MSA &msaOut);
+void RealignDiffsE(const MSA &msaIn, const SeqVect &v,
+  const Tree &NewTree, const Tree &OldTree,
+  const unsigned uNewNodeIndexToOldNodeIndex[],
+  MSA &msaOut, ProgNode *OldProgNodes);
+void RefineTree(MSA &msa, Tree &tree);
+void RefineTreeE(MSA &msa, const SeqVect &v, Tree &tree, ProgNode *ProgNodes);
+void SetScoreMatrix();
+extern bool IsHydrophobic(const FCOUNT fcCounts[]);
+void Hydro(ProfPos *Prof, unsigned uLength);
+void SetTermGaps(const ProfPos *Prof, unsigned uLength);
+
+// Macros to simulate 2D matrices
+#define DPL(PLA, PLB)	DPL_[(PLB)*uPrefixCountA + (PLA)]
+#define DPM(PLA, PLB)	DPM_[(PLB)*uPrefixCountA + (PLA)]
+#define DPD(PLA, PLB)	DPD_[(PLB)*uPrefixCountA + (PLA)]
+#define DPE(PLA, PLB)	DPE_[(PLB)*uPrefixCountA + (PLA)]
+#define DPI(PLA, PLB)	DPI_[(PLB)*uPrefixCountA + (PLA)]
+#define DPJ(PLA, PLB)	DPJ_[(PLB)*uPrefixCountA + (PLA)]
+#define DPU(PLA, PLB)	DPU_[(PLB)*uPrefixCountA + (PLA)]
+#define TBM(PLA, PLB)	TBM_[(PLB)*uPrefixCountA + (PLA)]
+#define TBD(PLA, PLB)	TBD_[(PLB)*uPrefixCountA + (PLA)]
+#define TBE(PLA, PLB)	TBE_[(PLB)*uPrefixCountA + (PLA)]
+#define TBI(PLA, PLB)	TBI_[(PLB)*uPrefixCountA + (PLA)]
+#define TBJ(PLA, PLB)	TBJ_[(PLB)*uPrefixCountA + (PLA)]
+
+SCORE ScoreProfPos2LA(const ProfPos &PPA, const ProfPos &PPB);
+SCORE ScoreProfPos2NS(const ProfPos &PPA, const ProfPos &PPB);
+SCORE ScoreProfPos2SP(const ProfPos &PPA, const ProfPos &PPB);
+SCORE ScoreProfPos2SPN(const ProfPos &PPA, const ProfPos &PPB);
+
+#endif // FastProf_h

Added: trunk/packages/muscle/branches/upstream/current/profilefrommsa.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/profilefrommsa.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/profilefrommsa.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,318 @@
+#include "muscle.h"
+#include "msa.h"
+#include "profile.h"
+
+#define TRACE	0
+
+static void LogF(FCOUNT f)
+	{
+	if (f > -0.00001 && f < 0.00001)
+		Log("       ");
+	else
+		Log("  %5.3f", f);
+	}
+
+static const char *LocalScoreToStr(SCORE s)
+	{
+	static char str[16];
+	if (s < -1e10 || s > 1e10)
+		return "    *";
+	sprintf(str, "%5.1f", s);
+	return str;
+	}
+
+#if	DOUBLE_AFFINE
+void ListProfile(const ProfPos *Prof, unsigned uLength, const MSA *ptrMSA)
+	{
+	Log("  Pos  Occ     LL     LG     GL     GG     Open  Close  Open2  Clos2\n");
+	Log("  ---  ---     --     --     --     --     ----  -----  -----  -----\n");
+	for (unsigned n = 0; n < uLength; ++n)
+		{
+		const ProfPos &PP = Prof[n];
+		Log("%5u", n);
+		LogF(PP.m_fOcc);
+		LogF(PP.m_LL);
+		LogF(PP.m_LG);
+		LogF(PP.m_GL);
+		LogF(PP.m_GG);
+		Log("  %s", LocalScoreToStr(-PP.m_scoreGapOpen));
+		Log("  %s", LocalScoreToStr(-PP.m_scoreGapClose));
+		Log("  %s", LocalScoreToStr(-PP.m_scoreGapOpen2));
+		Log("  %s", LocalScoreToStr(-PP.m_scoreGapClose2));
+		if (0 != ptrMSA)
+			{
+			const unsigned uSeqCount = ptrMSA->GetSeqCount();
+			Log("  ");
+			for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+				Log("%c", ptrMSA->GetChar(uSeqIndex, n));
+			}
+		Log("\n");
+		}
+
+	Log("\n");
+	Log("  Pos G");
+	for (unsigned n = 0; n < g_AlphaSize; ++n)
+		Log("     %c", LetterExToChar(n));
+	Log("\n");
+	Log("  --- -");
+	for (unsigned n = 0; n < g_AlphaSize; ++n)
+		Log(" -----");
+	Log("\n");
+
+	for (unsigned n = 0; n < uLength; ++n)
+		{
+		const ProfPos &PP = Prof[n];
+		Log("%5u", n);
+		if (-1 == PP.m_uResidueGroup)
+			Log(" -", PP.m_uResidueGroup);
+		else
+			Log(" %d", PP.m_uResidueGroup);
+
+		for (unsigned uLetter = 0; uLetter < g_AlphaSize; ++uLetter)
+			{
+			FCOUNT f = PP.m_fcCounts[uLetter];
+			if (f == 0.0)
+				Log("      ");
+			else
+				Log(" %5.3f", f);
+			}
+		if (0 != ptrMSA)
+			{
+			const unsigned uSeqCount = ptrMSA->GetSeqCount();
+			Log("  ");
+			for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+				Log("%c", ptrMSA->GetChar(uSeqIndex, n));
+			}
+		Log("\n");
+		}
+	}
+#endif	// DOUBLE_AFFINE
+
+#if SINGLE_AFFINE
+void ListProfile(const ProfPos *Prof, unsigned uLength, const MSA *ptrMSA)
+	{
+	Log("  Pos  Occ     LL     LG     GL     GG     Open  Close\n");
+	Log("  ---  ---     --     --     --     --     ----  -----\n");
+	for (unsigned n = 0; n < uLength; ++n)
+		{
+		const ProfPos &PP = Prof[n];
+		Log("%5u", n);
+		LogF(PP.m_fOcc);
+		LogF(PP.m_LL);
+		LogF(PP.m_LG);
+		LogF(PP.m_GL);
+		LogF(PP.m_GG);
+		Log("  %5.1f", -PP.m_scoreGapOpen);
+		Log("  %5.1f", -PP.m_scoreGapClose);
+		if (0 != ptrMSA)
+			{
+			const unsigned uSeqCount = ptrMSA->GetSeqCount();
+			Log("  ");
+			for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+				Log("%c", ptrMSA->GetChar(uSeqIndex, n));
+			}
+		Log("\n");
+		}
+
+	Log("\n");
+	Log("  Pos G");
+	for (unsigned n = 0; n < g_AlphaSize; ++n)
+		Log("     %c", LetterExToChar(n));
+	Log("\n");
+	Log("  --- -");
+	for (unsigned n = 0; n < g_AlphaSize; ++n)
+		Log(" -----");
+	Log("\n");
+
+	for (unsigned n = 0; n < uLength; ++n)
+		{
+		const ProfPos &PP = Prof[n];
+		Log("%5u", n);
+		if (-1 == PP.m_uResidueGroup)
+			Log(" -", PP.m_uResidueGroup);
+		else
+			Log(" %d", PP.m_uResidueGroup);
+
+		for (unsigned uLetter = 0; uLetter < g_AlphaSize; ++uLetter)
+			{
+			FCOUNT f = PP.m_fcCounts[uLetter];
+			if (f == 0.0)
+				Log("      ");
+			else
+				Log(" %5.3f", f);
+			}
+		if (0 != ptrMSA)
+			{
+			const unsigned uSeqCount = ptrMSA->GetSeqCount();
+			Log("  ");
+			for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+				Log("%c", ptrMSA->GetChar(uSeqIndex, n));
+			}
+		Log("\n");
+		}
+	}
+#endif
+
+void SortCounts(const FCOUNT fcCounts[], unsigned SortOrder[])
+	{
+	static unsigned InitialSortOrder[MAX_ALPHA] =
+		{
+		0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19
+		};
+	memcpy(SortOrder, InitialSortOrder, g_AlphaSize*sizeof(unsigned));
+
+	bool bAny = true;
+	while (bAny)
+		{
+		bAny = false;
+		for (unsigned n = 0; n < g_AlphaSize - 1; ++n)
+			{
+			unsigned i1 = SortOrder[n];
+			unsigned i2 = SortOrder[n+1];
+			if (fcCounts[i1] < fcCounts[i2])
+				{
+				SortOrder[n+1] = i1;
+				SortOrder[n] = i2;
+				bAny = true;
+				}
+			}
+		}
+	}
+
+static unsigned AminoGroupFromFCounts(const FCOUNT fcCounts[])
+	{
+	bool bAny = false;
+	unsigned uConsensusResidueGroup = RESIDUE_GROUP_MULTIPLE;
+	for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
+		{
+		if (0 == fcCounts[uLetter])
+			continue;
+		const unsigned uResidueGroup = ResidueGroup[uLetter];
+		if (bAny)
+			{
+			if (uResidueGroup != uConsensusResidueGroup)
+				return RESIDUE_GROUP_MULTIPLE;
+			}
+		else
+			{
+			bAny = true;
+			uConsensusResidueGroup = uResidueGroup;
+			}
+		}
+	return uConsensusResidueGroup;
+	}
+
+static unsigned NucleoGroupFromFCounts(const FCOUNT fcCounts[])
+	{
+	bool bAny = false;
+	unsigned uConsensusResidueGroup = RESIDUE_GROUP_MULTIPLE;
+	for (unsigned uLetter = 0; uLetter < 4; ++uLetter)
+		{
+		if (0 == fcCounts[uLetter])
+			continue;
+		const unsigned uResidueGroup = uLetter;
+		if (bAny)
+			{
+			if (uResidueGroup != uConsensusResidueGroup)
+				return RESIDUE_GROUP_MULTIPLE;
+			}
+		else
+			{
+			bAny = true;
+			uConsensusResidueGroup = uResidueGroup;
+			}
+		}
+	return uConsensusResidueGroup;
+	}
+
+unsigned ResidueGroupFromFCounts(const FCOUNT fcCounts[])
+	{
+	switch (g_Alpha)
+		{
+	case ALPHA_Amino:
+		return AminoGroupFromFCounts(fcCounts);
+
+	case ALPHA_DNA:
+	case ALPHA_RNA:
+		return NucleoGroupFromFCounts(fcCounts);
+		}
+	Quit("ResidueGroupFromFCounts: bad alpha");
+	return 0;
+	}
+
+ProfPos *ProfileFromMSA(const MSA &a)
+	{
+	const unsigned uSeqCount = a.GetSeqCount();
+	const unsigned uColCount = a.GetColCount();
+
+// Yuck -- cast away const (inconsistent design here).
+	SetMSAWeightsMuscle((MSA &) a);
+
+	ProfPos *Pos = new ProfPos[uColCount];
+
+	unsigned uHydrophobicRunLength = 0;
+	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
+		{
+		ProfPos &PP = Pos[uColIndex];
+
+		PP.m_bAllGaps = a.IsGapColumn(uColIndex);
+
+		FCOUNT fcGapStart;
+		FCOUNT fcGapEnd;
+		FCOUNT fcGapExtend;
+		FCOUNT fOcc;
+		a.GetFractionalWeightedCounts(uColIndex, g_bNormalizeCounts, PP.m_fcCounts,
+		  &fcGapStart, &fcGapEnd, &fcGapExtend, &fOcc,
+		  &PP.m_LL, &PP.m_LG, &PP.m_GL, &PP.m_GG);
+		PP.m_fOcc = fOcc;
+
+		SortCounts(PP.m_fcCounts, PP.m_uSortOrder);
+
+		PP.m_uResidueGroup = ResidueGroupFromFCounts(PP.m_fcCounts);
+
+		for (unsigned i = 0; i < g_AlphaSize; ++i)
+			{
+			SCORE scoreSum = 0;
+			for (unsigned j = 0; j < g_AlphaSize; ++j)
+				scoreSum += PP.m_fcCounts[j]*(*g_ptrScoreMatrix)[i][j];
+			PP.m_AAScores[i] = scoreSum;
+			}
+
+		SCORE sStartOcc = (SCORE) (1.0 - fcGapStart);
+		SCORE sEndOcc = (SCORE) (1.0 - fcGapEnd);
+
+		PP.m_fcStartOcc = sStartOcc;
+		PP.m_fcEndOcc = sEndOcc;
+
+		PP.m_scoreGapOpen = sStartOcc*g_scoreGapOpen/2;
+		PP.m_scoreGapClose = sEndOcc*g_scoreGapOpen/2;
+#if	DOUBLE_AFFINE
+		PP.m_scoreGapOpen2 = sStartOcc*g_scoreGapOpen2/2;
+		PP.m_scoreGapClose2 = sEndOcc*g_scoreGapOpen2/2;
+#endif
+//		PP.m_scoreGapExtend = (SCORE) ((1.0 - fcGapExtend)*scoreGapExtend);
+
+#if	PAF
+		if (ALHPA_Amino == g_Alpha && sStartOcc > 0.5)
+			{
+			extern SCORE PAFactor(const FCOUNT fcCounts[]);
+			SCORE paf = PAFactor(PP.m_fcCounts);
+			PP.m_scoreGapOpen *= paf;
+			PP.m_scoreGapClose *= paf;
+			}
+#endif
+		}
+
+#if	HYDRO
+	if (ALPHA_Amino == g_Alpha)
+		Hydro(Pos, uColCount);
+#endif
+
+#if	TRACE
+	{
+	Log("ProfileFromMSA\n");
+	ListProfile(Pos, uColCount, &a);
+	}
+#endif
+	return Pos;
+	}

Added: trunk/packages/muscle/branches/upstream/current/progalign.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/progalign.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/progalign.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,206 @@
+#include "muscle.h"
+#include "tree.h"
+#include "seqvect.h"
+#include "profile.h"
+#include "msa.h"
+#include "pwpath.h"
+#include "distfunc.h"
+#include "textfile.h"
+#include "estring.h"
+
+#define TRACE		0
+#define VALIDATE	0
+#define TRACE_LENGTH_DELTA	0
+
+static void LogLeafNames(const Tree &tree, unsigned uNodeIndex)
+	{
+	const unsigned uNodeCount = tree.GetNodeCount();
+	unsigned *Leaves = new unsigned[uNodeCount];
+	unsigned uLeafCount;
+	GetLeaves(tree, uNodeIndex, Leaves, &uLeafCount);
+	for (unsigned i = 0; i < uLeafCount; ++i)
+		{
+		if (i > 0)
+			Log(",");
+		Log("%s", tree.GetLeafName(Leaves[i]));
+		}
+	delete[] Leaves;
+	}
+
+ProgNode *ProgressiveAlignE(const SeqVect &v, const Tree &GuideTree, MSA &a)
+	{
+	assert(GuideTree.IsRooted());
+
+#if	TRACE
+	Log("GuideTree:\n");
+	GuideTree.LogMe();
+#endif
+
+	const unsigned uSeqCount = v.Length();
+	const unsigned uNodeCount = 2*uSeqCount - 1;
+	const unsigned uIterCount = uSeqCount - 1;
+
+	WEIGHT *Weights = new WEIGHT[uSeqCount];
+	CalcClustalWWeights(GuideTree, Weights);
+
+	ProgNode *ProgNodes = new ProgNode[uNodeCount];
+
+	unsigned uJoin = 0;
+	unsigned uTreeNodeIndex = GuideTree.FirstDepthFirstNode();
+	SetProgressDesc("Align node");
+	do
+		{
+		if (GuideTree.IsLeaf(uTreeNodeIndex))
+			{
+			if (uTreeNodeIndex >= uNodeCount)
+				Quit("TreeNodeIndex=%u NodeCount=%u\n", uTreeNodeIndex, uNodeCount);
+			ProgNode &Node = ProgNodes[uTreeNodeIndex];
+			unsigned uId = GuideTree.GetLeafId(uTreeNodeIndex);
+			if (uId >= uSeqCount)
+				Quit("Seq index out of range");
+			const Seq &s = *(v[uId]);
+			Node.m_MSA.FromSeq(s);
+			Node.m_MSA.SetSeqId(0, uId);
+			Node.m_uLength = Node.m_MSA.GetColCount();
+			Node.m_Weight = Weights[uId];
+		// TODO: Term gaps settable
+			Node.m_Prof = ProfileFromMSA(Node.m_MSA);
+			Node.m_EstringL = 0;
+			Node.m_EstringR = 0;
+#if	TRACE
+			Log("Leaf id=%u\n", uId);
+			Log("MSA=\n");
+			Node.m_MSA.LogMe();
+			Log("Profile (from MSA)=\n");
+			ListProfile(Node.m_Prof, Node.m_uLength, &Node.m_MSA);
+#endif
+			}
+		else
+			{
+			Progress(uJoin, uSeqCount - 1);
+			++uJoin;
+
+			const unsigned uMergeNodeIndex = uTreeNodeIndex;
+			ProgNode &Parent = ProgNodes[uMergeNodeIndex];
+
+			const unsigned uLeft = GuideTree.GetLeft(uTreeNodeIndex);
+			const unsigned uRight = GuideTree.GetRight(uTreeNodeIndex);
+
+			if (g_bVerbose)
+				{
+				Log("Align: (");
+				LogLeafNames(GuideTree, uLeft);
+				Log(") (");
+				LogLeafNames(GuideTree, uRight);
+				Log(")\n");
+				}
+
+			ProgNode &Node1 = ProgNodes[uLeft];
+			ProgNode &Node2 = ProgNodes[uRight];
+
+#if	TRACE
+			Log("AlignTwoMSAs:\n");
+#endif
+			AlignTwoProfs(
+			  Node1.m_Prof, Node1.m_uLength, Node1.m_Weight,
+			  Node2.m_Prof, Node2.m_uLength, Node2.m_Weight,
+			  Parent.m_Path,
+			  &Parent.m_Prof, &Parent.m_uLength);
+#if	TRACE_LENGTH_DELTA
+			{
+			unsigned L = Node1.m_uLength;
+			unsigned R = Node2.m_uLength;
+			unsigned P = Parent.m_Path.GetEdgeCount();
+			unsigned Max = L > R ? L : R;
+			unsigned d = P - Max;
+			Log("LD%u;%u;%u;%u\n", L, R, P, d);
+			}
+#endif
+			PathToEstrings(Parent.m_Path, &Parent.m_EstringL, &Parent.m_EstringR);
+
+			Parent.m_Weight = Node1.m_Weight + Node2.m_Weight;
+
+#if	VALIDATE
+			{
+#if	TRACE
+			Log("AlignTwoMSAs:\n");
+#endif
+			PWPath TmpPath;
+			AlignTwoMSAs(Node1.m_MSA, Node2.m_MSA, Parent.m_MSA, TmpPath);
+			ProfPos *P1 = ProfileFromMSA(Node1.m_MSA, true);
+			ProfPos *P2 = ProfileFromMSA(Node2.m_MSA, true);
+			unsigned uLength = Parent.m_MSA.GetColCount();
+			ProfPos *TmpProf = ProfileFromMSA(Parent.m_MSA, true);
+
+#if	TRACE
+			Log("Node1 MSA=\n");
+			Node1.m_MSA.LogMe();
+
+			Log("Node1 prof=\n");
+			ListProfile(Node1.m_Prof, Node1.m_MSA.GetColCount(), &Node1.m_MSA);
+			Log("Node1 prof (from MSA)=\n");
+			ListProfile(P1, Node1.m_MSA.GetColCount(), &Node1.m_MSA);
+
+			AssertProfsEq(Node1.m_Prof, Node1.m_uLength, P1, Node1.m_MSA.GetColCount());
+
+			Log("Node2 prof=\n");
+			ListProfile(Node2.m_Prof, Node2.m_MSA.GetColCount(), &Node2.m_MSA);
+
+			Log("Node2 MSA=\n");
+			Node2.m_MSA.LogMe();
+
+			Log("Node2 prof (from MSA)=\n");
+			ListProfile(P2, Node2.m_MSA.GetColCount(), &Node2.m_MSA);
+
+			AssertProfsEq(Node2.m_Prof, Node2.m_uLength, P2, Node2.m_MSA.GetColCount());
+
+			TmpPath.AssertEqual(Parent.m_Path);
+
+			Log("Parent MSA=\n");
+			Parent.m_MSA.LogMe();
+
+			Log("Parent prof=\n");
+			ListProfile(Parent.m_Prof, Parent.m_uLength, &Parent.m_MSA);
+
+			Log("Parent prof (from MSA)=\n");
+			ListProfile(TmpProf, Parent.m_MSA.GetColCount(), &Parent.m_MSA);
+
+#endif	// TRACE
+			AssertProfsEq(Parent.m_Prof, Parent.m_uLength,
+			  TmpProf, Parent.m_MSA.GetColCount());
+			delete[] P1;
+			delete[] P2;
+			delete[] TmpProf;
+			}
+#endif	// VALIDATE
+
+			Node1.m_MSA.Clear();
+			Node2.m_MSA.Clear();
+
+		// Don't delete profiles, may need them for tree refinement.
+			//delete[] Node1.m_Prof;
+			//delete[] Node2.m_Prof;
+			//Node1.m_Prof = 0;
+			//Node2.m_Prof = 0;
+			}
+		uTreeNodeIndex = GuideTree.NextDepthFirstNode(uTreeNodeIndex);
+		}
+	while (NULL_NEIGHBOR != uTreeNodeIndex);
+	ProgressStepsDone();
+
+	if (g_bBrenner)
+		MakeRootMSABrenner((SeqVect &) v, GuideTree, ProgNodes, a);
+	else
+		MakeRootMSA(v, GuideTree, ProgNodes, a);
+
+#if	VALIDATE
+	{
+	unsigned uRootNodeIndex = GuideTree.GetRootNodeIndex();
+	const ProgNode &RootProgNode = ProgNodes[uRootNodeIndex];
+	AssertMSAEq(a, RootProgNode.m_MSA);
+	}
+#endif
+
+	delete[] Weights;
+	return ProgNodes;
+	}

Added: trunk/packages/muscle/branches/upstream/current/progress.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/progress.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/progress.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,186 @@
+#include "muscle.h"
+#include <stdio.h>
+#include <time.h>
+
+// Functions that provide visible feedback to the user
+// that progress is being made.
+
+static unsigned g_uIter = 0;		// Main MUSCLE iteration 1, 2..
+static unsigned g_uLocalMaxIters = 0;	// Max iters
+static FILE *g_fProgress = stderr;	// Default to standard error
+static char g_strFileName[32];		// File name
+static time_t g_tLocalStart;				// Start time
+static char g_strDesc[32];			// Description
+static bool g_bWipeDesc = false;
+static int g_nPrevDescLength;
+static unsigned g_uTotalSteps;
+
+double GetCheckMemUseMB()
+	{
+	unsigned MB = (unsigned) GetMemUseMB();
+	if (0 == g_uMaxMB || MB <= g_uMaxMB)
+		return MB;
+	fprintf(stderr, "\n\n*** MAX MEMORY %u MB EXCEEDED***\n", g_uMaxMB);
+	fprintf(stderr, "Memory allocated so far %u MB, physical RAM %u MB\n",
+	  MB, (unsigned) GetRAMSizeMB());
+	fprintf(stderr, "Use -maxmb <n> option to increase limit, where <n> is in MB.\n");
+	SaveCurrentAlignment();
+	exit(EXIT_FatalError);
+	return MB;
+	}
+
+const char *ElapsedTimeAsStr()
+	{
+	time_t Now = time(0);
+	unsigned long ElapsedSecs = (unsigned long) (Now - g_tLocalStart);
+	return SecsToStr(ElapsedSecs);
+	}
+
+const char *MemToStr(double MB)
+	{
+	if (MB < 0)
+		return "";
+
+	static char Str[9];
+	static double MaxMB = 0;
+	static double RAMMB = 0;
+
+	if (RAMMB == 0)
+		RAMMB = GetRAMSizeMB();
+
+	if (MB > MaxMB)
+		MaxMB = MB;
+	double Pct = (MaxMB*100.0)/RAMMB;
+	if (Pct > 100)
+		Pct = 100;
+	sprintf(Str, "%.0f MB(%.0f%%)", MaxMB, Pct);
+	return Str;
+	}
+
+void SetInputFileName(const char *pstrFileName)
+	{
+	NameFromPath(pstrFileName, g_strFileName, sizeof(g_strFileName));
+	}
+
+void SetSeqStats(unsigned uSeqCount, unsigned uMaxL, unsigned uAvgL)
+	{
+	if (g_bQuiet)
+		return;
+
+	fprintf(g_fProgress, "%s %u seqs, max length %u, avg  length %u\n",
+	  g_strFileName, uSeqCount, uMaxL, uAvgL);
+	if (g_bVerbose)
+		Log("%u seqs, max length %u, avg  length %u\n",
+		  uSeqCount, uMaxL, uAvgL);
+	}
+
+void SetStartTime()
+	{
+	time(&g_tLocalStart);
+	}
+
+unsigned long GetStartTime()
+	{
+	return (unsigned long) g_tLocalStart;
+	}
+
+void SetIter(unsigned uIter)
+	{
+	g_uIter = uIter;
+	}
+
+void IncIter()
+	{
+	++g_uIter;
+	}
+
+void SetMaxIters(unsigned uMaxIters)
+	{
+	g_uLocalMaxIters = uMaxIters;
+	}
+
+void SetProgressDesc(const char szDesc[])
+	{
+	strncpy(g_strDesc, szDesc, sizeof(g_strDesc));
+	g_strDesc[sizeof(g_strDesc) - 1] = 0;
+	}
+
+static void Wipe(int n)
+	{
+	for (int i = 0; i < n; ++i)
+		fprintf(g_fProgress, " ");
+	}
+
+void Progress(const char *szFormat, ...)
+	{
+	CheckMaxTime();
+
+	if (g_bQuiet)
+		return;
+
+	double MB = GetCheckMemUseMB();
+
+	char szStr[4096];
+	va_list ArgList;
+	va_start(ArgList, szFormat);
+	vsprintf(szStr, szFormat, ArgList);
+
+	fprintf(g_fProgress, "\n%8.8s  %12s                    %s",
+	  ElapsedTimeAsStr(),
+	  MemToStr(MB),
+	  szStr);
+
+	fprintf(g_fProgress, "\n");
+	fflush(g_fProgress);
+	}
+
+void Progress(unsigned uStep, unsigned uTotalSteps)
+	{
+	CheckMaxTime();
+
+	if (g_bQuiet)
+		return;
+
+	double dPct = ((uStep + 1)*100.0)/uTotalSteps;
+	double MB = GetCheckMemUseMB();
+	fprintf(g_fProgress, "%8.8s  %12s  Iter %3u  %6.2f%%  %s",
+	  ElapsedTimeAsStr(),
+	  MemToStr(MB),
+	  g_uIter,
+	  dPct,
+	  g_strDesc);
+
+	if (g_bWipeDesc)
+		{
+		int n = g_nPrevDescLength - (int) strlen(g_strDesc);
+		Wipe(n);
+		g_bWipeDesc = false;
+		}
+
+	fprintf(g_fProgress, "\r");
+
+	g_uTotalSteps = uTotalSteps;
+	}
+
+void ProgressStepsDone()
+	{
+	CheckMaxTime();
+
+	if (g_bVerbose)
+		{
+		double MB = GetCheckMemUseMB();
+		Log("Elapsed time %8.8s  Peak memory use %12s  Iteration %3u %s\n",
+		 ElapsedTimeAsStr(),
+		 MemToStr(MB),
+		 g_uIter,
+		 g_strDesc);
+		}
+
+	if (g_bQuiet)
+		return;
+
+	Progress(g_uTotalSteps - 1, g_uTotalSteps);
+	fprintf(g_fProgress, "\n");
+	g_bWipeDesc = true;
+	g_nPrevDescLength = (int) strlen(g_strDesc);
+	}

Added: trunk/packages/muscle/branches/upstream/current/progressivealign.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/progressivealign.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/progressivealign.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,76 @@
+#include "muscle.h"
+#include <math.h>
+#include "tree.h"
+#include "seqvect.h"
+#include "profile.h"
+#include "msa.h"
+#include "pwpath.h"
+#include "distfunc.h"
+
+#define TRACE 0
+
+void ProgressiveAlign(const SeqVect &v, const Tree &GuideTree, MSA &a)
+	{
+	assert(GuideTree.IsRooted());
+
+#if	TRACE
+	Log("GuideTree:\n");
+	GuideTree.LogMe();
+#endif
+
+	const unsigned uSeqCount = v.Length();
+	const unsigned uNodeCount = 2*uSeqCount - 1;
+
+	ProgNode *ProgNodes = new ProgNode[uNodeCount];
+
+	unsigned uJoin = 0;
+	unsigned uTreeNodeIndex = GuideTree.FirstDepthFirstNode();
+	SetProgressDesc("Align node");
+	do
+		{
+		if (GuideTree.IsLeaf(uTreeNodeIndex))
+			{
+			if (uTreeNodeIndex >= uNodeCount)
+				Quit("TreeNodeIndex=%u NodeCount=%u\n", uTreeNodeIndex, uNodeCount);
+			ProgNode &Node = ProgNodes[uTreeNodeIndex];
+			unsigned uId = GuideTree.GetLeafId(uTreeNodeIndex);
+			if (uId >= uSeqCount)
+				Quit("Seq index out of range");
+			const Seq &s = *(v[uId]);
+			Node.m_MSA.FromSeq(s);
+			Node.m_MSA.SetSeqId(0, uId);
+			Node.m_uLength = Node.m_MSA.GetColCount();
+			}
+		else
+			{
+			Progress(uJoin, uSeqCount - 1);
+			++uJoin;
+
+			const unsigned uMergeNodeIndex = uTreeNodeIndex;
+			ProgNode &Parent = ProgNodes[uMergeNodeIndex];
+
+			const unsigned uLeft = GuideTree.GetLeft(uTreeNodeIndex);
+			const unsigned uRight = GuideTree.GetRight(uTreeNodeIndex);
+
+			ProgNode &Node1 = ProgNodes[uLeft];
+			ProgNode &Node2 = ProgNodes[uRight];
+
+			PWPath Path;
+			AlignTwoMSAs(Node1.m_MSA, Node2.m_MSA, Parent.m_MSA, Path);
+			Parent.m_uLength = Parent.m_MSA.GetColCount();
+
+			Node1.m_MSA.Clear();
+			Node2.m_MSA.Clear();
+			}
+		uTreeNodeIndex = GuideTree.NextDepthFirstNode(uTreeNodeIndex);
+		}
+	while (NULL_NEIGHBOR != uTreeNodeIndex);
+	ProgressStepsDone();
+
+	unsigned uRootNodeIndex = GuideTree.GetRootNodeIndex();
+	const ProgNode &RootProgNode = ProgNodes[uRootNodeIndex];
+	a.Copy(RootProgNode.m_MSA);
+
+	delete[] ProgNodes;
+	ProgNodes = 0;
+	}

Added: trunk/packages/muscle/branches/upstream/current/pwpath.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/pwpath.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/pwpath.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,386 @@
+#include "muscle.h"
+#include "pwpath.h"
+#include "seq.h"
+#include "textfile.h"
+#include "msa.h"
+
+PWPath::PWPath()
+	{
+	m_uArraySize = 0;
+	m_uEdgeCount = 0;
+	m_Edges = 0;
+	}
+
+PWPath::~PWPath()
+	{
+	Clear();
+	}
+
+void PWPath::Clear()
+	{
+	delete[] m_Edges;
+	m_Edges = 0;
+	m_uArraySize = 0;
+	m_uEdgeCount = 0;
+	}
+
+void PWPath::ExpandPath(unsigned uAdditionalEdgeCount)
+	{
+	PWEdge *OldPath = m_Edges;
+	unsigned uEdgeCount = m_uArraySize + uAdditionalEdgeCount;
+
+	m_Edges = new PWEdge[uEdgeCount];
+	m_uArraySize = uEdgeCount;
+	if (m_uEdgeCount > 0)
+		memcpy(m_Edges, OldPath, m_uEdgeCount*sizeof(PWEdge));
+	delete[] OldPath;
+	}
+
+void PWPath::AppendEdge(const PWEdge &Edge)
+	{
+	if (0 == m_uArraySize || m_uEdgeCount + 1 == m_uArraySize)
+		ExpandPath(200);
+
+	m_Edges[m_uEdgeCount] = Edge;
+	++m_uEdgeCount;
+	}
+
+void PWPath::AppendEdge(char cType, unsigned uPrefixLengthA, unsigned uPrefixLengthB)
+	{
+	PWEdge e;
+	e.uPrefixLengthA = uPrefixLengthA;
+	e.uPrefixLengthB = uPrefixLengthB;
+	e.cType = cType;
+	AppendEdge(e);
+	}
+
+void PWPath::PrependEdge(const PWEdge &Edge)
+	{
+	if (0 == m_uArraySize || m_uEdgeCount + 1 == m_uArraySize)
+		ExpandPath(1000);
+	if (m_uEdgeCount > 0)
+		memmove(m_Edges + 1, m_Edges, sizeof(PWEdge)*m_uEdgeCount);
+	m_Edges[0] = Edge;
+	++m_uEdgeCount;
+	}
+
+const PWEdge &PWPath::GetEdge(unsigned uEdgeIndex) const
+	{
+	assert(uEdgeIndex < m_uEdgeCount);
+	return m_Edges[uEdgeIndex];
+	}
+
+void PWPath::Validate() const
+	{
+	const unsigned uEdgeCount = GetEdgeCount();
+	if (0 == uEdgeCount)
+		return;
+	const PWEdge &FirstEdge = GetEdge(0);
+	const PWEdge &LastEdge = GetEdge(uEdgeCount - 1);
+	unsigned uStartA = FirstEdge.uPrefixLengthA;
+	unsigned uStartB = FirstEdge.uPrefixLengthB;
+	if (FirstEdge.cType != 'I')
+		--uStartA;
+	if (FirstEdge.cType != 'D')
+		--uStartB;
+
+	unsigned uPrefixLengthA = FirstEdge.uPrefixLengthA;
+	unsigned uPrefixLengthB = FirstEdge.uPrefixLengthB;
+	for (unsigned uEdgeIndex = 1; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
+		{
+		const PWEdge &Edge = GetEdge(uEdgeIndex);
+		switch (Edge.cType)
+			{
+		case 'M':
+			if (uPrefixLengthA + 1 != Edge.uPrefixLengthA)
+				Quit("PWPath::Validate MA %u", uPrefixLengthA);
+			if (uPrefixLengthB + 1 != Edge.uPrefixLengthB)
+				Quit("PWPath::Validate MB %u", uPrefixLengthB);
+			++uPrefixLengthA;
+			++uPrefixLengthB;
+			break;
+		case 'D':
+			if (uPrefixLengthA + 1 != Edge.uPrefixLengthA)
+				Quit("PWPath::Validate DA %u", uPrefixLengthA);
+			if (uPrefixLengthB != Edge.uPrefixLengthB)
+				Quit("PWPath::Validate DB %u", uPrefixLengthB);
+			++uPrefixLengthA;
+			break;
+		case 'I':
+			if (uPrefixLengthA != Edge.uPrefixLengthA)
+				Quit("PWPath::Validate IA %u", uPrefixLengthA);
+			if (uPrefixLengthB + 1 != Edge.uPrefixLengthB)
+				Quit("PWPath::Validate IB %u", uPrefixLengthB);
+			++uPrefixLengthB;
+			break;
+			}
+		}
+	}
+
+void PWPath::LogMe() const
+	{
+	for (unsigned uEdgeIndex = 0; uEdgeIndex < GetEdgeCount(); ++uEdgeIndex)
+		{
+		const PWEdge &Edge = GetEdge(uEdgeIndex);
+		if (uEdgeIndex > 0)
+			Log(" ");
+		Log("%c%d.%d",
+		  Edge.cType,
+		  Edge.uPrefixLengthA,
+		  Edge.uPrefixLengthB);
+		if ((uEdgeIndex > 0 && uEdgeIndex%10 == 0) ||
+		 uEdgeIndex == GetEdgeCount() - 1)
+			Log("\n");
+		}
+	}
+
+void PWPath::Copy(const PWPath &Path)
+	{
+	Clear();
+	const unsigned uEdgeCount = Path.GetEdgeCount();
+	for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
+		{
+		const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
+		AppendEdge(Edge);
+		}
+	}
+
+void PWPath::FromMSAPair(const MSA &msaA, const MSA &msaB)
+	{
+	const unsigned uColCount = msaA.GetColCount();
+	if (uColCount != msaB.GetColCount())
+		Quit("PWPath::FromMSAPair, lengths differ");
+
+	Clear();
+
+	unsigned uPrefixLengthA = 0;
+	unsigned uPrefixLengthB = 0;
+	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
+		{
+		bool bIsGapA = msaA.IsGapColumn(uColIndex);
+		bool bIsGapB = msaB.IsGapColumn(uColIndex);
+
+		PWEdge Edge;
+		char cType;
+		if (!bIsGapA && !bIsGapB)
+			{
+			cType = 'M';
+			++uPrefixLengthA;
+			++uPrefixLengthB;
+			}
+		else if (bIsGapA && !bIsGapB)
+			{
+			cType = 'I';
+			++uPrefixLengthB;
+			}
+		else if (!bIsGapA && bIsGapB)
+			{
+			cType = 'D';
+			++uPrefixLengthA;
+			}
+		else
+			{
+			assert(bIsGapB && bIsGapA);
+			continue;
+			}
+
+		Edge.cType = cType;
+		Edge.uPrefixLengthA = uPrefixLengthA;
+		Edge.uPrefixLengthB = uPrefixLengthB;
+		AppendEdge(Edge);
+		}
+	}
+
+// Very similar to HMMPath::FromFile, should consolidate.
+void PWPath::FromFile(TextFile &File)
+	{
+	Clear();
+	char szToken[1024];
+	File.GetTokenX(szToken, sizeof(szToken));
+	if (0 != strcmp(szToken, "Path"))
+		Quit("Invalid path file (Path)");
+
+	File.GetTokenX(szToken, sizeof(szToken));
+	if (0 != strcmp(szToken, "edges"))
+		Quit("Invalid path file (edges)");
+
+	File.GetTokenX(szToken, sizeof(szToken));
+	if (!IsValidInteger(szToken))
+		Quit("Invalid path file (edges value)");
+
+	const unsigned uEdgeCount = (unsigned) atoi(szToken);
+	unsigned uEdgeIndex = 0;
+	for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
+		{
+	// index
+		File.GetTokenX(szToken, sizeof(szToken));
+		if (!IsValidInteger(szToken))
+			Quit("Invalid path file, invalid index '%s'", szToken);
+		unsigned n = (unsigned) atoi(szToken);
+		if (n != uEdgeIndex)
+			Quit("Invalid path file, expecting edge %u got %u", uEdgeIndex, n);
+
+	// type
+		File.GetTokenX(szToken, sizeof(szToken));
+		if (1 != strlen(szToken))
+			Quit("Invalid path file, expecting state, got '%s'", szToken);
+		const char cType = szToken[0];
+		if ('M' != cType && 'D' != cType && cType != 'I' && 'S' != cType)
+			Quit("Invalid path file, expecting state, got '%c'", cType);
+
+	// prefix length A
+		File.GetTokenX(szToken, sizeof(szToken));
+		if (!IsValidInteger(szToken))
+			Quit("Invalid path file, bad prefix length A '%s'", szToken);
+		const unsigned uPrefixLengthA = (unsigned) atoi(szToken);
+
+	// prefix length B
+		File.GetTokenX(szToken, sizeof(szToken));
+		if (!IsValidInteger(szToken))
+			Quit("Invalid path file, bad prefix length B '%s'", szToken);
+		const unsigned uPrefixLengthB = (unsigned) atoi(szToken);
+
+		PWEdge Edge;
+		Edge.cType = cType;
+		Edge.uPrefixLengthA = uPrefixLengthA;
+		Edge.uPrefixLengthB = uPrefixLengthB;
+		AppendEdge(Edge);
+		}
+	File.GetTokenX(szToken, sizeof(szToken));
+	if (0 != strcmp(szToken, "//"))
+		Quit("Invalid path file (//)");
+	}
+
+void PWPath::ToFile(TextFile &File) const
+	{
+	const unsigned uEdgeCount = GetEdgeCount();
+
+	File.PutString("Path\n");
+	File.PutFormat("edges %u\n", uEdgeCount);
+	for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
+		{
+		const PWEdge &Edge = GetEdge(uEdgeIndex);
+		File.PutFormat("%u %c %u %u\n",
+		  uEdgeIndex,
+		  Edge.cType,
+		  Edge.uPrefixLengthA,
+		  Edge.uPrefixLengthB);
+		}
+	File.PutString("//\n");
+	}
+
+void PWPath::AssertEqual(const PWPath &Path) const
+	{
+	const unsigned uEdgeCount = GetEdgeCount();
+	if (uEdgeCount != Path.GetEdgeCount())
+		{
+		Log("PWPath::AssertEqual, this=\n");
+		LogMe();
+		Log("\nOther path=\n");
+		Path.LogMe();
+		Log("\n");
+		Quit("PWPath::AssertEqual, Edge count different %u %u\n",
+		  uEdgeCount, Path.GetEdgeCount());
+		}
+
+	for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
+		{
+		const PWEdge &e1 = GetEdge(uEdgeIndex);
+		const PWEdge &e2 = Path.GetEdge(uEdgeIndex);
+		if (e1.cType != e2.cType || e1.uPrefixLengthA != e2.uPrefixLengthA ||
+		  e1.uPrefixLengthB != e2.uPrefixLengthB)
+			{
+			Log("PWPath::AssertEqual, this=\n");
+			LogMe();
+			Log("\nOther path=\n");
+			Path.LogMe();
+			Log("\n");
+			Log("This edge %c%u.%u, other edge %c%u.%u\n",
+			  e1.cType, e1.uPrefixLengthA, e1.uPrefixLengthB,
+			  e2.cType, e2.uPrefixLengthA, e2.uPrefixLengthB);
+			Quit("PWPath::AssertEqual, edge %u different\n", uEdgeIndex);
+			}
+		}
+	}
+
+bool PWPath::Equal(const PWPath &Path) const
+	{
+	const unsigned uEdgeCount = GetEdgeCount();
+	if (uEdgeCount != Path.GetEdgeCount())
+		return false;
+
+	for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
+		{
+		const PWEdge &e1 = GetEdge(uEdgeIndex);
+		const PWEdge &e2 = Path.GetEdge(uEdgeIndex);
+		if (e1.cType != e2.cType || e1.uPrefixLengthA != e2.uPrefixLengthA ||
+		  e1.uPrefixLengthB != e2.uPrefixLengthB)
+			return false;
+		}
+	return true;
+	}
+
+unsigned PWPath::GetMatchCount() const
+	{
+	unsigned uMatchCount = 0;
+	const unsigned uEdgeCount = GetEdgeCount();
+	for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
+		{
+		const PWEdge &e = GetEdge(uEdgeIndex);
+		if ('M' == e.cType)
+			++uMatchCount;
+		}
+	return uMatchCount;
+	}
+
+unsigned PWPath::GetInsertCount() const
+	{
+	unsigned uInsertCount = 0;
+	const unsigned uEdgeCount = GetEdgeCount();
+	for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
+		{
+		const PWEdge &e = GetEdge(uEdgeIndex);
+		if ('I' == e.cType)
+			++uInsertCount;
+		}
+	return uInsertCount;
+	}
+
+unsigned PWPath::GetDeleteCount() const
+	{
+	unsigned uDeleteCount = 0;
+	const unsigned uEdgeCount = GetEdgeCount();
+	for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
+		{
+		const PWEdge &e = GetEdge(uEdgeIndex);
+		if ('D' == e.cType)
+			++uDeleteCount;
+		}
+	return uDeleteCount;
+	}
+
+void PWPath::FromStr(const char Str[])
+	{
+	Clear();
+	unsigned uPrefixLengthA = 0;
+	unsigned uPrefixLengthB = 0;
+	while (char c = *Str++)
+		{
+		switch (c)
+			{
+		case 'M':
+			++uPrefixLengthA;
+			++uPrefixLengthB;
+			break;
+		case 'D':
+			++uPrefixLengthA;
+			break;
+		case 'I':
+			++uPrefixLengthB;
+			break;
+		default:
+			Quit("PWPath::FromStr, invalid state %c", c);
+			}
+		AppendEdge(c, uPrefixLengthA, uPrefixLengthB);
+		}
+	}

Added: trunk/packages/muscle/branches/upstream/current/pwpath.h
===================================================================
--- trunk/packages/muscle/branches/upstream/current/pwpath.h	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/pwpath.h	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,100 @@
+#ifndef	PWPath_h
+#define PWPath_h
+
+/***
+Each PWEdge in a PWPath specifies a column in a pair-wise (PW) alignment.
+"Path" is by analogy with the path through an HMM.
+Edge types are:
+
+	'M'		LetterA + LetterB
+	'D'		LetterA + GapB
+	'I'		GapB + LetterA
+
+The mnemomic is Match, Delete, Insert (with respect to A).
+Here is a global alignment of sequences A and B.
+
+	A:	AMQT-F
+	B:	-M-TIF
+
+The path for this example is:
+
+	Edge	cType	uPrefixLengthA	uPrefixLengthB
+	0		D		1				0
+	1		M		2				1
+	2		D		3				1
+	3		M		4				2
+	4		I		4				3
+	5		M		5				4
+
+Given the starting positions in each alignment (e.g., column zero for
+a global alignment), the prefix length fields are redundant; they are
+included only for convenience and as a sanity check, we are not trying
+to optimize for speed or space here. We use prefix lengths rather than
+column indexes because of the problem of representing the special case
+of a gap in the first position.
+***/
+
+class Seq;
+class MSA;
+class SatchmoParams;
+class PW;
+class TextFile;
+class PWScore;
+
+class PWEdge
+	{
+public:
+	char cType;
+	unsigned uPrefixLengthA;
+	unsigned uPrefixLengthB;
+
+	bool Equal(const PWEdge &e) const
+		{
+		return uPrefixLengthA == e.uPrefixLengthA &&
+		  uPrefixLengthB == e.uPrefixLengthB &&
+		  cType == e.cType;
+		}
+	};
+
+class PWPath
+	{
+// Disable compiler defaults
+private:
+	PWPath &operator=(const PWPath &rhs);
+	PWPath(const PWPath &rhs);
+
+public:
+	PWPath();
+	virtual ~PWPath();
+
+public:
+	void Clear();
+	void FromStr(const char Str[]);
+	void Copy(const PWPath &Path);
+	void AppendEdge(const PWEdge &Edge);
+	void AppendEdge(char cType, unsigned uPrefixLengthA, unsigned uPrefixLengthB);
+	void PrependEdge(const PWEdge &Edge);
+	unsigned GetEdgeCount() const { return m_uEdgeCount; }
+	const PWEdge &GetEdge(unsigned uEdgeIndex) const;
+	void Validate(const PWScore &PWS) const;
+	void Validate() const;
+	void LogMe() const;
+	void FromFile(TextFile &File);
+	void ToFile(TextFile &File) const;
+	void FromMSAPair(const MSA &msaA, const MSA &msaB);
+	void AssertEqual(const PWPath &Path) const;
+	bool Equal(const PWPath &Path) const;
+	unsigned GetMatchCount() const;
+	unsigned GetDeleteCount() const;
+	unsigned GetInsertCount() const;
+
+private:
+	void ExpandPath(unsigned uAdditionalEdgeCount);
+
+private:
+	unsigned m_uEdgeCount;
+	unsigned m_uArraySize;
+	PWEdge *m_Edges;
+	};
+
+#endif	// PWPath_h

Added: trunk/packages/muscle/branches/upstream/current/readmx.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/readmx.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/readmx.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,156 @@
+#include "muscle.h"
+#include "textfile.h"
+
+#define TRACE	0
+
+const int MAX_LINE = 4096;
+const int MAX_HEADINGS = 32;
+static char Heading[MAX_HEADINGS];
+static unsigned HeadingCount = 0;
+static float Mx[32][32];
+
+static void LogMx()
+	{
+	Log("Matrix\n");
+	Log("     ");
+	for (int i = 0; i < 20; ++i)
+		Log("    %c", LetterToChar(i));
+	Log("\n");
+
+	for (int i = 0; i < 20; ++i)
+		{
+		Log("%c    ", LetterToChar(i));
+		for (int j = 0; j < 20; ++j)
+			Log("%5.1f", Mx[i][j]);
+		Log("\n");
+		}
+	Log("\n");
+	}
+
+static unsigned MxCharToLetter(char c)
+	{
+	for (unsigned Letter = 0; Letter < HeadingCount; ++Letter)
+		if (Heading[Letter] == c)
+			return Letter;
+	Quit("Letter '%c' has no heading", c);
+	return 0;
+	}
+
+PTR_SCOREMATRIX ReadMx(TextFile &File)
+	{
+// Find column headers
+	char Line[MAX_LINE];
+	for (;;)
+		{
+		bool EndOfFile = File.GetLine(Line, sizeof(Line));
+		if (EndOfFile)
+			Quit("Premature EOF in matrix file");
+
+		if (Line[0] == '#')
+			continue;
+		else if (Line[0] == ' ')
+			break;
+		else
+			Quit("Invalid line in matrix file: '%s'", Line);
+		}
+
+// Read column headers
+	HeadingCount = 0;
+	for (char *p = Line; *p; ++p)
+		{
+		char c = *p;
+		if (!isspace(c))
+			Heading[HeadingCount++] = c;
+		}
+
+	if (HeadingCount > 0 && Heading[HeadingCount-1] == '*')
+		--HeadingCount;
+
+	if (HeadingCount < 20)
+		Quit("Error in matrix file: < 20 headers, line='%s'", Line);
+
+#if TRACE
+	{
+	Log("ReadMx\n");
+	Log("%d headings: ", HeadingCount);
+	for (unsigned i = 0; i < HeadingCount; ++i)
+		Log("%c", Heading[i]);
+	Log("\n");
+	}
+#endif
+
+// Zero out matrix
+	for (int i = 0; i < MAX_ALPHA; ++i)
+		for (int j = 0; j < MAX_ALPHA; ++j)
+			Mx[i][j] = 0.0;
+
+// Read data lines
+	for (unsigned RowIndex = 0; RowIndex < HeadingCount; ++RowIndex)
+		{
+		bool EndOfFile = File.GetTrimLine(Line, sizeof(Line));
+		if (EndOfFile)
+			Quit("Premature EOF in matrix file");
+
+#if	TRACE
+		Log("Line=%s\n", Line);
+#endif
+		if (Line[0] == '#')
+			continue;
+
+		char c = Line[0];
+#if	TRACE
+		Log("Row char=%c\n", c);
+#endif
+		if (!IsResidueChar(c))
+			continue;
+
+		unsigned RowLetter = CharToLetter(c);
+#if	TRACE
+		Log("Row letter = %u\n", RowLetter);
+#endif
+
+		char *p = Line + 1;
+		char *maxp = p + strlen(Line);
+		for (unsigned Col = 0; Col < HeadingCount - 1; ++Col)
+			{
+			if (p >= maxp)
+				Quit("Too few fields in line of matrix file: '%s'", Line);
+			while (isspace(*p))
+				++p;
+			char *Value = p;
+			while (!isspace(*p))
+				++p;
+			float v = (float) atof(Value);
+			char HeaderChar = Heading[Col];
+			if (IsResidueChar(HeaderChar))
+				{
+				unsigned ColLetter = CharToLetter(HeaderChar);
+				Mx[RowLetter][ColLetter] = v;
+				}
+			p += 1;
+			}
+		}
+
+// Sanity check for symmetry
+	for (int i = 0; i < 20; ++i)
+		for (int j = 0; j < i; ++j)
+			{
+			if (Mx[i][j] != Mx[j][i])
+				{
+				Warning("Matrix is not symmetrical, %c->%c=%g, %c->%c=%g",
+				  CharToLetter(i),
+				  CharToLetter(j),
+				  Mx[i][j],
+				  CharToLetter(j),
+				  CharToLetter(i),
+				  Mx[j][i]);
+				goto ExitLoop;
+				}
+			}
+ExitLoop:;
+
+	if (g_bVerbose)
+		LogMx();
+
+	return &Mx;
+	}

Added: trunk/packages/muscle/branches/upstream/current/realigndiffs.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/realigndiffs.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/realigndiffs.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,115 @@
+#include "muscle.h"
+#include "msa.h"
+#include "tree.h"
+#include "profile.h"
+#include "pwpath.h"
+
+#define TRACE	0
+
+// Progressive alignment according to a diffs tree.
+
+static void MakeNode(const MSA &msaIn, const Tree &Diffs, unsigned uDiffsNodeIndex,
+   const unsigned IdToDiffsTreeNodeIndex[], ProgNode &Node)
+	{
+	const unsigned uSeqCount = msaIn.GetSeqCount();
+
+	unsigned *Ids = new unsigned[uSeqCount];
+
+	unsigned uSeqsInDiffCount = 0;
+	for (unsigned uId = 0; uId < uSeqCount; ++uId)
+		{
+		if (IdToDiffsTreeNodeIndex[uId] == uDiffsNodeIndex)
+			{
+			Ids[uSeqsInDiffCount] = uId;
+			++uSeqsInDiffCount;
+			}
+		}
+	if (0 == uSeqsInDiffCount)
+		Quit("MakeNode: no seqs in diff");
+
+	MSASubsetByIds(msaIn, Ids, uSeqsInDiffCount, Node.m_MSA);
+
+#if	DEBUG
+	ValidateMuscleIds(Node.m_MSA);
+#endif
+
+	DeleteGappedCols(Node.m_MSA);
+	delete[] Ids;
+	}
+
+void RealignDiffs(const MSA &msaIn, const Tree &Diffs,
+  const unsigned IdToDiffsTreeNodeIndex[], MSA &msaOut)
+	{
+	assert(Diffs.IsRooted());
+
+#if	TRACE
+	Log("RealignDiffs\n");
+	Log("Diff tree:\n");
+	Diffs.LogMe();
+#endif
+
+	const unsigned uNodeCount = Diffs.GetNodeCount();
+	if (uNodeCount%2 == 0)
+		Quit("RealignDiffs: Expected odd number of nodes");
+
+	const unsigned uMergeCount = (uNodeCount - 1)/2;
+
+	ProgNode *ProgNodes = new ProgNode[uNodeCount];
+
+	unsigned uJoin = 0;
+	SetProgressDesc("Refine tree");
+	for (unsigned uDiffsNodeIndex = Diffs.FirstDepthFirstNode();
+	  NULL_NEIGHBOR != uDiffsNodeIndex;
+	  uDiffsNodeIndex = Diffs.NextDepthFirstNode(uDiffsNodeIndex))
+		{
+		if (Diffs.IsLeaf(uDiffsNodeIndex))
+			{
+			assert(uDiffsNodeIndex < uNodeCount);
+			if (uDiffsNodeIndex >= uNodeCount)
+				Quit("TreeNodeIndex=%u NodeCount=%u\n", uDiffsNodeIndex, uNodeCount);
+
+			ProgNode &Node = ProgNodes[uDiffsNodeIndex];
+			MakeNode(msaIn, Diffs, uDiffsNodeIndex, IdToDiffsTreeNodeIndex, Node);
+
+			Node.m_uLength = Node.m_MSA.GetColCount();
+			}
+		else
+			{
+			Progress(uJoin, uMergeCount);
+			++uJoin;
+			const unsigned uMergeNodeIndex = uDiffsNodeIndex;
+			ProgNode &Parent = ProgNodes[uMergeNodeIndex];
+
+			const unsigned uLeft = Diffs.GetLeft(uDiffsNodeIndex);
+			const unsigned uRight = Diffs.GetRight(uDiffsNodeIndex);
+
+			ProgNode &Node1 = ProgNodes[uLeft];
+			ProgNode &Node2 = ProgNodes[uRight];
+
+			PWPath Path;
+			AlignTwoMSAs(Node1.m_MSA, Node2.m_MSA, Parent.m_MSA, Path);
+
+#if	TRACE
+			{
+			Log("Combined:\n");
+			Parent.m_MSA.LogMe();
+			}
+#endif
+
+			Node1.m_MSA.Clear();
+			Node2.m_MSA.Clear();
+			}
+		}
+	ProgressStepsDone();
+
+	unsigned uRootNodeIndex = Diffs.GetRootNodeIndex();
+	const ProgNode &RootProgNode = ProgNodes[uRootNodeIndex];
+	msaOut.Copy(RootProgNode.m_MSA);
+
+#if	DEBUG
+	AssertMSAEqIgnoreCaseAndGaps(msaIn, msaOut);
+#endif
+
+	delete[] ProgNodes;
+	ProgNodes = 0;
+	}

Added: trunk/packages/muscle/branches/upstream/current/realigndiffse.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/realigndiffse.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/realigndiffse.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,142 @@
+#include "muscle.h"
+#include "msa.h"
+#include "tree.h"
+#include "profile.h"
+#include "pwpath.h"
+#include "seqvect.h"
+#include "estring.h"
+
+#define TRACE		0
+
+void DeleteProgNode(ProgNode &Node)
+	{
+	delete[] Node.m_Prof;
+	delete[] Node.m_EstringL;
+	delete[] Node.m_EstringR;
+
+	Node.m_Prof = 0;
+	Node.m_EstringL = 0;
+	Node.m_EstringR = 0;
+	}
+
+static void MakeNode(ProgNode &OldNode, ProgNode &NewNode, bool bSwapLR)
+	{
+	if (bSwapLR)
+		{
+		NewNode.m_EstringL = OldNode.m_EstringR;
+		NewNode.m_EstringR = OldNode.m_EstringL;
+		}
+	else
+		{
+		NewNode.m_EstringL = OldNode.m_EstringL;
+		NewNode.m_EstringR = OldNode.m_EstringR;
+		}
+	NewNode.m_Prof = OldNode.m_Prof;
+	NewNode.m_uLength = OldNode.m_uLength;
+	NewNode.m_Weight = OldNode.m_Weight;
+
+	OldNode.m_Prof = 0;
+	OldNode.m_EstringL = 0;
+	OldNode.m_EstringR = 0;
+	}
+
+void RealignDiffsE(const MSA &msaIn, const SeqVect &v,
+  const Tree &NewTree, const Tree &OldTree, 
+  const unsigned uNewNodeIndexToOldNodeIndex[],
+  MSA &msaOut, ProgNode *OldProgNodes)
+	{
+	assert(OldProgNodes != 0);
+
+	const unsigned uNodeCount = NewTree.GetNodeCount();
+	if (uNodeCount%2 == 0)
+		Quit("RealignDiffs: Expected odd number of nodes");
+
+	const unsigned uMergeCount = (uNodeCount - 1)/2;
+	ProgNode *NewProgNodes = new ProgNode[uNodeCount];
+
+	for (unsigned uNewNodeIndex = 0; uNewNodeIndex < uNodeCount; ++uNewNodeIndex)
+		{
+		if (NODE_CHANGED == uNewNodeIndexToOldNodeIndex[uNewNodeIndex])
+			continue;
+
+		unsigned uOldNodeIndex = uNewNodeIndexToOldNodeIndex[uNewNodeIndex];
+		assert(uNewNodeIndex < uNodeCount);
+		assert(uOldNodeIndex < uNodeCount);
+
+		ProgNode &NewNode = NewProgNodes[uNewNodeIndex];
+		ProgNode &OldNode = OldProgNodes[uOldNodeIndex];
+		bool bSwapLR = false;
+		if (!NewTree.IsLeaf(uNewNodeIndex))
+			{
+			unsigned uNewLeft = NewTree.GetLeft(uNewNodeIndex);
+			unsigned uNewRight = NewTree.GetRight(uNewNodeIndex);
+			unsigned uOld = uNewNodeIndexToOldNodeIndex[uNewNodeIndex];
+			unsigned uOldLeft = OldTree.GetLeft(uOld);
+			unsigned uOldRight = OldTree.GetRight(uOld);
+			assert(uOldLeft < uNodeCount && uOldRight < uNodeCount);
+			if (uOldLeft != uNewNodeIndexToOldNodeIndex[uNewLeft])
+				{
+				assert(uOldLeft == uNewNodeIndexToOldNodeIndex[uNewRight]);
+				bSwapLR = true;
+				}
+			}
+		MakeNode(OldNode, NewNode, bSwapLR);
+#if	TRACE
+		Log("MakeNode old=%u new=%u swap=%d length=%u weight=%.3g\n",
+		  uOldNodeIndex, uNewNodeIndex, bSwapLR, NewNode.m_uLength, NewNode.m_Weight);
+#endif
+		}
+
+	unsigned uJoin = 0;
+	SetProgressDesc("Refine tree");
+	for (unsigned uNewNodeIndex = NewTree.FirstDepthFirstNode();
+	  NULL_NEIGHBOR != uNewNodeIndex;
+	  uNewNodeIndex = NewTree.NextDepthFirstNode(uNewNodeIndex))
+		{
+		if (NODE_CHANGED != uNewNodeIndexToOldNodeIndex[uNewNodeIndex])
+			continue;
+
+		Progress(uJoin, uMergeCount - 1);
+		++uJoin;
+
+		const unsigned uMergeNodeIndex = uNewNodeIndex;
+		ProgNode &Parent = NewProgNodes[uMergeNodeIndex];
+
+		const unsigned uLeft = NewTree.GetLeft(uNewNodeIndex);
+		const unsigned uRight = NewTree.GetRight(uNewNodeIndex);
+
+		ProgNode &Node1 = NewProgNodes[uLeft];
+		ProgNode &Node2 = NewProgNodes[uRight];
+
+		AlignTwoProfs(
+			Node1.m_Prof, Node1.m_uLength, Node1.m_Weight,
+			Node2.m_Prof, Node2.m_uLength, Node2.m_Weight,
+			Parent.m_Path,
+			&Parent.m_Prof, &Parent.m_uLength);
+		PathToEstrings(Parent.m_Path, &Parent.m_EstringL, &Parent.m_EstringR);
+
+		Parent.m_Weight = Node1.m_Weight + Node2.m_Weight;
+
+		delete[] Node1.m_Prof;
+		delete[] Node2.m_Prof;
+
+		Node1.m_Prof = 0;
+		Node2.m_Prof = 0;
+		}
+
+	ProgressStepsDone();
+
+	if (g_bBrenner)
+		MakeRootMSABrenner((SeqVect &) v, NewTree, NewProgNodes, msaOut);
+	else
+		MakeRootMSA(v, NewTree, NewProgNodes, msaOut);
+
+#if	DEBUG
+	AssertMSAEqIgnoreCaseAndGaps(msaIn, msaOut);
+#endif
+
+	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
+		DeleteProgNode(NewProgNodes[uNodeIndex]);
+
+	delete[] NewProgNodes;
+	}

Added: trunk/packages/muscle/branches/upstream/current/refine.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/refine.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/refine.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,79 @@
+#include "muscle.h"
+#include "textfile.h"
+#include "seqvect.h"
+#include "distfunc.h"
+#include "msa.h"
+#include "tree.h"
+#include "clust.h"
+#include "profile.h"
+#include "clustsetmsa.h"
+
+void Refine()
+	{
+	SetOutputFileName(g_pstrOutFileName);
+	SetInputFileName(g_pstrInFileName);
+	SetStartTime();
+
+	SetMaxIters(g_uMaxIters);
+	SetSeqWeightMethod(g_SeqWeight1);
+
+	TextFile fileIn(g_pstrInFileName);
+	MSA msa;
+	msa.FromFile(fileIn);
+
+	const unsigned uSeqCount = msa.GetSeqCount();
+	if (0 == uSeqCount)
+		Quit("No sequences in input file");
+
+	ALPHA Alpha = ALPHA_Undefined;
+	switch (g_SeqType)
+		{
+	case SEQTYPE_Auto:
+		Alpha = msa.GuessAlpha();
+		break;
+
+	case SEQTYPE_Protein:
+		Alpha = ALPHA_Amino;
+		break;
+
+	case SEQTYPE_DNA:
+		Alpha = ALPHA_DNA;
+		break;
+
+	case SEQTYPE_RNA:
+		Alpha = ALPHA_RNA;
+		break;
+
+	default:
+		Quit("Invalid SeqType");
+		}
+	SetAlpha(Alpha);
+	msa.FixAlpha();
+
+	if (ALPHA_DNA == Alpha || ALPHA_RNA == Alpha)
+		SetPPScore(PPSCORE_SPN);
+
+	MSA::SetIdCount(uSeqCount);
+
+// Initialize sequence ids.
+// From this point on, ids must somehow propogate from here.
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		msa.SetSeqId(uSeqIndex, uSeqIndex);
+	SetMuscleInputMSA(msa);
+
+	Tree GuideTree;
+	TreeFromMSA(msa, GuideTree, g_Cluster2, g_Distance2, g_Root2);
+	SetMuscleTree(GuideTree);
+
+	if (g_bAnchors)
+		RefineVert(msa, GuideTree, g_uMaxIters);
+	else
+		RefineHoriz(msa, GuideTree, g_uMaxIters, false, false);
+
+	ValidateMuscleIds(msa);
+	ValidateMuscleIds(GuideTree);
+
+//	TextFile fileOut(g_pstrOutFileName, true);
+//	msa.ToFile(fileOut);
+	MuscleOutput(msa);
+	}

Added: trunk/packages/muscle/branches/upstream/current/refinehoriz.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/refinehoriz.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/refinehoriz.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,288 @@
+#include "muscle.h"
+#include "tree.h"
+#include "msa.h"
+#include "pwpath.h"
+#include "profile.h"
+#include "scorehistory.h"
+#include "objscore.h"
+
+unsigned g_uRefineHeightSubtree;
+unsigned g_uRefineHeightSubtreeTotal;
+
+#define TRACE			0
+#define DIFFOBJSCORE	0
+
+static bool TryRealign(MSA &msaIn, const Tree &tree, const unsigned Leaves1[],
+  unsigned uCount1, const unsigned Leaves2[], unsigned uCount2,
+  SCORE *ptrscoreBefore, SCORE *ptrscoreAfter,
+  bool bLockLeft, bool bLockRight)
+	{
+#if	TRACE
+	Log("TryRealign, msaIn=\n");
+	msaIn.LogMe();
+#endif
+
+	const unsigned uSeqCount = msaIn.GetSeqCount();
+
+	unsigned *Ids1 = new unsigned[uSeqCount];
+	unsigned *Ids2 = new unsigned[uSeqCount];
+
+	LeafIndexesToIds(tree, Leaves1, uCount1, Ids1);
+	LeafIndexesToIds(tree, Leaves2, uCount2, Ids2);
+
+	MSA msa1;
+	MSA msa2;
+
+	MSASubsetByIds(msaIn, Ids1, uCount1, msa1);
+	MSASubsetByIds(msaIn, Ids2, uCount2, msa2);
+
+#if	DEBUG
+	ValidateMuscleIds(msa1);
+	ValidateMuscleIds(msa2);
+#endif
+
+// Computing the objective score may be expensive for
+// large numbers of sequences. As a speed optimization,
+// we check whether the alignment changes. If it does
+// not change, there is no need to compute the objective
+// score. We test for the alignment changing by comparing
+// the Viterbi paths before and after re-aligning.
+	PWPath pathBefore;
+	pathBefore.FromMSAPair(msa1, msa2);
+
+	DeleteGappedCols(msa1);
+	DeleteGappedCols(msa2);
+
+	if (0 == msa1.GetColCount() || 0 == msa2.GetColCount())
+		return false;
+
+	MSA msaRealigned;
+	PWPath pathAfter;
+
+	AlignTwoMSAs(msa1, msa2, msaRealigned, pathAfter, bLockLeft, bLockRight);
+
+	bool bAnyChanges = !pathAfter.Equal(pathBefore);
+	unsigned uDiffCount1;
+	unsigned uDiffCount2;
+	static unsigned Edges1[10000];
+	static unsigned Edges2[10000];
+	DiffPaths(pathBefore, pathAfter, Edges1, &uDiffCount1, Edges2, &uDiffCount2);
+
+#if	TRACE
+	Log("TryRealign, msa1=\n");
+	msa1.LogMe();
+	Log("\nmsa2=\n");
+	msa2.LogMe();
+	Log("\nRealigned (changes %s)=\n", bAnyChanges ? "TRUE" : "FALSE");
+	msaRealigned.LogMe();
+#endif
+
+	if (!bAnyChanges)
+		{
+		*ptrscoreBefore = 0;
+		*ptrscoreAfter = 0;
+		return false;
+		}
+
+	SetMSAWeightsMuscle(msaIn);
+	SetMSAWeightsMuscle(msaRealigned);
+
+#if	DIFFOBJSCORE
+	const SCORE scoreDiff = DiffObjScore(msaIn, pathBefore, Edges1, uDiffCount1,
+	  msaRealigned, pathAfter, Edges2, uDiffCount2);
+	bool bAccept = (scoreDiff > 0);
+	*ptrscoreBefore = 0;
+	*ptrscoreAfter = scoreDiff;
+	//const SCORE scoreBefore = ObjScoreIds(msaIn, Ids1, uCount1, Ids2, uCount2);
+	//const SCORE scoreAfter = ObjScoreIds(msaRealigned, Ids1, uCount1, Ids2, uCount2);
+	//Log("Diff = %.3g %.3g\n", scoreDiff, scoreAfter - scoreBefore);
+#else
+	const SCORE scoreBefore = ObjScoreIds(msaIn, Ids1, uCount1, Ids2, uCount2);
+	const SCORE scoreAfter = ObjScoreIds(msaRealigned, Ids1, uCount1, Ids2, uCount2);
+
+	bool bAccept = (scoreAfter > scoreBefore);
+
+#if	TRACE
+	Log("Score %g -> %g Accept %s\n", scoreBefore, scoreAfter, bAccept ? "TRUE" : "FALSE");
+#endif
+
+	*ptrscoreBefore = scoreBefore;
+	*ptrscoreAfter = scoreAfter;
+#endif
+
+	if (bAccept)
+		msaIn.Copy(msaRealigned);
+	delete[] Ids1;
+	delete[] Ids2;
+	return bAccept;
+	}
+
+static void RefineHeightParts(MSA &msaIn, const Tree &tree,
+ const unsigned InternalNodeIndexes[], bool bReversed, bool bRight,
+ unsigned uIter, 
+ ScoreHistory &History,
+ bool *ptrbAnyChanges, bool *ptrbOscillating, bool bLockLeft, bool bLockRight)
+	{
+	*ptrbOscillating = false;
+
+	const unsigned uSeqCount = msaIn.GetSeqCount();
+	const unsigned uInternalNodeCount = uSeqCount - 1;
+
+	unsigned *Leaves1 = new unsigned[uSeqCount];
+	unsigned *Leaves2 = new unsigned[uSeqCount];
+
+	const unsigned uRootNodeIndex = tree.GetRootNodeIndex();
+	bool bAnyAccepted = false;
+	for (unsigned i = 0; i < uInternalNodeCount; ++i)
+		{
+		const unsigned uInternalNodeIndex = InternalNodeIndexes[i];
+		unsigned uNeighborNodeIndex;
+		if (tree.IsRoot(uInternalNodeIndex) && !bRight)
+			continue;
+		else if (bRight)
+			uNeighborNodeIndex = tree.GetRight(uInternalNodeIndex);
+		else
+			uNeighborNodeIndex = tree.GetLeft(uInternalNodeIndex);
+
+		g_uTreeSplitNode1 = uInternalNodeIndex;
+		g_uTreeSplitNode2 = uNeighborNodeIndex;
+
+		unsigned uCount1;
+		unsigned uCount2;
+
+		GetLeaves(tree, uNeighborNodeIndex, Leaves1, &uCount1);
+		GetLeavesExcluding(tree, uRootNodeIndex, uNeighborNodeIndex,
+		  Leaves2, &uCount2);
+
+#if	TRACE
+		Log("\nRefineHeightParts node %u\n", uInternalNodeIndex);
+		Log("Group1=");
+		for (unsigned n = 0; n < uCount1; ++n)
+			Log(" %u(%s)", Leaves1[n], tree.GetName(Leaves1[n]));
+		Log("\n");
+		Log("Group2=");
+		for (unsigned n = 0; n < uCount2; ++n)
+			Log(" %u(%s)", Leaves2[n], tree.GetName(Leaves2[n]));
+		Log("\n");
+#endif
+
+		SCORE scoreBefore;
+		SCORE scoreAfter;
+		bool bAccepted = TryRealign(msaIn, tree, Leaves1, uCount1, Leaves2, uCount2,
+		  &scoreBefore, &scoreAfter, bLockLeft, bLockRight);
+		SetCurrentAlignment(msaIn);
+
+		++g_uRefineHeightSubtree;
+		Progress(g_uRefineHeightSubtree, g_uRefineHeightSubtreeTotal);
+
+#if	TRACE
+		if (uIter > 0)
+			Log("Before %g %g\n", scoreBefore,
+			  History.GetScore(uIter - 1, uInternalNodeIndex, bReversed, bRight));
+#endif
+		SCORE scoreMax = scoreAfter > scoreBefore? scoreAfter : scoreBefore;
+		bool bRepeated = History.SetScore(uIter, uInternalNodeIndex, bRight, scoreMax);
+		if (bRepeated)
+			{
+			*ptrbOscillating = true;
+			break;
+			}
+
+		if (bAccepted)
+			bAnyAccepted = true;
+		}
+
+	delete[] Leaves1;
+	delete[] Leaves2;
+
+	*ptrbAnyChanges = bAnyAccepted;
+	}
+
+// Return true if any changes made
+bool RefineHoriz(MSA &msaIn, const Tree &tree, unsigned uIters, bool bLockLeft,
+  bool bLockRight)
+	{
+#if	TRACE
+	tree.LogMe();
+#endif
+
+	if (!tree.IsRooted())
+		Quit("RefineHeight: requires rooted tree");
+
+	const unsigned uSeqCount = msaIn.GetSeqCount();
+	if (uSeqCount < 3)
+		return false;
+
+	const unsigned uInternalNodeCount = uSeqCount - 1;
+	unsigned *InternalNodeIndexes = new unsigned[uInternalNodeCount];
+	unsigned *InternalNodeIndexesR = new unsigned[uInternalNodeCount];
+
+	GetInternalNodesInHeightOrder(tree, InternalNodeIndexes);
+
+	ScoreHistory History(uIters, 2*uSeqCount - 1);
+
+	bool bAnyChangesAnyIter = false;
+	for (unsigned n = 0; n < uInternalNodeCount; ++n)
+		InternalNodeIndexesR[uInternalNodeCount - 1 - n] = InternalNodeIndexes[n];
+
+	for (unsigned uIter = 0; uIter < uIters; ++uIter)
+		{
+		bool bAnyChangesThisIter = false;
+		IncIter();
+		SetProgressDesc("Refine biparts");
+		g_uRefineHeightSubtree = 0;
+		g_uRefineHeightSubtreeTotal = uInternalNodeCount*2 - 1;
+
+		bool bReverse = (uIter%2 != 0);
+		unsigned *Internals;
+		if (bReverse)
+			Internals = InternalNodeIndexesR;
+		else
+			Internals = InternalNodeIndexes;
+
+		bool bOscillating;
+		for (unsigned i = 0; i < 2; ++i)
+			{
+			bool bAnyChanges = false;
+			bool bRight;
+			switch (i)
+				{
+			case 0:
+				bRight = true;
+				break;
+			case 1:
+				bRight = false;
+				break;
+			default:
+				Quit("RefineHeight default case");
+				}
+			RefineHeightParts(msaIn, tree, Internals, bReverse, bRight,
+			  uIter, 
+			  History, 
+			  &bAnyChanges, &bOscillating, bLockLeft, bLockRight);
+			if (bOscillating)
+				{
+				ProgressStepsDone();
+				goto Osc;
+				}
+			if (bAnyChanges)
+				{
+				bAnyChangesThisIter = true;
+				bAnyChangesAnyIter = true;
+				}
+			}
+
+		ProgressStepsDone();
+		if (bOscillating)
+			break;
+
+		if (!bAnyChangesThisIter)
+			break;
+		}
+
+Osc:
+	delete[] InternalNodeIndexes;
+	delete[] InternalNodeIndexesR;
+
+	return bAnyChangesAnyIter;
+	}

Added: trunk/packages/muscle/branches/upstream/current/refinesubfams.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/refinesubfams.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/refinesubfams.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,212 @@
+#include "muscle.h"
+#include "msa.h"
+#include "tree.h"
+#include "clust.h"
+#include "profile.h"
+#include "pwpath.h"
+
+#define TRACE 0
+
+static void ProgressiveAlignSubfams(const Tree &tree, const unsigned Subfams[],
+  unsigned uSubfamCount, const MSA SubfamMSAs[], MSA &msa);
+
+// Identify subfamilies in a tree.
+// Returns array of internal node indexes, one for each subfamily.
+// First try is to select groups by height (which should approximate
+// minimum percent identity), if this gives too many subfamilies then
+// we cut at a point that gives the maximum allowed number of subfams.
+static void GetSubfams(const Tree &tree, double dMaxHeight,
+  unsigned uMaxSubfamCount, unsigned **ptrptrSubfams, unsigned *ptruSubfamCount)
+	{
+	const unsigned uNodeCount = tree.GetNodeCount();
+
+	unsigned *Subfams = new unsigned[uNodeCount];
+
+	unsigned uSubfamCount;
+	ClusterByHeight(tree, dMaxHeight, Subfams, &uSubfamCount);
+
+	if (uSubfamCount > uMaxSubfamCount)
+		ClusterBySubfamCount(tree, uMaxSubfamCount, Subfams, &uSubfamCount);
+
+	*ptrptrSubfams = Subfams;
+	*ptruSubfamCount = uSubfamCount;
+	}
+
+static void LogSubfams(const Tree &tree, const unsigned Subfams[],
+  unsigned uSubfamCount)
+	{
+	const unsigned uNodeCount = tree.GetNodeCount();
+	Log("%u subfamilies found\n", uSubfamCount);
+	Log("Subfam  Sequence\n");
+	Log("------  --------\n");
+	unsigned *Leaves = new unsigned[uNodeCount];
+	for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex)
+		{
+		unsigned uSubfamNodeIndex = Subfams[uSubfamIndex];
+		unsigned uLeafCount;
+		GetLeaves(tree, uSubfamNodeIndex, Leaves, &uLeafCount);
+		for (unsigned uLeafIndex = 0; uLeafIndex < uLeafCount; ++uLeafIndex)
+			Log("%6u  %s\n", uSubfamIndex + 1, tree.GetLeafName(Leaves[uLeafIndex]));
+		Log("\n");
+		}
+	delete[] Leaves;
+	}
+
+bool RefineSubfams(MSA &msa, const Tree &tree, unsigned uIters)
+	{
+	const unsigned uSeqCount = msa.GetSeqCount();
+	if (uSeqCount < 3)
+		return false;
+
+	const double dMaxHeight = 0.6;
+	const unsigned uMaxSubfamCount = 16;
+	const unsigned uNodeCount = tree.GetNodeCount();
+
+	unsigned *Subfams;
+	unsigned uSubfamCount;
+	GetSubfams(tree, dMaxHeight, uMaxSubfamCount, &Subfams, &uSubfamCount);
+	assert(uSubfamCount <= uSeqCount);
+
+	if (g_bVerbose)
+		LogSubfams(tree, Subfams, uSubfamCount);
+
+	MSA *SubfamMSAs = new MSA[uSubfamCount];
+	unsigned *Leaves = new unsigned[uSeqCount];
+	unsigned *Ids = new unsigned[uSeqCount];
+
+	bool bAnyChanges = false;
+	for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex)
+		{
+		unsigned uSubfam = Subfams[uSubfamIndex];
+		unsigned uLeafCount;
+		GetLeaves(tree, uSubfam, Leaves, &uLeafCount);
+		assert(uLeafCount <= uSeqCount);
+
+		LeafIndexesToIds(tree, Leaves, uLeafCount, Ids);
+
+		MSA &msaSubfam = SubfamMSAs[uSubfamIndex];
+		MSASubsetByIds(msa, Ids, uLeafCount, msaSubfam);
+		DeleteGappedCols(msaSubfam);
+
+#if	TRACE
+		Log("Subfam %u MSA=\n", uSubfamIndex);
+		msaSubfam.LogMe();
+#endif
+
+		if (msaSubfam.GetSeqCount() <= 2)
+			continue;
+
+	// TODO /////////////////////////////////////////
+	// Try using existing tree, may actually hurt to
+	// re-estimate, may also be a waste of CPU & mem.
+	/////////////////////////////////////////////////
+		Tree SubfamTree;
+		TreeFromMSA(msaSubfam, SubfamTree, g_Cluster2, g_Distance2, g_Root2);
+
+		bool bAnyChangesThisSubfam;
+		if (g_bAnchors)
+			bAnyChangesThisSubfam = RefineVert(msaSubfam, SubfamTree, uIters);
+		else
+			bAnyChangesThisSubfam = RefineHoriz(msaSubfam, SubfamTree, uIters, false, false);
+#if	TRACE
+		Log("Subfam %u Changed %d\n", uSubfamIndex, bAnyChangesThisSubfam);
+#endif
+		if (bAnyChangesThisSubfam)
+			bAnyChanges = true;
+		}
+
+	if (bAnyChanges)
+		ProgressiveAlignSubfams(tree, Subfams, uSubfamCount, SubfamMSAs, msa);
+
+	delete[] Leaves;
+	delete[] Subfams;
+	delete[] SubfamMSAs;
+
+	return bAnyChanges;
+	}
+
+static void ProgressiveAlignSubfams(const Tree &tree, const unsigned Subfams[],
+  unsigned uSubfamCount, const MSA SubfamMSAs[], MSA &msa)
+	{
+	const unsigned uNodeCount = tree.GetNodeCount();
+
+	bool *Ready = new bool[uNodeCount];
+	MSA **MSAs = new MSA *[uNodeCount];
+	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
+		{
+		Ready[uNodeIndex] = false;
+		MSAs[uNodeIndex] = 0;
+		}
+
+	for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex)
+		{
+		unsigned uNodeIndex = Subfams[uSubfamIndex];
+		Ready[uNodeIndex] = true;
+		MSA *ptrMSA = new MSA;
+	// TODO: Wasteful copy, needs re-design
+		ptrMSA->Copy(SubfamMSAs[uSubfamIndex]);
+		MSAs[uNodeIndex] = ptrMSA;
+		}
+
+	for (unsigned uNodeIndex = tree.FirstDepthFirstNode();
+	  NULL_NEIGHBOR != uNodeIndex;
+	  uNodeIndex = tree.NextDepthFirstNode(uNodeIndex))
+		{
+		if (tree.IsLeaf(uNodeIndex))
+			continue;
+
+		unsigned uRight = tree.GetRight(uNodeIndex);
+		unsigned uLeft = tree.GetLeft(uNodeIndex);
+		if (!Ready[uRight] || !Ready[uLeft])
+			continue;
+
+		MSA *ptrLeft = MSAs[uLeft];
+		MSA *ptrRight = MSAs[uRight];
+		assert(ptrLeft != 0 && ptrRight != 0);
+
+		MSA *ptrParent = new MSA;
+
+		PWPath Path;
+		AlignTwoMSAs(*ptrLeft, *ptrRight, *ptrParent, Path);
+
+		MSAs[uNodeIndex] = ptrParent;
+		Ready[uNodeIndex] = true;
+		Ready[uLeft] = false;
+		Ready[uRight] = false;
+
+		delete MSAs[uLeft];
+		delete MSAs[uRight];
+		MSAs[uLeft] = 0;
+		MSAs[uRight] = 0;
+		}
+
+#if	DEBUG
+	{
+	unsigned uReadyCount = 0;
+	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
+		{
+		if (Ready[uNodeIndex])
+			{
+			assert(tree.IsRoot(uNodeIndex));
+			++uReadyCount;
+			assert(0 != MSAs[uNodeIndex]);
+			}
+		else
+			assert(0 == MSAs[uNodeIndex]);
+		}
+	assert(1 == uReadyCount);
+	}
+#endif
+
+	const unsigned uRoot = tree.GetRootNodeIndex();
+	MSA *ptrRootAlignment = MSAs[uRoot];
+
+	msa.Copy(*ptrRootAlignment);
+
+	delete ptrRootAlignment;
+
+#if	TRACE
+	Log("After refine subfamilies, root alignment=\n");
+	msa.LogMe();
+#endif
+	}

Added: trunk/packages/muscle/branches/upstream/current/refinetree.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/refinetree.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/refinetree.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,59 @@
+#include "muscle.h"
+#include "msa.h"
+#include "tree.h"
+#include "profile.h"
+#include <stdio.h>
+
+void RefineTree(MSA &msa, Tree &tree)
+	{
+	const unsigned uSeqCount = msa.GetSeqCount();
+	if (tree.GetLeafCount() != uSeqCount)
+		Quit("Refine tree, tree has different number of nodes");
+
+	if (uSeqCount < 3)
+		return;
+
+#if	DEBUG
+	ValidateMuscleIds(msa);
+	ValidateMuscleIds(tree);
+#endif
+
+	unsigned *IdToDiffsLeafNodeIndex = new unsigned[uSeqCount];
+	unsigned uDiffsCount = uSeqCount;
+	Tree Tree2;
+	for (unsigned uIter = 0; uIter < g_uMaxTreeRefineIters; ++uIter)
+		{
+		TreeFromMSA(msa, Tree2, g_Cluster2, g_Distance2, g_Root2);
+
+#if	DEBUG
+		ValidateMuscleIds(Tree2);
+#endif
+
+		Tree Diffs;
+		DiffTrees(Tree2, tree, Diffs, IdToDiffsLeafNodeIndex);
+
+		tree.Copy(Tree2);
+
+		const unsigned uNewDiffsNodeCount = Diffs.GetNodeCount();
+		const unsigned uNewDiffsCount = (uNewDiffsNodeCount - 1)/2;
+
+		if (0 == uNewDiffsCount || uNewDiffsCount >= uDiffsCount)
+			{
+			ProgressStepsDone();
+			break;
+			}
+		uDiffsCount = uNewDiffsCount;
+
+		MSA msa2;
+		RealignDiffs(msa, Diffs, IdToDiffsLeafNodeIndex, msa2);
+
+#if	DEBUG
+		ValidateMuscleIds(msa2);
+#endif
+
+		msa.Copy(msa2);
+		SetCurrentAlignment(msa);
+		}
+
+	delete[] IdToDiffsLeafNodeIndex;
+	}

Added: trunk/packages/muscle/branches/upstream/current/refinetreee.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/refinetreee.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/refinetreee.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,51 @@
+#include "muscle.h"
+#include "msa.h"
+#include "tree.h"
+#include "profile.h"
+#include <stdio.h>
+
+#define TRACE	0
+
+void RefineTreeE(MSA &msa, const SeqVect &v, Tree &tree, ProgNode *ProgNodes)
+	{
+	const unsigned uSeqCount = msa.GetSeqCount();
+	if (tree.GetLeafCount() != uSeqCount)
+		Quit("Refine tree, tree has different number of nodes");
+
+	if (uSeqCount < 3)
+		return;
+
+#if	DEBUG
+	ValidateMuscleIds(msa);
+	ValidateMuscleIds(tree);
+#endif
+
+	const unsigned uNodeCount = tree.GetNodeCount();
+	unsigned *uNewNodeIndexToOldNodeIndex= new unsigned[uNodeCount];
+
+	Tree Tree2;
+	TreeFromMSA(msa, Tree2, g_Cluster2, g_Distance2, g_Root2);
+
+#if	DEBUG
+	ValidateMuscleIds(Tree2);
+#endif
+
+	DiffTreesE(Tree2, tree, uNewNodeIndexToOldNodeIndex);
+
+	unsigned uRoot = Tree2.GetRootNodeIndex();
+	if (NODE_CHANGED == uNewNodeIndexToOldNodeIndex[uRoot])
+		{
+		MSA msa2;
+		RealignDiffsE(msa, v, Tree2, tree, uNewNodeIndexToOldNodeIndex, msa2, ProgNodes);
+		tree.Copy(Tree2);
+		msa.Copy(msa2);
+#if	DEBUG
+		ValidateMuscleIds(msa2);
+#endif
+		}
+
+	delete[] uNewNodeIndexToOldNodeIndex;
+
+	SetCurrentAlignment(msa);
+	ProgressStepsDone();
+	}

Added: trunk/packages/muscle/branches/upstream/current/refinevert.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/refinevert.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/refinevert.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,159 @@
+#include "muscle.h"
+#include "profile.h"
+#include "msa.h"
+#include "pwpath.h"
+#include "seqvect.h"
+#include "clust.h"
+#include "tree.h"
+
+#define TRACE 0
+
+struct Range
+	{
+	unsigned m_uBestColLeft;
+	unsigned m_uBestColRight;
+	};
+
+static void ListVertSavings(unsigned uColCount, unsigned uAnchorColCount,
+  const Range *Ranges, unsigned uRangeCount)
+	{
+	if (!g_bVerbose || !g_bAnchors)
+		return;
+	double dTotalArea = uColCount*uColCount;
+	double dArea = 0.0;
+	for (unsigned i = 0; i < uRangeCount; ++i)
+		{
+		unsigned uLength = Ranges[i].m_uBestColRight - Ranges[i].m_uBestColLeft;
+		dArea += uLength*uLength;
+		}
+	double dPct = (dTotalArea - dArea)*100.0/dTotalArea;
+	Log("Anchor columns found       %u\n", uAnchorColCount);
+	Log("DP area saved by anchors   %-4.1f%%\n", dPct);
+	}
+
+static void ColsToRanges(const unsigned BestCols[], unsigned uBestColCount,
+  unsigned uColCount, Range Ranges[])
+	{
+// N best columns produces N+1 vertical blocks.
+	const unsigned uRangeCount = uBestColCount + 1;
+	for (unsigned uIndex = 0; uIndex < uRangeCount ; ++uIndex)
+		{
+		unsigned uBestColLeft = 0;
+		if (uIndex > 0)
+			uBestColLeft = BestCols[uIndex-1];
+		
+		unsigned uBestColRight = uColCount;
+		if (uIndex < uBestColCount)
+			uBestColRight = BestCols[uIndex];
+
+		Ranges[uIndex].m_uBestColLeft = uBestColLeft;
+		Ranges[uIndex].m_uBestColRight = uBestColRight;
+		}
+	}
+
+// Return true if any changes made
+bool RefineVert(MSA &msaIn, const Tree &tree, unsigned uIters)
+	{
+	bool bAnyChanges = false;
+
+	const unsigned uColCountIn = msaIn.GetColCount();
+	const unsigned uSeqCountIn = msaIn.GetSeqCount();
+
+	if (uColCountIn < 3 || uSeqCountIn < 3)
+		return false;
+
+	unsigned *AnchorCols = new unsigned[uColCountIn];
+	unsigned uAnchorColCount;
+	SetMSAWeightsMuscle(msaIn);
+	FindAnchorCols(msaIn, AnchorCols, &uAnchorColCount);
+
+	const unsigned uRangeCount = uAnchorColCount + 1;
+	Range *Ranges = new Range[uRangeCount];
+
+#if	TRACE
+	Log("%u ranges\n", uRangeCount);
+#endif
+
+	ColsToRanges(AnchorCols, uAnchorColCount, uColCountIn, Ranges);
+	ListVertSavings(uColCountIn, uAnchorColCount, Ranges, uRangeCount);
+
+#if	TRACE
+	{
+	Log("Anchor cols: ");
+	for (unsigned i = 0; i < uAnchorColCount; ++i)
+		Log(" %u", AnchorCols[i]);
+	Log("\n");
+
+	Log("Ranges:\n");
+	for (unsigned i = 0; i < uRangeCount; ++i)
+		Log("%4u - %4u\n", Ranges[i].m_uBestColLeft, Ranges[i].m_uBestColRight);
+	}
+#endif
+
+	delete[] AnchorCols;
+
+	MSA msaOut;
+	msaOut.SetSize(uSeqCountIn, 0);
+
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCountIn; ++uSeqIndex)
+		{
+		const char *ptrName = msaIn.GetSeqName(uSeqIndex);
+		unsigned uId = msaIn.GetSeqId(uSeqIndex);
+		msaOut.SetSeqName(uSeqIndex, ptrName);
+		msaOut.SetSeqId(uSeqIndex, uId);
+		}
+
+	for (unsigned uRangeIndex = 0; uRangeIndex < uRangeCount; ++uRangeIndex)
+		{
+		MSA msaRange;
+
+		const Range &r = Ranges[uRangeIndex];
+
+		const unsigned uFromColIndex = r.m_uBestColLeft;
+		const unsigned uRangeColCount = r.m_uBestColRight - uFromColIndex;
+
+		if (0 == uRangeColCount)
+			continue;
+		else if (1 == uRangeColCount)
+			{
+			MSAFromColRange(msaIn, uFromColIndex, 1, msaRange);
+			MSAAppend(msaOut, msaRange);
+			continue;
+			}
+		MSAFromColRange(msaIn, uFromColIndex, uRangeColCount, msaRange);
+
+#if	TRACE
+		Log("\n-------------\n");
+		Log("Range %u - %u count=%u\n", r.m_uBestColLeft, r.m_uBestColRight, uRangeColCount);
+		Log("Before:\n");
+		msaRange.LogMe();
+#endif
+
+		bool bLockLeft = (0 != uRangeIndex);
+		bool bLockRight = (uRangeCount - 1 != uRangeIndex);
+		bool bAnyChangesThisBlock = RefineHoriz(msaRange, tree, uIters, bLockLeft, bLockRight);
+		bAnyChanges = (bAnyChanges || bAnyChangesThisBlock);
+
+#if	TRACE
+		Log("After:\n");
+		msaRange.LogMe();
+#endif
+
+		MSAAppend(msaOut, msaRange);
+
+#if	TRACE
+		Log("msaOut after Cat:\n");
+		msaOut.LogMe();
+#endif
+		}
+
+#if	DEBUG
+// Sanity check
+	AssertMSAEqIgnoreCaseAndGaps(msaIn, msaOut);
+#endif
+
+	delete[] Ranges;
+	if (bAnyChanges)
+		msaIn.Copy(msaOut);
+	return bAnyChanges;
+	}

Added: trunk/packages/muscle/branches/upstream/current/refinew.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/refinew.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/refinew.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,227 @@
+#include "muscle.h"
+#include "msa.h"
+#include "seqvect.h"
+#include "textfile.h"
+
+#define MEMDEBUG	0
+
+#if	MEMDEBUG
+#include <crtdbg.h>
+#endif
+
+void MUSCLE(SeqVect &v, MSA &msaOut);
+
+// Append msa2 at the end of msa1
+void AppendMSA(MSA &msa1, const MSA &msa2)
+	{
+	const unsigned uSeqCount = msa1.GetSeqCount();
+
+	const unsigned uColCount1 = msa1.GetColCount();
+	const unsigned uColCount2 = msa2.GetColCount();
+
+	const unsigned uColCountCat = uColCount1 + uColCount2;
+
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		unsigned uId = msa1.GetSeqId(uSeqIndex);
+		unsigned uSeqIndex2;
+		bool bFound = msa2.GetSeqIndex(uId, &uSeqIndex2);
+		if (bFound)
+			{
+			for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)
+				{
+				const char c = msa2.GetChar(uSeqIndex2, uColIndex);
+				msa1.SetChar(uSeqIndex, uColCount1 + uColIndex, c);
+				}
+			}
+		else
+			{
+			for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)
+				msa1.SetChar(uSeqIndex, uColCount1 + uColIndex, '-');
+			}
+		}
+	}
+
+static void SeqFromMSACols(const MSA &msa, unsigned uSeqIndex, unsigned uColFrom,
+  unsigned uColTo, Seq &s)
+	{
+	s.Clear();
+	s.SetName(msa.GetSeqName(uSeqIndex));
+	s.SetId(msa.GetSeqId(uSeqIndex));
+	for (unsigned uColIndex = uColFrom; uColIndex <= uColTo; ++uColIndex)
+		{
+		char c = msa.GetChar(uSeqIndex, uColIndex);
+		if (!IsGapChar(c))
+			s.AppendChar(c);
+		}
+	}
+
+static void SeqVectFromMSACols(const MSA &msa, unsigned uColFrom, unsigned uColTo,
+  SeqVect &v)
+	{
+	v.Clear();
+	const unsigned uSeqCount = msa.GetSeqCount();
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		Seq s;
+		SeqFromMSACols(msa, uSeqIndex, uColFrom, uColTo, s);
+		v.AppendSeq(s);
+		}
+	}
+
+void RefineW(const MSA &msaIn, MSA &msaOut)
+	{
+	const unsigned uSeqCount = msaIn.GetSeqCount();
+	const unsigned uColCount = msaIn.GetColCount();
+
+// Reserve same nr seqs, 20% more cols
+	const unsigned uReserveColCount = (uColCount*120)/100;
+	msaOut.SetSize(uSeqCount, uReserveColCount);
+
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		msaOut.SetSeqName(uSeqIndex, msaIn.GetSeqName(uSeqIndex));
+		msaOut.SetSeqId(uSeqIndex, msaIn.GetSeqId(uSeqIndex));
+		}
+
+	const unsigned uWindowCount = (uColCount + g_uRefineWindow - 1)/g_uRefineWindow;
+	if (0 == g_uWindowTo)
+		g_uWindowTo = uWindowCount - 1;
+
+#if	MEMDEBUG
+	_CrtSetBreakAlloc(1560);
+#endif
+
+	if (g_uWindowOffset > 0)
+		{
+		MSA msaTmp;
+		MSAFromColRange(msaIn, 0, g_uWindowOffset, msaOut);
+		}
+
+	fprintf(stderr, "\n");
+	for (unsigned uWindowIndex = g_uWindowFrom; uWindowIndex <= g_uWindowTo; ++uWindowIndex)
+		{
+		fprintf(stderr, "Window %d of %d    \r", uWindowIndex, uWindowCount);
+		const unsigned uColFrom = g_uWindowOffset + uWindowIndex*g_uRefineWindow;
+		unsigned uColTo = uColFrom + g_uRefineWindow - 1;
+		if (uColTo >= uColCount)
+			uColTo = uColCount - 1;
+		assert(uColTo >= uColFrom);
+
+		SeqVect v;
+		SeqVectFromMSACols(msaIn, uColFrom, uColTo, v);
+
+#if	MEMDEBUG
+		_CrtMemState s1;
+		_CrtMemCheckpoint(&s1);
+#endif
+
+		MSA msaTmp;
+		MUSCLE(v, msaTmp);
+		AppendMSA(msaOut, msaTmp);
+		if (uWindowIndex == g_uSaveWindow)
+			{
+			MSA msaInTmp;
+			unsigned uOutCols = msaOut.GetColCount();
+			unsigned un = uColTo - uColFrom + 1;
+			MSAFromColRange(msaIn, uColFrom, un, msaInTmp);
+
+			char fn[256];
+			sprintf(fn, "win%d_inaln.tmp", uWindowIndex);
+			TextFile fIn(fn, true);
+			msaInTmp.ToFile(fIn);
+
+			sprintf(fn, "win%d_inseqs.tmp", uWindowIndex);
+			TextFile fv(fn, true);
+			v.ToFile(fv);
+
+			sprintf(fn, "win%d_outaln.tmp", uWindowIndex);
+			TextFile fOut(fn, true);
+			msaTmp.ToFile(fOut);
+			}
+
+#if	MEMDEBUG
+		void FreeDPMemSPN();
+		FreeDPMemSPN();
+
+		_CrtMemState s2;
+		_CrtMemCheckpoint(&s2);
+
+		_CrtMemState s;
+		_CrtMemDifference(&s, &s1, &s2);
+
+		_CrtMemDumpStatistics(&s);
+		_CrtMemDumpAllObjectsSince(&s1);
+		exit(1);
+#endif
+//#if	DEBUG
+//		AssertMSAEqIgnoreCaseAndGaps(msaInTmp, msaTmp);
+//#endif
+		}
+	fprintf(stderr, "\n");
+
+//	AssertMSAEqIgnoreCaseAndGaps(msaIn, msaOut);//@@uncomment!
+	}
+
+void DoRefineW()
+	{
+	SetOutputFileName(g_pstrOutFileName);
+	SetInputFileName(g_pstrInFileName);
+	SetStartTime();
+
+	SetMaxIters(g_uMaxIters);
+	SetSeqWeightMethod(g_SeqWeight1);
+
+	TextFile fileIn(g_pstrInFileName);
+	MSA msa;
+	msa.FromFile(fileIn);
+
+	const unsigned uSeqCount = msa.GetSeqCount();
+	if (0 == uSeqCount)
+		Quit("No sequences in input file");
+
+	MSA::SetIdCount(uSeqCount);
+
+// Initialize sequence ids.
+// From this point on, ids must somehow propogate from here.
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		msa.SetSeqId(uSeqIndex, uSeqIndex);
+	SetMuscleInputMSA(msa);
+
+	ALPHA Alpha = ALPHA_Undefined;
+	switch (g_SeqType)
+		{
+	case SEQTYPE_Auto:
+		Alpha = msa.GuessAlpha();
+		break;
+
+	case SEQTYPE_Protein:
+		Alpha = ALPHA_Amino;
+		break;
+
+	case SEQTYPE_DNA:
+		Alpha = ALPHA_DNA;
+		break;
+
+	case SEQTYPE_RNA:
+		Alpha = ALPHA_RNA;
+		break;
+
+	default:
+		Quit("Invalid SeqType");
+		}
+	SetAlpha(Alpha);
+	msa.FixAlpha();
+
+	if (ALPHA_DNA == Alpha || ALPHA_RNA == Alpha)
+		SetPPScore(PPSCORE_SPN);
+
+	MSA msaOut;
+	RefineW(msa, msaOut);
+
+//	ValidateMuscleIds(msa);
+
+//	TextFile fileOut(g_pstrOutFileName, true);
+//	msaOut.ToFile(fileOut);
+	MuscleOutput(msaOut);
+	}

Added: trunk/packages/muscle/branches/upstream/current/savebest.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/savebest.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/savebest.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,66 @@
+#include "muscle.h"
+#include "msa.h"
+#include "textfile.h"
+#include <time.h>
+
+static MSA *ptrBestMSA;
+static const char *pstrOutputFileName;
+
+void SetOutputFileName(const char *out)
+	{
+	pstrOutputFileName = out;
+	}
+
+void SetCurrentAlignment(MSA &msa)
+	{
+	ptrBestMSA = &msa;
+	}
+
+void SaveCurrentAlignment()
+	{
+	static bool bCalled = false;
+	if (bCalled)
+		{
+		fprintf(stderr,
+		  "\nRecursive call to SaveCurrentAlignment, giving up attempt to save.\n");
+		exit(EXIT_FatalError);
+		}
+
+	if (0 == ptrBestMSA)
+		{
+		fprintf(stderr, "\nAlignment not completed, cannot save.\n");
+		Log("Alignment not completed, cannot save.\n");
+		exit(EXIT_FatalError);
+		}
+
+	if (0 == pstrOutputFileName)
+		{
+		fprintf(stderr, "\nOutput file name not specified, cannot save.\n");
+		exit(EXIT_FatalError);
+		}
+
+	fprintf(stderr, "\nSaving current alignment ...\n");
+
+	TextFile fileOut(pstrOutputFileName, true);
+	ptrBestMSA->ToFASTAFile(fileOut);
+
+	fprintf(stderr, "Current alignment saved to \"%s\".\n", pstrOutputFileName);
+	Log("Current alignment saved to \"%s\".\n", pstrOutputFileName);
+	}
+
+void CheckMaxTime()
+	{
+	if (0 == g_ulMaxSecs)
+		return;
+
+	time_t Now = time(0);
+	time_t ElapsedSecs = Now - GetStartTime();
+	if (ElapsedSecs <= (time_t) g_ulMaxSecs)
+		return;
+
+	Log("Max time %s exceeded, elapsed seconds = %ul\n",
+	  MaxSecsToStr(), ElapsedSecs);
+
+	SaveCurrentAlignment();
+	exit(EXIT_Success);
+	}

Added: trunk/packages/muscle/branches/upstream/current/scoregaps.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/scoregaps.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/scoregaps.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,201 @@
+#include "muscle.h"
+#include "msa.h"
+#include "objscore.h"
+
+#define TRACE	0
+
+struct GAPINFO
+	{
+	GAPINFO *Next;
+	unsigned Start;
+	unsigned End;
+	};
+
+static GAPINFO **g_Gaps;
+static GAPINFO *g_FreeList;
+static unsigned g_MaxSeqCount;
+static unsigned g_MaxColCount;
+static unsigned g_ColCount;
+static bool *g_ColDiff;
+
+static GAPINFO *NewGapInfo()
+	{
+	if (0 == g_FreeList)
+		{
+		const int NEWCOUNT = 256;
+		GAPINFO *NewList = new GAPINFO[NEWCOUNT];
+		g_FreeList = &NewList[0];
+		for (int i = 0; i < NEWCOUNT-1; ++i)
+			NewList[i].Next = &NewList[i+1];
+		NewList[NEWCOUNT-1].Next = 0;
+		}
+	GAPINFO *GI = g_FreeList;
+	g_FreeList = g_FreeList->Next;
+	return GI;
+	}
+
+static void FreeGapInfo(GAPINFO *GI)
+	{
+	GI->Next = g_FreeList;
+	g_FreeList = GI;
+	}
+
+// TODO: This could be much faster, no need to look
+// at all columns.
+static void FindIntersectingGaps(const MSA &msa, unsigned SeqIndex)
+	{
+	const unsigned ColCount = msa.GetColCount();
+	bool InGap = false;
+	bool Intersects = false;
+	unsigned Start = uInsane;
+	for (unsigned Col = 0; Col <= ColCount; ++Col)
+		{
+		bool Gap = ((Col != ColCount) && msa.IsGap(SeqIndex, Col));
+		if (Gap)
+			{
+			if (!InGap)
+				{
+				InGap = true;
+				Start = Col;
+				}
+			if (g_ColDiff[Col])
+				Intersects = true;
+			}
+		else if (InGap)
+			{
+			InGap = false;
+			if (Intersects)
+				{
+				GAPINFO *GI = NewGapInfo();
+				GI->Start = Start;
+				GI->End = Col - 1;
+				GI->Next = g_Gaps[SeqIndex];
+				g_Gaps[SeqIndex] = GI;
+				}
+			Intersects = false;
+			}
+		}
+	}
+
+static SCORE Penalty(unsigned Length, bool Term)
+	{
+	if (0 == Length)
+		return 0;
+	SCORE s1 = g_scoreGapOpen + g_scoreGapExtend*(Length - 1);
+#if	DOUBLE_AFFINE
+	SCORE s2 = g_scoreGapOpen2 + g_scoreGapExtend2*(Length - 1);
+	if (s1 > s2)
+		return s1;
+	return s2;
+#else
+	return s1;
+#endif
+	}
+
+//static SCORE ScorePair(unsigned Seq1, unsigned Seq2)
+//	{
+//#if	TRACE
+//	{
+//	Log("ScorePair(%d,%d)\n", Seq1, Seq2);
+//	Log("Gaps seq 1: ");
+//	for (GAPINFO *GI = g_Gaps[Seq1]; GI; GI = GI->Next)
+//		Log(" %d-%d", GI->Start, GI->End);
+//	Log("\n");
+//	Log("Gaps seq 2: ");
+//	for (GAPINFO *GI = g_Gaps[Seq2]; GI; GI = GI->Next)
+//		Log(" %d-%d", GI->Start, GI->End);
+//	Log("\n");
+//	}
+//#endif
+//	return 0;
+//	}
+
+SCORE ScoreGaps(const MSA &msa, const unsigned DiffCols[], unsigned DiffColCount)
+	{
+#if	TRACE
+	{
+	Log("ScoreGaps\n");
+	Log("DiffCols ");
+	for (unsigned i = 0; i < DiffColCount; ++i)
+		Log(" %u", DiffCols[i]);
+	Log("\n");
+	Log("msa=\n");
+	msa.LogMe();
+	Log("\n");
+	}
+#endif
+	const unsigned SeqCount = msa.GetSeqCount();
+	const unsigned ColCount = msa.GetColCount();
+	g_ColCount = ColCount;
+
+	if (SeqCount > g_MaxSeqCount)
+		{
+		delete[] g_Gaps;
+		g_MaxSeqCount = SeqCount + 256;
+		g_Gaps = new GAPINFO *[g_MaxSeqCount];
+		}
+	memset(g_Gaps, 0, SeqCount*sizeof(GAPINFO *));
+
+	if (ColCount > g_MaxColCount)
+		{
+		delete[] g_ColDiff;
+		g_MaxColCount = ColCount + 256;
+		g_ColDiff = new bool[g_MaxColCount];
+		}
+
+	memset(g_ColDiff, 0, g_ColCount*sizeof(bool));
+	for (unsigned i = 0; i < DiffColCount; ++i)
+		{
+		unsigned Col = DiffCols[i];
+		assert(Col < ColCount);
+		g_ColDiff[Col] = true;
+		}
+
+	for (unsigned SeqIndex = 0; SeqIndex < SeqCount; ++SeqIndex)
+		FindIntersectingGaps(msa, SeqIndex);
+
+#if	TRACE
+	{
+	Log("\n");
+	Log("Intersecting gaps:\n");
+	Log("      ");
+	for (unsigned Col = 0; Col < ColCount; ++Col)
+		Log("%c", g_ColDiff[Col] ? '*' : ' ');
+	Log("\n");
+	Log("      ");
+	for (unsigned Col = 0; Col < ColCount; ++Col)
+		Log("%d", Col%10);
+	Log("\n");
+	for (unsigned Seq = 0; Seq < SeqCount; ++Seq)
+		{
+		Log("%3d:  ", Seq);
+		for (unsigned Col = 0; Col < ColCount; ++Col)
+			Log("%c", msa.GetChar(Seq, Col));
+		Log("  :: ");
+		for (GAPINFO *GI = g_Gaps[Seq]; GI; GI = GI->Next)
+			Log(" (%d,%d)", GI->Start, GI->End);
+		Log("  >%s\n", msa.GetSeqName(Seq));
+		}
+	Log("\n");
+	}
+#endif
+
+	SCORE Score = 0;
+	for (unsigned Seq1 = 0; Seq1 < SeqCount; ++Seq1)
+		{
+		const WEIGHT w1 = msa.GetSeqWeight(Seq1);
+		for (unsigned Seq2 = Seq1 + 1; Seq2 < SeqCount; ++Seq2)
+			{
+			const WEIGHT w2 = msa.GetSeqWeight(Seq2);
+//			const SCORE Pair = ScorePair(Seq1, Seq2);
+			const SCORE Pair = ScoreSeqPairGaps(msa, Seq1, msa, Seq2);
+			Score += w1*w2*Pair;
+#if	TRACE
+			Log("Seq1=%u Seq2=%u ScorePair=%.4g w1=%.4g w2=%.4g Sum=%.4g\n",
+			  Seq1, Seq2, Pair, w1, w2, Score);
+#endif
+			}
+		}
+
+	return Score;
+	}

Added: trunk/packages/muscle/branches/upstream/current/scorehistory.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/scorehistory.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/scorehistory.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,101 @@
+#include "muscle.h"
+#include "scorehistory.h"
+#include <stdio.h>
+
+#define TRACE	0
+
+ScoreHistory::ScoreHistory(unsigned uIters, unsigned uNodeCount)
+	{
+	m_uNodeCount = uNodeCount;
+	m_uIters = uIters;
+
+	m_Score = new SCORE *[uIters];
+	m_bScoreSet = new bool *[uIters];
+	for (unsigned n = 0; n < uIters; ++n)
+		{
+		m_Score[n] = new SCORE[uNodeCount*2];
+		m_bScoreSet[n] = new bool[uNodeCount*2];
+		memset(m_bScoreSet[n], 0, uNodeCount*2*sizeof(bool));
+		}
+	}
+
+ScoreHistory::~ScoreHistory()
+	{
+	for (unsigned n = 0; n < m_uIters; ++n)
+		{
+		delete[] m_Score[n];
+		delete[] m_bScoreSet[n];
+		}
+	delete[] m_Score;
+	delete[] m_bScoreSet;
+	}
+
+bool ScoreHistory::SetScore(unsigned uIter, unsigned uNodeIndex, bool bRight, SCORE Score)
+	{
+#if	TRACE
+	Log("ScoreHistory::SetScore(Iter=%u Node=%u Right=%d Score=%g)\n",
+	  uIter, uNodeIndex, bRight, Score);
+#endif
+	if (uIter >= m_uIters)
+		Quit("ScoreHistory::SetScore-1");
+	if (uNodeIndex >= m_uNodeCount)
+		Quit("ScoreHistory::SetScore-2");
+
+	const unsigned uIndex = uNodeIndex*2 + bRight;
+	for (unsigned n = 1; n < uIter; ++n)
+		{
+		const unsigned uPrevIter = n - 1;
+		if (!m_bScoreSet[uPrevIter][uIndex])
+			{
+			LogMe();
+			Quit("ScoreHistory::SetScore-3");
+			}
+		if (m_Score[uPrevIter][uIndex] == Score)
+			{
+			ProgressStepsDone();
+#if	TRACE
+			Log("Oscillating\n");
+#endif
+			return true;
+			}
+		}
+	m_Score[uIter][uIndex] = Score;
+	m_bScoreSet[uIter][uIndex] = true;
+	return false;
+	}
+
+void ScoreHistory::LogMe() const
+	{
+	Log("ScoreHistory\n");
+	Log("Iter  Node  Right      Score\n");
+	Log("----  ----  -----  ---------\n");
+	for (unsigned uIter = 0; uIter < m_uIters; ++uIter)
+		{
+		bool bAnySet = false;
+		for (unsigned n = 0; n < m_uNodeCount*2; ++n)
+			if (m_bScoreSet[uIter][n])
+				{
+				bAnySet = true;
+				break;
+				}
+		if (!bAnySet)
+			return;
+		for (unsigned uNodeIndex = 0; uNodeIndex < m_uNodeCount; ++uNodeIndex)
+			{
+			const unsigned uBase = 2*uNodeIndex;
+			if (m_bScoreSet[uIter][uBase])
+				Log("%4u  %4u         F  %9.3f\n", uIter, uNodeIndex, m_Score[uIter][uBase]);
+			if (m_bScoreSet[uIter][uBase+1])
+				Log("%4u  %4u         T  %9.3f\n", uIter, uNodeIndex, m_Score[uIter][uBase+1]);
+			}
+		}
+	}
+
+SCORE ScoreHistory::GetScore(unsigned uIter, unsigned uNodeIndex,
+  bool bReverse, bool bRight) const
+	{
+	const unsigned uIndex = uNodeIndex*2 + bRight;
+	if (!m_bScoreSet[uIter][uIndex])
+		Quit("ScoreHistory::GetScore");
+	return m_Score[uIter][uIndex];
+	}

Added: trunk/packages/muscle/branches/upstream/current/scorehistory.h
===================================================================
--- trunk/packages/muscle/branches/upstream/current/scorehistory.h	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/scorehistory.h	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,21 @@
+#ifndef ScoreHistory_h
+#define ScoreHistory_h
+
+class ScoreHistory
+	{
+public:
+	ScoreHistory(unsigned uIters, unsigned uInternalNodeCount);
+	~ScoreHistory();
+	bool SetScore(unsigned uIter, unsigned uInternalNodeIndex, bool bRight, SCORE Score);
+	void LogMe() const;
+	SCORE GetScore(unsigned uIter, unsigned uInternalNodeIndex, bool bReversed,
+	  bool bRight) const;
+
+private:
+	SCORE **m_Score;
+	bool **m_bScoreSet;
+	unsigned m_uIters;
+	unsigned m_uNodeCount;
+	};
+
+#endif	// ScoreHistory_h

Added: trunk/packages/muscle/branches/upstream/current/scorepp.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/scorepp.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/scorepp.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,104 @@
+#include "muscle.h"
+#include "profile.h"
+
+char ConsensusChar(const ProfPos &PP)
+	{
+	unsigned uMostCommonLetter = 0;
+	FCOUNT fcMostCommon = PP.m_fcCounts[0];
+	bool bMoreThanOneLetter = false;
+	bool bAnyLetter = false;
+	for (unsigned uLetter = 0; uLetter < g_AlphaSize; ++uLetter)
+		{
+		const FCOUNT fc = PP.m_fcCounts[uLetter];
+		if (fc > 0)
+			{
+			if (bAnyLetter)
+				bMoreThanOneLetter = true;
+			bAnyLetter = true;
+			}
+		if (fc > fcMostCommon)
+			{
+			uMostCommonLetter = uLetter;
+			fcMostCommon = fc;
+			}
+		}
+	if (!bAnyLetter)
+		return '-';
+	char c = LetterToChar(uMostCommonLetter);
+	if (bMoreThanOneLetter)
+		return UnalignChar(c);
+	return c;
+	}
+
+SCORE ScoreProfPos2LA(const ProfPos &PPA, const ProfPos &PPB)
+	{
+	SCORE Score = 0;
+	for (unsigned n = 0; n < 20; ++n)
+		{
+		const unsigned uLetter = PPA.m_uSortOrder[n];
+		const FCOUNT fcLetter = PPA.m_fcCounts[uLetter];
+		if (0 == fcLetter)
+			break;
+		Score += fcLetter*PPB.m_AAScores[uLetter];
+		}
+	if (0 == Score)
+		return -2.5;
+	SCORE logScore = logf(Score);
+	return (SCORE) ((logScore - g_scoreCenter)*(PPA.m_fOcc * PPB.m_fOcc));
+	}
+
+SCORE ScoreProfPos2NS(const ProfPos &PPA, const ProfPos &PPB)
+	{
+	SCORE Score = 0;
+	for (unsigned n = 0; n < 20; ++n)
+		{
+		const unsigned uLetter = PPA.m_uSortOrder[n];
+		const FCOUNT fcLetter = PPA.m_fcCounts[uLetter];
+		if (0 == fcLetter)
+			break;
+		Score += fcLetter*PPB.m_AAScores[uLetter];
+		}
+	return Score - g_scoreCenter;
+	}
+
+SCORE ScoreProfPos2SP(const ProfPos &PPA, const ProfPos &PPB)
+	{
+	SCORE Score = 0;
+	for (unsigned n = 0; n < 20; ++n)
+		{
+		const unsigned uLetter = PPA.m_uSortOrder[n];
+		const FCOUNT fcLetter = PPA.m_fcCounts[uLetter];
+		if (0 == fcLetter)
+			break;
+		Score += fcLetter*PPB.m_AAScores[uLetter];
+		}
+	return Score - g_scoreCenter;
+	}
+
+SCORE ScoreProfPos2SPN(const ProfPos &PPA, const ProfPos &PPB)
+	{
+	SCORE Score = 0;
+	for (unsigned n = 0; n < 4; ++n)
+		{
+		const unsigned uLetter = PPA.m_uSortOrder[n];
+		const FCOUNT fcLetter = PPA.m_fcCounts[uLetter];
+		if (0 == fcLetter)
+			break;
+		Score += fcLetter*PPB.m_AAScores[uLetter];
+		}
+	return Score - g_scoreCenter;
+	}
+
+SCORE ScoreProfPos2(const ProfPos &PPA, const ProfPos &PPB)
+	{
+	if (PPSCORE_SP == g_PPScore)
+		return ScoreProfPos2NS(PPA, PPB);
+	else if (PPSCORE_LE == g_PPScore)
+		return ScoreProfPos2LA(PPA, PPB);
+	else if (PPSCORE_SV == g_PPScore)
+		return ScoreProfPos2SP(PPA, PPB);
+	else if (PPSCORE_SPN == g_PPScore)
+		return ScoreProfPos2SPN(PPA, PPB);
+	Quit("Invalid g_PPScore");
+	return 0;
+	}

Added: trunk/packages/muscle/branches/upstream/current/seq.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/seq.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/seq.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,342 @@
+#include "muscle.h"
+#include "seq.h"
+#include "textfile.h"
+#include "msa.h"
+//#include <ctype.h>
+
+const size_t MAX_FASTA_LINE = 16000;
+
+void Seq::SetName(const char *ptrName)
+	{
+	delete[] m_ptrName;
+	size_t n = strlen(ptrName) + 1;
+	m_ptrName = new char[n];
+	strcpy(m_ptrName, ptrName);
+	}
+
+void Seq::ToFASTAFile(TextFile &File) const
+	{
+	File.PutFormat(">%s\n", m_ptrName);
+	unsigned uColCount = Length();
+	for (unsigned n = 0; n < uColCount; ++n)
+		{
+		if (n > 0 && n%60 == 0)
+			File.PutString("\n");
+		File.PutChar(at(n));
+		}
+	File.PutString("\n");
+	}
+
+// Return true on end-of-file
+bool Seq::FromFASTAFile(TextFile &File)
+	{
+	Clear();
+
+	char szLine[MAX_FASTA_LINE];
+	bool bEof = File.GetLine(szLine, sizeof(szLine));
+	if (bEof)
+		return true;
+	if ('>' != szLine[0])
+		Quit("Expecting '>' in FASTA file %s line %u",
+		  File.GetFileName(), File.GetLineNr());
+
+	size_t n = strlen(szLine);
+	if (1 == n)
+		Quit("Missing annotation following '>' in FASTA file %s line %u",
+		  File.GetFileName(), File.GetLineNr());
+
+	m_ptrName = new char[n];
+	strcpy(m_ptrName, szLine + 1);
+
+	TEXTFILEPOS Pos = File.GetPos();
+	for (;;)
+		{
+		bEof = File.GetLine(szLine, sizeof(szLine));
+		if (bEof)
+			{
+			if (0 == size())
+				{
+				Quit("Empty sequence in FASTA file %s line %u",
+				  File.GetFileName(), File.GetLineNr());
+				return true;
+				}
+			return false;
+			}
+		if ('>' == szLine[0])
+			{
+			if (0 == size())
+				Quit("Empty sequence in FASTA file %s line %u",
+				  File.GetFileName(), File.GetLineNr());
+		// Rewind to beginning of this line, it's the start of the
+		// next sequence.
+			File.SetPos(Pos);
+			return false;
+			}
+		const char *ptrChar = szLine;
+		while (char c = *ptrChar++)
+			{
+			if (isspace(c))
+				continue;
+			if (IsGapChar(c))
+				continue;
+			if (!IsResidueChar(c))
+				{
+				if (isprint(c))
+					{
+					char w = GetWildcardChar();
+					Warning("Invalid residue '%c' in FASTA file %s line %d, replaced by '%c'",
+					  c, File.GetFileName(), File.GetLineNr(), w);
+					c = w;
+					}
+				else
+					Quit("Invalid byte hex %02x in FASTA file %s line %d",
+					  (unsigned char) c, File.GetFileName(), File.GetLineNr());
+				}
+			c = toupper(c);
+			push_back(c);
+			}
+		Pos = File.GetPos();
+		}
+	}
+
+void Seq::ExtractUngapped(MSA &msa) const
+	{
+	msa.Clear();
+	unsigned uColCount = Length();
+	msa.SetSize(1, 1);
+	unsigned uUngappedPos = 0;
+	for (unsigned n = 0; n < uColCount; ++n)
+		{
+		char c = at(n);
+		if (!IsGapChar(c))
+			msa.SetChar(0, uUngappedPos++, c);
+		}
+	msa.SetSeqName(0, m_ptrName);
+	}
+
+void Seq::Copy(const Seq &rhs)
+	{
+	clear();
+	const unsigned uLength = rhs.Length();
+	for (unsigned uColIndex = 0; uColIndex < uLength; ++uColIndex)
+		push_back(rhs.at(uColIndex));
+	const char *ptrName = rhs.GetName();
+	size_t n = strlen(ptrName) + 1;
+	m_ptrName = new char[n];
+	strcpy(m_ptrName, ptrName);
+	SetId(rhs.GetId());
+	}
+
+void Seq::CopyReversed(const Seq &rhs)
+	{
+	clear();
+	const unsigned uLength = rhs.Length();
+	const unsigned uBase = rhs.Length() - 1;
+	for (unsigned uColIndex = 0; uColIndex < uLength; ++uColIndex)
+		push_back(rhs.at(uBase - uColIndex));
+	const char *ptrName = rhs.GetName();
+	size_t n = strlen(ptrName) + 1;
+	m_ptrName = new char[n];
+	strcpy(m_ptrName, ptrName);
+	}
+
+void Seq::StripGaps()
+	{
+	for (CharVect::iterator p = begin(); p != end(); )
+		{
+		char c = *p;
+		if (IsGapChar(c))
+			erase(p);
+		else
+			++p;
+		}
+	}
+
+void Seq::StripGapsAndWhitespace()
+	{
+	for (CharVect::iterator p = begin(); p != end(); )
+		{
+		char c = *p;
+		if (isspace(c) || IsGapChar(c))
+			erase(p);
+		else
+			++p;
+		}
+	}
+
+void Seq::ToUpper()
+	{
+	for (CharVect::iterator p = begin(); p != end(); ++p)
+		{
+		char c = *p;
+		if (islower(c))
+			*p = toupper(c);
+		}
+	}
+
+unsigned Seq::GetLetter(unsigned uIndex) const
+	{
+	assert(uIndex < Length());
+	char c = operator[](uIndex);
+	return CharToLetter(c);
+	}
+
+bool Seq::EqIgnoreCase(const Seq &s) const
+	{
+	const unsigned n = Length();
+	if (n != s.Length())
+		return false;
+	for (unsigned i = 0; i < n; ++i)
+		{
+		const char c1 = at(i);
+		const char c2 = s.at(i);
+		if (IsGapChar(c1))
+			{
+			if (!IsGapChar(c2))
+				return false;
+			}
+		else
+			{
+			if (toupper(c1) != toupper(c2))
+				return false;
+			}
+		}
+	return true;
+	}
+
+bool Seq::Eq(const Seq &s) const
+	{
+	const unsigned n = Length();
+	if (n != s.Length())
+		return false;
+	for (unsigned i = 0; i < n; ++i)
+		{
+		const char c1 = at(i);
+		const char c2 = s.at(i);
+		if (c1 != c2)
+			return false;
+		}
+	return true;
+	}
+
+bool Seq::EqIgnoreCaseAndGaps(const Seq &s) const
+	{
+	const unsigned uThisLength = Length();
+	const unsigned uOtherLength = s.Length();
+	
+	unsigned uThisPos = 0;
+	unsigned uOtherPos = 0;
+
+	int cThis;
+	int cOther;
+	for (;;)
+		{
+		if (uThisPos == uThisLength && uOtherPos == uOtherLength)
+			break;
+
+	// Set cThis to next non-gap character in this string
+	// or -1 if end-of-string.
+		for (;;)
+			{
+			if (uThisPos == uThisLength)
+				{
+				cThis = -1;
+				break;
+				}
+			else
+				{
+				cThis = at(uThisPos);
+				++uThisPos;
+				if (!IsGapChar(cThis))
+					{
+					cThis = toupper(cThis);
+					break;
+					}
+				}
+			}
+
+	// Set cOther to next non-gap character in s
+	// or -1 if end-of-string.
+		for (;;)
+			{
+			if (uOtherPos == uOtherLength)
+				{
+				cOther = -1;
+				break;
+				}
+			else
+				{
+				cOther = s.at(uOtherPos);
+				++uOtherPos;
+				if (!IsGapChar(cOther))
+					{
+					cOther = toupper(cOther);
+					break;
+					}
+				}
+			}
+
+	// Compare characters are corresponding ungapped position
+		if (cThis != cOther)
+			return false;
+		}
+	return true;
+	}
+
+unsigned Seq::GetUngappedLength() const
+	{
+	unsigned uUngappedLength = 0;
+	for (CharVect::const_iterator p = begin(); p != end(); ++p)
+		{
+		char c = *p;
+		if (!IsGapChar(c))
+			++uUngappedLength;
+		}
+	return uUngappedLength;
+	}
+
+void Seq::LogMe() const
+	{
+	Log(">%s\n", m_ptrName);
+	const unsigned n = Length();
+	for (unsigned i = 0; i < n; ++i)
+		Log("%c", at(i));
+	Log("\n");
+	}
+
+void Seq::FromString(const char *pstrSeq, const char *pstrName)
+	{
+	clear();
+	const unsigned uLength = (unsigned) strlen(pstrSeq);
+	for (unsigned uColIndex = 0; uColIndex < uLength; ++uColIndex)
+		push_back(pstrSeq[uColIndex]);
+	size_t n = strlen(pstrName) + 1;
+	m_ptrName = new char[n];
+	strcpy(m_ptrName, pstrName);
+	}
+
+bool Seq::HasGap() const
+	{
+	for (CharVect::const_iterator p = begin(); p != end(); ++p)
+		{
+		char c = *p;
+		if (IsGapChar(c))
+			return true;
+		}
+	return false;
+	}
+
+void Seq::FixAlpha()
+	{
+	for (CharVect::iterator p = begin(); p != end(); ++p)
+		{
+		char c = *p;
+		if (!IsResidueChar(c))
+			{
+			char w = GetWildcardChar();
+			// Warning("Invalid residue '%c', replaced by '%c'", c, w);
+			InvalidLetterWarning(c, w);
+			*p = w;
+			}
+		}
+	}

Added: trunk/packages/muscle/branches/upstream/current/seq.h
===================================================================
--- trunk/packages/muscle/branches/upstream/current/seq.h	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/seq.h	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,85 @@
+#ifndef Seq_h
+#define Seq_h
+
+#include <vector>
+
+class TextFile;
+class MSA;
+
+typedef std::vector<char> CharVect;
+
+class Seq : public CharVect
+	{
+public:
+	Seq()
+		{
+		m_ptrName = 0;
+	// Start with moderate size to avoid
+	// thrashing the heap.
+		reserve(200);
+		}
+	virtual ~Seq()
+		{
+		delete[] m_ptrName;
+		}
+
+private:
+// Not implemented; prevent use of copy c'tor and assignment.
+	Seq(const Seq &);
+	Seq &operator=(const Seq &);
+
+public:
+	void Clear()
+		{
+		clear();
+		delete[] m_ptrName;
+		m_ptrName = 0;
+		m_uId = uInsane;
+		}
+	const char *GetName() const
+		{
+		return m_ptrName;
+		}
+	unsigned GetId() const
+		{
+		if (uInsane == m_uId)
+			Quit("Seq::GetId, id not set");
+		return m_uId;
+		}
+	void SetId(unsigned uId) { m_uId = uId; }
+
+	bool FromFASTAFile(TextFile &File);
+	void ToFASTAFile(TextFile &File) const;
+	void ExtractUngapped(MSA &msa) const;
+
+	void FromString(const char *pstrSeq, const char *pstrName);
+	void Copy(const Seq &rhs);
+	void CopyReversed(const Seq &rhs);
+	void StripGaps();
+	void StripGapsAndWhitespace();
+	void ToUpper();
+	void SetName(const char *ptrName);
+	unsigned GetLetter(unsigned uIndex) const;
+	unsigned Length() const { return (unsigned) size(); }
+	bool Eq(const Seq &s) const;
+	bool EqIgnoreCase(const Seq &s) const;
+	bool EqIgnoreCaseAndGaps(const Seq &s) const;
+	bool HasGap() const;
+	unsigned GetUngappedLength() const;
+	void LogMe() const;
+	char GetChar(unsigned uIndex) const { return operator[](uIndex); }
+	void SetChar(unsigned uIndex, char c) { operator[](uIndex) = c; }
+	void AppendChar(char c) { push_back(c); }
+	void FixAlpha();
+
+#ifndef	_WIN32
+	reference at(size_type i) { return operator[](i); }
+	const_reference at(size_type i) const { return operator[](i); }
+#endif
+
+private:
+	char *m_ptrName;
+	unsigned m_uId;
+	};
+
+#endif	// Seq.h

Added: trunk/packages/muscle/branches/upstream/current/seqvect.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/seqvect.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/seqvect.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,290 @@
+#include "muscle.h"
+#include "seqvect.h"
+#include "textfile.h"
+#include "msa.h"
+
+const size_t MAX_FASTA_LINE = 16000;
+
+SeqVect::~SeqVect()
+	{
+	Clear();
+	}
+
+void SeqVect::Clear()
+	{
+	for (size_t n = 0; n < size(); ++n)
+		delete (*this)[n];
+	}
+
+void SeqVect::ToFASTAFile(TextFile &File) const
+	{
+	unsigned uSeqCount = Length();
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		Seq *ptrSeq = at(uSeqIndex);
+		ptrSeq->ToFASTAFile(File);
+		}
+	}
+
+void SeqVect::FromFASTAFile(TextFile &File)
+	{
+	Clear();
+
+	FILE *f = File.GetStdioFile();
+	for (;;)
+		{
+		char *Label;
+		unsigned uLength;
+		char *SeqData = GetFastaSeq(f, &uLength, &Label);
+		if (0 == SeqData)
+			return;
+		Seq *ptrSeq = new Seq;
+
+		for (unsigned i = 0; i < uLength; ++i)
+			{
+			char c = SeqData[i];
+			ptrSeq->push_back(c);
+			}
+
+		ptrSeq->SetName(Label);
+		push_back(ptrSeq);
+
+		delete[] SeqData;
+		delete[] Label;
+		}
+	}
+
+void SeqVect::PadToMSA(MSA &msa)
+	{
+	unsigned uSeqCount = Length();
+	if (0 == uSeqCount)
+		{
+		msa.Clear();
+		return;
+		}
+
+	unsigned uLongestSeqLength = 0;
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		Seq *ptrSeq = at(uSeqIndex);
+		unsigned uColCount = ptrSeq->Length();
+		if (uColCount > uLongestSeqLength)
+			uLongestSeqLength = uColCount;
+		}
+	msa.SetSize(uSeqCount, uLongestSeqLength);
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		Seq *ptrSeq = at(uSeqIndex);
+		msa.SetSeqName(uSeqIndex, ptrSeq->GetName());
+		unsigned uColCount = ptrSeq->Length();
+		unsigned uColIndex;
+		for (uColIndex = 0; uColIndex < uColCount; ++uColIndex)
+			{
+			char c = ptrSeq->at(uColIndex);
+			msa.SetChar(uSeqIndex, uColIndex, c);
+			}
+		while (uColIndex < uLongestSeqLength)
+			msa.SetChar(uSeqIndex, uColIndex++, '.');
+		}
+	}
+
+void SeqVect::Copy(const SeqVect &rhs)
+	{
+	clear();
+	unsigned uSeqCount = rhs.Length();
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		Seq *ptrSeq = rhs.at(uSeqIndex);
+		Seq *ptrSeqCopy = new Seq;
+		ptrSeqCopy->Copy(*ptrSeq);
+		push_back(ptrSeqCopy);
+		}
+	}
+
+void SeqVect::StripGaps()
+	{
+	unsigned uSeqCount = Length();
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		Seq *ptrSeq = at(uSeqIndex);
+		ptrSeq->StripGaps();
+		}
+	}
+
+void SeqVect::StripGapsAndWhitespace()
+	{
+	unsigned uSeqCount = Length();
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		Seq *ptrSeq = at(uSeqIndex);
+		ptrSeq->StripGapsAndWhitespace();
+		}
+	}
+
+void SeqVect::ToUpper()
+	{
+	unsigned uSeqCount = Length();
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		Seq *ptrSeq = at(uSeqIndex);
+		ptrSeq->ToUpper();
+		}
+	}
+
+bool SeqVect::FindName(const char *ptrName, unsigned *ptruIndex) const
+	{
+	unsigned uSeqCount = Length();
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		const Seq *ptrSeq = at(uSeqIndex);
+		if (0 == stricmp(ptrSeq->GetName(), ptrName))
+			{
+			*ptruIndex = uSeqIndex;
+			return true;
+			}
+		}
+	return false;
+	}
+
+void SeqVect::AppendSeq(const Seq &s)
+	{
+	Seq *ptrSeqCopy = new Seq;
+	ptrSeqCopy->Copy(s);
+	push_back(ptrSeqCopy);
+	}
+
+void SeqVect::LogMe() const
+	{
+	unsigned uSeqCount = Length();
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		const Seq *ptrSeq = at(uSeqIndex);
+		ptrSeq->LogMe();
+		}
+	}
+
+const char *SeqVect::GetSeqName(unsigned uSeqIndex) const
+	{
+	assert(uSeqIndex < size());
+	const Seq *ptrSeq = at(uSeqIndex);
+	return ptrSeq->GetName();
+	}
+
+unsigned SeqVect::GetSeqId(unsigned uSeqIndex) const
+	{
+	assert(uSeqIndex < size());
+	const Seq *ptrSeq = at(uSeqIndex);
+	return ptrSeq->GetId();
+	}
+
+unsigned SeqVect::GetSeqIdFromName(const char *Name) const
+	{
+	const unsigned uSeqCount = GetSeqCount();
+	for (unsigned i = 0; i < uSeqCount; ++i)
+		{
+		if (!strcmp(Name, GetSeqName(i)))
+			return GetSeqId(i);
+		}
+	Quit("SeqVect::GetSeqIdFromName(%s): not found", Name);
+	return 0;
+	}
+
+Seq &SeqVect::GetSeqById(unsigned uId)
+	{
+	const unsigned uSeqCount = GetSeqCount();
+	for (unsigned i = 0; i < uSeqCount; ++i)
+		{
+		if (GetSeqId(i) == uId)
+			return GetSeq(i);
+		}
+	Quit("SeqVect::GetSeqIdByUd(%d): not found", uId);
+	return (Seq &) *((Seq *) 0);
+	}
+
+unsigned SeqVect::GetSeqLength(unsigned uSeqIndex) const
+	{
+	assert(uSeqIndex < size());
+	const Seq *ptrSeq = at(uSeqIndex);
+	return ptrSeq->Length();
+	}
+
+Seq &SeqVect::GetSeq(unsigned uSeqIndex)
+	{
+	assert(uSeqIndex < size());
+	return *at(uSeqIndex);
+	}
+
+const Seq &SeqVect::GetSeq(unsigned uSeqIndex) const
+	{
+	assert(uSeqIndex < size());
+	return *at(uSeqIndex);
+	}
+
+void SeqVect::SetSeqId(unsigned uSeqIndex, unsigned uId)
+	{
+	assert(uSeqIndex < size());
+	Seq *ptrSeq = at(uSeqIndex);
+	return ptrSeq->SetId(uId);
+	}
+
+ALPHA SeqVect::GuessAlpha() const
+	{
+// If at least MIN_NUCLEO_PCT of the first CHAR_COUNT non-gap
+// letters belong to the nucleotide alphabet, guess nucleo.
+// Otherwise amino.
+	const unsigned CHAR_COUNT = 100;
+	const unsigned MIN_NUCLEO_PCT = 95;
+
+	const unsigned uSeqCount = GetSeqCount();
+	if (0 == uSeqCount)
+		return ALPHA_Amino;
+
+	unsigned uSeqIndex = 0;
+	unsigned uPos = 0;
+	unsigned uSeqLength = GetSeqLength(0);
+	unsigned uDNACount = 0;
+	unsigned uRNACount = 0;
+	unsigned uTotal = 0;
+	const Seq *ptrSeq = &GetSeq(0);
+	for (;;)
+		{
+		while (uPos >= uSeqLength)
+			{
+			++uSeqIndex;
+			if (uSeqIndex >= uSeqCount)
+				break;
+			ptrSeq = &GetSeq(uSeqIndex);
+			uSeqLength = ptrSeq->Length();
+			uPos = 0;
+			}
+		if (uSeqIndex >= uSeqCount)
+			break;
+		char c = ptrSeq->at(uPos++);
+		if (IsGapChar(c))
+			continue;
+		if (IsDNA(c))
+			++uDNACount;
+		if (IsRNA(c))
+			++uRNACount;
+		++uTotal;
+		if (uTotal >= CHAR_COUNT)
+			break;
+		}
+	if (uTotal != 0 && ((uDNACount*100)/uTotal) >= MIN_NUCLEO_PCT)
+		return ALPHA_DNA;
+	if (uTotal != 0 && ((uRNACount*100)/uTotal) >= MIN_NUCLEO_PCT)
+		return ALPHA_RNA;
+	return ALPHA_Amino;
+	}
+
+void SeqVect::FixAlpha()
+	{
+	ClearInvalidLetterWarning();
+	unsigned uSeqCount = Length();
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		Seq *ptrSeq = at(uSeqIndex);
+		ptrSeq->FixAlpha();
+		}
+	ReportInvalidLetters();
+	}

Added: trunk/packages/muscle/branches/upstream/current/seqvect.h
===================================================================
--- trunk/packages/muscle/branches/upstream/current/seqvect.h	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/seqvect.h	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,63 @@
+#ifndef SeqVect_h
+#define SeqVect_h
+
+#include <vector>
+#include "seq.h"
+
+typedef std::vector<Seq *> SeqVectBase;
+
+class SeqVect : public SeqVectBase
+	{
+public:
+	SeqVect() {}
+	virtual ~SeqVect();
+
+private:
+// Not implemented; prevent use of copy c'tor and assignment.
+	SeqVect(const SeqVect &);
+	SeqVect &operator=(const SeqVect &);
+
+public:
+	void FromFile(TextFile &File)
+		{
+		FromFASTAFile(File);
+		}
+
+	void FromFASTAFile(TextFile &File);
+	void ToFASTAFile(TextFile &File) const;
+
+	void ToFile(TextFile &File) const
+		{
+		ToFASTAFile(File);
+		}
+
+	void PadToMSA(MSA &msa);
+	void Copy(const SeqVect &rhs);
+	void StripGaps();
+	void StripGapsAndWhitespace();
+	void ToUpper();
+	void Clear();
+	unsigned Length() const { return (unsigned) size(); }
+	unsigned GetSeqCount() const { return (unsigned) size(); }
+	void AppendSeq(const Seq &s);
+	bool FindName(const char *ptrName, unsigned *ptruIndex) const;
+	void LogMe() const;
+	const char *GetSeqName(unsigned uSeqIndex) const;
+	unsigned GetSeqId(unsigned uSeqIndex) const;
+	unsigned GetSeqIdFromName(const char *Name) const;
+	unsigned GetSeqLength(unsigned uSeqIndex) const;
+	void SetSeqId(unsigned uSeqIndex, unsigned uId);
+	Seq &GetSeq(unsigned uIndex);
+	Seq &GetSeqById(unsigned uId);
+	const Seq &GetSeq(unsigned uIndex) const;
+
+	ALPHA GuessAlpha() const;
+	void FixAlpha();
+
+#ifndef	_WIN32
+	reference at(size_type i) { return operator[](i); }
+	const_reference at(size_type i) const { return operator[](i); }
+#endif
+	};
+
+#endif	// SeqVect_h

Added: trunk/packages/muscle/branches/upstream/current/setblosumweights.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/setblosumweights.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/setblosumweights.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,131 @@
+/***
+Code for implementing HMMer's "BLOSUM weighting" algorithm.
+
+The algorithm was deduced by reverse-engineering the HMMer code.
+
+The HMMer documentation refers to BLOSUM weighting as "Henikoff
+simple filter weighting"
+
+The name BLOSUM implied to me that HMMer would be using a
+substitution probability matrix to compute distances, but this
+turned out not to be the case.
+
+It is notable, not to say puzzling, that the HMMer BLOSUM weighting
+algorithm  is guaranteed to produce an integral NIC (number-of-indepdent-
+counts, also known as effective sequence count). Presumably Eddy must
+have known this, though he doesn't comment on it and he computes & stores
+the value in a float.
+
+Here's the algorithm:
+
+Distances between two sequences are based on the average of a simple 
+binary equal (one) / not equal (zero) at each position. The only thing
+that has  anything to do with BLOSUM in this calculation is an obscure
+(to me) constant  value of 0.62. The sequences are clustered using this
+distance. If the pairwise identity (fraction of  identical positions)
+is less than 0.62, they get assigned to disjoint clusters, the final
+number of disjoint clusters is the NIC. This makes some intuitive sense:
+I would interpret this by saying that if a set of sequences are close
+enough they count as one sequence. The weight for each sequence within a
+disjoint cluster is then determined to be 1 / (clustersize), from which it
+follows that the sum of all weights is equal to the number of disjoint
+clusters and is thus guaranteed to be an integer value. It is exactly this
+sum that HMMer uses for the NIC, by default.
+
+The individual BLOSUM sequence weights are not used for anything else in
+HMMer, unless you specify that BLOSUM weighting should override the default
+GSC  weighting. GSC weighting uses a different clustering algorithm to
+determine  relative weights. The BLOSUM NIC is then distributed over the
+GSC tree according to those relative weights.
+***/
+
+#include "muscle.h"
+#include "msa.h"
+#include "cluster.h"
+#include "distfunc.h"
+
+// Set weights of all sequences in the subtree under given node.
+void MSA::SetBLOSUMSubtreeWeight(const ClusterNode *ptrNode, double dWeight) const
+	{
+	if (0 == ptrNode)
+		return;
+
+	const ClusterNode *ptrRight = ptrNode->GetRight();
+	const ClusterNode *ptrLeft = ptrNode->GetLeft();
+
+// If leaf, set weight
+	if (0 == ptrRight && 0 == ptrLeft)
+		{
+		unsigned uIndex = ptrNode->GetIndex();
+		WEIGHT w = DoubleToWeight(dWeight);
+		m_Weights[uIndex] = w;
+		return;
+		}
+
+// Otherwise, recursively set subtrees
+	SetBLOSUMSubtreeWeight(ptrLeft, dWeight);
+	SetBLOSUMSubtreeWeight(ptrRight, dWeight);
+	}
+
+// Traverse a subtree looking for clusters where all
+// the leaves are sufficiently similar that they
+// should be weighted as a group, i.e. given a weight
+// of 1/N where N is the cluster size. The idea is
+// to avoid sample bias where we have closely related
+// sequences in the input alignment.
+// The weight at a node is the distance between
+// the two closest sequences in the left and right
+// subtrees under that node. "Sufficiently similar"
+// is defined as being where that minimum distance
+// is less than the dMinDist threshhold. I don't know
+// why the clustering is done using a minimum rather
+// than a maximum or average, either of which would
+// seem more natural to me.
+// Return value is number of groups under this node.
+// A "group" is the cluster found under a node with a
+// weight less than the minimum.
+unsigned MSA::SetBLOSUMNodeWeight(const ClusterNode *ptrNode, double dMinDist) const
+	{
+	if (0 == ptrNode)
+		return 0;
+
+	if (ptrNode->GetWeight() < dMinDist)
+		{
+		unsigned uClusterSize = ptrNode->GetClusterSize(); 
+		assert(uClusterSize > 0);
+		double dWeight = 1.0 / uClusterSize;
+		SetBLOSUMSubtreeWeight(ptrNode, dWeight);
+		return 1;
+		}
+
+	const ClusterNode *ptrLeft = ptrNode->GetLeft();
+	const ClusterNode *ptrRight = ptrNode->GetRight();
+
+	unsigned uLeftGroupCount = SetBLOSUMNodeWeight(ptrLeft, dMinDist);
+	unsigned uRightGroupCount = SetBLOSUMNodeWeight(ptrRight, dMinDist);
+
+	return uLeftGroupCount + uRightGroupCount;
+	}
+
+// Return value is the group count, i.e. the effective number
+// of distinctly different sequences.
+unsigned MSA::CalcBLOSUMWeights(ClusterTree &BlosumCluster) const
+	{
+// Build distance matrix
+	DistFunc DF;
+	unsigned uSeqCount = GetSeqCount();
+	DF.SetCount(uSeqCount);
+	for (unsigned i = 0; i < uSeqCount; ++i)
+		for (unsigned j = i+1; j < uSeqCount; ++j)
+			{
+			double dDist = GetPctIdentityPair(i, j);
+			assert(dDist >= 0.0 && dDist <= 1.0);
+			DF.SetDist(i, j, (float) (1.0 - dDist));
+			}
+
+// Cluster based on the distance function
+	BlosumCluster.Create(DF);
+
+// Return value is HMMer's "effective sequence count".
+	return SetBLOSUMNodeWeight(BlosumCluster.GetRoot(), 1.0 - BLOSUM_DIST);
+	}

Added: trunk/packages/muscle/branches/upstream/current/setgscweights.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/setgscweights.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/setgscweights.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,195 @@
+/***
+Gerstein/Sonnhammer/Chothia ad hoc sequence weighting.
+The algorithm was deduced by reverse-engineering the
+HMMer code.
+
+I used an alternative representation that I prefer over
+HMMer's. The HMMer code is full of tree manipulations
+that do something to the left child and then the equivalent
+thing to the right child. It was clear that there must be
+a re-formulation that does everything once for each node,
+which would reduce the number of operations expressed
+in the code by a factor of two. This gives a more elegant
+and less error-prone way to code it.
+
+These notes explain the correspondence between my design
+and Eddy's.
+
+HMMer stores a data structure phylo_s for each non-leaf
+node in the cluster tree. This structure contains the
+following fields:
+
+	diff		Weight of the node
+	lblen		Left branch length
+	rblen		Right branch length
+
+The lblen and rblen branch lengths are calculated as:
+
+	this.lblen = this.diff - left.diff
+	this.rblen = this.diff - right.diff
+
+My code stores one ClusterNode data structure per node
+in the cluster tree, including leaves. I store only the
+weight. I can recover the HMMer branch length fields
+in a trivial O(1) calculation as follows:
+
+	lblen = Node.GetWeight() - Node.GetLeft()->GetWeight()
+	rblen = Node.GetWeight() - Node.GetRight()->GetWeight()
+
+For the GSC weights calculation, HMMer constructs the
+following vectors, which have entries for all nodes,
+including leaves:
+
+	lwt		Left weight
+	rwt		Right weight
+
+The "left weight" is calculated as the sum of the weights in
+all the nodes reachable through the left branch, including
+the node itself. (This is not immediately obvious from the
+code, which does the calculation using branch lengths rather
+than weights, but this is an equivalent, and to my mind clearer,
+statement of what they are). Similarly, the "right weight" is
+the sum of all weights reachable via the right branch. I define
+the "cluster weight" to be the summed weight of all nodes in the
+subtree under the node, including the node itself. I provide
+a function Node.GetClusterWeight() which calculates the cluster
+weight using a O(ln N) recursion through the tree. The lwt and
+rwt values can be recovered as follows:
+
+	lwt		= Node.GetLeft()->GetClusterWeight()
+			+ Node.GetWeight()
+
+	lwt		= Node.GetLeft()->GetClusterWeight()
+			+ Node.GetWeight()
+
+HMMer calculates a further vector fwt as follows.
+
+	this.fwt = parent.fwt * parent.lwt / (parent.lwt + parent.rwt)
+
+This applies to nodes reached via a left branch, for nodes reached
+via a right branch:
+
+	this.fwt = parent.fwt * parent.rwt / (parent.lwt + parent.rwt)
+
+The values of fwt at the leaf nodes are the final GSC weights.
+We derive the various terms using our equivalents.
+
+	parent.lwt	= Parent.GetLeft()->GetClusterWeight()
+				+ Parent.GetWeight()
+
+	parent.rwt	= Parent.GetRight()->GetClusterWeight()
+				+ Parent.GetWeight()
+
+	parent.lwt + parent.rwt =
+				{ Parent.GetLeft()->GetClusterWeight()
+				+ Parent.GetRight()->GetClusterWeight()
+				+ Parent.GetWeight() }
+				+ Parent.GetWeight()
+
+We recognize the term {...} as the cluster weight of the
+parent, so
+
+	parent.lwt + parent.rwt
+				= Parent.GetClusterWeight()
+				+ Parent.GetWeight()
+
+As you would expect, repeating this exercise for parent.rwt gives
+exactly the same expression.
+
+The GSC weights (fwt) are stored in the Weight2 field of the cluster
+tree, the Weight field stores the original (BLOSUM) weights used
+as input to this algorithm.
+***/
+
+#include "muscle.h"
+#include "msa.h"
+#include "cluster.h"
+#include "distfunc.h"
+
+// Set weights of all sequences in the subtree under given node.
+void MSA::SetSubtreeWeight2(const ClusterNode *ptrNode) const
+	{
+	if (0 == ptrNode)
+		return;
+
+	const ClusterNode *ptrRight = ptrNode->GetRight();
+	const ClusterNode *ptrLeft = ptrNode->GetLeft();
+
+// If leaf, set weight
+	if (0 == ptrRight && 0 == ptrLeft)
+		{
+		unsigned uIndex = ptrNode->GetIndex();
+		double dWeight = ptrNode->GetWeight2();
+		WEIGHT w = DoubleToWeight(dWeight);
+		m_Weights[uIndex] = w;
+		return;
+		}
+
+// Otherwise, recursively set subtrees
+	SetSubtreeWeight2(ptrLeft);
+	SetSubtreeWeight2(ptrRight);
+	}
+
+void MSA::SetSubtreeGSCWeight(ClusterNode *ptrNode) const
+	{
+	if (0 == ptrNode)
+		return;
+
+	ClusterNode *ptrParent = ptrNode->GetParent();
+	double dParentWeight2 = ptrParent->GetWeight2();
+	double dParentClusterWeight = ptrParent->GetClusterWeight();
+	if (0.0 == dParentClusterWeight)
+		{
+		double dThisClusterSize = ptrNode->GetClusterSize();
+		double dParentClusterSize = ptrParent->GetClusterSize();
+		double dWeight2 =
+		  dParentWeight2*dThisClusterSize/dParentClusterSize;
+		ptrNode->SetWeight2(dWeight2);
+		}
+	else
+		{
+	// Could cache cluster weights for better performance.
+	// We calculate cluster weight of each node twice, so this
+	// would give x2 improvement.
+	// As weighting is not very expensive, we don't care.
+		double dThisClusterWeight = ptrNode->GetClusterWeight();
+		double dParentWeight = ptrParent->GetWeight();
+
+		double dNum = dThisClusterWeight + dParentWeight;
+		double dDenom = dParentClusterWeight + dParentWeight;
+		double dWeight2 = dParentWeight2*(dNum/dDenom);
+
+		ptrNode->SetWeight2(dWeight2);
+		}
+
+	SetSubtreeGSCWeight(ptrNode->GetLeft());
+	SetSubtreeGSCWeight(ptrNode->GetRight());
+	}
+
+void MSA::SetGSCWeights() const
+	{
+	ClusterTree CT;
+	CalcBLOSUMWeights(CT);
+
+// Calculate weights and store in tree.
+	ClusterNode *ptrRoot = CT.GetRoot();
+	ptrRoot->SetWeight2(1.0);
+	SetSubtreeGSCWeight(ptrRoot->GetLeft());
+	SetSubtreeGSCWeight(ptrRoot->GetRight());
+
+// Copy weights from tree to MSA.
+	SetSubtreeWeight2(ptrRoot);
+	}
+ 
+void MSA::ListWeights() const
+	{
+	const unsigned uSeqCount = GetSeqCount();
+	Log("Weights:\n");
+	WEIGHT wTotal = 0;
+	for (unsigned n = 0; n < uSeqCount; ++n)
+		{
+		wTotal += GetSeqWeight(n);
+		Log("%6.3f %s\n", GetSeqWeight(n), GetSeqName(n));
+		}
+	Log("Total weights = %6.3f, should be 1.0\n", wTotal);
+	}

Added: trunk/packages/muscle/branches/upstream/current/setnewhandler.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/setnewhandler.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/setnewhandler.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,22 @@
+#include "muscle.h"
+#include <stdio.h>
+#include <new>
+
+const int ONE_MB = 1024*1024;
+const size_t RESERVE_BYTES = 8*ONE_MB;
+static void *EmergencyReserve = 0;
+
+void OnOutOfMemory()
+	{
+	free(EmergencyReserve);
+	fprintf(stderr, "\n*** OUT OF MEMORY ***\n");
+	fprintf(stderr, "Memory allocated so far %g MB\n", GetMemUseMB());
+	SaveCurrentAlignment();
+	exit(EXIT_FatalError);
+	}
+
+void SetNewHandler()
+	{
+	EmergencyReserve = malloc(RESERVE_BYTES);
+	std::set_new_handler(OnOutOfMemory);
+	}

Added: trunk/packages/muscle/branches/upstream/current/spfast.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/spfast.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/spfast.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,269 @@
+#include "muscle.h"
+#include "profile.h"
+
+#define TRACE	0
+
+enum
+	{
+	LL = 0,
+	LG = 1,
+	GL = 2,
+	GG = 3,
+	};
+
+static char *GapTypeToStr(int GapType)
+	{
+	switch (GapType)
+		{
+	case LL: return "LL";
+	case LG: return "LG";
+	case GL: return "GL";
+	case GG: return "GG";
+		}
+	Quit("Invalid gap type");
+	return "?";
+	}
+
+static SCORE GapScoreMatrix[4][4];
+
+static void InitGapScoreMatrix()
+	{
+	const SCORE t = (SCORE) 0.2;
+
+	GapScoreMatrix[LL][LL] = 0;
+	GapScoreMatrix[LL][LG] = g_scoreGapOpen;
+	GapScoreMatrix[LL][GL] = 0;
+	GapScoreMatrix[LL][GG] = 0;
+
+	GapScoreMatrix[LG][LL] = g_scoreGapOpen;
+	GapScoreMatrix[LG][LG] = 0;
+	GapScoreMatrix[LG][GL] = g_scoreGapOpen;
+	GapScoreMatrix[LG][GG] = t*g_scoreGapOpen;	// approximation!
+
+	GapScoreMatrix[GL][LL] = 0;
+	GapScoreMatrix[GL][LG] = g_scoreGapOpen;
+	GapScoreMatrix[GL][GL] = 0;
+	GapScoreMatrix[GL][GG] = 0;
+
+	GapScoreMatrix[GG][LL] = 0;
+	GapScoreMatrix[GG][LG] = t*g_scoreGapOpen;	// approximation!
+	GapScoreMatrix[GG][GL] = 0;
+	GapScoreMatrix[GG][GG] = 0;
+
+	for (int i = 0; i < 4; ++i)
+		for (int j = 0; j < i; ++j)
+			if (GapScoreMatrix[i][j] != GapScoreMatrix[j][i])
+				Quit("GapScoreMatrix not symmetrical");
+	}
+
+static SCORE SPColBrute(const MSA &msa, unsigned uColIndex)
+	{
+	SCORE Sum = 0;
+	const unsigned uSeqCount = msa.GetSeqCount();
+	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
+		{
+		const WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1);
+		unsigned uLetter1 = msa.GetLetterEx(uSeqIndex1, uColIndex);
+		if (uLetter1 >= 20)
+			continue;
+		for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqIndex1; ++uSeqIndex2)
+			{
+			const WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2);
+			unsigned uLetter2 = msa.GetLetterEx(uSeqIndex2, uColIndex);
+			if (uLetter2 >= 20)
+				continue;
+			SCORE t = w1*w2*(*g_ptrScoreMatrix)[uLetter1][uLetter2];
+#if	TRACE
+			Log("Check %c %c w1=%.3g w2=%.3g Mx=%.3g t=%.3g\n",
+			  LetterToCharAmino(uLetter1),
+			  LetterToCharAmino(uLetter2),
+			  w1,
+			  w2,
+			  (*g_ptrScoreMatrix)[uLetter1][uLetter2],
+			  t);
+#endif
+			Sum += t;
+			}
+		}
+	return Sum;
+	}
+
+static SCORE SPGapFreqs(const FCOUNT Freqs[])
+	{
+#if TRACE
+	Log("Freqs=");
+	for (unsigned i = 0; i < 4; ++i)
+		if (Freqs[i] != 0)
+			Log(" %s=%.3g", GapTypeToStr(i), Freqs[i]);
+	Log("\n");
+#endif
+
+	SCORE TotalOffDiag = 0;
+	SCORE TotalDiag = 0;
+	for (unsigned i = 0; i < 4; ++i)
+		{
+		const FCOUNT fi = Freqs[i];
+		if (0 == fi)
+			continue;
+		const float *Row = GapScoreMatrix[i];
+		SCORE diagt = fi*fi*Row[i];
+		TotalDiag += diagt;
+#if	TRACE
+		Log("SPFGaps %s %s + Mx=%.3g TotalDiag += %.3g\n",
+		  GapTypeToStr(i),
+		  GapTypeToStr(i),
+		  Row[i],
+		  diagt);
+#endif
+		SCORE Sum = 0;
+		for (unsigned j = 0; j < i; ++j)
+			{
+			SCORE t = Freqs[j]*Row[j];
+#if	TRACE
+			if (Freqs[j] != 0)
+				Log("SPFGaps %s %s + Mx=%.3g Sum += %.3g\n",
+				  GapTypeToStr(i),
+				  GapTypeToStr(j),
+				  Row[j],
+				  fi*t);
+#endif
+			Sum += t;
+			}
+		TotalOffDiag += fi*Sum;
+		}
+#if TRACE
+	Log("SPFGap TotalOffDiag=%.3g + TotalDiag=%.3g = %.3g\n",
+	  TotalOffDiag, TotalDiag, TotalOffDiag + TotalDiag);
+#endif
+	return TotalOffDiag*2 + TotalDiag;
+	}
+
+static SCORE SPFreqs(const FCOUNT Freqs[])
+	{
+#if TRACE
+	Log("Freqs=");
+	for (unsigned i = 0; i < 20; ++i)
+		if (Freqs[i] != 0)
+			Log(" %c=%.3g", LetterToCharAmino(i), Freqs[i]);
+	Log("\n");
+#endif
+
+	SCORE TotalOffDiag = 0;
+	SCORE TotalDiag = 0;
+	for (unsigned i = 0; i < 20; ++i)
+		{
+		const FCOUNT fi = Freqs[i];
+		if (0 == fi)
+			continue;
+		const float *Row = (*g_ptrScoreMatrix)[i];
+		SCORE diagt = fi*fi*Row[i];
+		TotalDiag += diagt;
+#if	TRACE
+		Log("SPF %c %c + Mx=%.3g TotalDiag += %.3g\n",
+		  LetterToCharAmino(i),
+		  LetterToCharAmino(i),
+		  Row[i],
+		  diagt);
+#endif
+		SCORE Sum = 0;
+		for (unsigned j = 0; j < i; ++j)
+			{
+			SCORE t = Freqs[j]*Row[j];
+#if	TRACE
+			if (Freqs[j] != 0)
+				Log("SPF %c %c + Mx=%.3g Sum += %.3g\n",
+				  LetterToCharAmino(i),
+				  LetterToCharAmino(j),
+				  Row[j],
+				  fi*t);
+#endif
+			Sum += t;
+			}
+		TotalOffDiag += fi*Sum;
+		}
+#if TRACE
+	Log("SPF TotalOffDiag=%.3g + TotalDiag=%.3g = %.3g\n",
+	  TotalOffDiag, TotalDiag, TotalOffDiag + TotalDiag);
+#endif
+	return TotalOffDiag*2 + TotalDiag;
+	}
+
+static SCORE ObjScoreSPCol(const MSA &msa, unsigned uColIndex)
+	{
+	FCOUNT Freqs[20];
+	FCOUNT GapFreqs[4];
+
+	memset(Freqs, 0, sizeof(Freqs));
+	memset(GapFreqs, 0, sizeof(GapFreqs));
+
+	const unsigned uSeqCount = msa.GetSeqCount();
+#if	TRACE
+	Log("Weights=");
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		Log(" %u=%.3g", uSeqIndex, msa.GetSeqWeight(uSeqIndex));
+	Log("\n");
+#endif
+	SCORE SelfOverCount = 0;
+	SCORE GapSelfOverCount = 0;
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		WEIGHT w = msa.GetSeqWeight(uSeqIndex);
+
+		bool bGapThisCol = msa.IsGap(uSeqIndex, uColIndex);
+		bool bGapPrevCol = (uColIndex == 0 ? false : msa.IsGap(uSeqIndex, uColIndex - 1));
+		int GapType = bGapThisCol + 2*bGapPrevCol;
+		assert(GapType >= 0 && GapType < 4);
+		GapFreqs[GapType] += w;
+		SCORE gapt = w*w*GapScoreMatrix[GapType][GapType];
+		GapSelfOverCount += gapt;
+
+		if (bGapThisCol)
+			continue;
+		unsigned uLetter = msa.GetLetterEx(uSeqIndex, uColIndex);
+		if (uLetter >= 20)
+			continue;
+		Freqs[uLetter] += w;
+		SCORE t = w*w*(*g_ptrScoreMatrix)[uLetter][uLetter];
+#if	TRACE
+		Log("FastCol compute freqs & SelfOverCount %c w=%.3g M=%.3g SelfOverCount += %.3g\n",
+		  LetterToCharAmino(uLetter), w, (*g_ptrScoreMatrix)[uLetter][uLetter], t);
+#endif
+		SelfOverCount += t;
+		}
+	SCORE SPF = SPFreqs(Freqs);
+	SCORE Col = SPF - SelfOverCount;
+
+	SCORE SPFGaps = SPGapFreqs(GapFreqs);
+	SCORE ColGaps = SPFGaps - GapSelfOverCount;
+#if	TRACE
+	Log("SPF=%.3g - SelfOverCount=%.3g = %.3g\n", SPF, SelfOverCount, Col);
+	Log("SPFGaps=%.3g - GapsSelfOverCount=%.3g = %.3g\n", SPFGaps, GapSelfOverCount, ColGaps);
+#endif
+	return Col + ColGaps;
+	}
+
+SCORE ObjScoreSPDimer(const MSA &msa)
+	{
+	static bool bGapScoreMatrixInit = false;
+	if (!bGapScoreMatrixInit)
+		InitGapScoreMatrix();
+
+	SCORE Total = 0;
+	const unsigned uSeqCount = msa.GetSeqCount();
+	const unsigned uColCount = msa.GetColCount();
+	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
+		{
+		SCORE Col = ObjScoreSPCol(msa, uColIndex);
+#if	TRACE
+		{
+		SCORE ColCheck = SPColBrute(msa, uColIndex);
+		Log("FastCol=%.3g CheckCol=%.3g\n", Col, ColCheck);
+		}
+#endif
+		Total += Col;
+		}
+#if TRACE
+	Log("Total/2 = %.3g (final result from fast)\n", Total/2);
+#endif
+	return Total/2;
+	}

Added: trunk/packages/muscle/branches/upstream/current/sptest.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/sptest.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/sptest.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,176 @@
+#include "muscle.h"
+#include "objscore.h"
+#include "msa.h"
+#include "textfile.h"
+#include "pwpath.h"
+
+const unsigned INDELS = 1;
+
+static void GetPos(const char Str[], unsigned L, int *pi1, int *pi2)
+	{
+	int i1;
+	for (;;)
+		{
+		i1 = rand()%(L-2) + 1;
+		if (Str[i1] == 'M')
+			break;
+		}
+	int i2;
+	for (;;)
+		{
+		i2 = rand()%(L-2) + 1;
+		if (i1 != i2 && Str[i2] == 'M')
+			break;
+		}
+	*pi1 = i1;
+	*pi2 = i2;
+	}
+
+static void MakePath(unsigned uSeqLength, unsigned uIndelCount, char Str[])
+	{
+	unsigned uPathLength = uSeqLength + uIndelCount;
+	for (unsigned i = 0; i < uPathLength; ++i)
+		Str[i] = 'M';
+
+	for (unsigned i = 0; i < uIndelCount; ++i)
+		{
+		int i1, i2;
+		GetPos(Str, uPathLength, &i1, &i2);
+		Str[i1] = 'D';
+		Str[i2] = 'I';
+		}
+
+	Str[uPathLength] = 0;
+	Log("MakePath=%s\n", Str);
+	}
+
+void SPTest()
+	{
+	SetPPScore(PPSCORE_SV);
+
+	SetListFileName("c:\\tmp\\muscle.log", false);
+
+	TextFile file1("c:\\tmp\\msa1.afa");
+	TextFile file2("c:\\tmp\\msa2.afa");
+
+	MSA msa1;
+	MSA msa2;
+
+	msa1.FromFile(file1);
+	msa2.FromFile(file2);
+
+	Log("msa1=\n");
+	msa1.LogMe();
+	Log("msa2=\n");
+	msa2.LogMe();
+
+	const unsigned uColCount = msa1.GetColCount();
+	if (msa2.GetColCount() != uColCount)
+		Quit("Different lengths");
+
+	const unsigned uSeqCount1 = msa1.GetSeqCount();
+	const unsigned uSeqCount2 = msa2.GetSeqCount();
+	const unsigned uSeqCount = uSeqCount1 + uSeqCount2;
+
+	MSA::SetIdCount(uSeqCount);
+
+	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount1; ++uSeqIndex1)
+		{
+		msa1.SetSeqWeight(uSeqIndex1, 1.0);
+		msa1.SetSeqId(uSeqIndex1, uSeqIndex1);
+		}
+
+	for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqCount2; ++uSeqIndex2)
+		{
+		msa2.SetSeqWeight(uSeqIndex2, 1.0);
+		msa2.SetSeqId(uSeqIndex2, uSeqCount1 + uSeqIndex2);
+		}
+
+	MSA alnA;
+	MSA alnB;
+
+	char strPathA[1024];
+	char strPathB[1024];
+	MakePath(uColCount, INDELS, strPathA);
+	MakePath(uColCount, INDELS, strPathB);
+
+	PWPath PathA;
+	PWPath PathB;
+	PathA.FromStr(strPathA);
+	PathB.FromStr(strPathB);
+
+	Log("PathA=\n");
+	PathA.LogMe();
+	Log("PathB=\n");
+	PathB.LogMe();
+
+	AlignTwoMSAsGivenPath(PathA, msa1, msa2, alnA);
+	AlignTwoMSAsGivenPath(PathB, msa1, msa2, alnB);
+
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		alnA.SetSeqWeight(uSeqIndex, 1.0);
+		alnB.SetSeqWeight(uSeqIndex, 1.0);
+		}
+
+	unsigned Seqs1[1024];
+	unsigned Seqs2[1024];
+
+	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount1; ++uSeqIndex1)
+		Seqs1[uSeqIndex1] = uSeqIndex1;
+
+	for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqCount2; ++uSeqIndex2)
+		Seqs2[uSeqIndex2] = uSeqCount1 + uSeqIndex2;
+
+	MSA msaA1;
+	MSA msaA2;
+	MSA msaB1;
+	MSA msaB2;
+	MSAFromSeqSubset(alnA, Seqs1, uSeqCount1, msaA1);
+	MSAFromSeqSubset(alnB, Seqs1, uSeqCount1, msaB1);
+	MSAFromSeqSubset(alnA, Seqs2, uSeqCount2, msaA2);
+	MSAFromSeqSubset(alnB, Seqs2, uSeqCount2, msaB2);
+
+	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount1; ++uSeqIndex1)
+		{
+		msaA1.SetSeqWeight(uSeqIndex1, 1.0);
+		msaB1.SetSeqWeight(uSeqIndex1, 1.0);
+		}
+
+	for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqCount2; ++uSeqIndex2)
+		{
+		msaA2.SetSeqWeight(uSeqIndex2, 1.0);
+		msaB2.SetSeqWeight(uSeqIndex2, 1.0);
+		}
+
+	Log("msaA1=\n");
+	msaA1.LogMe();
+
+	Log("msaB1=\n");
+	msaB1.LogMe();
+
+	Log("msaA2=\n");
+	msaA2.LogMe();
+
+	Log("msaB2=\n");
+	msaB2.LogMe();
+
+	Log("alnA=\n");
+	alnA.LogMe();
+
+	Log("AlnB=\n");
+	alnB.LogMe();
+
+	Log("\nSPA\n---\n");
+	SCORE SPA = ObjScoreSP(alnA);
+	Log("\nSPB\n---\n");
+	SCORE SPB = ObjScoreSP(alnB);
+
+	Log("\nXPA\n---\n");
+	SCORE XPA = ObjScoreXP(msaA1, msaA2);
+	Log("\nXPB\n---\n");
+	SCORE XPB = ObjScoreXP(msaB1, msaB2);
+
+	Log("SPA=%.4g SPB=%.4g Diff=%.4g\n", SPA, SPB, SPA - SPB);
+	Log("XPA=%.4g XPB=%.4g Diff=%.4g\n", XPA, XPB, XPA - XPB);
+	}

Added: trunk/packages/muscle/branches/upstream/current/stabilize.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/stabilize.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/stabilize.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,20 @@
+#include "muscle.h"
+#include "msa.h"
+
+void Stabilize(const MSA &msa, MSA &msaStable)
+	{
+	const unsigned uSeqCount = msa.GetSeqCount();
+	const unsigned uColCount = msa.GetColCount();
+
+	msaStable.SetSize(uSeqCount, uColCount);
+	for (unsigned uId = 0; uId < uSeqCount; ++uId)
+		{
+		const unsigned uSeqIndex = msa.GetSeqIndex(uId);
+		msaStable.SetSeqName(uId, msa.GetSeqName(uSeqIndex));
+		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
+			{
+			const char c = msa.GetChar(uSeqIndex, uColIndex);
+			msaStable.SetChar(uId, uColIndex, c);
+			}
+		}
+	}

Added: trunk/packages/muscle/branches/upstream/current/subfam.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/subfam.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/subfam.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,384 @@
+#include "muscle.h"
+#include "tree.h"
+#include "textfile.h"	// for test only
+#include "msa.h"
+#include "seqvect.h"
+#include "profile.h"
+#ifndef _MSC_VER
+#include <unistd.h>	//	for unlink
+#endif
+
+#define TRACE	0
+
+/***
+Find subfamilies from tree by following criteria:
+
+(a) number of leaves <= max,
+(b) is monophyletic, i.e. most recent common ancestor is parent
+of no more than one subfamily.
+***/
+
+static unsigned SubFamRecurse(const Tree &tree, unsigned uNodeIndex, unsigned uMaxLeafCount,
+  unsigned SubFams[], unsigned &uSubFamCount)
+	{
+	if (tree.IsLeaf(uNodeIndex))
+		return 1;
+
+	unsigned uLeft = tree.GetLeft(uNodeIndex);
+	unsigned uRight = tree.GetRight(uNodeIndex);
+	unsigned uLeftCount = SubFamRecurse(tree, uLeft, uMaxLeafCount, SubFams, uSubFamCount);
+	unsigned uRightCount = SubFamRecurse(tree, uRight, uMaxLeafCount, SubFams, uSubFamCount);
+
+	unsigned uLeafCount = uLeftCount + uRightCount;
+	if (uLeftCount + uRightCount > uMaxLeafCount)
+		{
+		if (uLeftCount <= uMaxLeafCount)
+			SubFams[uSubFamCount++] = uLeft;
+		if (uRightCount <= uMaxLeafCount)
+			SubFams[uSubFamCount++] = uRight;
+		}
+	else if (tree.IsRoot(uNodeIndex))
+		{
+		if (uSubFamCount != 0)
+			Quit("Error in SubFamRecurse");
+		SubFams[uSubFamCount++] = uNodeIndex;
+		}
+
+	return uLeafCount;
+	}
+
+void SubFam(const Tree &tree, unsigned uMaxLeafCount, unsigned SubFams[], unsigned *ptruSubFamCount)
+	{
+	*ptruSubFamCount = 0;
+	SubFamRecurse(tree, tree.GetRootNodeIndex(), uMaxLeafCount, SubFams, *ptruSubFamCount);
+
+#if	TRACE
+	{
+	Log("\n");
+	Log("Tree:\n");
+	tree.LogMe();
+	//void DrawTree(const Tree &tree);
+	//DrawTree(tree);
+	Log("\n");
+	Log("%d subfams:\n", *ptruSubFamCount);
+	for (unsigned i = 0; i < *ptruSubFamCount; ++i)
+		Log("  %d=%d", i, SubFams[i]);
+	Log("\n");
+	}
+#endif
+	}
+
+//unsigned SubFams[9999];
+//unsigned uSubFamCount;
+//
+//static unsigned DistFromRoot(const Tree &tree, unsigned uNodeIndex)
+//	{
+//	const unsigned uRoot = tree.GetRootNodeIndex();
+//	unsigned uDist = 0;
+//	while (uNodeIndex != uRoot)
+//		{
+//		++uDist;
+//		uNodeIndex = tree.GetParent(uNodeIndex);
+//		}
+//	return uDist;
+//	}
+//
+//static void DrawNode(const Tree &tree, unsigned uNodeIndex)
+//	{
+//	if (!tree.IsLeaf(uNodeIndex))
+//		DrawNode(tree, tree.GetLeft(uNodeIndex));
+//
+//	unsigned uDist = DistFromRoot(tree, uNodeIndex);
+//	for (unsigned i = 0; i < 5*uDist; ++i)
+//		Log(" ");
+//	Log("%d", uNodeIndex);
+//	for (unsigned i = 0; i < uSubFamCount; ++i)
+//		if (uNodeIndex == SubFams[i])
+//			{
+//			Log("*");
+//			break;
+//			}
+//	Log("\n");
+//
+//	if (!tree.IsLeaf(uNodeIndex))
+//		DrawNode(tree, tree.GetRight(uNodeIndex));
+//	}
+//
+//static void DrawTree(const Tree &tree)
+//	{
+//	unsigned uRoot = tree.GetRootNodeIndex();
+//	DrawNode(tree, uRoot);
+//	}
+//
+//void TestSubFams(const char *FileName)
+//	{
+//	Tree tree;
+//	TextFile f(FileName);
+//	tree.FromFile(f);
+//	SubFam(tree, 5, SubFams, &uSubFamCount);
+//	DrawTree(tree);
+//	}
+
+static void SetInFam(const Tree &tree, unsigned uNodeIndex, bool NodeInSubFam[])
+	{
+	if (tree.IsLeaf(uNodeIndex))
+		return;
+	unsigned uLeft = tree.GetLeft(uNodeIndex);
+	unsigned uRight = tree.GetRight(uNodeIndex);
+	NodeInSubFam[uLeft] = true;
+	NodeInSubFam[uRight] = true;
+
+	SetInFam(tree, uLeft, NodeInSubFam);
+	SetInFam(tree, uRight, NodeInSubFam);
+	}
+
+void AlignSubFam(SeqVect &vAll, const Tree &GuideTree, unsigned uNodeIndex,
+  MSA &msaOut)
+	{
+	const unsigned uSeqCount = vAll.GetSeqCount();
+
+	const char *InTmp = "asf_in.tmp";
+	const char *OutTmp = "asf_out.tmp";
+
+	unsigned *Leaves = new unsigned[uSeqCount];
+	unsigned uLeafCount;
+	GetLeaves(GuideTree, uNodeIndex, Leaves, &uLeafCount);
+
+	SeqVect v;
+	for (unsigned i = 0; i < uLeafCount; ++i)
+		{
+		unsigned uLeafNodeIndex = Leaves[i];
+		unsigned uId = GuideTree.GetLeafId(uLeafNodeIndex);
+		Seq &s = vAll.GetSeqById(uId);
+		v.AppendSeq(s);
+		}
+
+#if	TRACE
+	{
+	Log("Align subfam[node=%d, size=%d] ", uNodeIndex, uLeafCount);
+	for (unsigned i = 0; i < uLeafCount; ++i)
+		Log(" %s", v.GetSeqName(i));
+	Log("\n");
+	}
+#endif
+
+	TextFile fIn(InTmp, true);
+
+	v.ToFASTAFile(fIn);
+	fIn.Close();
+
+	char CmdLine[4096];
+	sprintf(CmdLine, "probcons %s > %s 2> /dev/null", InTmp, OutTmp);
+//	sprintf(CmdLine, "muscle -in %s -out %s -maxiters 1", InTmp, OutTmp);
+	system(CmdLine);
+
+	TextFile fOut(OutTmp);
+	msaOut.FromFile(fOut);
+
+	for (unsigned uSeqIndex = 0; uSeqIndex < uLeafCount; ++uSeqIndex)
+		{
+		const char *Name = msaOut.GetSeqName(uSeqIndex);
+		unsigned uId = vAll.GetSeqIdFromName(Name);
+		msaOut.SetSeqId(uSeqIndex, uId);
+		}
+
+	unlink(InTmp);
+	unlink(OutTmp);
+
+	delete[] Leaves;
+	}
+
+void ProgAlignSubFams()
+	{
+	MSA msaOut;
+
+	SetOutputFileName(g_pstrOutFileName);
+	SetInputFileName(g_pstrInFileName);
+
+	SetMaxIters(g_uMaxIters);
+	SetSeqWeightMethod(g_SeqWeight1);
+
+	TextFile fileIn(g_pstrInFileName);
+	SeqVect v;
+	v.FromFASTAFile(fileIn);
+	const unsigned uSeqCount = v.Length();
+
+	if (0 == uSeqCount)
+		Quit("No sequences in input file");
+
+	ALPHA Alpha = ALPHA_Undefined;
+	switch (g_SeqType)
+		{
+	case SEQTYPE_Auto:
+		Alpha = v.GuessAlpha();
+		break;
+
+	case SEQTYPE_Protein:
+		Alpha = ALPHA_Amino;
+		break;
+
+	case SEQTYPE_DNA:
+		Alpha = ALPHA_DNA;
+		break;
+
+	case SEQTYPE_RNA:
+		Alpha = ALPHA_RNA;
+		break;
+
+	default:
+		Quit("Invalid seq type");
+		}
+	SetAlpha(Alpha);
+	v.FixAlpha();
+
+	if (ALPHA_DNA == Alpha || ALPHA_RNA == Alpha)
+		{
+		SetPPScore(PPSCORE_SPN);
+		g_Distance1 = DISTANCE_Kmer4_6;
+		}
+
+	unsigned uMaxL = 0;
+	unsigned uTotL = 0;
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		unsigned L = v.GetSeq(uSeqIndex).Length();
+		uTotL += L;
+		if (L > uMaxL)
+			uMaxL = L;
+		}
+
+	SetIter(1);
+	g_bDiags = g_bDiags1;
+	SetSeqStats(uSeqCount, uMaxL, uTotL/uSeqCount);
+
+	MSA::SetIdCount(uSeqCount);
+
+// Initialize sequence ids.
+// From this point on, ids must somehow propogate from here.
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		v.SetSeqId(uSeqIndex, uSeqIndex);
+
+	if (uSeqCount > 1)
+		MHackStart(v);
+
+	if (0 == uSeqCount)
+		{
+		msaOut.Clear();
+		return;
+		}
+
+	if (1 == uSeqCount && ALPHA_Amino == Alpha)
+		{
+		const Seq &s = v.GetSeq(0);
+		msaOut.FromSeq(s);
+		return;
+		}
+
+	Tree GuideTree;
+	TreeFromSeqVect(v, GuideTree, g_Cluster1, g_Distance1, g_Root1);
+	SetMuscleTree(GuideTree);
+
+	MSA msa;
+	if (g_bLow)
+		{
+		ProgNode *ProgNodes = 0;
+		ProgNodes = ProgressiveAlignE(v, GuideTree, msa);
+		delete[] ProgNodes;
+		}
+	else
+		ProgressiveAlign(v, GuideTree, msa);
+	SetCurrentAlignment(msa);
+	TreeFromMSA(msa, GuideTree, g_Cluster2, g_Distance2, g_Root2);
+	SetMuscleTree(GuideTree);
+
+	unsigned *SubFams = new unsigned[uSeqCount];
+	unsigned uSubFamCount;
+	SubFam(GuideTree, g_uMaxSubFamCount, SubFams, &uSubFamCount);
+
+	SetProgressDesc("Align node");
+	const unsigned uNodeCount = 2*uSeqCount - 1;
+
+	ProgNode *ProgNodes = new ProgNode[uNodeCount];
+	bool *NodeIsSubFam = new bool[uNodeCount];
+	bool *NodeInSubFam = new bool[uNodeCount];
+
+	for (unsigned i = 0; i < uNodeCount; ++i)
+		{
+		NodeIsSubFam[i] = false;
+		NodeInSubFam[i] = false;
+		}
+
+	for (unsigned i = 0; i < uSubFamCount; ++i)
+		{
+		unsigned uNodeIndex = SubFams[i];
+		assert(uNodeIndex < uNodeCount);
+		NodeIsSubFam[uNodeIndex] = true;
+		SetInFam(GuideTree, uNodeIndex, NodeInSubFam);
+		}
+
+	unsigned uJoin = 0;
+	unsigned uTreeNodeIndex = GuideTree.FirstDepthFirstNode();
+	do
+		{
+		if (NodeIsSubFam[uTreeNodeIndex])
+			{
+#if	TRACE
+			Log("Node %d: align subfam\n", uTreeNodeIndex);
+#endif
+			ProgNode &Node = ProgNodes[uTreeNodeIndex];
+			AlignSubFam(v, GuideTree, uTreeNodeIndex, Node.m_MSA);
+			Node.m_uLength = Node.m_MSA.GetColCount();
+			}
+		else if (!NodeInSubFam[uTreeNodeIndex])
+			{
+#if	TRACE
+			Log("Node %d: align two subfams\n", uTreeNodeIndex);
+#endif
+			Progress(uJoin, uSubFamCount - 1);
+			++uJoin;
+
+			const unsigned uMergeNodeIndex = uTreeNodeIndex;
+			ProgNode &Parent = ProgNodes[uMergeNodeIndex];
+
+			const unsigned uLeft = GuideTree.GetLeft(uTreeNodeIndex);
+			const unsigned uRight = GuideTree.GetRight(uTreeNodeIndex);
+
+			ProgNode &Node1 = ProgNodes[uLeft];
+			ProgNode &Node2 = ProgNodes[uRight];
+
+			PWPath Path;
+			AlignTwoMSAs(Node1.m_MSA, Node2.m_MSA, Parent.m_MSA, Path);
+			Parent.m_uLength = Parent.m_MSA.GetColCount();
+
+			Node1.m_MSA.Clear();
+			Node2.m_MSA.Clear();
+			}
+		else
+			{
+#if	TRACE
+			Log("Node %d: in subfam\n", uTreeNodeIndex);
+#endif
+			;
+			}
+		uTreeNodeIndex = GuideTree.NextDepthFirstNode(uTreeNodeIndex);
+		}
+	while (NULL_NEIGHBOR != uTreeNodeIndex);
+	ProgressStepsDone();
+
+	unsigned uRootNodeIndex = GuideTree.GetRootNodeIndex();
+	ProgNode &RootProgNode = ProgNodes[uRootNodeIndex];
+
+	TextFile fOut(g_pstrOutFileName, true);
+	MHackEnd(RootProgNode.m_MSA);
+	RootProgNode.m_MSA.ToFile(fOut);
+
+	delete[] NodeInSubFam;
+	delete[] NodeIsSubFam;
+	delete[] ProgNodes;
+	delete[] SubFams;
+
+	ProgNodes = 0;
+	NodeInSubFam = 0;
+	NodeIsSubFam = 0;
+	SubFams = 0;
+	}

Added: trunk/packages/muscle/branches/upstream/current/subfams.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/subfams.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/subfams.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,65 @@
+#include "muscle.h"
+#include "distfunc.h"
+
+const float INFINITY = float(1e29);
+const unsigned NILL = uInsane;
+
+static float *ShortestPathEstimate;
+static unsigned *Predecessor;
+
+static void GetMostDistantPair(DistFunc &DF, unsigned *ptrIndex1, unsigned *ptrIndex2)
+	{
+	const unsigned uNodeCount = DF.GetCount();
+	if (uNodeCount < 2)
+		Quit("GetMostDistantPair: < 2 seqs");
+
+	float MaxDist = -1; 
+	unsigned Index1 = uInsane;
+	unsigned Index2 = uInsane;
+	for (unsigned i = 0; i < uNodeCount; ++i)
+		{
+		for (unsigned j = i + 1; j < uNodeCount; ++j)
+			{
+			float d = DF.GetDist(i, j);
+			if (d > MaxDist)
+				{
+				MaxDist = d;
+				Index1 = i;
+				Index2 = j;
+				}
+			}
+		}
+
+	assert(Index1 != uInsane);
+	assert(Index2 != uInsane);
+
+	*ptrIndex1 = Index1;
+	*ptrIndex2 = Index2;
+	}
+
+static void InitializeSingleSource(DistFunc &DF, unsigned uIndex)
+	{
+	const unsigned uNodeCount = 0;
+
+	for (unsigned i = 0; i < uNodeCount; ++i)
+		{
+		ShortestPathEstimate[i] = INFINITY;
+		Predecessor[i] = NILL;
+		}
+	ShortestPathEstimate[uIndex] = 0;
+	}
+
+static void Relax(DistFunc &DF, unsigned u, unsigned v)
+	{
+	float w = DF.GetDist(u, v);
+	float d = ShortestPathEstimate[u] + w;
+	if (ShortestPathEstimate[v] > d)
+		{
+		ShortestPathEstimate[v] = d;
+		Predecessor[v] = u;
+		}
+	}
+
+void ShortestPath(DistFunc &DF, unsigned uIndex)
+	{
+	}

Added: trunk/packages/muscle/branches/upstream/current/sw.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/sw.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/sw.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,206 @@
+#include "muscle.h"
+#include <math.h>
+#include "pwpath.h"
+#include "profile.h"
+#include <stdio.h>
+
+// Textbook Smith-Waterman affine gap implementation.
+
+#define	TRACE	0
+
+static const char *LocalScoreToStr(SCORE s)
+	{
+	static char str[16];
+	if (MINUS_INFINITY == s)
+		return "     *";
+	sprintf(str, "%6.2f", s);
+	return str;
+	}
+
+static void ListDP(const SCORE *DPM_, const ProfPos *PA, const ProfPos *PB,
+  unsigned uPrefixCountA, unsigned uPrefixCountB)
+	{
+	Log("        ");
+	for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+		{
+		char c = ' ';
+		if (uPrefixLengthB > 0)
+			c = ConsensusChar(PB[uPrefixLengthB - 1]);
+		Log(" %4u:%c", uPrefixLengthB, c);
+		}
+	Log("\n");
+	for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
+		{
+		char c = ' ';
+		if (uPrefixLengthA > 0)
+			c = ConsensusChar(PA[uPrefixLengthA - 1]);
+		Log("%4u:%c  ", uPrefixLengthA, c);
+		for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+			Log(" %s", LocalScoreToStr(DPM(uPrefixLengthA, uPrefixLengthB)));
+		Log("\n");
+		}
+	}
+
+SCORE SW(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
+  unsigned uLengthB, PWPath &Path)
+	{
+	assert(uLengthB > 0 && uLengthA > 0);
+
+	const unsigned uPrefixCountA = uLengthA + 1;
+	const unsigned uPrefixCountB = uLengthB + 1;
+
+// Allocate DP matrices
+	const size_t LM = uPrefixCountA*uPrefixCountB;
+	SCORE *DPM_ = new SCORE[LM];
+	SCORE *DPD_ = new SCORE[LM];
+	SCORE *DPI_ = new SCORE[LM];
+
+	DPM(0, 0) = 0;
+	DPD(0, 0) = MINUS_INFINITY;
+	DPI(0, 0) = MINUS_INFINITY;
+
+	DPM(1, 0) = MINUS_INFINITY;
+	DPD(1, 0) = MINUS_INFINITY;
+	DPI(1, 0) = MINUS_INFINITY;
+
+	DPM(0, 1) = MINUS_INFINITY;
+	DPD(0, 1) = MINUS_INFINITY;
+	DPI(0, 1) = MINUS_INFINITY;
+
+// Empty prefix of B is special case
+	for (unsigned uPrefixLengthA = 2; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
+		{
+	// M=LetterA+LetterB, impossible with empty prefix
+		DPM(uPrefixLengthA, 0) = MINUS_INFINITY;
+
+	// D=LetterA+GapB, never optimal in local alignment with gap penalties
+		DPD(uPrefixLengthA, 0) = MINUS_INFINITY;
+
+	// I=GapA+LetterB, impossible with empty prefix
+		DPI(uPrefixLengthA, 0) = MINUS_INFINITY;
+		}
+
+// Empty prefix of A is special case
+	for (unsigned uPrefixLengthB = 2; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+		{
+	// M=LetterA+LetterB, impossible with empty prefix
+		DPM(0, uPrefixLengthB) = MINUS_INFINITY;
+
+	// D=LetterA+GapB, impossible with empty prefix
+		DPD(0, uPrefixLengthB) = MINUS_INFINITY;
+
+	// I=GapA+LetterB, never optimal in local alignment with gap penalties
+		DPI(0, uPrefixLengthB) = MINUS_INFINITY;
+		}
+
+	SCORE scoreMax = MINUS_INFINITY;
+	unsigned uPrefixLengthAMax = uInsane;
+	unsigned uPrefixLengthBMax = uInsane;
+
+// ============
+// Main DP loop
+// ============
+	SCORE scoreGapCloseB = MINUS_INFINITY;
+	for (unsigned uPrefixLengthB = 1; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
+		{
+		const ProfPos &PPB = PB[uPrefixLengthB - 1];
+
+		SCORE scoreGapCloseA = MINUS_INFINITY;
+		for (unsigned uPrefixLengthA = 1; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
+			{
+			const ProfPos &PPA = PA[uPrefixLengthA - 1];
+
+			{
+		// Match M=LetterA+LetterB
+			SCORE scoreLL = ScoreProfPos2(PPA, PPB);
+
+			SCORE scoreMM = DPM(uPrefixLengthA-1, uPrefixLengthB-1);
+			SCORE scoreDM = DPD(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapCloseA;
+			SCORE scoreIM = DPI(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapCloseB;
+
+			SCORE scoreBest;
+			if (scoreMM >= scoreDM && scoreMM >= scoreIM)
+				scoreBest = scoreMM;
+			else if (scoreDM >= scoreMM && scoreDM >= scoreIM)
+				scoreBest = scoreDM;
+			else 
+				{
+				assert(scoreIM >= scoreMM && scoreIM >= scoreDM);
+				scoreBest = scoreIM;
+				}
+			if (scoreBest < 0)
+				scoreBest = 0;
+			scoreBest += scoreLL;
+			if (scoreBest > scoreMax)
+				{
+				scoreMax = scoreBest;
+				uPrefixLengthAMax = uPrefixLengthA;
+				uPrefixLengthBMax = uPrefixLengthB;
+				}
+			DPM(uPrefixLengthA, uPrefixLengthB) = scoreBest;
+			}
+
+			{
+		// Delete D=LetterA+GapB
+			SCORE scoreMD = DPM(uPrefixLengthA-1, uPrefixLengthB) +
+			  PA[uPrefixLengthA-1].m_scoreGapOpen;
+			SCORE scoreDD = DPD(uPrefixLengthA-1, uPrefixLengthB);
+
+			SCORE scoreBest;
+			if (scoreMD >= scoreDD)
+				scoreBest = scoreMD;
+			else
+				{
+				assert(scoreDD >= scoreMD);
+				scoreBest = scoreDD;
+				}
+			DPD(uPrefixLengthA, uPrefixLengthB) = scoreBest;
+			}
+
+		// Insert I=GapA+LetterB
+			{
+			SCORE scoreMI = DPM(uPrefixLengthA, uPrefixLengthB-1) +
+			  PB[uPrefixLengthB - 1].m_scoreGapOpen;
+			SCORE scoreII = DPI(uPrefixLengthA, uPrefixLengthB-1);
+
+			SCORE scoreBest;
+			if (scoreMI >= scoreII)
+				scoreBest = scoreMI;
+			else 
+				{
+				assert(scoreII > scoreMI);
+				scoreBest = scoreII;
+				}
+			DPI(uPrefixLengthA, uPrefixLengthB) = scoreBest;
+			}
+
+			scoreGapCloseA = PPA.m_scoreGapClose;
+			}
+		scoreGapCloseB = PPB.m_scoreGapClose;
+		}
+
+#if TRACE
+	Log("DPM:\n");
+	ListDP(DPM_, PA, PB, uPrefixLengthA, uPrefixLengthB);
+	Log("DPD:\n");
+	ListDP(DPD_, PA, PB, uPrefixLengthA, uPrefixLengthB);
+	Log("DPI:\n");
+	ListDP(DPI_, PA, PB, uPrefixLengthA, uPrefixLengthB);
+#endif
+
+	assert(scoreMax == DPM(uPrefixLengthAMax, uPrefixLengthBMax));
+	TraceBackSW(PA, uLengthA, PB, uLengthB, DPM_, DPD_, DPI_, 
+	  uPrefixLengthAMax, uPrefixLengthBMax, Path);
+
+#if	TRACE
+	SCORE scorePath = FastScorePath2(PA, uLengthA, PB, uLengthB, Path);
+	Path.LogMe();
+	Log("Score = %s Path = %s\n", LocalScoreToStr(scoreMax), LocalScoreToStr(scorePath));
+#endif
+
+	delete[] DPM_;
+	delete[] DPD_;
+	delete[] DPI_;
+
+	return scoreMax;
+	}

Added: trunk/packages/muscle/branches/upstream/current/termgaps.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/termgaps.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/termgaps.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,36 @@
+#include "muscle.h"
+#include "profile.h"
+
+void SetTermGaps(const ProfPos *Prof, unsigned uLength)
+	{
+	if (0 == uLength)
+		return;
+
+	ProfPos *First = (ProfPos *) Prof;
+	ProfPos *Last = (ProfPos *) (Prof + uLength - 1);
+
+	switch (g_TermGaps)
+		{
+	case TERMGAPS_Full:
+		break;
+
+	case TERMGAPS_Half:
+	// -infinity check for lock left/right
+		if (First->m_scoreGapOpen != MINUS_INFINITY)
+			First->m_scoreGapOpen = 0;
+
+		if (uLength > 1 && Last->m_scoreGapClose != MINUS_INFINITY)
+			Last->m_scoreGapClose = 0;
+
+	case TERMGAPS_Ext:
+		if (First->m_scoreGapOpen != MINUS_INFINITY)
+			First->m_scoreGapOpen *= -1;
+
+		if (uLength > 1 && Last->m_scoreGapClose != MINUS_INFINITY)
+			Last->m_scoreGapClose *= -1;
+		break;
+
+	default:
+		Quit("Invalid g_TermGaps");
+		}
+	}

Added: trunk/packages/muscle/branches/upstream/current/textfile.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/textfile.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/textfile.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,345 @@
+#include "muscle.h"
+#include "textfile.h"
+#include <errno.h>
+
+TextFile::TextFile(const char szFileName[], bool bWrite)
+	{
+	FILE *ptrFile = 0;
+	if (bWrite)
+		{
+		if (0 == strcmp(szFileName, "-"))
+			ptrFile = stdout;
+		else
+			ptrFile = fopen(szFileName, "wb");
+		}
+	else
+		{
+		if (0 == strcmp(szFileName, "-"))
+			ptrFile = stdin;
+		else
+			ptrFile = fopen(szFileName, "rb");
+		}
+	if (0 == ptrFile)
+		Quit("Cannot open '%s' errno=%d\n", szFileName, errno);
+	Init(ptrFile, szFileName);
+	}
+
+void TextFile::Init(FILE *ptrFile, const char *ptrFileName)
+	{
+	m_ptrFile = ptrFile;
+	m_ptrName = strdup(ptrFileName);
+	m_uLineNr = 1;
+	m_uColNr = 0;
+	m_bLastCharWasEOL = true;
+	m_cPushedBack = -1;
+#if	DEBUG
+	setbuf(m_ptrFile, 0);
+#endif
+	}
+
+TextFile::TextFile(FILE *ptrFile, const char *ptrFileName)
+	{
+	Init(ptrFile, "-");
+	}
+
+TextFile::~TextFile()
+	{
+	if (m_ptrFile &&
+	  m_ptrFile != stdin && m_ptrFile != stdout && m_ptrFile != stderr)
+		fclose(m_ptrFile);
+	free(m_ptrName);
+	}
+
+// Get line from file.
+// Return true if end-of-file, quit if line too long.
+bool TextFile::GetLine(char szLine[], unsigned uBytes)
+	{
+	if (0 == uBytes)
+		Quit("TextFile::GetLine, buffer zero size");
+
+	memset(szLine, 0, uBytes);
+
+	unsigned uBytesCopied = 0;
+
+// Loop until end of line or end of file.
+	for (;;)
+		{
+		char c;
+		bool bEof = GetChar(c);
+		if (bEof)
+			return true;
+		if ('\r' == c)
+			continue;
+		if ('\n' == c)
+			return false;
+		if (uBytesCopied < uBytes - 1)
+			szLine[uBytesCopied++] = (char) c;
+		else
+			Quit("TextFile::GetLine: input buffer too small, line %u",
+			  m_uLineNr);
+		}
+	}
+
+// As GetLine, but trim leading and trailing blanks; skip empty lines
+bool TextFile::GetTrimLine(char szLine[], unsigned uBytes)
+	{
+	for (;;)
+		{
+		bool bEOF = GetLine(szLine, uBytes);
+		if (bEOF)
+			return true;
+		TrimBlanks(szLine);
+		if (0 != szLine[0])
+			break;
+		}
+	return false;
+	}
+
+void TextFile::Rewind()
+	{
+	fseek(m_ptrFile, 0, SEEK_SET);
+	m_uLineNr = 1;
+	m_bLastCharWasEOL = true;
+	}
+
+void TextFile::PutChar(char c)
+	{
+	int i = fputc(c, m_ptrFile);
+	assert(i == c);
+	if ('\n' == c)
+		{
+		++m_uLineNr;
+		m_uColNr = 1;
+		}
+	else
+		++m_uColNr;
+	}
+
+void TextFile::PutString(const char szLine[])
+	{
+	int iError = fputs(szLine, m_ptrFile);
+	assert(iError >= 0);
+	}
+
+void TextFile::PutFormat(const char szFormat[], ...)
+	{
+	char szStr[4096];
+	va_list ArgList;
+	va_start(ArgList, szFormat);
+	vsprintf(szStr, szFormat, ArgList);
+	PutString(szStr);
+	}
+
+void TextFile::GetLineX(char szLine[], unsigned uBytes)
+	{
+	bool bEof = GetLine(szLine, uBytes);
+	if (bEof)
+		Quit("end-of-file in GetLineX");
+	}
+
+bool TextFile::GetToken(char szToken[], unsigned uBytes, const char szCharTokens[])
+	{
+// Skip leading white space
+	char c;
+	for (;;)
+		{
+		bool bEof = GetChar(c);
+		if (bEof)
+			return true;
+		if (!isspace(c))
+			break;
+		}
+
+// Check for special case single-character tokens
+	if (0 != strchr(szCharTokens, c))
+		{
+		assert(uBytes >= 2);
+		szToken[0] = c;
+		szToken[1] = 0;
+		return false;
+		}
+
+// Loop until token terminated by white space, EOF or special
+	unsigned uBytesCopied = 0;
+	for (;;)
+		{
+		if (uBytesCopied < uBytes - 1)
+			szToken[uBytesCopied++] = c;
+		else
+			Quit("TextFile::GetToken: input buffer too small, line %u",
+			  m_uLineNr);
+		bool bEof = GetChar(c);
+		if (bEof)
+			{
+			szToken[uBytesCopied] = 0;
+			return true;
+			}
+	// Check for special case single-character tokens
+		if (0 != strchr(szCharTokens, c))
+			{
+			PushBack(c);
+			assert(uBytesCopied > 0 && uBytesCopied < uBytes);
+			szToken[uBytesCopied] = 0;
+			return false;
+			}
+		if (isspace(c))
+			{
+			assert(uBytesCopied > 0 && uBytesCopied < uBytes);
+			szToken[uBytesCopied] = 0;
+			return false;
+			}
+		}
+	}
+
+void TextFile::GetTokenX(char szToken[], unsigned uBytes, const char szCharTokens[])
+	{
+	bool bEof = GetToken(szToken, uBytes, szCharTokens);
+	if (bEof)
+		Quit("End-of-file in GetTokenX");
+	}
+
+void TextFile::Skip()
+	{
+	for (;;)
+		{
+		char c;
+		bool bEof = GetChar(c);
+		if (bEof || '\n' == c)
+			return;
+		assert(isspace(c));
+		}
+	}
+
+#ifdef _WIN32
+
+TEXTFILEPOS TextFile::GetPos()
+	{
+	fpos_t p;
+	int i = fgetpos(m_ptrFile, &p);
+	assert(0 == i);
+	assert(p >= 0);
+	TEXTFILEPOS Pos;
+	Pos.uOffset = (unsigned) p;
+	Pos.uLineNr = m_uLineNr;
+	Pos.uColNr = m_uColNr;
+	return Pos;
+	}
+
+void TextFile::SetPos(TEXTFILEPOS Pos)
+	{
+	fpos_t p = (fpos_t) Pos.uOffset;
+	int i = fsetpos(m_ptrFile, &p);
+	assert(0 == i);
+	m_uLineNr = Pos.uLineNr;
+	m_uColNr = Pos.uColNr;
+	}
+
+#else
+
+TEXTFILEPOS TextFile::GetPos()
+	{
+	TEXTFILEPOS Pos;
+	Pos.uOffset = ftell(m_ptrFile);
+	Pos.uLineNr = m_uLineNr;
+	Pos.uColNr = m_uColNr;
+	return Pos;
+	}
+
+void TextFile::SetPos(TEXTFILEPOS Pos)
+	{
+	fseek(m_ptrFile, Pos.uOffset, SEEK_SET);
+	m_uLineNr = Pos.uLineNr;
+	m_uColNr = Pos.uColNr;
+	}
+
+#endif
+
+bool TextFile::GetChar(char &c)
+	{
+	if (-1 != m_cPushedBack)
+		{
+		c = (char) m_cPushedBack;
+		m_cPushedBack = -1;
+		return false;
+		}
+
+	int ic = fgetc(m_ptrFile);
+	if (ic < 0)
+		{
+		if (feof(m_ptrFile))
+			{
+		// Hack to fix up a non-empty text file that is missing
+		// and end-of-line character in the last line.
+			if (!m_bLastCharWasEOL && m_uLineNr > 0)
+				{
+				c = '\n';
+				m_bLastCharWasEOL = true;
+				return false;
+				}
+			return true;
+			}
+		Quit("TextFile::GetChar, error %s", strerror(errno));
+		}
+	c = (char) ic;
+	if ('\n' == c)
+		{
+		m_bLastCharWasEOL = true;
+		++m_uLineNr;
+		m_uColNr = 1;
+		}
+	else
+		{
+		m_bLastCharWasEOL = false;
+		++m_uColNr;
+		}
+	return false;
+	}
+
+void TextFile::GetCharX(char &c)
+	{
+	bool bEof = GetChar(c);
+	if (bEof)
+		Quit("End-of-file in GetCharX");
+	}
+
+void TextFile::GetNonblankChar(char &c)
+	{
+	do
+		{
+		bool bEof = GetChar(c);
+		if (bEof)
+			Quit("End-of-file in GetCharX");
+		}
+	while (isspace(c));
+	}
+
+void TextFile::SkipLine()
+	{
+	if (m_bLastCharWasEOL)
+		return;
+	for (;;)
+		{
+		char c;
+		bool bEof = GetChar(c);
+		if (bEof)
+			Quit("End-of-file in SkipLine");
+		if ('\n' == c)
+			break;
+		}
+	}
+
+void TextFile::SkipWhite()
+	{
+	for (;;)
+		{
+		char c;
+		bool bEof = GetChar(c);
+		if (bEof)
+			Quit("End-of-file in SkipWhite");
+		if (!isspace(c))
+			{
+			PushBack(c);
+			break;
+			}
+		}
+	}

Added: trunk/packages/muscle/branches/upstream/current/textfile.h
===================================================================
--- trunk/packages/muscle/branches/upstream/current/textfile.h	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/textfile.h	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,69 @@
+#ifndef	TextFile_h
+#define TextFile_h
+
+#include <stdio.h>
+
+struct TEXTFILEPOS
+	{
+	unsigned uOffset;
+	unsigned uLineNr;
+	unsigned uColNr;
+	};
+
+const unsigned TextFileBufferSize = 256;
+
+class TextFile
+	{
+private:
+// no default c'tor, not implemented
+	TextFile();
+
+public:
+	virtual ~TextFile();
+
+	TextFile(const char szFileName[], bool bWrite = false);
+	TextFile(FILE *ptrFile, const char *ptrFileName = "-");
+	void Close() { fclose(m_ptrFile); m_ptrFile = 0; }
+
+	bool GetLine(char szLine[], unsigned uBytes);
+	bool GetTrimLine(char szLine[], unsigned uBytes);
+	void GetLineX(char szLine[], unsigned uBytes);
+
+	bool GetToken(char szToken[], unsigned uBytes, const char szCharTokens[] = "{}");
+	void GetTokenX(char szToken[], unsigned uBytes, const char szCharTokens[] = "{}");
+
+	void Skip();
+	void SkipLine();
+	void SkipWhite();
+	void Rewind();
+	TEXTFILEPOS GetPos();
+	void SetPos(TEXTFILEPOS Pos);
+	bool GetChar(char &c);
+	void GetCharX(char &c);
+	void GetNonblankChar(char &c);
+
+	unsigned GetLineNr() { return m_uLineNr; }
+
+	void PutString(const char szLine[]);
+	void PutFormat(const char szFormat[], ...);
+	void PutChar(char c);
+
+	const char *GetFileName() { return m_ptrName; }
+
+	void PushBack(int c) { m_cPushedBack = c; }
+
+	FILE *GetStdioFile() const { return m_ptrFile; }
+
+private:
+	void Init(FILE *ptrFile, const char *ptrFileName);
+
+private:
+	FILE *m_ptrFile;
+	unsigned m_uLineNr;
+	unsigned m_uColNr;
+	char *m_ptrName;
+	bool m_bLastCharWasEOL;
+	int m_cPushedBack;
+	};
+
+#endif // TextFile_h

Added: trunk/packages/muscle/branches/upstream/current/threewaywt.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/threewaywt.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/threewaywt.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,342 @@
+#include "muscle.h"
+#include "tree.h"
+#include <math.h>
+
+#define TRACE	0
+
+/***
+Sequence weights derived from a tree using Gotoh's
+three-way method.
+
+	Gotoh (1995) CABIOS 11(5), 543-51.
+
+Each edge e is assigned a weight w(e).
+
+Consider first a tree with three leaves A,B and C
+having branch lengths a, b and c, as follows.
+
+            B
+            |
+            b
+            |
+    A---a---R---c---C
+
+The internal node is denoted by R.
+
+Define:
+
+	S = (ab + ca + ab)
+	x = bc(a + b)(a + c)
+	y = a(b + c)FS
+
+Here F is a tunable normalization factor which is
+approximately 1.0. Then the edge weight for AR
+is computed as:
+
+	w(AR) = sqrt(x/y)
+
+Similar expressions for the other edges follow by
+symmetry.
+
+For a tree with more than three edges, the weight
+of an edge that ends in a leaf is computed from
+the three-way tree that includes the edge and
+its two neighbors. The weight of an internal edge
+is computed as the product of the weights for that
+edge derived from the two three-way subtrees that
+include that edge.
+
+For example, consider the following tree.
+
+       B
+       |
+    A--R--V--C
+          |
+          D
+
+Here, w(RV) is computed as the product of the
+two values for w(RV) derived from the three-way
+trees with leaves ABV and RCD respectively.
+
+The calculation is done using "Gotoh lengths",
+not the real edge lengths.
+
+The Gotoh length G of a directed edge is calculated
+recursively as:
+
+	G = d + LR/(L + R)
+
+where d is the length of the edge, and L and R are
+the Gotoh lengths of the left and right edges adjoining
+the terminal end of the edge. If the edge terminates on
+a leaf, then G=d.
+
+Pairwise sequence weights are computed as the
+product of edge weights on the path that connects
+their leaves.
+
+If the tree is split into two subtrees by deleting
+a given edge e, then the pairwise weights factorize.
+For operations on profiles formed from the two
+subtrees, it is possible to assign a weight to a
+sequence as the product of edge weights on a path
+from e to its leaf.
+***/
+
+// The xxxUnrooted functions present a rooted tree as
+// if it had been unrooted by deleting the root node.
+static unsigned GetFirstNeighborUnrooted(const Tree &tree, unsigned uNode1,
+  unsigned uNode2)
+	{
+	if (tree.IsRoot(uNode1) || tree.IsRoot(uNode2))
+		Quit("GetFirstNeighborUnrooted, should never be called with root");
+	if (!tree.IsEdge(uNode1, uNode2))
+		{
+		if (!tree.IsRoot(tree.GetParent(uNode1)) ||
+		  !tree.IsRoot(tree.GetParent(uNode2)))
+			Quit("GetFirstNeighborUnrooted, not edge");
+		const unsigned uRoot = tree.GetRootNodeIndex();
+		return tree.GetFirstNeighbor(uNode1, uRoot);
+		}
+
+	unsigned uNeighbor = tree.GetFirstNeighbor(uNode1, uNode2);
+	if (tree.IsRoot(uNeighbor))
+		return tree.GetFirstNeighbor(uNeighbor, uNode1);
+	return uNeighbor;
+	}
+
+static unsigned GetSecondNeighborUnrooted(const Tree &tree, unsigned uNode1,
+  unsigned uNode2)
+	{
+	if (tree.IsRoot(uNode1) || tree.IsRoot(uNode2))
+		Quit("GetFirstNeighborUnrooted, should never be called with root");
+	if (!tree.IsEdge(uNode1, uNode2))
+		{
+		if (!tree.IsRoot(tree.GetParent(uNode1)) ||
+		  !tree.IsRoot(tree.GetParent(uNode2)))
+			Quit("GetFirstNeighborUnrooted, not edge");
+		const unsigned uRoot = tree.GetRootNodeIndex();
+		return tree.GetSecondNeighbor(uNode1, uRoot);
+		}
+
+	unsigned uNeighbor = tree.GetSecondNeighbor(uNode1, uNode2);
+	if (tree.IsRoot(uNeighbor))
+		return tree.GetFirstNeighbor(uNeighbor, uNode1);
+	return uNeighbor;
+	}
+
+static unsigned GetNeighborUnrooted(const Tree &tree, unsigned uNode1,
+  unsigned uSub)
+	{
+	unsigned uNeighbor = tree.GetNeighbor(uNode1, uSub);
+	if (tree.IsRoot(uNeighbor))
+		return tree.GetFirstNeighbor(uNeighbor, uNode1);
+	return uNeighbor;
+	}
+
+static unsigned GetNeighborSubscriptUnrooted(const Tree &tree, unsigned uNode1,
+  unsigned uNode2)
+	{
+	if (tree.IsEdge(uNode1, uNode2))
+		return tree.GetNeighborSubscript(uNode1, uNode2);
+	if (!tree.IsRoot(tree.GetParent(uNode1)) ||
+	  !tree.IsRoot(tree.GetParent(uNode2)))
+		Quit("GetNeighborSubscriptUnrooted, not edge");
+	for (unsigned uSub = 0; uSub < 3; ++uSub)
+		if (GetNeighborUnrooted(tree, uNode1, uSub) == uNode2)
+			return uSub;
+	Quit("GetNeighborSubscriptUnrooted, not a neighbor");
+	return NULL_NEIGHBOR;
+	}
+
+static double GetEdgeLengthUnrooted(const Tree &tree, unsigned uNode1,
+  unsigned uNode2)
+	{
+	if (tree.IsRoot(uNode1) || tree.IsRoot(uNode2))
+		Quit("GetEdgeLengthUnrooted, should never be called with root");
+	if (!tree.IsEdge(uNode1, uNode2))
+		{
+		if (!tree.IsRoot(tree.GetParent(uNode1)) ||
+		  !tree.IsRoot(tree.GetParent(uNode2)))
+			Quit("GetEdgeLengthUnrooted, not edge");
+
+		const unsigned uRoot = tree.GetRootNodeIndex();
+		return tree.GetEdgeLength(uNode1, uRoot) +
+		  tree.GetEdgeLength(uNode2, uRoot);
+		}
+	return tree.GetEdgeLength(uNode1, uNode2);
+	}
+
+double GetGotohLength(const Tree &tree, unsigned R, unsigned A)
+	{
+	double dThis = GetEdgeLengthUnrooted(tree, R, A);
+
+// Enforce non-negative edge lengths
+	if (dThis < 0)
+		dThis = 0;
+
+	if (tree.IsLeaf(A))
+		return dThis;
+
+	const unsigned uFirst = GetFirstNeighborUnrooted(tree, A, R);
+	const unsigned uSecond = GetSecondNeighborUnrooted(tree, A, R);
+	const double dFirst = GetGotohLength(tree, A, uFirst);
+	const double dSecond = GetGotohLength(tree, A, uSecond);
+	const double dSum = dFirst + dSecond;
+	const double dThird = dSum == 0 ? 0 : (dFirst*dSecond)/dSum;
+	return dThis + dThird;
+	}
+
+// Return weight of edge A-R in three-way subtree that has
+// leaves A,B,C and internal node R.
+static double GotohWeightThreeWay(const Tree &tree, unsigned A,
+  unsigned B, unsigned C, unsigned R)
+	{
+	const double F = 1.0;
+
+	if (tree.IsLeaf(R))
+		Quit("GotohThreeWay: R must be internal node");
+
+	double a = GetGotohLength(tree, R, A);
+	double b = GetGotohLength(tree, R, B);
+	double c = GetGotohLength(tree, R, C);
+
+	double S = b*c + c*a + a*b;
+	double x = b*c*(a + b)*(a + c);
+	double y = a*(b + c)*F*S;
+
+// y is zero iff all three branch lengths are zero.
+	if (y < 0.001)
+		return 1.0;
+	return sqrt(x/y);
+	}
+
+static double GotohWeightEdge(const Tree &tree, unsigned uNodeIndex1,
+  unsigned uNodeIndex2)
+	{
+	double w1 = 1.0;
+	double w2 = 1.0;
+	if (!tree.IsLeaf(uNodeIndex1))
+		{
+		unsigned R = uNodeIndex1;
+		unsigned A = uNodeIndex2;
+		unsigned B = GetFirstNeighborUnrooted(tree, R, A);
+		unsigned C = GetSecondNeighborUnrooted(tree, R, A);
+		w1 = GotohWeightThreeWay(tree, A, B, C, R);
+		}
+	if (!tree.IsLeaf(uNodeIndex2))
+		{
+		unsigned R = uNodeIndex2;
+		unsigned A = uNodeIndex1;
+		unsigned B = GetFirstNeighborUnrooted(tree, R, A);
+		unsigned C = GetSecondNeighborUnrooted(tree, R, A);
+		w2 = GotohWeightThreeWay(tree, A, B, C, R);
+		}
+	return w1*w2;
+	}
+
+void CalcThreeWayEdgeWeights(const Tree &tree, WEIGHT **EdgeWeights)
+	{
+	const unsigned uNodeCount = tree.GetNodeCount();
+	for (unsigned uNodeIndex1 = 0; uNodeIndex1 < uNodeCount; ++uNodeIndex1)
+		{
+		if (tree.IsRoot(uNodeIndex1))
+			continue;
+		for (unsigned uSub1 = 0; uSub1 < 3; ++uSub1)
+			{
+			const unsigned uNodeIndex2 = GetNeighborUnrooted(tree, uNodeIndex1, uSub1);
+			if (NULL_NEIGHBOR == uNodeIndex2)
+				continue;
+
+		// Avoid computing same edge twice in reversed order
+			if (uNodeIndex2 < uNodeIndex1)
+				continue;
+
+			const WEIGHT w = (WEIGHT) GotohWeightEdge(tree, uNodeIndex1, uNodeIndex2);
+			const unsigned uSub2 = GetNeighborSubscriptUnrooted(tree, uNodeIndex2, uNodeIndex1);
+#if	DEBUG
+			{
+			assert(uNodeIndex2 == GetNeighborUnrooted(tree, uNodeIndex1, uSub1));
+			assert(uNodeIndex1 == GetNeighborUnrooted(tree, uNodeIndex2, uSub2));
+			const WEIGHT wRev = (WEIGHT) GotohWeightEdge(tree, uNodeIndex2, uNodeIndex1);
+			if (!BTEq(w, wRev))
+				Quit("CalcThreeWayWeights: rev check failed %g %g",
+				  w, wRev);
+			}
+#endif
+			EdgeWeights[uNodeIndex1][uSub1] = w;
+			EdgeWeights[uNodeIndex2][uSub2] = w;
+			}
+		}
+	}
+
+static void SetSeqWeights(const Tree &tree, unsigned uNode1, unsigned uNode2,
+  double dPathWeight, WEIGHT *Weights)
+	{
+	if (tree.IsRoot(uNode1) || tree.IsRoot(uNode2))
+		Quit("SetSeqWeights, should never be called with root");
+
+	const double dThisLength = GetEdgeLengthUnrooted(tree, uNode1, uNode2);
+	if (tree.IsLeaf(uNode2))
+		{
+		const unsigned Id = tree.GetLeafId(uNode2);
+		Weights[Id] = (WEIGHT) (dPathWeight + dThisLength);
+		return;
+		}
+	const unsigned uFirst = GetFirstNeighborUnrooted(tree, uNode2, uNode1);
+	const unsigned uSecond = GetSecondNeighborUnrooted(tree, uNode2, uNode1);
+	dPathWeight *= dThisLength;
+	SetSeqWeights(tree, uNode2, uFirst, dPathWeight, Weights);
+	SetSeqWeights(tree, uNode2, uSecond, dPathWeight, Weights);
+	}
+
+void CalcThreeWayWeights(const Tree &tree, unsigned uNode1, unsigned uNode2,
+  WEIGHT *Weights)
+	{
+#if	TRACE
+	Log("CalcThreeWayEdgeWeights\n");
+	tree.LogMe();
+#endif
+
+	if (tree.IsRoot(uNode1))
+		uNode1 = tree.GetFirstNeighbor(uNode1, uNode2);
+	else if (tree.IsRoot(uNode2))
+		uNode2 = tree.GetFirstNeighbor(uNode2, uNode1);
+	const unsigned uNodeCount = tree.GetNodeCount();
+	WEIGHT **EdgeWeights = new WEIGHT *[uNodeCount];
+	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
+		EdgeWeights[uNodeIndex] = new WEIGHT[3];
+
+	CalcThreeWayEdgeWeights(tree, EdgeWeights);
+
+#if	TRACE
+	{
+	Log("Node1  Node2  Length   Gotoh  EdgeWt\n");
+	Log("-----  -----  ------  ------  ------\n");
+	for (unsigned uNodeIndex1 = 0; uNodeIndex1 < uNodeCount; ++uNodeIndex1)
+		{
+		if (tree.IsRoot(uNodeIndex1))
+			continue;
+		for (unsigned uSub1 = 0; uSub1 < 3; ++uSub1)
+			{
+			const unsigned uNodeIndex2 = GetNeighborUnrooted(tree, uNodeIndex1, uSub1);
+			if (NULL_NEIGHBOR == uNodeIndex2)
+				continue;
+			if (uNodeIndex2 < uNodeIndex1)
+				continue;
+			const WEIGHT ew = EdgeWeights[uNodeIndex1][uSub1];
+			const double d = GetEdgeLengthUnrooted(tree, uNodeIndex1, uNodeIndex2);
+			const double g = GetGotohLength(tree, uNodeIndex1, uNodeIndex2);
+			Log("%5u  %5u  %6.3f  %6.3f  %6.3f\n", uNodeIndex1, uNodeIndex2, d, g, ew);
+			}
+		}
+	}
+#endif
+
+	SetSeqWeights(tree, uNode1, uNode2, 0.0, Weights);
+	SetSeqWeights(tree, uNode2, uNode1, 0.0, Weights);
+
+	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
+		delete[] EdgeWeights[uNodeIndex];
+	delete[] EdgeWeights;
+	}

Added: trunk/packages/muscle/branches/upstream/current/timing.h
===================================================================
--- trunk/packages/muscle/branches/upstream/current/timing.h	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/timing.h	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,24 @@
+#if	WIN32
+
+typedef unsigned __int64 TICKS;
+
+#pragma warning(disable:4035)
+inline TICKS GetClockTicks()
+	{
+	_asm
+		{
+		_emit	0x0f
+		_emit	0x31
+		}
+	}
+
+#define	StartTimer()	__int64 t1__ = GetClockTicks()
+
+#define	GetElapsedTicks()	(GetClockTicks() - t1__)
+
+static double TicksToSecs(TICKS t)
+	{
+	return (__int64) t/2.5e9;
+	}
+
+#endif	// WIN32

Added: trunk/packages/muscle/branches/upstream/current/traceback.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/traceback.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/traceback.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,208 @@
+#include "muscle.h"
+#include "profile.h"
+#include "pwpath.h"
+#include <math.h>
+
+#define TRACE	0
+
+#define EQ(a, b)	(fabs(a-b) < 0.1)
+
+SCORE TraceBack(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
+  unsigned uLengthB, const SCORE *DPM_, const SCORE *DPD_, const SCORE *DPI_,
+  PWPath &Path)
+	{
+#if	TRACE
+	Log("\n");
+	Log("TraceBack LengthA=%u LengthB=%u\n", uLengthA, uLengthB);
+#endif
+	assert(uLengthB > 0 && uLengthA > 0);
+
+	const unsigned uPrefixCountA = uLengthA + 1;
+	const unsigned uPrefixCountB = uLengthB + 1;
+
+	Path.Clear();
+
+	unsigned uPrefixLengthA = uLengthA;
+	unsigned uPrefixLengthB = uLengthB;
+
+	const SCORE scoreM = DPM(uPrefixLengthA, uPrefixLengthB);
+	SCORE scoreD = DPD(uPrefixLengthA, uPrefixLengthB);
+	SCORE scoreI = DPI(uPrefixLengthA, uPrefixLengthB);
+
+	const ProfPos &LastPPA = PA[uLengthA - 1];
+	const ProfPos &LastPPB = PB[uLengthB - 1];
+
+	scoreD += LastPPA.m_scoreGapClose;
+	scoreI += LastPPB.m_scoreGapClose;
+
+	char cEdgeType = cInsane;
+	SCORE scoreMax;
+	if (scoreM >= scoreD && scoreM >= scoreI)
+		{
+		scoreMax = scoreM;
+		cEdgeType = 'M';
+		}
+	else if (scoreD >= scoreM && scoreD >= scoreI)
+		{
+		scoreMax = scoreD;
+		cEdgeType = 'D';
+		}
+	else
+		{
+		assert(scoreI >= scoreM && scoreI >= scoreD);
+		scoreMax = scoreI;
+		cEdgeType = 'I';
+		}
+
+	for (;;)
+		{
+		if ('S' == cEdgeType)
+			break;
+
+		PWEdge Edge;
+		Edge.cType = cEdgeType;
+		Edge.uPrefixLengthA = uPrefixLengthA;
+		Edge.uPrefixLengthB = uPrefixLengthB;
+		Path.PrependEdge(Edge);
+
+		char cPrevEdgeType;
+		unsigned uPrevPrefixLengthA = uPrefixLengthA;
+		unsigned uPrevPrefixLengthB = uPrefixLengthB;
+
+		switch (cEdgeType)
+			{
+		case 'M':
+			{
+			assert(uPrefixLengthA > 0);
+			assert(uPrefixLengthB > 0);
+			const ProfPos &PPA = PA[uPrefixLengthA - 1];
+			const ProfPos &PPB = PB[uPrefixLengthB - 1];
+
+			const SCORE Score = DPM(uPrefixLengthA, uPrefixLengthB);
+			const SCORE scoreMatch = ScoreProfPos2(PPA, PPB);
+
+			SCORE scoreSM;
+			if (1 == uPrefixLengthA && 1 == uPrefixLengthB)
+				scoreSM = scoreMatch;
+			else
+				scoreSM = MINUS_INFINITY;
+
+			SCORE scoreMM = MINUS_INFINITY;
+			SCORE scoreDM = MINUS_INFINITY;
+			SCORE scoreIM = MINUS_INFINITY;
+			if (uPrefixLengthA > 1 && uPrefixLengthB > 1)
+				scoreMM = DPM(uPrefixLengthA-1, uPrefixLengthB-1) + scoreMatch;
+			if (uPrefixLengthA > 1)
+				{
+				SCORE scoreTransDM = PA[uPrefixLengthA-2].m_scoreGapClose;
+				scoreDM = DPD(uPrefixLengthA-1, uPrefixLengthB-1) + scoreTransDM + scoreMatch;
+				}
+			if (uPrefixLengthB > 1)
+				{
+				SCORE scoreTransIM = PB[uPrefixLengthB-2].m_scoreGapClose;
+				scoreIM = DPI(uPrefixLengthA-1, uPrefixLengthB-1) + scoreTransIM + scoreMatch;
+				}
+
+			if (EQ(scoreMM, Score))
+				cPrevEdgeType = 'M';
+			else if (EQ(scoreDM, Score))
+				cPrevEdgeType = 'D';
+			else if (EQ(scoreIM, Score))
+				cPrevEdgeType = 'I';
+			else if (EQ(scoreSM, Score))
+				cPrevEdgeType = 'S';
+			else
+				Quit("TraceBack: failed to match M score=%g M=%g D=%g I=%g S=%g",
+				  Score, scoreMM, scoreDM, scoreIM, scoreSM);
+
+			--uPrevPrefixLengthA;
+			--uPrevPrefixLengthB;
+			break;
+			}
+
+		case 'D':
+			{
+			assert(uPrefixLengthA > 0);
+			const SCORE Score = DPD(uPrefixLengthA, uPrefixLengthB);
+
+			SCORE scoreMD = MINUS_INFINITY;
+			SCORE scoreDD = MINUS_INFINITY;
+			SCORE scoreSD = MINUS_INFINITY;
+			if (uPrefixLengthB == 0)
+				{
+				if (uPrefixLengthA == 1)
+					scoreSD = PA[0].m_scoreGapOpen;
+				else
+					scoreSD = DPD(uPrefixLengthA - 1, 0);
+				}
+			if (uPrefixLengthA > 1)
+				{
+				const ProfPos &PPA = PA[uPrefixLengthA - 1];
+				SCORE scoreTransMD = PPA.m_scoreGapOpen;
+				scoreMD = DPM(uPrefixLengthA-1, uPrefixLengthB) + scoreTransMD;
+				scoreDD = DPD(uPrefixLengthA-1, uPrefixLengthB);
+				}
+
+			if (EQ(Score, scoreMD))
+				cPrevEdgeType = 'M';
+			else if (EQ(Score, scoreDD))
+				cPrevEdgeType = 'D';
+			else if (EQ(Score, scoreSD))
+				cPrevEdgeType = 'S';
+			else
+				Quit("TraceBack: failed to match D");
+
+			--uPrevPrefixLengthA;
+			break;
+			}
+
+		case 'I':
+			{
+			assert(uPrefixLengthB > 0);
+			const SCORE Score = DPI(uPrefixLengthA, uPrefixLengthB);
+
+			SCORE scoreMI = MINUS_INFINITY;
+			SCORE scoreII = MINUS_INFINITY;
+			SCORE scoreSI = MINUS_INFINITY;
+			if (uPrefixLengthA == 0)
+				{
+				if (uPrefixLengthB == 1)
+					scoreSI = PB[0].m_scoreGapOpen;
+				else
+					scoreSI = DPI(0, uPrefixLengthB - 1);
+				}
+			if (uPrefixLengthB > 1)
+				{
+				const ProfPos &PPB = PB[uPrefixLengthB - 1];
+				SCORE scoreTransMI = PPB.m_scoreGapOpen;
+				scoreMI = DPM(uPrefixLengthA, uPrefixLengthB-1) + scoreTransMI;
+				scoreII = DPI(uPrefixLengthA, uPrefixLengthB-1);
+				}
+
+			if (EQ(Score, scoreMI))
+				cPrevEdgeType = 'M';
+			else if (EQ(Score, scoreII))
+				cPrevEdgeType = 'I';
+			else if (EQ(Score, scoreSI))
+				cPrevEdgeType = 'S';
+			else
+				Quit("TraceBack: failed to match I");
+
+			--uPrevPrefixLengthB;
+			break;
+			}
+
+		default:
+			assert(false);
+			}
+#if	TRACE
+		Log("Edge %c%c%u.%u", cPrevEdgeType, cEdgeType, uPrefixLengthA, uPrefixLengthB);
+		Log("\n");
+#endif
+		cEdgeType = cPrevEdgeType;
+		uPrefixLengthA = uPrevPrefixLengthA;
+		uPrefixLengthB = uPrevPrefixLengthB;
+		}
+
+	return scoreMax;
+	}

Added: trunk/packages/muscle/branches/upstream/current/tracebackopt.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/tracebackopt.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/tracebackopt.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,73 @@
+#include "muscle.h"
+#include "pwpath.h"
+
+void TraceBackToPath(int **TraceBack, unsigned uLengthA,
+  unsigned uLengthB, PWPath &Path)
+	{
+	Path.Clear();
+
+	PWEdge Edge;
+	Edge.uPrefixLengthA = uLengthA;
+	Edge.uPrefixLengthB = uLengthB;
+
+	for (;;)
+		{
+		if (0 == Edge.uPrefixLengthA && 0 == Edge.uPrefixLengthB)
+			break;
+
+		int iDelta = TraceBack[Edge.uPrefixLengthA][Edge.uPrefixLengthB];
+#if	TRACE
+		Log("TraceBack[%u][%u] = %d\n",
+		  Edge.uPrefixLengthA, Edge.uPrefixLengthB, iDelta);
+#endif
+		if (0 == iDelta)
+			{
+			assert(Edge.uPrefixLengthA > 0);
+			assert(Edge.uPrefixLengthB > 0);
+
+			Edge.cType = 'M';
+			Path.PrependEdge(Edge);
+			--(Edge.uPrefixLengthA);
+			--(Edge.uPrefixLengthB);
+			continue;
+			}
+		else if (iDelta > 0)
+			{
+			Edge.cType = 'D';
+			while (iDelta-- > 0)
+				{
+				assert(Edge.uPrefixLengthA > 0);
+
+				Path.PrependEdge(Edge);
+				--(Edge.uPrefixLengthA);
+				}
+			}
+		else if (iDelta < 0)
+			{
+			Edge.cType = 'I';
+			while (iDelta++ < 0)
+				{
+				assert(Edge.uPrefixLengthB > 0);
+
+				Path.PrependEdge(Edge);
+				--(Edge.uPrefixLengthB);
+				}
+			}
+
+		if (0 == Edge.uPrefixLengthA && 0 == Edge.uPrefixLengthB)
+			break;
+
+		assert(Edge.uPrefixLengthA > 0);
+		assert(Edge.uPrefixLengthB > 0);
+
+		Edge.cType = 'M';
+		Path.PrependEdge(Edge);
+		--(Edge.uPrefixLengthA);
+		--(Edge.uPrefixLengthB);
+		}
+
+#if	TRACE
+	Log("TraceBackToPath ");
+	Path.LogMe();
+#endif
+	}

Added: trunk/packages/muscle/branches/upstream/current/tracebacksw.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/tracebacksw.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/tracebacksw.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,186 @@
+#include "muscle.h"
+#include "profile.h"
+#include "pwpath.h"
+#include <math.h>
+
+#define TRACE	0
+
+#define EQ(a, b)	(fabs(a-b) < 0.1)
+
+void TraceBackSW(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
+  unsigned uLengthB, const SCORE *DPM_, const SCORE *DPD_, const SCORE *DPI_,
+  unsigned uPrefixLengthAMax, unsigned uPrefixLengthBMax, PWPath &Path)
+	{
+#if	TRACE
+	Log("\n");
+	Log("TraceBackSW LengthA=%u LengthB=%u PLAMax=%u PLBMax=%u\n",
+	  uLengthA, uLengthB, uPrefixLengthAMax, uPrefixLengthBMax);
+#endif
+	assert(uLengthB > 0 && uLengthA > 0);
+
+	const unsigned uPrefixCountA = uLengthA + 1;
+	const unsigned uPrefixCountB = uLengthB + 1;
+
+	Path.Clear();
+
+	unsigned uPrefixLengthA = uPrefixLengthAMax;
+	unsigned uPrefixLengthB = uPrefixLengthBMax;
+
+	SCORE scoreMax = DPM(uPrefixLengthA, uPrefixLengthB);
+	char cEdgeType = 'M';
+
+	for (;;)
+		{
+		if ('S' == cEdgeType)
+			break;
+
+		PWEdge Edge;
+		Edge.cType = cEdgeType;
+		Edge.uPrefixLengthA = uPrefixLengthA;
+		Edge.uPrefixLengthB = uPrefixLengthB;
+		Path.PrependEdge(Edge);
+
+		char cPrevEdgeType;
+		unsigned uPrevPrefixLengthA = uPrefixLengthA;
+		unsigned uPrevPrefixLengthB = uPrefixLengthB;
+
+		switch (cEdgeType)
+			{
+		case 'M':
+			{
+			assert(uPrefixLengthA > 0);
+			assert(uPrefixLengthB > 0);
+			const ProfPos &PPA = PA[uPrefixLengthA - 1];
+			const ProfPos &PPB = PB[uPrefixLengthB - 1];
+
+			const SCORE Score = DPM(uPrefixLengthA, uPrefixLengthB);
+			const SCORE scoreMatch = ScoreProfPos2(PPA, PPB);
+
+			SCORE scoreSM;
+			if (1 == uPrefixLengthA && 1 == uPrefixLengthB)
+				scoreSM = scoreMatch;
+			else
+				scoreSM = MINUS_INFINITY;
+
+			SCORE scoreMM = MINUS_INFINITY;
+			SCORE scoreDM = MINUS_INFINITY;
+			SCORE scoreIM = MINUS_INFINITY;
+			if (uPrefixLengthA > 1 && uPrefixLengthB > 1)
+				{
+				SCORE scoreTrans = DPM(uPrefixLengthA-1, uPrefixLengthB-1);
+				scoreMM = scoreTrans + scoreMatch;
+				}
+			if (uPrefixLengthA > 1)
+				{
+				SCORE scoreTransDM = PA[uPrefixLengthA-2].m_scoreGapClose;
+				scoreDM = DPD(uPrefixLengthA-1, uPrefixLengthB-1) + scoreTransDM + scoreMatch;
+				}
+			if (uPrefixLengthB > 1)
+				{
+				SCORE scoreTransIM = PB[uPrefixLengthB-2].m_scoreGapClose;
+				scoreIM = DPI(uPrefixLengthA-1, uPrefixLengthB-1) + scoreTransIM + scoreMatch;
+				}
+
+			if (EQ(scoreMM, Score))
+				cPrevEdgeType = 'M';
+			else if (EQ(scoreDM, Score))
+				cPrevEdgeType = 'D';
+			else if (EQ(scoreIM, Score))
+				cPrevEdgeType = 'I';
+			else if (EQ(scoreSM, Score))
+				cPrevEdgeType = 'S';
+			else if (EQ(scoreMatch, Score))
+				cPrevEdgeType = 'S';
+			else
+				Quit("TraceBack2: failed to match M score=%g M=%g D=%g I=%g S=%g",
+				  Score, scoreMM, scoreDM, scoreIM, scoreSM);
+
+			--uPrevPrefixLengthA;
+			--uPrevPrefixLengthB;
+			break;
+			}
+
+		case 'D':
+			{
+			assert(uPrefixLengthA > 0);
+			const SCORE Score = DPD(uPrefixLengthA, uPrefixLengthB);
+
+			SCORE scoreMD = MINUS_INFINITY;
+			SCORE scoreDD = MINUS_INFINITY;
+			SCORE scoreSD = MINUS_INFINITY;
+			if (uPrefixLengthB == 0)
+				{
+				if (uPrefixLengthA == 1)
+					scoreSD = PA[0].m_scoreGapOpen;
+				else
+					scoreSD = DPD(uPrefixLengthA - 1, 0);
+				}
+			if (uPrefixLengthA > 1)
+				{
+				const ProfPos &PPA = PA[uPrefixLengthA - 1];
+				SCORE scoreTransMD = PPA.m_scoreGapOpen;
+				scoreMD = DPM(uPrefixLengthA-1, uPrefixLengthB) + scoreTransMD;
+				scoreDD = DPD(uPrefixLengthA-1, uPrefixLengthB);
+				}
+
+			if (EQ(Score, scoreMD))
+				cPrevEdgeType = 'M';
+			else if (EQ(Score, scoreDD))
+				cPrevEdgeType = 'D';
+			else if (EQ(Score, scoreSD))
+				cPrevEdgeType = 'S';
+			else
+				Quit("TraceBack2: failed to match D");
+
+			--uPrevPrefixLengthA;
+			break;
+			}
+
+		case 'I':
+			{
+			assert(uPrefixLengthB > 0);
+			const SCORE Score = DPI(uPrefixLengthA, uPrefixLengthB);
+
+			SCORE scoreMI = MINUS_INFINITY;
+			SCORE scoreII = MINUS_INFINITY;
+			SCORE scoreSI = MINUS_INFINITY;
+			if (uPrefixLengthA == 0)
+				{
+				if (uPrefixLengthB == 1)
+					scoreSI = PB[0].m_scoreGapOpen;
+				else
+					scoreSI = DPI(0, uPrefixLengthB - 1);
+				}
+			if (uPrefixLengthB > 1)
+				{
+				const ProfPos &PPB = PB[uPrefixLengthB - 1];
+				SCORE scoreTransMI = PPB.m_scoreGapOpen;
+				scoreMI = DPM(uPrefixLengthA, uPrefixLengthB-1) + scoreTransMI;
+				scoreII = DPI(uPrefixLengthA, uPrefixLengthB-1);
+				}
+
+			if (EQ(Score, scoreMI))
+				cPrevEdgeType = 'M';
+			else if (EQ(Score, scoreII))
+				cPrevEdgeType = 'I';
+			else if (EQ(Score, scoreSI))
+				cPrevEdgeType = 'S';
+			else
+				Quit("TraceBack2: failed to match I");
+
+			--uPrevPrefixLengthB;
+			break;
+			}
+
+		default:
+			assert(false);
+			}
+#if	TRACE
+		Log("Edge %c%c%u.%u", cPrevEdgeType, cEdgeType, uPrefixLengthA, uPrefixLengthB);
+		Log("\n");
+#endif
+		cEdgeType = cPrevEdgeType;
+		uPrefixLengthA = uPrevPrefixLengthA;
+		uPrefixLengthB = uPrevPrefixLengthB;
+		}
+	}

Added: trunk/packages/muscle/branches/upstream/current/tree.h
===================================================================
--- trunk/packages/muscle/branches/upstream/current/tree.h	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/tree.h	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,339 @@
+#ifndef tree_h
+#define tree_h
+
+#include <limits.h>
+
+class Clust;
+
+const unsigned NULL_NEIGHBOR = UINT_MAX;
+
+enum NEWICK_TOKEN_TYPE
+	{
+	NTT_Unknown,
+
+// Returned from Tree::GetToken:
+	NTT_Lparen,
+	NTT_Rparen,
+	NTT_Colon,
+	NTT_Comma,
+	NTT_Semicolon,
+	NTT_String,
+
+// Following are never returned from Tree::GetToken:
+	NTT_SingleQuotedString,
+	NTT_DoubleQuotedString,
+	NTT_Comment
+	};
+
+class Tree
+	{
+public:
+	Tree()
+		{
+		m_uNodeCount = 0;
+		m_uCacheCount = 0;
+		m_uNeighbor1 = 0;
+		m_uNeighbor2 = 0;
+		m_uNeighbor3 = 0;
+		m_dEdgeLength1 = 0;
+		m_dEdgeLength2 = 0;
+		m_dEdgeLength3 = 0;
+		m_dHeight = 0;
+		m_bHasEdgeLength1 = 0;
+		m_bHasEdgeLength2 = 0;
+		m_bHasEdgeLength3 = 0;
+		m_bHasHeight = 0;
+		m_ptrName = 0;
+		m_Ids = 0;
+		}
+	virtual ~Tree()
+		{
+		Clear();
+		}
+
+	void Clear()
+		{
+		for (unsigned n = 0; n < m_uNodeCount; ++n)
+			free(m_ptrName[n]);
+
+		m_uNodeCount = 0;
+		m_uCacheCount = 0;
+
+		delete[] m_uNeighbor1;
+		delete[] m_uNeighbor2;
+		delete[] m_uNeighbor3;
+		delete[] m_dEdgeLength1;
+		delete[] m_dEdgeLength2;
+		delete[] m_dEdgeLength3;
+		delete[] m_bHasEdgeLength1;
+		delete[] m_bHasEdgeLength2;
+		delete[] m_bHasEdgeLength3;
+		delete[] m_ptrName;
+		delete[] m_Ids;
+		delete[] m_bHasHeight;
+		delete[] m_dHeight;
+
+		m_uNeighbor1 = 0;
+		m_uNeighbor2 = 0;
+		m_uNeighbor3 = 0;
+		m_dEdgeLength1 = 0;
+		m_dEdgeLength2 = 0;
+		m_dEdgeLength3 = 0;
+		m_ptrName = 0;
+		m_Ids = 0;
+		m_uRootNodeIndex = 0;
+		m_bHasHeight = 0;
+		m_dHeight = 0;
+
+		m_bRooted = false;
+		}
+
+// Creation and manipulation
+	void CreateRooted();
+	void CreateUnrooted(double dEdgeLength);
+
+	void FromFile(TextFile &File);
+	void FromClust(Clust &C);
+
+	void Copy(const Tree &tree);
+
+	void Create(unsigned uLeafCount, unsigned uRoot, const unsigned Left[],
+	  const unsigned Right[], const float LeftLength[], const float RightLength[],
+	  const unsigned LeafIds[], char *LeafNames[]);
+	unsigned AppendBranch(unsigned uExistingNodeIndex);
+	void SetLeafName(unsigned uNodeIndex, const char *ptrName);
+	void SetLeafId(unsigned uNodeIndex, unsigned uId);
+	void SetEdgeLength(unsigned uNodeIndex1, unsigned uNodeIndex2,
+	  double dLength);
+
+	void RootUnrootedTree(unsigned uNodeIndex1, unsigned uNodeIndex2);
+	void RootUnrootedTree(ROOT Method);
+	void UnrootByDeletingRoot();
+
+// Saving to file
+	void ToFile(TextFile &File) const;
+
+// Accessor functions
+	unsigned GetNodeCount() const
+		{
+		return m_uNodeCount;
+		}
+
+	unsigned GetLeafCount() const
+		{
+		if (m_bRooted)
+			{
+			assert(m_uNodeCount%2 == 1);
+			return (m_uNodeCount + 1)/2;
+			}
+		else
+			{
+			assert(m_uNodeCount%2 == 0);
+			return (m_uNodeCount + 2)/2;
+			}
+		}
+
+	unsigned GetNeighbor(unsigned uNodeIndex, unsigned uNeighborSubscript) const;
+
+	unsigned GetNeighbor1(unsigned uNodeIndex) const
+		{
+		assert(uNodeIndex < m_uNodeCount);
+		return m_uNeighbor1[uNodeIndex];
+		}
+
+	unsigned GetNeighbor2(unsigned uNodeIndex) const
+		{
+		assert(uNodeIndex < m_uNodeCount);
+		return m_uNeighbor2[uNodeIndex];
+		}
+
+	unsigned GetNeighbor3(unsigned uNodeIndex) const
+		{
+		assert(uNodeIndex < m_uNodeCount);
+		return m_uNeighbor3[uNodeIndex];
+		}
+
+	unsigned GetParent(unsigned uNodeIndex) const
+		{
+		assert(m_bRooted && uNodeIndex < m_uNodeCount);
+		return m_uNeighbor1[uNodeIndex];
+		}
+
+	bool IsRooted() const
+		{
+		return m_bRooted;
+		}
+
+	unsigned GetLeft(unsigned uNodeIndex) const
+		{
+		assert(m_bRooted && uNodeIndex < m_uNodeCount);
+		return m_uNeighbor2[uNodeIndex];
+		}
+
+	unsigned GetRight(unsigned uNodeIndex) const
+		{
+		assert(m_bRooted && uNodeIndex < m_uNodeCount);
+		return m_uNeighbor3[uNodeIndex];
+		}
+
+	const char *GetName(unsigned uNodeIndex) const
+		{
+		assert(uNodeIndex < m_uNodeCount);
+		return m_ptrName[uNodeIndex];
+		}
+
+	unsigned GetRootNodeIndex() const
+		{
+		assert(m_bRooted);
+		return m_uRootNodeIndex;
+		}
+
+	unsigned GetNeighborCount(unsigned uNodeIndex) const
+		{
+		const unsigned n1 = m_uNeighbor1[uNodeIndex];
+		const unsigned n2 = m_uNeighbor2[uNodeIndex];
+		const unsigned n3 = m_uNeighbor3[uNodeIndex];
+		return (NULL_NEIGHBOR != n1) + (NULL_NEIGHBOR != n2) + (NULL_NEIGHBOR != n3);
+		}
+
+	bool IsLeaf(unsigned uNodeIndex) const
+		{
+		assert(uNodeIndex < m_uNodeCount);
+		if (1 == m_uNodeCount)
+			return true;
+		return 1 == GetNeighborCount(uNodeIndex);
+		}
+
+	bool IsRoot(unsigned uNodeIndex) const
+		{
+		return IsRooted() && m_uRootNodeIndex == uNodeIndex;
+		}
+
+	unsigned GetLeafId(unsigned uNodeIndex) const;
+	unsigned GetLeafNodeIndex(const char *ptrName) const;
+	bool IsEdge(unsigned uNodeIndex1, unsigned uNodeIndex2) const;
+	bool HasEdgeLength(unsigned uNodeIndex1, unsigned uNodeIndex2) const;
+	double GetEdgeLength(unsigned uNodeIndex1, unsigned uNodeIndex2) const;
+	const char *GetLeafName(unsigned uNodeIndex) const;
+	unsigned GetNeighborSubscript(unsigned uNodeIndex, unsigned uNeighborIndex) const;
+	double GetNodeHeight(unsigned uNodeIndex) const;
+
+// Depth-first traversal
+	unsigned FirstDepthFirstNode() const;
+	unsigned NextDepthFirstNode(unsigned uNodeIndex) const;
+
+	unsigned FirstDepthFirstNodeR() const;
+	unsigned NextDepthFirstNodeR(unsigned uNodeIndex) const;
+
+// Equivalent of GetLeft/Right in unrooted tree, works in rooted tree too.
+	unsigned GetFirstNeighbor(unsigned uNodeIndex, unsigned uNeighborIndex) const;
+	unsigned GetSecondNeighbor(unsigned uNodeIndex, unsigned uNeighborIndex) const;
+
+// Getting parent node in unrooted tree defined iff leaf
+	unsigned GetLeafParent(unsigned uNodeIndex) const;
+
+// Misc
+	const char *NTTStr(NEWICK_TOKEN_TYPE NTT) const;
+	void FindCenterByLongestSpan(unsigned *ptrNodeIndex1,
+	  unsigned *ptrNodeIndex2) const;
+	void PruneTree(const Tree &tree, unsigned Subfams[],
+	  unsigned uSubfamCount);
+	unsigned LeafIndexToNodeIndex(unsigned uLeafIndex) const;
+
+// Debugging & trouble-shooting support
+	void Validate() const;
+	void ValidateNode(unsigned uNodeIndex) const;
+	void AssertAreNeighbors(unsigned uNodeIndex1, unsigned uNodeIndex2) const;
+	void LogMe() const;
+
+private:
+	unsigned UnrootFromFile();
+	NEWICK_TOKEN_TYPE GetTokenVerbose(TextFile &File, char szToken[],
+	  unsigned uBytes) const
+		{
+		NEWICK_TOKEN_TYPE NTT = GetToken(File, szToken, uBytes);
+		Log("GetToken %10.10s  %s\n", NTTStr(NTT), szToken);
+		return NTT;
+		}
+
+	void InitCache(unsigned uCacheCount);
+	void ExpandCache();
+	NEWICK_TOKEN_TYPE GetToken(TextFile &File, char szToken[], unsigned uBytes) const;
+	bool GetGroupFromFile(TextFile &File, unsigned uNodeIndex, double *ptrdEdgeLength);
+	unsigned GetLeafCountUnrooted(unsigned uNodeIndex1, unsigned uNodeIndex2,
+	  double *ptrdTotalDistance) const;
+	void ToFileNodeRooted(TextFile &File, unsigned uNodeIndex) const;
+	void ToFileNodeUnrooted(TextFile &File, unsigned uNodeIndex, unsigned uParent) const;
+	void OrientParent(unsigned uNodeIndex, unsigned uParentNodeIndex);
+	double FromClustNode(const Clust &C, unsigned uClustNodeIndex, unsigned uPhyNodeIndex);
+	unsigned GetAnyNonLeafNode() const;
+
+// Yuck. Data is made public for the convenience of Tree::Copy.
+// There has to be a better way.
+public:
+	unsigned m_uNodeCount;
+	unsigned m_uCacheCount;
+	unsigned *m_uNeighbor1;
+	unsigned *m_uNeighbor2;
+	unsigned *m_uNeighbor3;
+	double *m_dEdgeLength1;
+	double *m_dEdgeLength2;
+	double *m_dEdgeLength3;
+	double *m_dHeight;
+	bool *m_bHasEdgeLength1;
+	bool *m_bHasEdgeLength2;
+	bool *m_bHasEdgeLength3;
+	bool *m_bHasHeight;
+	unsigned *m_Ids;
+	char **m_ptrName;
+	bool m_bRooted;
+	unsigned m_uRootNodeIndex;
+	};
+
+struct PhyEnumEdgeState
+	{
+	PhyEnumEdgeState()
+		{
+		m_bInit = false;
+		m_uNodeIndex1 = NULL_NEIGHBOR;
+		m_uNodeIndex2 = NULL_NEIGHBOR;
+		}
+	bool m_bInit;
+	unsigned m_uNodeIndex1;
+	unsigned m_uNodeIndex2;
+	};
+
+const unsigned NODE_CHANGED = (unsigned) (~0);
+
+extern bool PhyEnumBiParts(const Tree &tree, PhyEnumEdgeState &ES,
+  unsigned Leaves1[], unsigned *ptruCount1,
+  unsigned Leaves2[], unsigned *ptruCount2);
+extern bool PhyEnumBiPartsR(const Tree &tree, PhyEnumEdgeState &ES,
+  unsigned Leaves1[], unsigned *ptruCount1,
+  unsigned Leaves2[], unsigned *ptruCount2);
+extern void ClusterByHeight(const Tree &tree, double dMaxHeight, unsigned Subtrees[],
+  unsigned *ptruSubtreeCount);
+void ClusterBySubfamCount(const Tree &tree, unsigned uSubfamCount,
+  unsigned Subfams[], unsigned *ptruSubfamCount);
+void GetLeaves(const Tree &tree, unsigned uNodeIndex, unsigned Leaves[],
+  unsigned *ptruLeafCount);
+void GetLeavesExcluding(const Tree &tree, unsigned uNodeIndex,
+  unsigned uExclude, unsigned Leaves[], unsigned *ptruCount);
+void GetInternalNodesInHeightOrder(const Tree &tree, unsigned NodeIndexes[]);
+void ApplyMinEdgeLength(Tree &tree, double dMinEdgeLength);
+void LeafIndexesToLeafNames(const Tree &tree, const unsigned Leaves[], unsigned uCount,
+ char *Names[]);
+void LeafIndexesToIds(const Tree &tree, const unsigned Leaves[], unsigned uCount,
+ unsigned Ids[]);
+void MSASeqSubset(const MSA &msaIn, char *Names[], unsigned uSeqCount,
+  MSA &msaOut);
+void DiffTrees(const Tree &Tree1, const Tree &Tree2, Tree &Diffs,
+  unsigned IdToDiffsLeafNodeIndex[]);
+void DiffTreesE(const Tree &NewTree, const Tree &OldTree,
+  unsigned NewNodeIndexToOldNodeIndex[]);
+void FindRoot(const Tree &tree, unsigned *ptruNode1, unsigned *ptruNode2,
+  double *ptrdLength1, double *ptrdLength2,
+  ROOT RootMethod);
+void FixRoot(Tree &tree, ROOT RootMethod);
+
+#endif // tree_h

Added: trunk/packages/muscle/branches/upstream/current/treefrommsa.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/treefrommsa.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/treefrommsa.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,55 @@
+#include "muscle.h"
+#include "msa.h"
+#include "tree.h"
+#include "clust.h"
+#include "clustsetmsa.h"
+#include "distcalc.h"
+
+static void TreeFromMSA_NJ(const MSA &msa, Tree &tree, CLUSTER Cluster,
+  DISTANCE Distance)
+	{
+	MSADist MD(Distance);
+	ClustSetMSA Set(msa, MD);
+
+	Clust C;
+	C.Create(Set, Cluster);
+
+	tree.FromClust(C);
+	}
+
+static void TreeFromMSA_UPGMA(const MSA &msa, Tree &tree, CLUSTER Cluster,
+  DISTANCE Distance)
+	{
+	LINKAGE Linkage = LINKAGE_Undefined;
+	switch (Cluster)
+		{
+	case CLUSTER_UPGMA:
+		Linkage = LINKAGE_Avg;
+		break;
+	case CLUSTER_UPGMAMin:
+		Linkage = LINKAGE_Min;
+		break;
+	case CLUSTER_UPGMAMax:
+		Linkage = LINKAGE_Max;
+		break;
+	case CLUSTER_UPGMB:
+		Linkage = LINKAGE_Biased;
+		break;
+	default:
+		Quit("TreeFromMSA_UPGMA, CLUSTER_%u not supported", Cluster);
+		}
+	
+	DistCalcMSA DC;
+	DC.Init(msa, Distance);
+	UPGMA2(DC, tree, Linkage);
+	}
+
+void TreeFromMSA(const MSA &msa, Tree &tree, CLUSTER Cluster,
+  DISTANCE Distance, ROOT Root)
+	{
+	if (CLUSTER_NeighborJoining == Cluster)
+		TreeFromMSA_NJ(msa, tree, Cluster, Distance);
+	else
+		TreeFromMSA_UPGMA(msa, tree, Cluster, Distance);
+	FixRoot(tree, Root);
+	}

Added: trunk/packages/muscle/branches/upstream/current/types.h
===================================================================
--- trunk/packages/muscle/branches/upstream/current/types.h	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/types.h	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,117 @@
+#ifndef types_h
+#define types_h
+
+typedef unsigned char byte;
+typedef unsigned short ushort;
+
+typedef float SCOREMATRIX[32][32];
+typedef SCOREMATRIX *PTR_SCOREMATRIX;
+
+class MSA;
+class Seq;
+class ClusterTree;
+class DistFunc;
+class TextFile;
+class PWPath;
+class Tree;
+class SeqVect;
+class DistCalc;
+
+struct ProgNode;
+struct ProfPos;
+
+#if	SINGLE_AFFINE
+// Compress M, D and I trace-back matrices into 4 bits
+enum
+	{
+	BIT_MM = 0x00,
+	BIT_DM = 0x01,
+	BIT_IM = 0x02,
+	BIT_xM = 0x03,
+
+	BIT_DD = 0x00,
+	BIT_MD = 0x04,
+	//  ID not allowed
+	BIT_xD = 0x04,
+
+	BIT_II = 0x00,
+	BIT_MI = 0x08,
+	//  DI not allowed
+	BIT_xI = 0x08,
+	};
+
+#endif
+
+#if	DOUBLE_AFFINE
+// Compress M, D, E, I and J trace-back matrices into 7 bits
+enum
+	{
+	BIT_MM = 0x00,
+	BIT_DM = 0x01,
+	BIT_EM = 0x02,
+	BIT_IM = 0x03,
+	BIT_JM = 0x04,
+	BIT_xM = 0x07,
+
+	BIT_DD = 0x00,
+	BIT_MD = 0x08,
+	// [EIJ]D not sallowed
+	BIT_xD = 0x08,
+
+	BIT_EE = 0x00,
+	BIT_ME = 0x10,
+	//  [DDJ]E not allowed
+	BIT_xE = 0x10,
+
+	BIT_II = 0x00,
+	BIT_MI = 0x20,
+	//  [EDJ]I not allowed
+	BIT_xI = 0x20,
+
+	BIT_JJ = 0x00,
+	BIT_MJ = 0x40,
+	//  [EDI]J not allowed
+	BIT_xJ = 0x40,
+	};
+#endif
+
+enum EXIT
+	{
+	EXIT_Success = 0,
+	EXIT_NotStarted = 1,
+	EXIT_FatalError = 2,
+	EXIT_Except = 3,
+	};
+
+enum NODECMP
+	{
+	NODECMP_Undefined = 0,
+	NODECMP_Same = 0,		// equivalent to node in old tree
+	NODECMP_Diff = 1,		// equivalent & parent is changed
+	NODECMP_Changed = 2		// no equivalent node in old tree
+	};
+
+// Declare enums using macro hacks (see enums.h).
+#define s(t)	enum t { t##_Undefined = 0,
+#define c(t, x)	t##_##x,
+#define e(t)	};
+#include "enums.h"
+
+// Declare conversion function XXXToStr(XXX x)
+// for each enum type XXX.
+#define	s(t)	const char *t##ToStr(t x);
+#define c(t, x)	/* empty */
+#define e(t)	/* empty */
+#include "enums.h"
+
+// Declare conversion function StrToXXX(const char *Str)
+// for each enum type XXX.
+#define	s(t)	t StrTo##t(const char *Str);
+#define c(t, x)	/* empty */
+#define e(t)	/* empty */
+#include "enums.h"
+
+const char *BoolToStr(bool b);
+const char *SecsToStr(unsigned long Secs);
+
+#endif // types_h

Added: trunk/packages/muscle/branches/upstream/current/typetostr.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/typetostr.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/typetostr.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,58 @@
+#include "muscle.h"
+#include <stdio.h>
+
+const char *SecsToStr(unsigned long Secs)
+	{
+	static char Str[16];
+	long hh, mm, ss;
+
+	hh = Secs/(60*60);
+	mm = (Secs/60)%60;
+	ss = Secs%60;
+
+	sprintf(Str, "%02d:%02d:%02d", hh, mm, ss);
+	return Str;
+	}
+
+const char *BoolToStr(bool b)
+	{
+	return b ? "True" : "False";
+	}
+
+const char *ScoreToStr(SCORE Score)
+	{
+	if (MINUS_INFINITY >= Score)
+		return "       *";
+// Hack to use "circular" buffer so when called multiple
+// times in a printf-like argument list it works OK.
+	const int iBufferCount = 16;
+	const int iBufferLength = 16;
+	static char szStr[iBufferCount*iBufferLength];
+	static int iBufferIndex = 0;
+	iBufferIndex = (iBufferIndex + 1)%iBufferCount;
+	char *pStr = szStr + iBufferIndex*iBufferLength;
+	sprintf(pStr, "%8g", Score);
+	return pStr;
+	}
+
+// Left-justified version of ScoreToStr
+const char *ScoreToStrL(SCORE Score)
+	{
+	if (MINUS_INFINITY >= Score)
+		return "*";
+// Hack to use "circular" buffer so when called multiple
+// times in a printf-like argument list it works OK.
+	const int iBufferCount = 16;
+	const int iBufferLength = 16;
+	static char szStr[iBufferCount*iBufferLength];
+	static int iBufferIndex = 0;
+	iBufferIndex = (iBufferIndex + 1)%iBufferCount;
+	char *pStr = szStr + iBufferIndex*iBufferLength;
+	sprintf(pStr, "%.3g", Score);
+	return pStr;
+	}
+
+const char *WeightToStr(WEIGHT w)
+	{
+	return ScoreToStr(w);
+	}

Added: trunk/packages/muscle/branches/upstream/current/unixio.h
===================================================================
--- trunk/packages/muscle/branches/upstream/current/unixio.h	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/unixio.h	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,11 @@
+#ifdef	WIN32
+#include <fcntl.h>
+#include <io.h>
+#else
+#include <fcntl.h>
+#include <unistd.h>
+#endif
+
+#if	!defined(WIN32) && !defined(O_BINARY)
+#define	O_BINARY	0
+#endif

Added: trunk/packages/muscle/branches/upstream/current/upgma2.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/upgma2.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/upgma2.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,395 @@
+#include "muscle.h"
+#include "tree.h"
+#include "distcalc.h"
+
+// UPGMA clustering in O(N^2) time and space.
+
+#define	TRACE	0
+
+#define	MIN(x, y)	((x) < (y) ? (x) : (y))
+#define	MAX(x, y)	((x) > (y) ? (x) : (y))
+#define	AVG(x, y)	(((x) + (y))/2)
+
+static unsigned g_uLeafCount;
+static unsigned g_uTriangleSize;
+static unsigned g_uInternalNodeCount;
+static unsigned g_uInternalNodeIndex;
+
+// Triangular distance matrix is g_Dist, which is allocated
+// as a one-dimensional vector of length g_uTriangleSize.
+// TriangleSubscript(i,j) maps row,column=i,j to the subscript
+// into this vector.
+// Row / column coordinates are a bit messy.
+// Initially they are leaf indexes 0..N-1.
+// But each time we create a new node (=new cluster, new subtree),
+// we re-use one of the two rows that become available (the children
+// of the new node). This saves memory.
+// We keep track of this through the g_uNodeIndex vector.
+static dist_t *g_Dist;
+
+// Distance to nearest neighbor in row i of distance matrix.
+// Subscript is distance matrix row.
+static dist_t *g_MinDist;
+
+// Nearest neighbor to row i of distance matrix.
+// Subscript is distance matrix row.
+static unsigned *g_uNearestNeighbor;
+
+// Node index of row i in distance matrix.
+// Node indexes are 0..N-1 for leaves, N..2N-2 for internal nodes.
+// Subscript is distance matrix row.
+static unsigned *g_uNodeIndex;
+
+// The following vectors are defined on internal nodes,
+// subscripts are internal node index 0..N-2.
+// For g_uLeft/Right, value is the node index 0 .. 2N-2
+// because a child can be internal or leaf.
+static unsigned *g_uLeft;
+static unsigned *g_uRight;
+static dist_t *g_Height;
+static dist_t *g_LeftLength;
+static dist_t *g_RightLength;
+
+static inline unsigned TriangleSubscript(unsigned uIndex1, unsigned uIndex2)
+	{
+#if	DEBUG
+	if (uIndex1 >= g_uLeafCount || uIndex2 >= g_uLeafCount)
+		Quit("TriangleSubscript(%u,%u) %u", uIndex1, uIndex2, g_uLeafCount);
+#endif
+	unsigned v;
+	if (uIndex1 >= uIndex2)
+		v = uIndex2 + (uIndex1*(uIndex1 - 1))/2;
+	else
+		v = uIndex1 + (uIndex2*(uIndex2 - 1))/2;
+	assert(v < (g_uLeafCount*(g_uLeafCount - 1))/2);
+	return v;
+	}
+
+static void ListState()
+	{
+	Log("Dist matrix\n");
+	Log("     ");
+	for (unsigned i = 0; i < g_uLeafCount; ++i)
+		{
+		if (uInsane == g_uNodeIndex[i])
+			continue;
+		Log("  %5u", g_uNodeIndex[i]);
+		}
+	Log("\n");
+
+	for (unsigned i = 0; i < g_uLeafCount; ++i)
+		{
+		if (uInsane == g_uNodeIndex[i])
+			continue;
+		Log("%5u  ", g_uNodeIndex[i]);
+		for (unsigned j = 0; j < g_uLeafCount; ++j)
+			{
+			if (uInsane == g_uNodeIndex[j])
+				continue;
+			if (i == j)
+				Log("       ");
+			else
+				{
+				unsigned v = TriangleSubscript(i, j);
+				Log("%5.2g  ", g_Dist[v]);
+				}
+			}
+		Log("\n");
+		}
+
+	Log("\n");
+	Log("    i   Node   NrNb      Dist\n");
+	Log("-----  -----  -----  --------\n");
+	for (unsigned i = 0; i < g_uLeafCount; ++i)
+		{
+		if (uInsane == g_uNodeIndex[i])
+			continue;
+		Log("%5u  %5u  %5u  %8.3f\n",
+		  i,
+		  g_uNodeIndex[i],
+		  g_uNearestNeighbor[i],
+		  g_MinDist[i]);
+		}
+
+	Log("\n");
+	Log(" Node      L      R  Height  LLength  RLength\n");
+	Log("-----  -----  -----  ------  -------  -------\n");
+	for (unsigned i = 0; i <= g_uInternalNodeIndex; ++i)
+		Log("%5u  %5u  %5u  %6.2g  %6.2g  %6.2g\n",
+		  i,
+		  g_uLeft[i],
+		  g_uRight[i],
+		  g_Height[i],
+		  g_LeftLength[i],
+		  g_RightLength[i]);
+	}
+
+void UPGMA2(const DistCalc &DC, Tree &tree, LINKAGE Linkage)
+	{
+	g_uLeafCount = DC.GetCount();
+
+	g_uTriangleSize = (g_uLeafCount*(g_uLeafCount - 1))/2;
+	g_uInternalNodeCount = g_uLeafCount - 1;
+
+	g_Dist = new dist_t[g_uTriangleSize];
+
+	g_uNodeIndex = new unsigned[g_uLeafCount];
+	g_uNearestNeighbor = new unsigned[g_uLeafCount];
+	g_MinDist = new dist_t[g_uLeafCount];
+	unsigned *Ids = new unsigned [g_uLeafCount];
+	char **Names = new char *[g_uLeafCount];
+
+	g_uLeft = new unsigned[g_uInternalNodeCount];
+	g_uRight = new unsigned[g_uInternalNodeCount];
+	g_Height = new dist_t[g_uInternalNodeCount];
+	g_LeftLength = new dist_t[g_uInternalNodeCount];
+	g_RightLength = new dist_t[g_uInternalNodeCount];
+
+	for (unsigned i = 0; i < g_uLeafCount; ++i)
+		{
+		g_MinDist[i] = BIG_DIST;
+		g_uNodeIndex[i] = i;
+		g_uNearestNeighbor[i] = uInsane;
+		Ids[i] = DC.GetId(i);
+		Names[i] = strsave(DC.GetName(i));
+		}
+
+	for (unsigned i = 0; i < g_uInternalNodeCount; ++i)
+		{
+		g_uLeft[i] = uInsane;
+		g_uRight[i] = uInsane;
+		g_LeftLength[i] = BIG_DIST;
+		g_RightLength[i] = BIG_DIST;
+		g_Height[i] = BIG_DIST;
+		}
+
+// Compute initial NxN triangular distance matrix.
+// Store minimum distance for each full (not triangular) row.
+// Loop from 1, not 0, because "row" is 0, 1 ... i-1,
+// so nothing to do when i=0.
+	for (unsigned i = 1; i < g_uLeafCount; ++i)
+		{
+		dist_t *Row = g_Dist + TriangleSubscript(i, 0);
+		DC.CalcDistRange(i, Row);
+		for (unsigned j = 0; j < i; ++j)
+			{
+			const dist_t d = Row[j];
+			if (d < g_MinDist[i])
+				{
+				g_MinDist[i] = d;
+				g_uNearestNeighbor[i] = j;
+				}
+			if (d < g_MinDist[j])
+				{
+				g_MinDist[j] = d;
+				g_uNearestNeighbor[j] = i;
+				}
+			}
+		}
+
+#if	TRACE
+	Log("Initial state:\n");
+	ListState();
+#endif
+
+	for (g_uInternalNodeIndex = 0; g_uInternalNodeIndex < g_uLeafCount - 1;
+	  ++g_uInternalNodeIndex)
+		{
+#if	TRACE
+		Log("\n");
+		Log("Internal node index %5u\n", g_uInternalNodeIndex);
+		Log("-------------------------\n");
+#endif
+
+	// Find nearest neighbors
+		unsigned Lmin = uInsane;
+		unsigned Rmin = uInsane;
+		dist_t dtMinDist = BIG_DIST;
+		for (unsigned j = 0; j < g_uLeafCount; ++j)
+			{
+			if (uInsane == g_uNodeIndex[j])
+				continue;
+
+			dist_t d = g_MinDist[j];
+			if (d < dtMinDist)
+				{
+				dtMinDist = d;
+				Lmin = j;
+				Rmin = g_uNearestNeighbor[j];
+				assert(uInsane != Rmin);
+				assert(uInsane != g_uNodeIndex[Rmin]);
+				}
+			}
+
+		assert(Lmin != uInsane);
+		assert(Rmin != uInsane);
+		assert(dtMinDist != BIG_DIST);
+
+#if	TRACE
+		Log("Nearest neighbors Lmin %u[=%u] Rmin %u[=%u] dist %.3g\n",
+		  Lmin,
+		  g_uNodeIndex[Lmin],
+		  Rmin,
+		  g_uNodeIndex[Rmin],
+		  dtMinDist);
+#endif
+
+	// Compute distances to new node
+	// New node overwrites row currently assigned to Lmin
+		dist_t dtNewMinDist = BIG_DIST;
+		unsigned uNewNearestNeighbor = uInsane;
+		for (unsigned j = 0; j < g_uLeafCount; ++j)
+			{
+			if (j == Lmin || j == Rmin)
+				continue;
+			if (uInsane == g_uNodeIndex[j])
+				continue;
+
+			const unsigned vL = TriangleSubscript(Lmin, j);
+			const unsigned vR = TriangleSubscript(Rmin, j);
+			const dist_t dL = g_Dist[vL];
+			const dist_t dR = g_Dist[vR];
+			dist_t dtNewDist;
+
+			switch (Linkage)
+				{
+			case LINKAGE_Avg:
+				dtNewDist = AVG(dL, dR);
+				break;
+
+			case LINKAGE_Min:
+				dtNewDist = MIN(dL, dR);
+				break;
+
+			case LINKAGE_Max:
+				dtNewDist = MAX(dL, dR);
+				break;
+
+			case LINKAGE_Biased:
+				dtNewDist = g_dSUEFF*AVG(dL, dR) + (1 - g_dSUEFF)*MIN(dL, dR);
+				break;
+
+			default:
+				Quit("UPGMA2: Invalid LINKAGE_%u", Linkage);
+				}
+
+		// Nasty special case.
+		// If nearest neighbor of j is Lmin or Rmin, then make the new
+		// node (which overwrites the row currently occupied by Lmin)
+		// the nearest neighbor. This situation can occur when there are
+		// equal distances in the matrix. If we don't make this fix,
+		// the nearest neighbor pointer for j would become invalid.
+		// (We don't need to test for == Lmin, because in that case
+		// the net change needed is zero due to the change in row
+		// numbering).
+			if (g_uNearestNeighbor[j] == Rmin)
+				g_uNearestNeighbor[j] = Lmin;
+
+#if	TRACE
+			Log("New dist to %u = (%u/%.3g + %u/%.3g)/2 = %.3g\n",
+			  j, Lmin, dL, Rmin, dR, dtNewDist);
+#endif
+			g_Dist[vL] = dtNewDist;
+			if (dtNewDist < dtNewMinDist)
+				{
+				dtNewMinDist = dtNewDist;
+				uNewNearestNeighbor = j;
+				}
+			}
+
+		assert(g_uInternalNodeIndex < g_uLeafCount - 1 || BIG_DIST != dtNewMinDist);
+		assert(g_uInternalNodeIndex < g_uLeafCount - 1 || uInsane != uNewNearestNeighbor);
+
+		const unsigned v = TriangleSubscript(Lmin, Rmin);
+		const dist_t dLR = g_Dist[v];
+		const dist_t dHeightNew = dLR/2;
+		const unsigned uLeft = g_uNodeIndex[Lmin];
+		const unsigned uRight = g_uNodeIndex[Rmin];
+		const dist_t HeightLeft =
+		  uLeft < g_uLeafCount ? 0 : g_Height[uLeft - g_uLeafCount];
+		const dist_t HeightRight =
+		  uRight < g_uLeafCount ? 0 : g_Height[uRight - g_uLeafCount];
+
+		g_uLeft[g_uInternalNodeIndex] = uLeft;
+		g_uRight[g_uInternalNodeIndex] = uRight;
+		g_LeftLength[g_uInternalNodeIndex] = dHeightNew - HeightLeft;
+		g_RightLength[g_uInternalNodeIndex] = dHeightNew - HeightRight;
+		g_Height[g_uInternalNodeIndex] = dHeightNew;
+
+	// Row for left child overwritten by row for new node
+		g_uNodeIndex[Lmin] = g_uLeafCount + g_uInternalNodeIndex;
+		g_uNearestNeighbor[Lmin] = uNewNearestNeighbor;
+		g_MinDist[Lmin] = dtNewMinDist;
+
+	// Delete row for right child
+		g_uNodeIndex[Rmin] = uInsane;
+
+#if	TRACE
+		Log("\nInternalNodeIndex=%u Lmin=%u Rmin=%u\n",
+		  g_uInternalNodeIndex, Lmin, Rmin);
+		ListState();
+#endif
+		}
+
+	unsigned uRoot = g_uLeafCount - 2;
+	tree.Create(g_uLeafCount, uRoot, g_uLeft, g_uRight, g_LeftLength, g_RightLength,
+	  Ids, Names);
+
+#if	TRACE
+	tree.LogMe();
+#endif
+
+	delete[] g_Dist;
+
+	delete[] g_uNodeIndex;
+	delete[] g_uNearestNeighbor;
+	delete[] g_MinDist;
+	delete[] g_Height;
+
+	delete[] g_uLeft;
+	delete[] g_uRight;
+	delete[] g_LeftLength;
+	delete[] g_RightLength;
+	
+	for (unsigned i = 0; i < g_uLeafCount; ++i)
+		free(Names[i]);
+	delete[] Names;
+	delete[] Ids;
+	}
+
+class DistCalcTest : public DistCalc
+	{
+	virtual void CalcDistRange(unsigned i, dist_t Dist[]) const
+		{
+		static dist_t TestDist[5][5] =
+			{
+			0,  2, 14, 14, 20,
+			2,  0, 14, 14, 20,
+			14, 14,  0,  4, 20,
+			14, 14,  4,  0, 20,
+			20, 20, 20, 20,  0,
+			};
+		for (unsigned j = 0; j < i; ++j)
+			Dist[j] = TestDist[i][j];
+		}
+	virtual unsigned GetCount() const
+		{
+		return 5;
+		}
+	virtual unsigned GetId(unsigned i) const
+		{
+		return i;
+		}
+	virtual const char *GetName(unsigned i) const
+		{
+		return "name";
+		}
+	};
+
+void Test()
+	{
+	SetListFileName("c:\\tmp\\lobster.log", false);
+	DistCalcTest DC;
+	Tree tree;
+	UPGMA2(DC, tree, LINKAGE_Avg);
+	}

Added: trunk/packages/muscle/branches/upstream/current/usage.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/usage.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/usage.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,47 @@
+#include "muscle.h"
+#include <stdio.h>
+
+void Credits()
+	{
+	static bool Displayed = false;
+	if (Displayed)
+		return;
+
+	fprintf(stderr, "\n" MUSCLE_LONG_VERSION "\n\n");
+	fprintf(stderr, "http://www.drive5.com/muscle\n");
+	fprintf(stderr, "This software is donated to the public domain.\n");
+	fprintf(stderr, "Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.\n\n");
+	Displayed = true;
+	}
+
+void Usage()
+	{
+	Credits();
+	fprintf(stderr,
+"\n"
+"Basic usage\n"
+"\n"
+"    muscle -in <inputfile> -out <outputfile>\n"
+"\n"
+"Common options (for a complete list please see the User Guide):\n"
+"\n"
+"    -in <inputfile>    Input file in FASTA format (default stdin)\n"
+"    -out <outputfile>  Output alignment in FASTA format (default stdout)\n"
+"    -diags             Find diagonals (faster for similar sequences)\n"
+"    -maxiters <n>      Maximum number of iterations (integer, default 16)\n"
+"    -maxhours <h>      Maximum time to iterate in hours (default no limit)\n"
+"    -maxmb <m>         Maximum memory to allocate in Mb (default 80%% of RAM)\n"
+"    -html              Write output in HTML format (default FASTA)\n"
+"    -msf               Write output in GCG MSF format (default FASTA)\n"
+"    -clw               Write output in CLUSTALW format (default FASTA)\n"
+"    -clwstrict         As -clw, with 'CLUSTAL W (1.81)' header\n"
+"    -log[a] <logfile>  Log to file (append if -loga, overwrite if -log)\n"
+"    -quiet             Do not write progress messages to stderr\n"
+"    -stable            Output sequences in input order (default is -group)\n"
+"    -group             Group sequences by similarity (this is the default)\n"
+"    -version           Display version information and exit\n"
+"\n"
+"Without refinement (very fast, avg accuracy similar to T-Coffee): -maxiters 2\n"
+"Fastest possible (amino acids): -maxiters 1 -diags -sv -distance1 kbit20_3\n"
+"Fastest possible (nucleotides): -maxiters 1 -diags\n");
+	}

Added: trunk/packages/muscle/branches/upstream/current/validateids.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/validateids.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/validateids.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,105 @@
+#include "muscle.h"
+#include "msa.h"
+#include "tree.h"
+#include "seqvect.h"
+
+#if	DEBUG
+static SeqVect *g_ptrMuscleSeqVect = 0;
+static MSA MuscleInputMSA;
+
+void SetMuscleInputMSA(MSA &msa)
+	{
+	MuscleInputMSA.Copy(msa);
+	}
+
+void SetMuscleSeqVect(SeqVect &v)
+	{
+	g_ptrMuscleSeqVect = &v;
+	}
+
+void ValidateMuscleIdsSeqVect(const MSA &msa)
+	{
+	const unsigned uSeqCount = msa.GetSeqCount();
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		const unsigned uId = msa.GetSeqId(uSeqIndex);
+		const char *ptrNameMSA = msa.GetSeqName(uSeqIndex);
+		const char *ptrName = g_ptrMuscleSeqVect->GetSeqName(uId);
+		if (0 != strcmp(ptrNameMSA, ptrName))
+			Quit("ValidateMuscleIdsSeqVect, names don't match");
+		}
+	}
+
+void ValidateMuscleIdsMSA(const MSA &msa)
+	{
+	const unsigned uSeqCount = msa.GetSeqCount();
+	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
+		{
+		const unsigned uId = msa.GetSeqId(uSeqIndex);
+		const char *ptrNameMSA = msa.GetSeqName(uSeqIndex);
+		const char *ptrName = MuscleInputMSA.GetSeqName(uId);
+		if (0 != strcmp(ptrNameMSA, ptrName))
+			{
+			Log("Input MSA:\n");
+			MuscleInputMSA.LogMe();
+			Log("MSA being tested:\n");
+			msa.LogMe();
+			Log("Id=%u\n", uId);
+			Log("Input name=%s\n", ptrName);
+			Log("Test name=%s\n", ptrNameMSA);
+			Quit("ValidateMuscleIdsMSA, names don't match");
+			}
+		}
+	}
+
+void ValidateMuscleIds(const MSA &msa)
+	{
+	if (0 != g_ptrMuscleSeqVect)
+		ValidateMuscleIdsSeqVect(msa);
+	else if (0 != MuscleInputMSA.GetSeqCount())
+		ValidateMuscleIdsMSA(msa);
+	else
+		Quit("ValidateMuscleIds, ptrMuscleSeqVect=0 && 0 == MuscleInputMSA.SeqCount()");
+
+	}
+
+void ValidateMuscleIdsSeqVect(const Tree &tree)
+	{
+	const unsigned uNodeCount = tree.GetNodeCount();
+	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
+		{
+		if (!tree.IsLeaf(uNodeIndex))
+			continue;
+		const unsigned uId = tree.GetLeafId(uNodeIndex);
+		const char *ptrNameTree = tree.GetLeafName(uNodeIndex);
+		const char *ptrName = g_ptrMuscleSeqVect->GetSeqName(uId);
+		if (0 != strcmp(ptrNameTree, ptrName))
+			Quit("ValidateMuscleIds: names don't match");
+		}
+	}
+
+void ValidateMuscleIdsMSA(const Tree &tree)
+	{
+	const unsigned uNodeCount = tree.GetNodeCount();
+	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
+		{
+		if (!tree.IsLeaf(uNodeIndex))
+			continue;
+		const unsigned uId = tree.GetLeafId(uNodeIndex);
+		const char *ptrNameTree = tree.GetLeafName(uNodeIndex);
+		const char *ptrName = MuscleInputMSA.GetSeqName(uId);
+		if (0 != strcmp(ptrNameTree, ptrName))
+			Quit("ValidateMuscleIds: names don't match");
+		}
+	}
+
+void ValidateMuscleIds(const Tree &tree)
+	{
+	if (0 != g_ptrMuscleSeqVect)
+		ValidateMuscleIdsSeqVect(tree);
+	else if (0 != MuscleInputMSA.GetSeqCount())
+		ValidateMuscleIdsMSA(tree);
+	else
+		Quit("ValidateMuscleIds, ptrMuscleSeqVect=0 && 0 == MuscleInputMSA.SeqCount");
+	}
+#endif

Added: trunk/packages/muscle/branches/upstream/current/vtml2.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/vtml2.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/vtml2.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,145 @@
+#include "muscle.h"
+
+// Note: We use 32x32 arrays rather than 20x20 as this may give the compiler
+// optimizer an opportunity to make subscript arithmetic more efficient
+// (multiplying by 32 is same as shifting left by 5 bits).
+
+#define v(x)	((float) x)
+#define ROW(A, C, D, E, F, G, H, I, K, L, M, N, P, Q, R, S, T, V, W, Y) \
+	{ v(A), v(C), v(D), v(E), v(F), v(G), v(H), v(I), v(K), v(L), v(M), v(N), v(P), v(Q), \
+	v(R), v(S), v(T), v(V), v(W), v(Y) },
+
+
+//         A        C        D        E        F        G        H        I        K        L
+//         M        N        P        Q        R        S        T        V        W        Y
+// VTML200
+float VTML_LA[32][32] =
+	{
+ROW( 2.25080, 1.31180, 0.82704, 0.88740, 0.55520, 1.09860, 0.71673, 0.80805, 0.81213, 0.68712,
+     0.79105, 0.86777, 0.99328, 0.86644, 0.72821, 1.33924, 1.20373, 1.05956, 0.38107, 0.54373) // A
+
+ROW( 1.31180,15.79469, 0.39862, 0.42329, 0.49882, 0.65541, 0.67100, 0.97185, 0.46414, 0.55673,
+     0.90230, 0.63236, 0.54479, 0.47895, 0.56465, 1.18490, 0.99069, 1.21604, 0.28988, 0.91338) // C
+
+ROW( 0.82704, 0.39862, 4.18833, 2.06850, 0.25194, 0.90937, 1.01617, 0.32860, 1.03391, 0.31300,
+     0.42498, 1.80888, 0.81307, 1.20043, 0.63712, 1.03001, 0.88191, 0.43557, 0.26313, 0.37947) // D
+
+ROW( 0.88740, 0.42329, 2.06850, 3.08354, 0.33456, 0.77183, 0.94536, 0.43151, 1.35989, 0.45579,
+     0.53423, 1.15745, 0.82832, 1.66752, 0.84500, 0.98693, 0.88132, 0.54047, 0.24519, 0.52025) // E
+
+ROW( 0.55520, 0.49882, 0.25194, 0.33456, 6.08351, 0.30140, 1.02191, 1.10969, 0.37069, 1.50587,
+     1.41207, 0.42850, 0.41706, 0.48113, 0.41970, 0.56867, 0.57172, 0.91256, 2.02494, 3.44675) // F
+
+ROW( 1.09860, 0.65541, 0.90937, 0.77183, 0.30140, 5.62829, 0.64191, 0.28432, 0.67874, 0.30549,
+     0.37739, 1.01012, 0.60851, 0.65996, 0.63660, 1.03448, 0.68435, 0.40728, 0.36034, 0.35679) // G
+
+ROW( 0.71673, 0.67100, 1.01617, 0.94536, 1.02191, 0.64191, 6.05494, 0.50783, 1.03822, 0.60887,
+     0.55685, 1.28619, 0.72275, 1.41503, 1.24635, 0.93344, 0.83543, 0.54817, 0.81780, 1.81552) // H
+
+ROW( 0.80805, 0.97185, 0.32860, 0.43151, 1.10969, 0.28432, 0.50783, 3.03766, 0.49310, 1.88886,
+     1.75039, 0.44246, 0.44431, 0.53213, 0.48153, 0.55603, 0.88168, 2.37367, 0.68494, 0.70035) // I
+
+ROW( 0.81213, 0.46414, 1.03391, 1.35989, 0.37069, 0.67874, 1.03822, 0.49310, 2.72883, 0.52739,
+     0.68244, 1.15671, 0.82911, 1.51333, 2.33521, 0.93858, 0.92730, 0.55467, 0.39944, 0.52549) // K
+
+ROW( 0.68712, 0.55673, 0.31300, 0.45579, 1.50587, 0.30549, 0.60887, 1.88886, 0.52739, 3.08540,
+     2.14480, 0.43539, 0.53630, 0.62771, 0.53025, 0.53468, 0.69924, 1.50372, 0.82822, 0.89854) // L
+
+ROW( 0.79105, 0.90230, 0.42498, 0.53423, 1.41207, 0.37739, 0.55685, 1.75039, 0.68244, 2.14480,
+     4.04057, 0.55603, 0.48415, 0.76770, 0.66775, 0.62409, 0.87759, 1.42742, 0.52278, 0.72067) // M
+
+ROW( 0.86777, 0.63236, 1.80888, 1.15745, 0.42850, 1.01012, 1.28619, 0.44246, 1.15671, 0.43539,
+     0.55603, 3.36000, 0.69602, 1.13490, 0.98603, 1.31366, 1.11252, 0.50603, 0.35810, 0.68349) // N
+
+ROW( 0.99328, 0.54479, 0.81307, 0.82832, 0.41706, 0.60851, 0.72275, 0.44431, 0.82911, 0.53630,
+     0.48415, 0.69602, 7.24709, 0.90276, 0.74827, 1.03719, 0.83014, 0.56795, 0.37867, 0.33127) // P
+
+ROW( 0.86644, 0.47895, 1.20043, 1.66752, 0.48113, 0.65996, 1.41503, 0.53213, 1.51333, 0.62771,
+     0.76770, 1.13490, 0.90276, 2.86937, 1.50116, 0.99561, 0.93103, 0.61085, 0.29926, 0.51971) // Q
+
+ROW( 0.72821, 0.56465, 0.63712, 0.84500, 0.41970, 0.63660, 1.24635, 0.48153, 2.33521, 0.53025,
+     0.66775, 0.98603, 0.74827, 1.50116, 4.28698, 0.84662, 0.80673, 0.51422, 0.47569, 0.59592) // R
+
+ROW( 1.33924, 1.18490, 1.03001, 0.98693, 0.56867, 1.03448, 0.93344, 0.55603, 0.93858, 0.53468,
+     0.62409, 1.31366, 1.03719, 0.99561, 0.84662, 2.13816, 1.52911, 0.67767, 0.45129, 0.66767) // S
+
+ROW( 1.20373, 0.99069, 0.88191, 0.88132, 0.57172, 0.68435, 0.83543, 0.88168, 0.92730, 0.69924,
+     0.87759, 1.11252, 0.83014, 0.93103, 0.80673, 1.52911, 2.58221, 0.98702, 0.31541, 0.57954) // T
+
+ROW( 1.05956, 1.21604, 0.43557, 0.54047, 0.91256, 0.40728, 0.54817, 2.37367, 0.55467, 1.50372,
+     1.42742, 0.50603, 0.56795, 0.61085, 0.51422, 0.67767, 0.98702, 2.65580, 0.43419, 0.63805) // V
+
+ROW( 0.38107, 0.28988, 0.26313, 0.24519, 2.02494, 0.36034, 0.81780, 0.68494, 0.39944, 0.82822,
+     0.52278, 0.35810, 0.37867, 0.29926, 0.47569, 0.45129, 0.31541, 0.43419,31.39564, 2.51433) // W
+
+ROW( 0.54373, 0.91338, 0.37947, 0.52025, 3.44675, 0.35679, 1.81552, 0.70035, 0.52549, 0.89854,
+     0.72067, 0.68349, 0.33127, 0.51971, 0.59592, 0.66767, 0.57954, 0.63805, 2.51433, 7.50693) // Y
+	};
+
+const float VTML_SP_CENTER = (float) 22.0;
+
+#undef	ROW
+#undef	v
+#define v(x)	((float) (x + VTML_SP_CENTER))
+#define ROW(A, C, D, E, F, G, H, I, K, L, M, N, P, Q, R, S, T, V, W, Y, X) \
+	{ v(A), v(C), v(D), v(E), v(F), v(G), v(H), v(I), v(K), v(L), v(M), v(N), v(P), v(Q), \
+	v(R), v(S), v(T), v(V), v(W), v(Y), v(X) },
+
+// VTML 240
+float VTML_SP[32][32] =
+	{
+//     A    C    D    E    F    G    H    I    K    L    M    N    P    Q    R    S    T    V    W    Y  X
+ROW(  58,  23, -12,  -7, -44,  10, -23, -14, -14, -27, -17,  -8,   1,  -9, -22,  23,  15,   5, -74, -45, 0) // A
+ROW(  23, 224, -67, -63, -50, -30, -29,   1, -56, -41,  -6, -33, -44, -53, -43,  15,   2,  18, -93,  -6, 0) // C
+ROW( -12, -67, 111,  59,-104,  -4,   4, -84,   6, -88, -65,  48, -13,  18, -29,   5,  -7, -63,-105, -73, 0) // D
+ROW(  -7, -63,  59,  85, -83, -17,  -1, -63,  25, -60, -47,  15, -12,  40,  -8,   1,  -7, -47,-108, -51, 0) // E
+ROW( -44, -50,-104, -83, 144, -93,   4,  12, -74,  36,  30, -64, -67, -56, -65, -43, -41,  -3,  63, 104, 0) // F
+ROW(  10, -30,  -4, -17, -93, 140, -32, -95, -27, -91, -75,   4, -36, -29, -32,   5, -26, -68, -80, -79, 0) // G
+ROW( -23, -29,   4,  -1,   4, -32, 137, -50,   6, -37, -42,  21, -23,  27,  19,  -4, -12, -44, -13,  48, 0) // H
+ROW( -14,   1, -84, -63,  12, -95, -50,  86, -53,  53,  47, -62, -60, -47, -55, -43,  -8,  69, -27, -24, 0) // I
+ROW( -14, -56,   6,  25, -74, -27,   6, -53,  75, -48, -30,  13, -12,  34,  68,  -3,  -4, -44, -71, -49, 0) // K
+ROW( -27, -41, -88, -60,  36, -91, -37,  53, -48,  88,  62, -63, -48, -36, -48, -47, -25,  36, -11,  -4, 0) // L
+ROW( -17,  -6, -65, -47,  30, -75, -42,  47, -30,  62, 103, -45, -54, -21, -31, -35,  -9,  31, -46, -20, 0) // M
+ROW(  -8, -33,  48,  15, -64,   4,  21, -62,  13, -63, -45,  89, -25,  12,   2,  22,  10, -51, -79, -29, 0) // N
+ROW(   1, -44, -13, -12, -67, -36, -23, -60, -12, -48, -54, -25, 160,  -6, -20,   5, -12, -42, -76, -83, 0) // P
+ROW(  -9, -53,  18,  40, -56, -29,  27, -47,  34, -36, -21,  12,  -6,  75,  34,   1,  -4, -37, -92, -48, 0) // Q
+ROW( -22, -43, -29,  -8, -65, -32,  19, -55,  68, -48, -31,   2, -20,  34, 113, -10, -14, -49, -58, -39, 0) // R
+ROW(  23,  15,   5,   1, -43,   5,  -4, -43,  -3, -47, -35,  22,   5,   1, -10,  53,  32, -28, -62, -31, 0) // S
+ROW(  15,   2,  -7,  -7, -41, -26, -12,  -8,  -4, -25,  -9,  10, -12,  -4, -14,  32,  68,   0, -87, -40, 0) // T
+ROW(   5,  18, -63, -47,  -3, -68, -44,  69, -44,  36,  31, -51, -42, -37, -49, -28,   0,  74, -61, -32, 0) // V
+ROW( -74, -93,-105,-108,  63, -80, -13, -27, -71, -11, -46, -79, -76, -92, -58, -62, -87, -61, 289,  81, 0) // W
+ROW( -45,  -6, -73, -51, 104, -79,  48, -24, -49,  -4, -20, -29, -83, -48, -39, -31, -40, -32,  81, 162, 0) // Y
+ROW(   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0) // X
+	};
+
+#undef	v
+#define v(x)	((float) (x))
+#define RNC(A, C, D, E, F, G, H, I, K, L, M, N, P, Q, R, S, T, V, W, Y, X) \
+	{ v(A), v(C), v(D), v(E), v(F), v(G), v(H), v(I), v(K), v(L), v(M), v(N), v(P), v(Q), \
+	v(R), v(S), v(T), v(V), v(W), v(Y), v(X) },
+
+float VTML_SPNoCenter[32][32] =
+	{
+//     A    C    D    E    F    G    H    I    K    L    M    N    P    Q    R    S    T    V    W    Y  X
+RNC(  58,  23, -12,  -7, -44,  10, -23, -14, -14, -27, -17,  -8,   1,  -9, -22,  23,  15,   5, -74, -45, 0) // A
+RNC(  23, 224, -67, -63, -50, -30, -29,   1, -56, -41,  -6, -33, -44, -53, -43,  15,   2,  18, -93,  -6, 0) // C
+RNC( -12, -67, 111,  59,-104,  -4,   4, -84,   6, -88, -65,  48, -13,  18, -29,   5,  -7, -63,-105, -73, 0) // D
+RNC(  -7, -63,  59,  85, -83, -17,  -1, -63,  25, -60, -47,  15, -12,  40,  -8,   1,  -7, -47,-108, -51, 0) // E
+RNC( -44, -50,-104, -83, 144, -93,   4,  12, -74,  36,  30, -64, -67, -56, -65, -43, -41,  -3,  63, 104, 0) // F
+RNC(  10, -30,  -4, -17, -93, 140, -32, -95, -27, -91, -75,   4, -36, -29, -32,   5, -26, -68, -80, -79, 0) // G
+RNC( -23, -29,   4,  -1,   4, -32, 137, -50,   6, -37, -42,  21, -23,  27,  19,  -4, -12, -44, -13,  48, 0) // H
+RNC( -14,   1, -84, -63,  12, -95, -50,  86, -53,  53,  47, -62, -60, -47, -55, -43,  -8,  69, -27, -24, 0) // I
+RNC( -14, -56,   6,  25, -74, -27,   6, -53,  75, -48, -30,  13, -12,  34,  68,  -3,  -4, -44, -71, -49, 0) // K
+RNC( -27, -41, -88, -60,  36, -91, -37,  53, -48,  88,  62, -63, -48, -36, -48, -47, -25,  36, -11,  -4, 0) // L
+RNC( -17,  -6, -65, -47,  30, -75, -42,  47, -30,  62, 103, -45, -54, -21, -31, -35,  -9,  31, -46, -20, 0) // M
+RNC(  -8, -33,  48,  15, -64,   4,  21, -62,  13, -63, -45,  89, -25,  12,   2,  22,  10, -51, -79, -29, 0) // N
+RNC(   1, -44, -13, -12, -67, -36, -23, -60, -12, -48, -54, -25, 160,  -6, -20,   5, -12, -42, -76, -83, 0) // P
+RNC(  -9, -53,  18,  40, -56, -29,  27, -47,  34, -36, -21,  12,  -6,  75,  34,   1,  -4, -37, -92, -48, 0) // Q
+RNC( -22, -43, -29,  -8, -65, -32,  19, -55,  68, -48, -31,   2, -20,  34, 113, -10, -14, -49, -58, -39, 0) // R
+RNC(  23,  15,   5,   1, -43,   5,  -4, -43,  -3, -47, -35,  22,   5,   1, -10,  53,  32, -28, -62, -31, 0) // S
+RNC(  15,   2,  -7,  -7, -41, -26, -12,  -8,  -4, -25,  -9,  10, -12,  -4, -14,  32,  68,   0, -87, -40, 0) // T
+RNC(   5,  18, -63, -47,  -3, -68, -44,  69, -44,  36,  31, -51, -42, -37, -49, -28,   0,  74, -61, -32, 0) // V
+RNC( -74, -93,-105,-108,  63, -80, -13, -27, -71, -11, -46, -79, -76, -92, -58, -62, -87, -61, 289,  81, 0) // W
+RNC( -45,  -6, -73, -51, 104, -79,  48, -24, -49,  -4, -20, -29, -83, -48, -39, -31, -40, -32,  81, 162, 0) // Y
+RNC(   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0) // X
+	};

Added: trunk/packages/muscle/branches/upstream/current/writescorefile.cpp
===================================================================
--- trunk/packages/muscle/branches/upstream/current/writescorefile.cpp	2006-07-10 12:54:50 UTC (rev 81)
+++ trunk/packages/muscle/branches/upstream/current/writescorefile.cpp	2006-08-07 00:08:59 UTC (rev 82)
@@ -0,0 +1,69 @@
+#include "muscle.h"
+#include "msa.h"
+#include <errno.h>
+
+extern float VTML_SP[32][32];
+extern float NUC_SP[32][32];
+
+static double GetColScore(const MSA &msa, unsigned uCol)
+	{
+	const unsigned uSeqCount = msa.GetSeqCount();
+	unsigned uPairCount = 0;
+	double dSum = 0.0;
+	for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)
+		{
+		if (msa.IsGap(uSeq1, uCol))
+			continue;
+		unsigned uLetter1 = msa.GetLetterEx(uSeq1, uCol);
+		if (uLetter1 >= g_AlphaSize)
+			continue;
+		for (unsigned uSeq2 = uSeq1 + 1; uSeq2 < uSeqCount; ++uSeq2)
+			{
+			if (msa.IsGap(uSeq2, uCol))
+				continue;
+			unsigned uLetter2 = msa.GetLetterEx(uSeq2, uCol);
+			if (uLetter2 >= g_AlphaSize)
+				continue;
+			double Score;
+			switch (g_Alpha)
+				{
+			case ALPHA_Amino:
+				Score = VTML_SP[uLetter1][uLetter2];
+				break;
+			case ALPHA_DNA:
+			case ALPHA_RNA:
+				Score = NUC_SP[uLetter1][uLetter2];
+				break;
+			default:
+				Quit("GetColScore: invalid alpha=%d", g_Alpha);
+				}
+			dSum += Score;
+			++uPairCount;
+			}
+		}
+	if (0 == uPairCount)
+		return 0;
+	return dSum / uPairCount;
+	}
+
+void WriteScoreFile(const MSA &msa)
+	{
+	FILE *f = fopen(g_pstrScoreFileName, "w");
+	if (0 == f)
+		Quit("Cannot open score file '%s' errno=%d", g_pstrScoreFileName, errno);
+
+	const unsigned uColCount = msa.GetColCount();
+	const unsigned uSeqCount = msa.GetSeqCount();
+	for (unsigned uCol = 0; uCol < uColCount; ++uCol)
+		{
+		double Score = GetColScore(msa, uCol);
+		fprintf(f, "%10.3f  ", Score);
+		for (unsigned uSeq = 0; uSeq < uSeqCount; ++uSeq)
+			{
+			char c = msa.GetChar(uSeq, uCol);
+			fprintf(f, "%c", c);
+			}
+		fprintf(f, "\n");
+		}
+	fclose(f);
+	}